diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2656,14 +2656,12 @@ return true; } - BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_ON)) - .addReg(IdxReg) - .addImm(AMDGPU::VGPRIndexMode::SRC0_ENABLE); - BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), DstReg) - .addReg(SrcReg, 0, SubReg) - .addReg(SrcReg, RegState::Implicit) - .addReg(AMDGPU::M0, RegState::Implicit); - BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_OFF)); + const MCInstrDesc &GPRIDXDesc = + TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC), true); + BuildMI(*BB, MI, DL, GPRIDXDesc, DstReg) + .addReg(SrcReg) + .addReg(IdxReg) + .addImm(SubReg); MI.eraseFromParent(); return true; @@ -2717,25 +2715,27 @@ MachineBasicBlock *BB = MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); - if (IndexMode) { - BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_ON)) - .addReg(IdxReg) - .addImm(AMDGPU::VGPRIndexMode::DST_ENABLE); - } else { + if (!IndexMode) { BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) .addReg(IdxReg); - } - const MCInstrDesc &RegWriteOp - = TII.getIndirectRegWritePseudo(VecSize, ValSize, - VecRB->getID() == AMDGPU::SGPRRegBankID); - BuildMI(*BB, MI, DL, RegWriteOp, DstReg) - .addReg(VecReg) - .addReg(ValReg) - .addImm(SubReg); + const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo( + VecSize, ValSize, VecRB->getID() == AMDGPU::SGPRRegBankID); + BuildMI(*BB, MI, DL, RegWriteOp, DstReg) + .addReg(VecReg) + .addReg(ValReg) + .addImm(SubReg); + MI.eraseFromParent(); + return true; + } - if (IndexMode) - BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_OFF)); + const MCInstrDesc &GPRIDXDesc = + TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), false); + BuildMI(*BB, MI, DL, GPRIDXDesc, DstReg) + .addReg(VecReg) + .addReg(ValReg) + .addReg(IdxReg) + .addImm(SubReg); MI.eraseFromParent(); return true; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3461,20 +3461,14 @@ // will only do one iteration. In the worst case, this will loop 64 times. // // TODO: Just use v_readlane_b32 if we know the VGPR has a uniform value. -static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop( - const SIInstrInfo *TII, - MachineRegisterInfo &MRI, - MachineBasicBlock &OrigBB, - MachineBasicBlock &LoopBB, - const DebugLoc &DL, - const MachineOperand &IdxReg, - unsigned InitReg, - unsigned ResultReg, - unsigned PhiReg, - unsigned InitSaveExecReg, - int Offset, - bool UseGPRIdxMode, - bool IsIndirectSrc) { +static MachineBasicBlock::iterator +emitLoadM0FromVGPRLoop(const SIInstrInfo *TII, MachineRegisterInfo &MRI, + MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB, + const DebugLoc &DL, const MachineOperand &Idx, + unsigned InitReg, unsigned ResultReg, unsigned PhiReg, + unsigned InitSaveExecReg, int Offset, bool UseGPRIdxMode, + Register &SGPRIdxReg) { + MachineFunction *MF = OrigBB.getParent(); const GCNSubtarget &ST = MF->getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); @@ -3500,12 +3494,12 @@ // Read the next variant <- also loop target. BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentIdxReg) - .addReg(IdxReg.getReg(), getUndefRegState(IdxReg.isUndef())); + .addReg(Idx.getReg(), getUndefRegState(Idx.isUndef())); // Compare the just read M0 value to all possible Idx values. BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e64), CondReg) - .addReg(CurrentIdxReg) - .addReg(IdxReg.getReg(), 0, IdxReg.getSubReg()); + .addReg(CurrentIdxReg) + .addReg(Idx.getReg(), 0, Idx.getSubReg()); // Update EXEC, save the original EXEC value to VCC. BuildMI(LoopBB, I, DL, TII->get(ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 @@ -3516,22 +3510,14 @@ MRI.setSimpleHint(NewExec, CondReg); if (UseGPRIdxMode) { - unsigned IdxReg; if (Offset == 0) { - IdxReg = CurrentIdxReg; + SGPRIdxReg = CurrentIdxReg; } else { - IdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), IdxReg) - .addReg(CurrentIdxReg, RegState::Kill) - .addImm(Offset); + SGPRIdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SGPRIdxReg) + .addReg(CurrentIdxReg, RegState::Kill) + .addImm(Offset); } - unsigned IdxMode = IsIndirectSrc ? - AMDGPU::VGPRIndexMode::SRC0_ENABLE : AMDGPU::VGPRIndexMode::DST_ENABLE; - MachineInstr *SetOn = - BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON)) - .addReg(IdxReg, RegState::Kill) - .addImm(IdxMode); - SetOn->getOperand(3).setIsUndef(); } else { // Move index from VCC into M0 if (Offset == 0) { @@ -3567,14 +3553,10 @@ // per-workitem, so is kept alive for the whole loop so we end up not re-using a // subregister from it, using 1 more VGPR than necessary. This was saved when // this was expanded after register allocation. -static MachineBasicBlock::iterator loadM0FromVGPR(const SIInstrInfo *TII, - MachineBasicBlock &MBB, - MachineInstr &MI, - unsigned InitResultReg, - unsigned PhiReg, - int Offset, - bool UseGPRIdxMode, - bool IsIndirectSrc) { +static MachineBasicBlock::iterator +loadM0FromVGPR(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineInstr &MI, + unsigned InitResultReg, unsigned PhiReg, int Offset, + bool UseGPRIdxMode, Register &SGPRIdxReg) { MachineFunction *MF = MBB.getParent(); const GCNSubtarget &ST = MF->getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); @@ -3603,7 +3585,8 @@ auto InsPt = emitLoadM0FromVGPRLoop(TII, MRI, MBB, *LoopBB, DL, *Idx, InitResultReg, DstReg, PhiReg, TmpExec, - Offset, UseGPRIdxMode, IsIndirectSrc); + Offset, UseGPRIdxMode, SGPRIdxReg); + MachineBasicBlock* LandingPad = MF->CreateMachineBasicBlock(); MachineFunction::iterator MBBI(LoopBB); ++MBBI; @@ -3634,64 +3617,45 @@ return std::make_pair(SIRegisterInfo::getSubRegFromChannel(Offset), 0); } -// Return true if the index is an SGPR and was set. -static bool setM0ToIndexFromSGPR(const SIInstrInfo *TII, - MachineRegisterInfo &MRI, - MachineInstr &MI, - int Offset, - bool UseGPRIdxMode, - bool IsIndirectSrc) { +static void setM0ToIndexFromSGPR(const SIInstrInfo *TII, + MachineRegisterInfo &MRI, MachineInstr &MI, + int Offset) { MachineBasicBlock *MBB = MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock::iterator I(&MI); const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx); - const TargetRegisterClass *IdxRC = MRI.getRegClass(Idx->getReg()); assert(Idx->getReg() != AMDGPU::NoRegister); - if (!TII->getRegisterInfo().isSGPRClass(IdxRC)) - return false; - - if (UseGPRIdxMode) { - unsigned IdxMode = IsIndirectSrc ? - AMDGPU::VGPRIndexMode::SRC0_ENABLE : AMDGPU::VGPRIndexMode::DST_ENABLE; - if (Offset == 0) { - MachineInstr *SetOn = - BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON)) - .add(*Idx) - .addImm(IdxMode); + if (Offset == 0) { + BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0).add(*Idx); + } else { + BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0) + .add(*Idx) + .addImm(Offset); + } +} - SetOn->getOperand(3).setIsUndef(); - } else { - Register Tmp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), Tmp) - .add(*Idx) - .addImm(Offset); - MachineInstr *SetOn = - BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON)) - .addReg(Tmp, RegState::Kill) - .addImm(IdxMode); +static Register getIndirectSGPRIdx(const SIInstrInfo *TII, + MachineRegisterInfo &MRI, MachineInstr &MI, + int Offset) { + MachineBasicBlock *MBB = MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + MachineBasicBlock::iterator I(&MI); - SetOn->getOperand(3).setIsUndef(); - } + const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx); - return true; - } + if (Offset == 0) + return Idx->getReg(); - if (Offset == 0) { - BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) - .add(*Idx); - } else { - BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0) + Register Tmp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), Tmp) .add(*Idx) .addImm(Offset); - } - - return true; + return Tmp; } -// Control flow needs to be inserted if indexing with a VGPR. static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI, MachineBasicBlock &MBB, const GCNSubtarget &ST) { @@ -3701,10 +3665,12 @@ MachineRegisterInfo &MRI = MF->getRegInfo(); Register Dst = MI.getOperand(0).getReg(); + const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx); Register SrcReg = TII->getNamedOperand(MI, AMDGPU::OpName::src)->getReg(); int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm(); const TargetRegisterClass *VecRC = MRI.getRegClass(SrcReg); + const TargetRegisterClass *IdxRC = MRI.getRegClass(Idx->getReg()); unsigned SubReg; std::tie(SubReg, Offset) @@ -3712,7 +3678,8 @@ const bool UseGPRIdxMode = ST.useVGPRIndexMode(); - if (setM0ToIndexFromSGPR(TII, MRI, MI, Offset, UseGPRIdxMode, true)) { + // Check for a SGPR index. + if (TII->getRegisterInfo().isSGPRClass(IdxRC)) { MachineBasicBlock::iterator I(&MI); const DebugLoc &DL = MI.getDebugLoc(); @@ -3720,12 +3687,17 @@ // TODO: Look at the uses to avoid the copy. This may require rescheduling // to avoid interfering with other uses, so probably requires a new // optimization pass. - BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), Dst) - .addReg(SrcReg, 0, SubReg) - .addReg(SrcReg, RegState::Implicit) - .addReg(AMDGPU::M0, RegState::Implicit); - BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF)); + Register Idx = getIndirectSGPRIdx(TII, MRI, MI, Offset); + + const MCInstrDesc &GPRIDXDesc = + TII->getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), true); + BuildMI(MBB, I, DL, GPRIDXDesc, Dst) + .addReg(SrcReg) + .addReg(Idx) + .addImm(SubReg); } else { + setM0ToIndexFromSGPR(TII, MRI, MI, Offset); + BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) .addReg(SrcReg, 0, SubReg) .addReg(SrcReg, RegState::Implicit); @@ -3736,6 +3708,7 @@ return &MBB; } + // Control flow needs to be inserted if indexing with a VGPR. const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock::iterator I(&MI); @@ -3744,16 +3717,20 @@ BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), InitReg); - auto InsPt = loadM0FromVGPR(TII, MBB, MI, InitReg, PhiReg, - Offset, UseGPRIdxMode, true); + Register SGPRIdxReg; + auto InsPt = loadM0FromVGPR(TII, MBB, MI, InitReg, PhiReg, Offset, + UseGPRIdxMode, SGPRIdxReg); + MachineBasicBlock *LoopBB = InsPt->getParent(); if (UseGPRIdxMode) { - BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOV_B32_e32), Dst) - .addReg(SrcReg, 0, SubReg) - .addReg(SrcReg, RegState::Implicit) - .addReg(AMDGPU::M0, RegState::Implicit); - BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF)); + const MCInstrDesc &GPRIDXDesc = + TII->getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), true); + + BuildMI(*LoopBB, InsPt, DL, GPRIDXDesc, Dst) + .addReg(SrcReg) + .addReg(SGPRIdxReg) + .addImm(SubReg); } else { BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) .addReg(SrcReg, 0, SubReg) @@ -3779,6 +3756,7 @@ const MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val); int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm(); const TargetRegisterClass *VecRC = MRI.getRegClass(SrcVec->getReg()); + const TargetRegisterClass *IdxRC = MRI.getRegClass(Idx->getReg()); // This can be an immediate, but will be folded later. assert(Val->getReg()); @@ -3804,23 +3782,36 @@ return &MBB; } - const MCInstrDesc &MovRelDesc - = TII->getIndirectRegWritePseudo(TRI.getRegSizeInBits(*VecRC), 32, false); - - if (setM0ToIndexFromSGPR(TII, MRI, MI, Offset, UseGPRIdxMode, false)) { + // Check for a SGPR index. + if (TII->getRegisterInfo().isSGPRClass(IdxRC)) { MachineBasicBlock::iterator I(&MI); const DebugLoc &DL = MI.getDebugLoc(); - BuildMI(MBB, I, DL, MovRelDesc, Dst) - .addReg(SrcVec->getReg()) - .add(*Val) - .addImm(SubReg); - if (UseGPRIdxMode) - BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF)); + if (UseGPRIdxMode) { + Register Idx = getIndirectSGPRIdx(TII, MRI, MI, Offset); + + const MCInstrDesc &GPRIDXDesc = + TII->getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), false); + BuildMI(MBB, I, DL, GPRIDXDesc, Dst) + .addReg(SrcVec->getReg()) + .add(*Val) + .addReg(Idx) + .addImm(SubReg); + } else { + setM0ToIndexFromSGPR(TII, MRI, MI, Offset); + + const MCInstrDesc &MovRelDesc = TII->getIndirectRegWriteMovRelPseudo( + TRI.getRegSizeInBits(*VecRC), 32, false); + BuildMI(MBB, I, DL, MovRelDesc, Dst) + .addReg(SrcVec->getReg()) + .add(*Val) + .addImm(SubReg); + } MI.eraseFromParent(); return &MBB; } + // Control flow needs to be inserted if indexing with a VGPR. if (Val->isReg()) MRI.clearKillFlags(Val->getReg()); @@ -3828,16 +3819,28 @@ Register PhiReg = MRI.createVirtualRegister(VecRC); - auto InsPt = loadM0FromVGPR(TII, MBB, MI, SrcVec->getReg(), PhiReg, - Offset, UseGPRIdxMode, false); + Register SGPRIdxReg; + auto InsPt = loadM0FromVGPR(TII, MBB, MI, SrcVec->getReg(), PhiReg, Offset, + UseGPRIdxMode, SGPRIdxReg); MachineBasicBlock *LoopBB = InsPt->getParent(); - BuildMI(*LoopBB, InsPt, DL, MovRelDesc, Dst) - .addReg(PhiReg) - .add(*Val) - .addImm(AMDGPU::sub0); - if (UseGPRIdxMode) - BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF)); + if (UseGPRIdxMode) { + const MCInstrDesc &GPRIDXDesc = + TII->getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), false); + + BuildMI(*LoopBB, InsPt, DL, GPRIDXDesc, Dst) + .addReg(PhiReg) + .add(*Val) + .addReg(SGPRIdxReg) + .addImm(AMDGPU::sub0); + } else { + const MCInstrDesc &MovRelDesc = TII->getIndirectRegWriteMovRelPseudo( + TRI.getRegSizeInBits(*VecRC), 32, false); + BuildMI(*LoopBB, InsPt, DL, MovRelDesc, Dst) + .addReg(PhiReg) + .add(*Val) + .addImm(AMDGPU::sub0); + } MI.eraseFromParent(); return LoopBB; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -245,9 +245,12 @@ // DstRC, then AMDGPU::COPY is returned. unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; - const MCInstrDesc &getIndirectRegWritePseudo( - unsigned VecSize, unsigned EltSize, bool IsSGPR) const; + const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize, + unsigned EltSize, + bool IsSGPR) const; + const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize, + bool IsIndirectSrc) const; LLVM_READONLY int commuteOpcode(unsigned Opc) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1195,78 +1195,123 @@ return AMDGPU::COPY; } -static unsigned getIndirectVGPRWritePseudoOpc(unsigned VecSize) { +const MCInstrDesc & +SIInstrInfo::getIndirectGPRIDXPseudo(unsigned VecSize, + bool IsIndirectSrc) const { + if (IsIndirectSrc) { + if (VecSize <= 32) // 4 bytes + return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1); + if (VecSize <= 64) // 8 bytes + return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2); + if (VecSize <= 96) // 12 bytes + return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3); + if (VecSize <= 128) // 16 bytes + return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4); + if (VecSize <= 160) // 20 bytes + return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5); + if (VecSize <= 256) // 32 bytes + return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8); + if (VecSize <= 512) // 64 bytes + return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16); + if (VecSize <= 1024) // 128 bytes + return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32); + + llvm_unreachable("unsupported size for IndirectRegReadGPRIDX pseudos"); + } + if (VecSize <= 32) // 4 bytes - return AMDGPU::V_INDIRECT_REG_WRITE_B32_V1; + return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1); if (VecSize <= 64) // 8 bytes - return AMDGPU::V_INDIRECT_REG_WRITE_B32_V2; + return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2); if (VecSize <= 96) // 12 bytes - return AMDGPU::V_INDIRECT_REG_WRITE_B32_V3; + return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3); if (VecSize <= 128) // 16 bytes - return AMDGPU::V_INDIRECT_REG_WRITE_B32_V4; + return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4); if (VecSize <= 160) // 20 bytes - return AMDGPU::V_INDIRECT_REG_WRITE_B32_V5; + return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5); if (VecSize <= 256) // 32 bytes - return AMDGPU::V_INDIRECT_REG_WRITE_B32_V8; + return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8); if (VecSize <= 512) // 64 bytes - return AMDGPU::V_INDIRECT_REG_WRITE_B32_V16; + return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16); if (VecSize <= 1024) // 128 bytes - return AMDGPU::V_INDIRECT_REG_WRITE_B32_V32; + return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32); + + llvm_unreachable("unsupported size for IndirectRegWriteGPRIDX pseudos"); +} + +static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize) { + if (VecSize <= 32) // 4 bytes + return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1; + if (VecSize <= 64) // 8 bytes + return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2; + if (VecSize <= 96) // 12 bytes + return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3; + if (VecSize <= 128) // 16 bytes + return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4; + if (VecSize <= 160) // 20 bytes + return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5; + if (VecSize <= 256) // 32 bytes + return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8; + if (VecSize <= 512) // 64 bytes + return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16; + if (VecSize <= 1024) // 128 bytes + return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32; llvm_unreachable("unsupported size for IndirectRegWrite pseudos"); } -static unsigned getIndirectSGPRWritePseudo32(unsigned VecSize) { +static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize) { if (VecSize <= 32) // 4 bytes - return AMDGPU::S_INDIRECT_REG_WRITE_B32_V1; + return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1; if (VecSize <= 64) // 8 bytes - return AMDGPU::S_INDIRECT_REG_WRITE_B32_V2; + return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2; if (VecSize <= 96) // 12 bytes - return AMDGPU::S_INDIRECT_REG_WRITE_B32_V3; + return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3; if (VecSize <= 128) // 16 bytes - return AMDGPU::S_INDIRECT_REG_WRITE_B32_V4; + return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4; if (VecSize <= 160) // 20 bytes - return AMDGPU::S_INDIRECT_REG_WRITE_B32_V5; + return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5; if (VecSize <= 256) // 32 bytes - return AMDGPU::S_INDIRECT_REG_WRITE_B32_V8; + return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8; if (VecSize <= 512) // 64 bytes - return AMDGPU::S_INDIRECT_REG_WRITE_B32_V16; + return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16; if (VecSize <= 1024) // 128 bytes - return AMDGPU::S_INDIRECT_REG_WRITE_B32_V32; + return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32; llvm_unreachable("unsupported size for IndirectRegWrite pseudos"); } -static unsigned getIndirectSGPRWritePseudo64(unsigned VecSize) { +static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize) { if (VecSize <= 64) // 8 bytes - return AMDGPU::S_INDIRECT_REG_WRITE_B64_V1; + return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1; if (VecSize <= 128) // 16 bytes - return AMDGPU::S_INDIRECT_REG_WRITE_B64_V2; + return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2; if (VecSize <= 256) // 32 bytes - return AMDGPU::S_INDIRECT_REG_WRITE_B64_V4; + return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4; if (VecSize <= 512) // 64 bytes - return AMDGPU::S_INDIRECT_REG_WRITE_B64_V8; + return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8; if (VecSize <= 1024) // 128 bytes - return AMDGPU::S_INDIRECT_REG_WRITE_B64_V16; + return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16; llvm_unreachable("unsupported size for IndirectRegWrite pseudos"); } -const MCInstrDesc &SIInstrInfo::getIndirectRegWritePseudo( - unsigned VecSize, unsigned EltSize, bool IsSGPR) const { +const MCInstrDesc & +SIInstrInfo::getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, + bool IsSGPR) const { if (IsSGPR) { switch (EltSize) { case 32: - return get(getIndirectSGPRWritePseudo32(VecSize)); + return get(getIndirectSGPRWriteMovRelPseudo32(VecSize)); case 64: - return get(getIndirectSGPRWritePseudo64(VecSize)); + return get(getIndirectSGPRWriteMovRelPseudo64(VecSize)); default: llvm_unreachable("invalid reg indexing elt size"); } } assert(EltSize == 32 && "invalid reg indexing elt size"); - return get(getIndirectVGPRWritePseudoOpc(VecSize)); + return get(getIndirectVGPRWriteMovRelPseudoOpc(VecSize)); } static unsigned getSGPRSpillSaveOpcode(unsigned Size) { @@ -1680,36 +1725,35 @@ MI.eraseFromParent(); break; } - case AMDGPU::V_INDIRECT_REG_WRITE_B32_V1: - case AMDGPU::V_INDIRECT_REG_WRITE_B32_V2: - case AMDGPU::V_INDIRECT_REG_WRITE_B32_V3: - case AMDGPU::V_INDIRECT_REG_WRITE_B32_V4: - case AMDGPU::V_INDIRECT_REG_WRITE_B32_V5: - case AMDGPU::V_INDIRECT_REG_WRITE_B32_V8: - case AMDGPU::V_INDIRECT_REG_WRITE_B32_V16: - case AMDGPU::V_INDIRECT_REG_WRITE_B32_V32: - case AMDGPU::S_INDIRECT_REG_WRITE_B32_V1: - case AMDGPU::S_INDIRECT_REG_WRITE_B32_V2: - case AMDGPU::S_INDIRECT_REG_WRITE_B32_V3: - case AMDGPU::S_INDIRECT_REG_WRITE_B32_V4: - case AMDGPU::S_INDIRECT_REG_WRITE_B32_V5: - case AMDGPU::S_INDIRECT_REG_WRITE_B32_V8: - case AMDGPU::S_INDIRECT_REG_WRITE_B32_V16: - case AMDGPU::S_INDIRECT_REG_WRITE_B32_V32: - case AMDGPU::S_INDIRECT_REG_WRITE_B64_V1: - case AMDGPU::S_INDIRECT_REG_WRITE_B64_V2: - case AMDGPU::S_INDIRECT_REG_WRITE_B64_V4: - case AMDGPU::S_INDIRECT_REG_WRITE_B64_V8: - case AMDGPU::S_INDIRECT_REG_WRITE_B64_V16: { + case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1: + case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2: + case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3: + case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4: + case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5: + case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8: + case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16: + case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32: + case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1: + case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2: + case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3: + case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4: + case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5: + case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8: + case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16: + case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32: + case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1: + case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2: + case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4: + case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8: + case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: { const TargetRegisterClass *EltRC = getOpRegClass(MI, 2); unsigned Opc; if (RI.hasVGPRs(EltRC)) { - Opc = ST.useVGPRIndexMode() ? - AMDGPU::V_MOV_B32_indirect : AMDGPU::V_MOVRELD_B32_e32; + Opc = AMDGPU::V_MOVRELD_B32_e32; } else { - Opc = RI.getRegSizeInBits(*EltRC) == 64 ? - AMDGPU::S_MOVRELD_B64 : AMDGPU::S_MOVRELD_B32; + Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64 + : AMDGPU::S_MOVRELD_B32; } const MCInstrDesc &OpDesc = get(Opc); @@ -1732,6 +1776,78 @@ MI.eraseFromParent(); break; } + case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1: + case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2: + case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3: + case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4: + case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5: + case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8: + case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16: + case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: { + assert(ST.useVGPRIndexMode()); + Register VecReg = MI.getOperand(0).getReg(); + bool IsUndef = MI.getOperand(1).isUndef(); + Register Idx = MI.getOperand(3).getReg(); + Register SubReg = MI.getOperand(4).getImm(); + + MachineInstr *SetOn = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_ON)) + .addReg(Idx) + .addImm(AMDGPU::VGPRIndexMode::DST_ENABLE); + SetOn->getOperand(3).setIsUndef(); + + const MCInstrDesc &OpDesc = get(AMDGPU::V_MOV_B32_indirect); + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DL, OpDesc) + .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef) + .add(MI.getOperand(2)) + .addReg(VecReg, RegState::ImplicitDefine) + .addReg(VecReg, + RegState::Implicit | (IsUndef ? RegState::Undef : 0)); + + const int ImpDefIdx = OpDesc.getNumOperands() + OpDesc.getNumImplicitUses(); + const int ImpUseIdx = ImpDefIdx + 1; + MIB->tieOperands(ImpDefIdx, ImpUseIdx); + + MachineInstr *SetOff = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_OFF)); + + finalizeBundle(MBB, SetOn->getIterator(), std::next(SetOff->getIterator())); + + MI.eraseFromParent(); + break; + } + case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1: + case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2: + case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3: + case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4: + case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5: + case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8: + case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16: + case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: { + assert(ST.useVGPRIndexMode()); + Register Dst = MI.getOperand(0).getReg(); + Register VecReg = MI.getOperand(1).getReg(); + bool IsUndef = MI.getOperand(1).isUndef(); + Register Idx = MI.getOperand(2).getReg(); + Register SubReg = MI.getOperand(3).getImm(); + + MachineInstr *SetOn = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_ON)) + .addReg(Idx) + .addImm(AMDGPU::VGPRIndexMode::SRC0_ENABLE); + SetOn->getOperand(3).setIsUndef(); + + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32)) + .addDef(Dst) + .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef) + .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0)) + .addReg(AMDGPU::M0, RegState::Implicit); + + MachineInstr *SetOff = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_OFF)); + + finalizeBundle(MBB, SetOn->getIterator(), std::next(SetOff->getIterator())); + + MI.eraseFromParent(); + break; + } case AMDGPU::SI_PC_ADD_REL_OFFSET: { MachineFunction &MF = *MBB.getParent(); Register Reg = MI.getOperand(0).getReg(); diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -545,64 +545,97 @@ } // End Uses = [EXEC], Defs = [M0, EXEC] - -// This is a pseudo variant of the v_movreld_b32 (or v_mov_b32 -// expecting to be executed with gpr indexing mode enabled) -// instruction in which the vector operand appears only twice, once as -// def and once as use. Using this pseudo avoids problems with the Two -// Address instructions pass. -class INDIRECT_REG_WRITE_pseudo : PseudoInstSI < (outs rc:$vdst), (ins rc:$vsrc, val_ty:$val, i32imm:$subreg)> { let Constraints = "$vsrc = $vdst"; let Uses = [M0]; } -class V_INDIRECT_REG_WRITE_B32_pseudo : - INDIRECT_REG_WRITE_pseudo { +class V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo : + INDIRECT_REG_WRITE_MOVREL_pseudo { let VALU = 1; let VOP1 = 1; let Uses = [M0, EXEC]; } -class S_INDIRECT_REG_WRITE_pseudo : - INDIRECT_REG_WRITE_pseudo { + INDIRECT_REG_WRITE_MOVREL_pseudo { let SALU = 1; let SOP1 = 1; let Uses = [M0]; } -class S_INDIRECT_REG_WRITE_B32_pseudo : - S_INDIRECT_REG_WRITE_pseudo; -class S_INDIRECT_REG_WRITE_B64_pseudo : - S_INDIRECT_REG_WRITE_pseudo; - - -def V_INDIRECT_REG_WRITE_B32_V1 : V_INDIRECT_REG_WRITE_B32_pseudo; -def V_INDIRECT_REG_WRITE_B32_V2 : V_INDIRECT_REG_WRITE_B32_pseudo; -def V_INDIRECT_REG_WRITE_B32_V3 : V_INDIRECT_REG_WRITE_B32_pseudo; -def V_INDIRECT_REG_WRITE_B32_V4 : V_INDIRECT_REG_WRITE_B32_pseudo; -def V_INDIRECT_REG_WRITE_B32_V5 : V_INDIRECT_REG_WRITE_B32_pseudo; -def V_INDIRECT_REG_WRITE_B32_V8 : V_INDIRECT_REG_WRITE_B32_pseudo; -def V_INDIRECT_REG_WRITE_B32_V16 : V_INDIRECT_REG_WRITE_B32_pseudo; -def V_INDIRECT_REG_WRITE_B32_V32 : V_INDIRECT_REG_WRITE_B32_pseudo; - -def S_INDIRECT_REG_WRITE_B32_V1 : S_INDIRECT_REG_WRITE_B32_pseudo; -def S_INDIRECT_REG_WRITE_B32_V2 : S_INDIRECT_REG_WRITE_B32_pseudo; -def S_INDIRECT_REG_WRITE_B32_V3 : S_INDIRECT_REG_WRITE_B32_pseudo; -def S_INDIRECT_REG_WRITE_B32_V4 : S_INDIRECT_REG_WRITE_B32_pseudo; -def S_INDIRECT_REG_WRITE_B32_V5 : S_INDIRECT_REG_WRITE_B32_pseudo; -def S_INDIRECT_REG_WRITE_B32_V8 : S_INDIRECT_REG_WRITE_B32_pseudo; -def S_INDIRECT_REG_WRITE_B32_V16 : S_INDIRECT_REG_WRITE_B32_pseudo; -def S_INDIRECT_REG_WRITE_B32_V32 : S_INDIRECT_REG_WRITE_B32_pseudo; - -def S_INDIRECT_REG_WRITE_B64_V1 : S_INDIRECT_REG_WRITE_B64_pseudo; -def S_INDIRECT_REG_WRITE_B64_V2 : S_INDIRECT_REG_WRITE_B64_pseudo; -def S_INDIRECT_REG_WRITE_B64_V4 : S_INDIRECT_REG_WRITE_B64_pseudo; -def S_INDIRECT_REG_WRITE_B64_V8 : S_INDIRECT_REG_WRITE_B64_pseudo; -def S_INDIRECT_REG_WRITE_B64_V16 : S_INDIRECT_REG_WRITE_B64_pseudo; +class S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo : + S_INDIRECT_REG_WRITE_MOVREL_pseudo; +class S_INDIRECT_REG_WRITE_MOVREL_B64_pseudo : + S_INDIRECT_REG_WRITE_MOVREL_pseudo; + +def V_INDIRECT_REG_WRITE_MOVREL_B32_V1 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; +def V_INDIRECT_REG_WRITE_MOVREL_B32_V2 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; +def V_INDIRECT_REG_WRITE_MOVREL_B32_V3 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; +def V_INDIRECT_REG_WRITE_MOVREL_B32_V4 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; +def V_INDIRECT_REG_WRITE_MOVREL_B32_V5 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; +def V_INDIRECT_REG_WRITE_MOVREL_B32_V8 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; +def V_INDIRECT_REG_WRITE_MOVREL_B32_V16 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; +def V_INDIRECT_REG_WRITE_MOVREL_B32_V32 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; + +def S_INDIRECT_REG_WRITE_MOVREL_B32_V1 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; +def S_INDIRECT_REG_WRITE_MOVREL_B32_V2 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; +def S_INDIRECT_REG_WRITE_MOVREL_B32_V3 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; +def S_INDIRECT_REG_WRITE_MOVREL_B32_V4 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; +def S_INDIRECT_REG_WRITE_MOVREL_B32_V5 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; +def S_INDIRECT_REG_WRITE_MOVREL_B32_V8 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; +def S_INDIRECT_REG_WRITE_MOVREL_B32_V16 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; +def S_INDIRECT_REG_WRITE_MOVREL_B32_V32 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo; + +def S_INDIRECT_REG_WRITE_MOVREL_B64_V1 : S_INDIRECT_REG_WRITE_MOVREL_B64_pseudo; +def S_INDIRECT_REG_WRITE_MOVREL_B64_V2 : S_INDIRECT_REG_WRITE_MOVREL_B64_pseudo; +def S_INDIRECT_REG_WRITE_MOVREL_B64_V4 : S_INDIRECT_REG_WRITE_MOVREL_B64_pseudo; +def S_INDIRECT_REG_WRITE_MOVREL_B64_V8 : S_INDIRECT_REG_WRITE_MOVREL_B64_pseudo; +def S_INDIRECT_REG_WRITE_MOVREL_B64_V16 : S_INDIRECT_REG_WRITE_MOVREL_B64_pseudo; + +// These variants of V_INDIRECT_REG_READ/WRITE use VGPR indexing. By using these +// pseudos we avoid spills or copies being inserted within indirect sequences +// that switch the VGPR indexing mode. Spills to accvgprs could be effected by +// this mode switching. + +class V_INDIRECT_REG_WRITE_GPR_IDX_pseudo : PseudoInstSI < + (outs rc:$vdst), (ins rc:$vsrc, VSrc_b32:$val, SSrc_b32:$idx, i32imm:$subreg)> { + let Constraints = "$vsrc = $vdst"; + let VALU = 1; + let Uses = [M0, EXEC]; + let Defs = [M0]; +} + +def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo; +def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo; +def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo; +def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo; +def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo; +def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo; +def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo; +def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo; + +class V_INDIRECT_REG_READ_GPR_IDX_pseudo : PseudoInstSI < + (outs VGPR_32:$vdst), (ins rc:$vsrc, SSrc_b32:$idx, i32imm:$subreg)> { + let VALU = 1; + let Uses = [M0, EXEC]; + let Defs = [M0]; +} +def V_INDIRECT_REG_READ_GPR_IDX_B32_V1 : V_INDIRECT_REG_READ_GPR_IDX_pseudo; +def V_INDIRECT_REG_READ_GPR_IDX_B32_V2 : V_INDIRECT_REG_READ_GPR_IDX_pseudo; +def V_INDIRECT_REG_READ_GPR_IDX_B32_V3 : V_INDIRECT_REG_READ_GPR_IDX_pseudo; +def V_INDIRECT_REG_READ_GPR_IDX_B32_V4 : V_INDIRECT_REG_READ_GPR_IDX_pseudo; +def V_INDIRECT_REG_READ_GPR_IDX_B32_V5 : V_INDIRECT_REG_READ_GPR_IDX_pseudo; +def V_INDIRECT_REG_READ_GPR_IDX_B32_V8 : V_INDIRECT_REG_READ_GPR_IDX_pseudo; +def V_INDIRECT_REG_READ_GPR_IDX_B32_V16 : V_INDIRECT_REG_READ_GPR_IDX_pseudo; +def V_INDIRECT_REG_READ_GPR_IDX_B32_V32 : V_INDIRECT_REG_READ_GPR_IDX_pseudo; multiclass SI_SPILL_SGPR { let UseNamedOperandTable = 1, SGPRSpill = 1, Uses = [EXEC] in { diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -1334,7 +1334,8 @@ for (auto &MBB : MF) { for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { - if (MI->getOpcode() == TargetOpcode::BUNDLE && MI->mayLoadOrStore()) { + // Unbundle instructions after the post-RA scheduler. + if (MI->isBundle()) { MachineBasicBlock::instr_iterator II(MI->getIterator()); for (MachineBasicBlock::instr_iterator I = ++II, E = MBB.instr_end(); I != E && I->isBundledWithPred(); ++I) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll @@ -4,32 +4,14 @@ ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s define amdgpu_ps i128 @extractelement_sgpr_v4i128_sgpr_idx(<4 x i128> addrspace(4)* inreg %ptr, i32 inreg %idx) { -; GFX9-LABEL: extractelement_sgpr_v4i128_sgpr_idx: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x0 -; GFX9-NEXT: s_lshl_b32 m0, s4, 1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_movrels_b64 s[0:1], s[8:9] -; GFX9-NEXT: s_movrels_b64 s[2:3], s[10:11] -; GFX9-NEXT: ; return to shader part epilog -; -; GFX8-LABEL: extractelement_sgpr_v4i128_sgpr_idx: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x0 -; GFX8-NEXT: s_lshl_b32 m0, s4, 1 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_movrels_b64 s[0:1], s[8:9] -; GFX8-NEXT: s_movrels_b64 s[2:3], s[10:11] -; GFX8-NEXT: ; return to shader part epilog -; -; GFX7-LABEL: extractelement_sgpr_v4i128_sgpr_idx: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x0 -; GFX7-NEXT: s_lshl_b32 m0, s4, 1 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_movrels_b64 s[0:1], s[8:9] -; GFX7-NEXT: s_movrels_b64 s[2:3], s[10:11] -; GFX7-NEXT: ; return to shader part epilog +; GCN-LABEL: extractelement_sgpr_v4i128_sgpr_idx: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x0 +; GCN-NEXT: s_lshl_b32 m0, s4, 1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_movrels_b64 s[0:1], s[8:9] +; GCN-NEXT: s_movrels_b64 s[2:3], s[10:11] +; GCN-NEXT: ; return to shader part epilog %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr %element = extractelement <4 x i128> %vector, i32 %idx ret i128 %element @@ -43,8 +25,8 @@ ; GFX9-NEXT: global_load_dwordx4 v[10:13], v[0:1], off offset:32 ; GFX9-NEXT: global_load_dwordx4 v[14:17], v[0:1], off offset:48 ; GFX9-NEXT: s_lshl_b32 s0, s2, 1 -; GFX9-NEXT: s_lshl_b32 s0, s0, 1 -; GFX9-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) +; GFX9-NEXT: s_lshl_b32 s2, s0, 1 +; GFX9-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-NEXT: v_mov_b32_e32 v1, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir @@ -507,10 +507,8 @@ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v2s32 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V2_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V2 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V2_]] %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr2 %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -535,10 +533,8 @@ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v3s32 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V3_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V3 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V3_]] %0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:sgpr(s32) = COPY $sgpr2 %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -563,10 +559,8 @@ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v4s32 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V4 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_]] %0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:sgpr(s32) = COPY $sgpr4 %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -591,10 +585,8 @@ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -619,10 +611,8 @@ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v16s32 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]] %0:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:sgpr(s32) = COPY $sgpr8 %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -647,10 +637,8 @@ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v32s32 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_1024 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 - ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_]] %0:vgpr(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 %1:sgpr(s32) = COPY $sgpr40 %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -675,10 +663,8 @@ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub1, implicit $exec, implicit [[COPY]], implicit $m0 - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], 11, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = G_CONSTANT i32 1 @@ -709,10 +695,8 @@ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_I32_]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[S_ADD_I32_]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = G_CONSTANT i32 -1 @@ -739,10 +723,8 @@ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 - ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub7, implicit $exec, implicit [[COPY]], implicit $m0 - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], 71, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = G_CONSTANT i32 7 @@ -773,10 +755,8 @@ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 ; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_I32_]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V8 [[COPY]], [[S_ADD_I32_]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = G_CONSTANT i32 8 @@ -830,10 +810,8 @@ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v4s32_const_idx ; GPRIDX: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GPRIDX: S_SET_GPR_IDX_ON [[S_MOV_B32_]], 1, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V4 [[COPY]], [[S_MOV_B32_]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V4_]] %0:vgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = G_CONSTANT i32 0 %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir @@ -16,15 +16,15 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V2_:%[0-9]+]]:sreg_64 = S_INDIRECT_REG_WRITE_B32_V2 [[COPY]], [[COPY1]], 3, implicit $m0 - ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V2_]] + ; MOVREL: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V2_:%[0-9]+]]:sreg_64 = S_INDIRECT_REG_WRITE_MOVREL_B32_V2 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V2_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v2s32 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V2_:%[0-9]+]]:sreg_64 = S_INDIRECT_REG_WRITE_B32_V2 [[COPY]], [[COPY1]], 3, implicit $m0 - ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V2_]] + ; GPRIDX: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V2_:%[0-9]+]]:sreg_64 = S_INDIRECT_REG_WRITE_MOVREL_B32_V2 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V2_]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 %2:sgpr(s32) = COPY $sgpr3 @@ -46,15 +46,15 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V3_:%[0-9]+]]:sgpr_96 = S_INDIRECT_REG_WRITE_B32_V3 [[COPY]], [[COPY1]], 3, implicit $m0 - ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V3_]] + ; MOVREL: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V3_:%[0-9]+]]:sgpr_96 = S_INDIRECT_REG_WRITE_MOVREL_B32_V3 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V3_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v3s32 ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V3_:%[0-9]+]]:sgpr_96 = S_INDIRECT_REG_WRITE_B32_V3 [[COPY]], [[COPY1]], 3, implicit $m0 - ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V3_]] + ; GPRIDX: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V3_:%[0-9]+]]:sgpr_96 = S_INDIRECT_REG_WRITE_MOVREL_B32_V3 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V3_]] %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(s32) = COPY $sgpr3 %2:sgpr(s32) = COPY $sgpr4 @@ -76,15 +76,15 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 - ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V4_]] + ; MOVREL: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v4s32 ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 - ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V4_]] + ; GPRIDX: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = COPY $sgpr3 %2:sgpr(s32) = COPY $sgpr4 @@ -106,15 +106,15 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V5_:%[0-9]+]]:sgpr_160 = S_INDIRECT_REG_WRITE_B32_V5 [[COPY]], [[COPY1]], 3, implicit $m0 - ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V5_]] + ; MOVREL: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V5_:%[0-9]+]]:sgpr_160 = S_INDIRECT_REG_WRITE_MOVREL_B32_V5 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V5_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v5s32 ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_160 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V5_:%[0-9]+]]:sgpr_160 = S_INDIRECT_REG_WRITE_B32_V5 [[COPY]], [[COPY1]], 3, implicit $m0 - ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V5_]] + ; GPRIDX: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V5_:%[0-9]+]]:sgpr_160 = S_INDIRECT_REG_WRITE_MOVREL_B32_V5 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V5_]] %0:sgpr(<5 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 %1:sgpr(s32) = COPY $sgpr5 %2:sgpr(s32) = COPY $sgpr6 @@ -136,15 +136,15 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 - ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]] + ; MOVREL: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v8s32 ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 - ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]] + ; GPRIDX: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = COPY $sgpr9 @@ -166,15 +166,15 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr16 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr17 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V16_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_B32_V16 [[COPY]], [[COPY1]], 3, implicit $m0 - ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V16_]] + ; MOVREL: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V16_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B32_V16 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V16_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v16s32 ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr16 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr17 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V16_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_B32_V16 [[COPY]], [[COPY1]], 3, implicit $m0 - ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V16_]] + ; GPRIDX: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V16_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B32_V16 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V16_]] %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr16 %2:sgpr(s32) = COPY $sgpr17 @@ -196,15 +196,15 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr41 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V32_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_B32_V32 [[COPY]], [[COPY1]], 3, implicit $m0 - ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V32_]] + ; MOVREL: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V32_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B32_V32 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v32s32 ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr41 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V32_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_B32_V32 [[COPY]], [[COPY1]], 3, implicit $m0 - ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V32_]] + ; GPRIDX: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V32_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B32_V32 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V32_]] %0:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %1:sgpr(s32) = COPY $sgpr40 %2:sgpr(s32) = COPY $sgpr41 @@ -226,15 +226,15 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B64_V2_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B64_V2 [[COPY]], [[COPY1]], 4, implicit $m0 - ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B64_V2_]] + ; MOVREL: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V2_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B64_V2 [[COPY]], [[COPY1]], 4, implicit $m0 + ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V2_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s64_v2s64 ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B64_V2_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B64_V2 [[COPY]], [[COPY1]], 4, implicit $m0 - ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B64_V2_]] + ; GPRIDX: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V2_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B64_V2 [[COPY]], [[COPY1]], 4, implicit $m0 + ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V2_]] %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s64) = COPY $sgpr4_sgpr5 %2:sgpr(s32) = COPY $sgpr6 @@ -256,15 +256,15 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr10 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B64_V4_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_B64_V4 [[COPY]], [[COPY1]], 4, implicit $m0 - ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B64_V4_]] + ; MOVREL: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V4_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B64_V4 [[COPY]], [[COPY1]], 4, implicit $m0 + ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s64_v4s64 ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr10 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B64_V4_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_B64_V4 [[COPY]], [[COPY1]], 4, implicit $m0 - ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B64_V4_]] + ; GPRIDX: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V4_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B64_V4 [[COPY]], [[COPY1]], 4, implicit $m0 + ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V4_]] %0:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s64) = COPY $sgpr8_sgpr9 %2:sgpr(s32) = COPY $sgpr10 @@ -286,15 +286,15 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr16_sgpr17 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr18 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B64_V8_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_B64_V8 [[COPY]], [[COPY1]], 4, implicit $m0 - ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B64_V8_]] + ; MOVREL: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V8_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B64_V8 [[COPY]], [[COPY1]], 4, implicit $m0 + ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s64_v8s64 ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr16_sgpr17 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr18 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B64_V8_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_B64_V8 [[COPY]], [[COPY1]], 4, implicit $m0 - ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B64_V8_]] + ; GPRIDX: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V8_:%[0-9]+]]:sgpr_512 = S_INDIRECT_REG_WRITE_MOVREL_B64_V8 [[COPY]], [[COPY1]], 4, implicit $m0 + ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V8_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s64) = COPY $sgpr16_sgpr17 %2:sgpr(s32) = COPY $sgpr18 @@ -316,15 +316,15 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr40_sgpr41 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr42 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B64_V16_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_B64_V16 [[COPY]], [[COPY1]], 4, implicit $m0 - ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B64_V16_]] + ; MOVREL: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V16_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B64_V16 [[COPY]], [[COPY1]], 4, implicit $m0 + ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V16_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v16s64 ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr40_sgpr41 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr42 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B64_V16_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_B64_V16 [[COPY]], [[COPY1]], 4, implicit $m0 - ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B64_V16_]] + ; GPRIDX: [[S_INDIRECT_REG_WRITE_MOVREL_B64_V16_:%[0-9]+]]:sgpr_1024 = S_INDIRECT_REG_WRITE_MOVREL_B64_V16 [[COPY]], [[COPY1]], 4, implicit $m0 + ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B64_V16_]] %0:sgpr(<16 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %1:sgpr(s64) = COPY $sgpr40_sgpr41 %2:sgpr(s32) = COPY $sgpr42 @@ -346,16 +346,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V2_:%[0-9]+]]:vreg_64 = V_INDIRECT_REG_WRITE_B32_V2 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec - ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V2_]] + ; MOVREL: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V2_:%[0-9]+]]:vreg_64 = V_INDIRECT_REG_WRITE_MOVREL_B32_V2 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V2_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v2s32 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GPRIDX: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GPRIDX: S_SET_GPR_IDX_ON [[COPY2]], 8, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V2_:%[0-9]+]]:vreg_64 = V_INDIRECT_REG_WRITE_B32_V2 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V2_]] + ; GPRIDX: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2_:%[0-9]+]]:vreg_64 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2_]] %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:sgpr(s32) = COPY $sgpr3 @@ -377,16 +375,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V3_:%[0-9]+]]:vreg_96 = V_INDIRECT_REG_WRITE_B32_V3 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec - ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V3_]] + ; MOVREL: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V3_:%[0-9]+]]:vreg_96 = V_INDIRECT_REG_WRITE_MOVREL_B32_V3 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V3_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v3s32 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 ; GPRIDX: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GPRIDX: S_SET_GPR_IDX_ON [[COPY2]], 8, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V3_:%[0-9]+]]:vreg_96 = V_INDIRECT_REG_WRITE_B32_V3 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V3_]] + ; GPRIDX: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3_:%[0-9]+]]:vreg_96 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3_]] %0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:vgpr(s32) = COPY $vgpr3 %2:sgpr(s32) = COPY $sgpr4 @@ -408,16 +404,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec - ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V4_]] + ; MOVREL: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v4s32 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GPRIDX: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GPRIDX: S_SET_GPR_IDX_ON [[COPY2]], 8, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V4_]] + ; GPRIDX: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_]] %0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:vgpr(s32) = COPY $vgpr3 %2:sgpr(s32) = COPY $sgpr4 @@ -439,16 +433,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V5_:%[0-9]+]]:vreg_160 = V_INDIRECT_REG_WRITE_B32_V5 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec - ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V5_]] + ; MOVREL: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V5_:%[0-9]+]]:vreg_160 = V_INDIRECT_REG_WRITE_MOVREL_B32_V5 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V5_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v5s32 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_160 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; GPRIDX: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GPRIDX: S_SET_GPR_IDX_ON [[COPY2]], 8, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V5_:%[0-9]+]]:vreg_160 = V_INDIRECT_REG_WRITE_B32_V5 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V5_]] + ; GPRIDX: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5_:%[0-9]+]]:vreg_160 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5_]] %0:vgpr(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 %1:vgpr(s32) = COPY $vgpr5 %2:sgpr(s32) = COPY $sgpr6 @@ -470,16 +462,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec - ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]] + ; MOVREL: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v8s32 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; GPRIDX: S_SET_GPR_IDX_ON [[COPY2]], 8, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]] + ; GPRIDX: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], [[COPY2]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s32) = COPY $vgpr8 %2:sgpr(s32) = COPY $sgpr9 @@ -501,16 +491,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 11, implicit $m0, implicit $exec - ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]] + ; MOVREL: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 11, implicit $m0, implicit $exec + ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v8s32_add_1 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 - ; GPRIDX: S_SET_GPR_IDX_ON [[COPY2]], 8, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 11, implicit $m0, implicit $exec - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]] + ; GPRIDX: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], [[COPY2]], 11, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s32) = COPY $vgpr8 %2:sgpr(s32) = COPY $sgpr9 @@ -536,18 +524,16 @@ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 ; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc ; MOVREL: $m0 = COPY [[S_ADD_I32_]] - ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec - ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]] + ; MOVREL: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v8s32_add_8 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 ; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_I32_]], 8, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]] + ; GPRIDX: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 [[COPY]], [[COPY1]], [[S_ADD_I32_]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:vgpr(s32) = COPY $vgpr8 %2:sgpr(s32) = COPY $sgpr9 @@ -571,15 +557,15 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; MOVREL: $m0 = COPY [[COPY2]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 11, implicit $m0 - ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]] + ; MOVREL: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 11, implicit $m0 + ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v8s32_add_1 ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; GPRIDX: $m0 = COPY [[COPY2]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 11, implicit $m0 - ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]] + ; GPRIDX: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 11, implicit $m0 + ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = COPY $sgpr9 @@ -605,8 +591,8 @@ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 ; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc ; MOVREL: $m0 = COPY [[S_ADD_I32_]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 - ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]] + ; MOVREL: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v8s32_add_8 ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 @@ -614,8 +600,8 @@ ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 ; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc ; GPRIDX: $m0 = COPY [[S_ADD_I32_]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 - ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]] + ; GPRIDX: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_:%[0-9]+]]:sgpr_256 = S_INDIRECT_REG_WRITE_MOVREL_B32_V8 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V8_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = COPY $sgpr9 @@ -641,15 +627,15 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; MOVREL: $m0 = COPY [[S_MOV_B32_]] - ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 - ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V4_]] + ; MOVREL: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 + ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v4s32_const_idx ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GPRIDX: $m0 = COPY [[S_MOV_B32_]] - ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 - ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V4_]] + ; GPRIDX: [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0 + ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = COPY $sgpr4 %2:sgpr(s32) = G_CONSTANT i32 0 @@ -671,16 +657,14 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; MOVREL: $m0 = COPY [[S_MOV_B32_]] - ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec - ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V4_]] + ; MOVREL: [[V_INDIRECT_REG_WRITE_MOVREL_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_MOVREL_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec + ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_MOVREL_B32_V4_]] ; GPRIDX-LABEL: name: insert_vector_elt_v_s32_v4s32_const_idx ; GPRIDX: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GPRIDX: S_SET_GPR_IDX_ON [[S_MOV_B32_]], 8, implicit-def $m0, implicit-def $mode, implicit $m0, implicit $mode - ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 3, implicit $m0, implicit $exec - ; GPRIDX: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V4_]] + ; GPRIDX: [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4 [[COPY]], [[COPY1]], [[S_MOV_B32_]], 3, implicit-def $m0, implicit $m0, implicit $exec + ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4_]] %0:vgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = COPY $sgpr4 %2:sgpr(s32) = G_CONSTANT i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/expand-si-indirect.mir b/llvm/test/CodeGen/AMDGPU/expand-si-indirect.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/expand-si-indirect.mir @@ -0,0 +1,102 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before=twoaddressinstruction %s -o - | FileCheck %s -check-prefix=GCN + +# Wait to sxpand SI_INDIRECT sequences that use VGPR indexing until after +# register allocation. We don’t want to reschedule the mode switching or to +# have any instructions inserted within the sequence. The Two-Address +# instruction pass could insert bad copies here if it is expanded too early. + +--- +# GCN-LABEL: expand_si_indirect +# GCN: s_set_gpr_idx_on +# GCN-NEXT: v_mov_b32_e32 +# GCN-NEXT: s_set_gpr_idx_off + +# GCN: s_set_gpr_idx_on +# GCN-NEXT: v_mov_b32_e32 +# GCN-NOT: v_mov_b32_e32 +# GCN-NEXT: s_set_gpr_idx_off + +name: expand_si_indirect +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $sgpr0_sgpr1 + + %0:sgpr_64 = COPY killed $sgpr0_sgpr1 + %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0(p4), 36, 0, 0 + %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0(p4), 44, 0, 0 + %4:sreg_32 = S_ADD_I32 %3, 1, implicit-def dead $scc + %5:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + %6:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec + %7:vgpr_32 = V_MOV_B32_e32 1077936128, implicit $exec + %8:vgpr_32 = V_MOV_B32_e32 1082130432, implicit $exec + %9:vgpr_32 = V_MOV_B32_e32 1084227584, implicit $exec + %10:vgpr_32 = V_MOV_B32_e32 1086324736, implicit $exec + %11:vgpr_32 = V_MOV_B32_e32 1088421888, implicit $exec + %12:vgpr_32 = V_MOV_B32_e32 1090519040, implicit $exec + %13:vgpr_32 = V_MOV_B32_e32 1091567616, implicit $exec + %14:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec + %15:vgpr_32 = V_MOV_B32_e32 1093664768, implicit $exec + %16:vgpr_32 = V_MOV_B32_e32 1094713344, implicit $exec + %17:vgpr_32 = V_MOV_B32_e32 1095761920, implicit $exec + %18:vgpr_32 = V_MOV_B32_e32 1096810496, implicit $exec + %19:vgpr_32 = V_MOV_B32_e32 1097859072, implicit $exec + %20:vgpr_32 = V_MOV_B32_e32 1098907648, implicit $exec + %21:vreg_512 = REG_SEQUENCE killed %5, %subreg.sub0, killed %6, %subreg.sub1, killed %7, %subreg.sub2, killed %8, %subreg.sub3, killed %9, %subreg.sub4, killed %10, %subreg.sub5, killed %11, %subreg.sub6, killed %12, %subreg.sub7, killed %13, %subreg.sub8, killed %14, %subreg.sub9, killed %15, %subreg.sub10, killed %16, %subreg.sub11, killed %17, %subreg.sub12, killed %18, %subreg.sub13, killed %19, %subreg.sub14, killed %20, %subreg.sub15 + %22:vgpr_32 = V_MOV_B32_e32 1099431936, implicit $exec + %23:vreg_512 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16 killed %21, %22, killed %4, 3, implicit-def $m0, implicit $m0, implicit $exec + %24:sreg_32 = S_ADD_I32 killed %3, 2, implicit-def dead $scc + %25:vreg_512 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16 %23, killed %22, killed %24, 3, implicit-def $m0, implicit $m0, implicit $exec + %26:vgpr_32 = COPY %23.sub15 + %27:vgpr_32 = COPY %23.sub14 + %28:vgpr_32 = COPY %23.sub13 + %29:vgpr_32 = COPY %23.sub12 + %30:vreg_128 = REG_SEQUENCE killed %29, %subreg.sub0, killed %28, %subreg.sub1, killed %27, %subreg.sub2, killed %26, %subreg.sub3 + GLOBAL_STORE_DWORDX4_SADDR %1, killed %30, %2, 48, 0, 0, 0, implicit $exec + %31:vgpr_32 = COPY %23.sub11 + %32:vgpr_32 = COPY %23.sub10 + %33:vgpr_32 = COPY %23.sub9 + %34:vgpr_32 = COPY %23.sub8 + %35:vreg_128 = REG_SEQUENCE killed %34, %subreg.sub0, killed %33, %subreg.sub1, killed %32, %subreg.sub2, killed %31, %subreg.sub3 + GLOBAL_STORE_DWORDX4_SADDR %1, killed %35, %2, 32, 0, 0, 0, implicit $exec + %36:vgpr_32 = COPY %23.sub7 + %37:vgpr_32 = COPY %23.sub6 + %38:vgpr_32 = COPY %23.sub5 + %39:vgpr_32 = COPY %23.sub4 + %40:vreg_128 = REG_SEQUENCE killed %39, %subreg.sub0, killed %38, %subreg.sub1, killed %37, %subreg.sub2, killed %36, %subreg.sub3 + GLOBAL_STORE_DWORDX4_SADDR %1, killed %40, %2, 16, 0, 0, 0, implicit $exec + %41:vgpr_32 = COPY %23.sub3 + %42:vgpr_32 = COPY %23.sub2 + %43:vgpr_32 = COPY %23.sub1 + %44:vgpr_32 = COPY killed %23.sub0 + %45:vreg_128 = REG_SEQUENCE killed %44, %subreg.sub0, killed %43, %subreg.sub1, killed %42, %subreg.sub2, killed %41, %subreg.sub3 + GLOBAL_STORE_DWORDX4_SADDR %1, killed %45, %2, 0, 0, 0, 0, implicit $exec + %46:vgpr_32 = COPY %25.sub15 + %47:vgpr_32 = COPY %25.sub14 + %48:vgpr_32 = COPY %25.sub13 + %49:vgpr_32 = COPY %25.sub12 + %50:vreg_128 = REG_SEQUENCE killed %49, %subreg.sub0, killed %48, %subreg.sub1, killed %47, %subreg.sub2, killed %46, %subreg.sub3 + GLOBAL_STORE_DWORDX4_SADDR %1, killed %50, %2, 112, 0, 0, 0, implicit $exec + %51:vgpr_32 = COPY %25.sub11 + %52:vgpr_32 = COPY %25.sub10 + %53:vgpr_32 = COPY %25.sub9 + %54:vgpr_32 = COPY %25.sub8 + %55:vreg_128 = REG_SEQUENCE killed %54, %subreg.sub0, killed %53, %subreg.sub1, killed %52, %subreg.sub2, killed %51, %subreg.sub3 + GLOBAL_STORE_DWORDX4_SADDR %1, killed %55, %2, 96, 0, 0, 0, implicit $exec + %56:vgpr_32 = COPY %25.sub7 + %57:vgpr_32 = COPY %25.sub6 + %58:vgpr_32 = COPY %25.sub5 + %59:vgpr_32 = COPY %25.sub4 + %60:vreg_128 = REG_SEQUENCE killed %59, %subreg.sub0, killed %58, %subreg.sub1, killed %57, %subreg.sub2, killed %56, %subreg.sub3 + GLOBAL_STORE_DWORDX4_SADDR %1, killed %60, %2, 80, 0, 0, 0, implicit $exec + %61:vgpr_32 = COPY %25.sub3 + %62:vgpr_32 = COPY %25.sub2 + %63:vgpr_32 = COPY %25.sub1 + %64:vgpr_32 = COPY killed %25.sub0 + %65:vreg_128 = REG_SEQUENCE killed %64, %subreg.sub0, killed %63, %subreg.sub1, killed %62, %subreg.sub2, killed %61, %subreg.sub3 + GLOBAL_STORE_DWORDX4_SADDR killed %1, killed %65, killed %2, 64, 0, 0, 0, implicit $exec + S_ENDPGM 0 + +... + diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll @@ -40,9 +40,33 @@ ret void } +; Avoid inserting extra v_mov from copies within the vgpr indexing sequence. The +; gpr_idx mode switching sequence is expanded late for this reason. + +; GCN-LABEL: {{^}}insert_w_offset_multiple_in_block + +; GCN: s_set_gpr_idx_on +; GCN-NEXT: v_mov_b32_e32 +; GCN-NEXT: s_set_gpr_idx_off + +; GCN: s_set_gpr_idx_on +; GCN-NEXT: v_mov_b32_e32 +; GCN-NOT: v_mov_b32_e32 +; GCN-NEXT: s_set_gpr_idx_off +define amdgpu_kernel void @insert_w_offset_multiple_in_block(<16 x float> addrspace(1)* %out1, i32 %in) #0 { +entry: + %add1 = add i32 %in, 1 + %ins1 = insertelement <16 x float> , float 17.0, i32 %add1 + %add2 = add i32 %in, 2 + %ins2 = insertelement <16 x float> %ins1, float 17.0, i32 %add2 + store <16 x float> %ins1, <16 x float> addrspace(1)* %out1 + %out2 = getelementptr <16 x float>, <16 x float> addrspace(1)* %out1, i32 1 + store <16 x float> %ins2, <16 x float> addrspace(1)* %out2 + + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 declare void @llvm.amdgcn.s.barrier() #2 attributes #0 = { nounwind } -attributes #1 = { nounwind readnone } -attributes #2 = { nounwind convergent } diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -99,8 +99,8 @@ ; MOVREL: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} ; MOVREL: v_movrels_b32_e32 v{{[0-9]}}, v0 -; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}} -; IDXMODE: v_mov_b32_e32 v0, +; IDXMODE-DAG: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}} +; IDXMODE-DAG: v_mov_b32_e32 v0, ; IDXMODE: v_mov_b32_e32 v1, ; IDXMODE: v_mov_b32_e32 v2, ; IDXMODE: v_mov_b32_e32 v3, @@ -273,8 +273,8 @@ ; MOVREL: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} ; MOVREL: v_movreld_b32_e32 v0, 5 -; IDXMODE: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, 0xfffffe00{{$}} -; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, gpr_idx(DST) +; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}} +; IDXMODE: s_set_gpr_idx_on [[ADD_IDX]], gpr_idx(DST) ; IDXMODE-NEXT: v_mov_b32_e32 v0, 5 ; IDXMODE-NEXT: s_set_gpr_idx_off define amdgpu_kernel void @insert_neg_offset_sgpr_loadreg(i32 addrspace(1)* %in, <16 x i32> addrspace(1)* %out, <16 x i32> %vec, i32 %offset) { diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll @@ -85,10 +85,8 @@ ; GCN: renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr16, implicit $exec ; GCN: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, $vgpr16, implicit $exec ; GCN: renamable $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN: S_SET_GPR_IDX_ON killed renamable $sgpr2, 1, implicit-def $m0, implicit-def undef $mode, implicit $m0, implicit $mode - ; GCN: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0 + ; GCN: renamable $vgpr0 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec ; GCN: SI_SPILL_V32_SAVE $vgpr0, %stack.6, $sgpr32, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5) - ; GCN: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.5, $sgpr32, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5) ; GCN: renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1 ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr32 :: (store 8 into %stack.4, align 4, addrspace 5)