Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3462,21 +3462,27 @@ return LoopBB; } -static unsigned getMOVRELDPseudo(const SIRegisterInfo &TRI, +static unsigned getIndirectRegWritePseudo(const SIRegisterInfo &TRI, const TargetRegisterClass *VecRC) { switch (TRI.getRegSizeInBits(*VecRC)) { case 32: // 4 bytes - return AMDGPU::V_MOVRELD_B32_V1; + return AMDGPU::V_INDIRECT_REG_WRITE_B32_V1; case 64: // 8 bytes - return AMDGPU::V_MOVRELD_B32_V2; + return AMDGPU::V_INDIRECT_REG_WRITE_B32_V2; + case 96: // 12 bytes + return AMDGPU::V_INDIRECT_REG_WRITE_B32_V3; case 128: // 16 bytes - return AMDGPU::V_MOVRELD_B32_V4; + return AMDGPU::V_INDIRECT_REG_WRITE_B32_V4; + case 160: // 20 bytes + return AMDGPU::V_INDIRECT_REG_WRITE_B32_V5; case 256: // 32 bytes - return AMDGPU::V_MOVRELD_B32_V8; + return AMDGPU::V_INDIRECT_REG_WRITE_B32_V8; case 512: // 64 bytes - return AMDGPU::V_MOVRELD_B32_V16; + return AMDGPU::V_INDIRECT_REG_WRITE_B32_V16; + case 1024: // 128 bytes + return AMDGPU::V_INDIRECT_REG_WRITE_B32_V32; default: - llvm_unreachable("unsupported size for MOVRELD pseudos"); + llvm_unreachable("unsupported size for IndirectRegWrite pseudos"); } } @@ -3523,24 +3529,14 @@ MachineBasicBlock::iterator I(&MI); const DebugLoc &DL = MI.getDebugLoc(); - if (UseGPRIdxMode) { - BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_indirect)) - .addReg(SrcVec->getReg(), RegState::Undef, SubReg) // vdst - .add(*Val) - .addReg(Dst, RegState::ImplicitDefine) - .addReg(SrcVec->getReg(), RegState::Implicit) - .addReg(AMDGPU::M0, RegState::Implicit); - + const MCInstrDesc &MovRelDesc + = TII->get(getIndirectRegWritePseudo(TRI, VecRC)); + BuildMI(MBB, I, DL, MovRelDesc, Dst) + .addReg(SrcVec->getReg()) + .add(*Val) + .addImm(SubReg); + if (UseGPRIdxMode) BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF)); - } else { - const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(TRI, VecRC)); - - BuildMI(MBB, I, DL, MovRelDesc) - .addReg(Dst, RegState::Define) - .addReg(SrcVec->getReg()) - .add(*Val) - .addImm(SubReg - AMDGPU::sub0); - } MI.eraseFromParent(); return &MBB; @@ -3557,26 +3553,15 @@ Offset, UseGPRIdxMode, false); MachineBasicBlock *LoopBB = InsPt->getParent(); - if (UseGPRIdxMode) { - BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOV_B32_indirect)) - .addReg(PhiReg, RegState::Undef, SubReg) // vdst - .add(*Val) // src0 - .addReg(Dst, RegState::ImplicitDefine) - .addReg(PhiReg, RegState::Implicit) - .addReg(AMDGPU::M0, RegState::Implicit); + const MCInstrDesc &MovRelDesc = TII->get(getIndirectRegWritePseudo(TRI, VecRC)); + BuildMI(*LoopBB, InsPt, DL, MovRelDesc, Dst) + .addReg(PhiReg) + .add(*Val) + .addImm(AMDGPU::sub0); + if (UseGPRIdxMode) BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF)); - } else { - const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(TRI, VecRC)); - - BuildMI(*LoopBB, InsPt, DL, MovRelDesc) - .addReg(Dst, RegState::Define) - .addReg(PhiReg) - .add(*Val) - .addImm(SubReg - AMDGPU::sub0); - } MI.eraseFromParent(); - return LoopBB; } Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1480,30 +1480,33 @@ MI.eraseFromParent(); break; } - case AMDGPU::V_MOVRELD_B32_V1: - case AMDGPU::V_MOVRELD_B32_V2: - case AMDGPU::V_MOVRELD_B32_V4: - case AMDGPU::V_MOVRELD_B32_V8: - case AMDGPU::V_MOVRELD_B32_V16: { - const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32); + case AMDGPU::V_INDIRECT_REG_WRITE_B32_V1: + case AMDGPU::V_INDIRECT_REG_WRITE_B32_V2: + case AMDGPU::V_INDIRECT_REG_WRITE_B32_V3: + case AMDGPU::V_INDIRECT_REG_WRITE_B32_V4: + case AMDGPU::V_INDIRECT_REG_WRITE_B32_V5: + case AMDGPU::V_INDIRECT_REG_WRITE_B32_V8: + case AMDGPU::V_INDIRECT_REG_WRITE_B32_V16: + case AMDGPU::V_INDIRECT_REG_WRITE_B32_V32: { + unsigned Opc = ST.useVGPRIndexMode() ? + AMDGPU::V_MOV_B32_indirect : AMDGPU::V_MOVRELD_B32_e32; + const MCInstrDesc &OpDesc = get(Opc); Register VecReg = MI.getOperand(0).getReg(); bool IsUndef = MI.getOperand(1).isUndef(); - unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm(); + unsigned SubReg = MI.getOperand(3).getImm(); assert(VecReg == MI.getOperand(1).getReg()); - MachineInstr *MovRel = - BuildMI(MBB, MI, DL, MovRelDesc) - .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef) - .add(MI.getOperand(2)) - .addReg(VecReg, RegState::ImplicitDefine) - .addReg(VecReg, - RegState::Implicit | (IsUndef ? RegState::Undef : 0)); + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DL, OpDesc) + .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef) + .add(MI.getOperand(2)) + .addReg(VecReg, RegState::ImplicitDefine) + .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0)); const int ImpDefIdx = - MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses(); + OpDesc.getNumOperands() + OpDesc.getNumImplicitUses(); const int ImpUseIdx = ImpDefIdx + 1; - MovRel->tieOperands(ImpDefIdx, ImpUseIdx); - + MIB->tieOperands(ImpDefIdx, ImpUseIdx); MI.eraseFromParent(); break; } Index: llvm/lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -812,25 +812,28 @@ let SubtargetPredicate = isGFX8GFX9; } -// This is a pseudo variant of the v_movreld_b32 instruction in which the -// vector operand appears only twice, once as def and once as use. Using this -// pseudo avoids problems with the Two Address instructions pass. -class V_MOVRELD_B32_pseudo : VPseudoInstSI < +// This is a pseudo variant of the v_movreld_b32 (or v_mov_b32 +// expecting to be executed with gpr indexing mode enabled) +// instruction in which the vector operand appears only twice, once as +// def and once as use. Using this pseudo avoids problems with the Two +// Address instructions pass. +class V_INDIRECT_REG_WRITE_B32_pseudo : VPseudoInstSI < (outs rc:$vdst), - (ins rc:$vsrc, VSrc_b32:$val, i32imm:$offset)> { + (ins rc:$vsrc, VSrc_b32:$val, i32imm:$subreg)> { let VOP1 = 1; let Constraints = "$vsrc = $vdst"; let Uses = [M0, EXEC]; - - let SubtargetPredicate = HasMovrel; } -def V_MOVRELD_B32_V1 : V_MOVRELD_B32_pseudo; -def V_MOVRELD_B32_V2 : V_MOVRELD_B32_pseudo; -def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo; -def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo; -def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo; +def V_INDIRECT_REG_WRITE_B32_V1 : V_INDIRECT_REG_WRITE_B32_pseudo; +def V_INDIRECT_REG_WRITE_B32_V2 : V_INDIRECT_REG_WRITE_B32_pseudo; +def V_INDIRECT_REG_WRITE_B32_V3 : V_INDIRECT_REG_WRITE_B32_pseudo; +def V_INDIRECT_REG_WRITE_B32_V4 : V_INDIRECT_REG_WRITE_B32_pseudo; +def V_INDIRECT_REG_WRITE_B32_V5 : V_INDIRECT_REG_WRITE_B32_pseudo; +def V_INDIRECT_REG_WRITE_B32_V8 : V_INDIRECT_REG_WRITE_B32_pseudo; +def V_INDIRECT_REG_WRITE_B32_V16 : V_INDIRECT_REG_WRITE_B32_pseudo; +def V_INDIRECT_REG_WRITE_B32_V32 : V_INDIRECT_REG_WRITE_B32_pseudo; let OtherPredicates = [isGFX8Plus] in {