diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -449,24 +449,10 @@ return true; } -// If the use operand doesn't care about the value, this may be an operand only -// used for register indexing, in which case it is unsafe to fold. bool SIFoldOperands::isUseSafeToFold(const MachineInstr &MI, const MachineOperand &UseMO) const { - if (UseMO.isUndef() || TII->isSDWA(MI)) - return false; - - switch (MI.getOpcode()) { - case AMDGPU::V_MOV_B32_e32: - case AMDGPU::V_MOV_B32_e64: - case AMDGPU::V_MOV_B64_PSEUDO: - case AMDGPU::V_MOV_B64_e32: - case AMDGPU::V_MOV_B64_e64: - // Do not fold into an indirect mov. - return !MI.hasRegisterImplicitUseOperand(AMDGPU::M0); - } - - return true; + // Operands of SDWA instructions must be registers. + return !TII->isSDWA(MI); } // Find a def of the UseReg, check if it is a reg_sequence and find initializers diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -637,9 +637,6 @@ if (!TRI->isVGPR(*MRI, X)) return nullptr; - if (MovT.hasRegisterImplicitUseOperand(AMDGPU::M0)) - return nullptr; - const unsigned SearchLimit = 16; unsigned Count = 0; bool KilledT = false; @@ -654,8 +651,7 @@ MovY->getOpcode() != AMDGPU::COPY) || !MovY->getOperand(1).isReg() || MovY->getOperand(1).getReg() != T || - MovY->getOperand(1).getSubReg() != Tsub || - MovY->hasRegisterImplicitUseOperand(AMDGPU::M0)) + MovY->getOperand(1).getSubReg() != Tsub) continue; Register Y = MovY->getOperand(0).getReg(); @@ -688,9 +684,6 @@ MovX = nullptr; break; } - // Implicit use of M0 is an indirect move. - if (I->hasRegisterImplicitUseOperand(AMDGPU::M0)) - continue; if (Size > 1 && (I->getNumImplicitOperands() > (I->isCopy() ? 0U : 1U))) continue; diff --git a/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir b/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir --- a/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir +++ b/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir @@ -848,7 +848,7 @@ ... # GCN-LABEL: name: indirect_mov_t -# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $m0 +# GCN: $vgpr2 = V_MOV_B32_indirect_read killed $vgpr0, implicit $exec, implicit $m0 # GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec # GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec # GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 @@ -859,7 +859,7 @@ body: | bb.0: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $m0 + $vgpr2 = V_MOV_B32_indirect_read killed $vgpr0, implicit $exec, implicit $m0 $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 @@ -867,7 +867,7 @@ # GCN-LABEL: name: indirect_mov_x # GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec -# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $m0 +# GCN-NEXT: $vgpr0 = V_MOV_B32_indirect_read killed $vgpr1, implicit $exec, implicit $m0 # GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec # GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 @@ -878,7 +878,7 @@ bb.0: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec - $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $m0 + $vgpr0 = V_MOV_B32_indirect_read killed $vgpr1, implicit $exec, implicit $m0 $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... @@ -886,7 +886,7 @@ # GCN-LABEL: name: indirect_mov_y # GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec # GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec -# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec, implicit $m0 +# GCN-NEXT: $vgpr1 = V_MOV_B32_indirect_read killed $vgpr2, implicit $exec, implicit $m0 # GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 --- @@ -897,7 +897,7 @@ liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec - $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec, implicit $m0 + $vgpr1 = V_MOV_B32_indirect_read killed $vgpr2, implicit $exec, implicit $m0 S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ...