diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -491,6 +491,7 @@ const unsigned SearchLimit = 16; unsigned Count = 0; bool KilledT = false; + bool UsedT = false; for (auto Iter = std::next(MovT.getIterator()), E = MovT.getParent()->instr_end(); Iter != E && Count < SearchLimit && !KilledT; ++Iter, ++Count) { @@ -498,6 +499,9 @@ MachineInstr *MovY = &*Iter; KilledT = MovY->killsRegister(T, &TRI); + bool PrevUsedT = UsedT; + UsedT = UsedT || instReadsReg(MovY, T, Tsub, TRI); + if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 && MovY->getOpcode() != AMDGPU::COPY) || !MovY->getOperand(1).isReg() || @@ -573,7 +577,10 @@ dropInstructionKeepingImpDefs(*MovY, TII); MachineInstr *Next = &*std::next(MovT.getIterator()); - if (MRI.use_nodbg_empty(T)) { + if (T.isVirtual() ? MRI.use_nodbg_empty(T) + : (MovY->getOperand(1).isKill() && !PrevUsedT)) { + // For physical registers: the use of T in MovY is killed, and we didn't + // see any other uses of T before MovY. dropInstructionKeepingImpDefs(MovT, TII); } else { Xop.setIsKill(false); diff --git a/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir b/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir --- a/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir +++ b/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir @@ -46,13 +46,13 @@ liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec - $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec + $vgpr1 = V_MOV_B32_e32 $vgpr2, implicit $exec S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr2, implicit $vgpr1 ... -# FIXME: should not remove the def of $vgpr2 because $vgpr2_vgpr3 is live out # GCN-LABEL: name: swap_phys_liveout_superreg # GCN: bb.0: +# GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec # GCN-NEXT: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec # GCN-NEXT: S_SETPC_B64_return --- @@ -62,7 +62,7 @@ liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec - $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec + $vgpr1 = V_MOV_B32_e32 $vgpr2, implicit $exec S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr2_vgpr3, implicit $vgpr1 ...