diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp --- a/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/llvm/lib/CodeGen/VirtRegMap.cpp @@ -404,6 +404,16 @@ return true; } +// Returns true when all the implicit operands of the copy instruction \p MI are +// reserved registers. +static bool isCopyWithReservedImplicitOpnds(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + for (unsigned I = 2, E = MI.getNumOperands(); I != E; ++I) { + if (!MRI.isReserved(MI.getOperand(I).getReg())) + return false; + } + return true; +} void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) { if (!MI.isIdentityCopy()) return; @@ -424,8 +434,11 @@ // %al = COPY %al, implicit-def %eax // give us additional liveness information: The target (super-)register // must not be valid before this point. Replace the COPY with a KILL - // instruction to maintain this information. - if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 2) { + // instruction to maintain this information. Do not insert KILL when the + // implicit operands are all reserved registers. + if (MI.getOperand(1).isUndef() || + ((MI.getNumOperands() > 2) && + !isCopyWithReservedImplicitOpnds(MI, *MRI))) { MI.setDesc(TII->get(TargetOpcode::KILL)); LLVM_DEBUG(dbgs() << " replace by: " << MI); return; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll @@ -688,7 +688,6 @@ ; SI-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; SI-NEXT: s_nop 1 ; SI-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; SI-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; SI-NEXT: s_and_b64 exec, exec, s[0:1] ; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 ; SI-NEXT: s_and_b64 s[2:3], s[0:1], vcc @@ -736,7 +735,6 @@ ; GFX9-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX9-NEXT: s_nop 1 ; GFX9-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX9-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-NEXT: s_and_b64 exec, exec, s[0:1] ; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 ; GFX9-NEXT: s_and_b64 s[2:3], s[0:1], vcc @@ -782,7 +780,6 @@ ; GFX10-32-NEXT: v_mov_b32_e32 v1, v0 ; GFX10-32-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX10-32-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX10-32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s0 ; GFX10-32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0 ; GFX10-32-NEXT: s_and_b32 s1, s0, vcc_lo @@ -828,7 +825,6 @@ ; GFX10-64-NEXT: v_mov_b32_e32 v1, v0 ; GFX10-64-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX10-64-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX10-64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-64-NEXT: s_and_b64 exec, exec, s[0:1] ; GFX10-64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0 ; GFX10-64-NEXT: s_and_b64 s[2:3], s[0:1], vcc @@ -932,7 +928,6 @@ ; SI-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; SI-NEXT: s_nop 1 ; SI-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; SI-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec ; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2 ; SI-NEXT: s_and_b64 s[4:5], s[0:1], vcc ; SI-NEXT: s_xor_b64 s[4:5], s[4:5], -1 @@ -998,7 +993,6 @@ ; GFX9-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX9-NEXT: s_nop 1 ; GFX9-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX9-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec ; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2 ; GFX9-NEXT: s_and_b64 s[4:5], s[0:1], vcc ; GFX9-NEXT: s_xor_b64 s[4:5], s[4:5], -1 @@ -1061,7 +1055,6 @@ ; GFX10-32-NEXT: v_mov_b32_e32 v3, v2 ; GFX10-32-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX10-32-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX10-32-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec ; GFX10-32-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2 ; GFX10-32-NEXT: s_and_b32 s2, s0, vcc_lo ; GFX10-32-NEXT: s_xor_b32 s2, s2, -1 @@ -1125,7 +1118,6 @@ ; GFX10-64-NEXT: v_mov_b32_e32 v3, v2 ; GFX10-64-NEXT: v_mov_b32_dpp v3, v3 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX10-64-NEXT: v_subrev_f32_dpp v2, v2, v3 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX10-64-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec ; GFX10-64-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2 ; GFX10-64-NEXT: s_and_b64 s[4:5], s[0:1], vcc ; GFX10-64-NEXT: s_xor_b64 s[4:5], s[4:5], -1 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll @@ -12,7 +12,6 @@ ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_add_f32_e32 v0, v0, v1 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; CHECK-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -33,7 +32,6 @@ ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_add_f32_e32 v0, v0, v1 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; CHECK-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -58,7 +56,6 @@ ; CHECK-NEXT: v_add_f32_e32 v1, v1, v2 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 idxen ; CHECK-NEXT: v_add_f32_e32 v0, v1, v1 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: ; return to shader part epilog main_body: @@ -124,7 +121,6 @@ ; CHECK-NEXT: s_mov_b64 exec, s[2:3] ; CHECK-NEXT: v_mov_b32_e32 v0, v1 ; CHECK-NEXT: v_add_f32_e32 v0, v0, v0 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; CHECK-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -156,7 +152,6 @@ ; CHECK-NEXT: s_mov_b64 exec, s[2:3] ; CHECK-NEXT: v_mov_b32_e32 v0, v1 ; CHECK-NEXT: v_add_f32_e32 v0, v0, v0 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; CHECK-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -191,7 +186,6 @@ ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_add_f32_e32 v2, v0, v1 -; CHECK-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec ; CHECK-NEXT: .LBB6_4: ; %END ; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] ; CHECK-NEXT: v_mov_b32_e32 v0, v2 @@ -246,7 +240,6 @@ ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_add_f32_e32 v2, v0, v1 -; CHECK-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec ; CHECK-NEXT: .LBB7_4: ; %END ; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] ; CHECK-NEXT: s_and_b64 exec, exec, s[14:15] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll @@ -684,7 +684,6 @@ ; SI-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; SI-NEXT: s_nop 1 ; SI-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; SI-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; SI-NEXT: s_and_b64 exec, exec, s[0:1] ; SI-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 ; SI-NEXT: s_or_b64 s[2:3], s[2:3], vcc @@ -732,7 +731,6 @@ ; GFX9-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX9-NEXT: s_nop 1 ; GFX9-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX9-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-NEXT: s_and_b64 exec, exec, s[0:1] ; GFX9-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 ; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], vcc @@ -777,7 +775,6 @@ ; GFX10-32-NEXT: v_mov_b32_e32 v1, v0 ; GFX10-32-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX10-32-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX10-32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s0 ; GFX10-32-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0 ; GFX10-32-NEXT: s_xor_b32 s1, s0, -1 @@ -823,7 +820,6 @@ ; GFX10-64-NEXT: v_mov_b32_e32 v1, v0 ; GFX10-64-NEXT: v_mov_b32_dpp v1, v1 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX10-64-NEXT: v_subrev_f32_dpp v0, v0, v1 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX10-64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-64-NEXT: s_and_b64 exec, exec, s[0:1] ; GFX10-64-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 ; GFX10-64-NEXT: s_xor_b64 s[2:3], s[0:1], -1 @@ -921,7 +917,6 @@ ; SI-NEXT: v_mov_b32_dpp v2, v2 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; SI-NEXT: s_nop 1 ; SI-NEXT: v_subrev_f32_dpp v0, v0, v2 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; SI-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; SI-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 ; SI-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; SI-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] @@ -987,7 +982,6 @@ ; GFX9-NEXT: v_mov_b32_dpp v2, v2 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX9-NEXT: s_nop 1 ; GFX9-NEXT: v_subrev_f32_dpp v0, v0, v2 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX9-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 ; GFX9-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX9-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] @@ -1050,7 +1044,6 @@ ; GFX10-32-NEXT: v_mov_b32_e32 v2, v0 ; GFX10-32-NEXT: v_mov_b32_dpp v2, v2 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX10-32-NEXT: v_subrev_f32_dpp v0, v0, v2 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX10-32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-32-NEXT: v_cmp_neq_f32_e32 vcc_lo, 0, v0 ; GFX10-32-NEXT: s_or_b32 s3, s3, vcc_lo ; GFX10-32-NEXT: s_and_saveexec_b32 s4, s3 @@ -1113,7 +1106,6 @@ ; GFX10-64-NEXT: v_mov_b32_e32 v2, v0 ; GFX10-64-NEXT: v_mov_b32_dpp v2, v2 quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; GFX10-64-NEXT: v_subrev_f32_dpp v0, v0, v2 quad_perm:[0,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 -; GFX10-64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-64-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 ; GFX10-64-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX10-64-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/optimize-kill.mir b/llvm/test/CodeGen/AMDGPU/optimize-kill.mir --- a/llvm/test/CodeGen/AMDGPU/optimize-kill.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-kill.mir @@ -23,7 +23,6 @@ ; GCN-NEXT: $exec = S_WQM_B64 $exec, implicit-def $scc ; GCN-NEXT: renamable $vgpr0 = COPY renamable $sgpr0 ; GCN-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_IDXEN killed renamable $vgpr0, undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: renamable $vgpr0 = KILL killed renamable $vgpr0, implicit $exec ; GCN-NEXT: $exec = S_AND_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0 renamable $sgpr2_sgpr3 = COPY $exec diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -204,7 +204,6 @@ ; GFX9-W64-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -219,7 +218,6 @@ ; GFX10-W32-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: @@ -243,7 +241,6 @@ ; GFX9-W64-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -258,7 +255,6 @@ ; GFX10-W32-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: @@ -496,7 +492,6 @@ ; GFX9-W64-NEXT: s_wqm_b64 exec, exec ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -518,7 +513,6 @@ ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog @@ -962,7 +956,6 @@ ; GFX9-W64-NEXT: s_wqm_b64 exec, exec ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -986,7 +979,6 @@ ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog @@ -1176,7 +1168,6 @@ ; GFX9-W64-NEXT: s_nop 0 ; GFX9-W64-NEXT: buffer_load_dword v2, v2, s[0:3], 0 idxen ; GFX9-W64-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $scc killed $exec -; GFX9-W64-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: v_add_u32_e32 v1, v2, v1 @@ -1193,7 +1184,6 @@ ; GFX10-W32-NEXT: buffer_load_dword v2, v0, s[0:3], 0 idxen ; GFX10-W32-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX10-W32-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $scc killed $exec -; GFX10-W32-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: v_add_nc_u32_e32 v1, v1, v2 @@ -2499,7 +2489,6 @@ ; GFX9-W64-NEXT: s_wqm_b64 exec, exec ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -2521,7 +2510,6 @@ ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog