Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -919,6 +919,10 @@ ArrayRef SubIndices = RI.getRegSplitParts(RC, 4); + // If there is an overlap, we can't kill the super-register on the last + // instruction, since it will also kill the components made live by this def. + const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg); + for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { unsigned SubIdx; if (Forward) @@ -926,7 +930,7 @@ else SubIdx = SubIndices[SubIndices.size() - Idx - 1]; - bool UseKill = KillSrc && Idx == SubIndices.size() - 1; + bool UseKill = CanKillSuperReg && Idx == SubIndices.size() - 1; if (Opcode == AMDGPU::INSTRUCTION_LIST_END) { Register ImpDefSuper = Idx == 0 ? Register(DestReg) : Register(); Index: llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir +++ llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir @@ -26,7 +26,9 @@ define amdgpu_kernel void @a_to_a() #0 { ret void } define amdgpu_kernel void @a2_to_a2() #0 { ret void } - define amdgpu_kernel void @a3_to_a3() #0 { ret void } + define amdgpu_kernel void @a2_to_a2_kill() #0 { ret void } + define amdgpu_kernel void @a3_to_a3_nonoverlap_kill() #0 { ret void } + define amdgpu_kernel void @a3_to_a3_overlap_kill() #0 { ret void } define amdgpu_kernel void @a4_to_a4() #0 { ret void } define amdgpu_kernel void @a4_to_a4_overlap() #0 { ret void } define amdgpu_kernel void @a8_to_a8() #0 { ret void } @@ -455,39 +457,61 @@ ... --- -name: a2_to_a2 +name: a2_to_a2_kill tracksRegLiveness: true body: | bb.0: - ; GCN-LABEL: name: a2_to_a2 - ; GCN: $agpr0_agpr1 = IMPLICIT_DEF + liveins: $agpr0_agpr1 + ; GCN-LABEL: name: a2_to_a2_kill + ; GCN: liveins: $agpr0_agpr1 ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1 - ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit-def $agpr1_agpr2 - ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1 + ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit-def $agpr1_agpr2 + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1 ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $exec - ; GCN: S_ENDPGM 0, implicit $agpr1_agpr2 - $agpr0_agpr1 = IMPLICIT_DEF + ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3 $agpr1_agpr2 = COPY killed $agpr0_agpr1, implicit $exec - S_ENDPGM 0, implicit $agpr1_agpr2 + $agpr3 = COPY $agpr2 + S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3 ... --- -name: a3_to_a3 +name: a3_to_a3_nonoverlap_kill tracksRegLiveness: true body: | bb.0: - ; GCN-LABEL: name: a3_to_a3 - ; GCN: $agpr0_agpr1_agpr2 = IMPLICIT_DEF - ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2 - ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit-def $agpr2_agpr3_agpr4 - ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 - ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec - ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 - ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $exec - ; GCN: S_ENDPGM 0, implicit $agpr2_agpr3_agpr4 - $agpr0_agpr1_agpr2 = IMPLICIT_DEF - $agpr2_agpr3_agpr4 = COPY killed $agpr0_agpr1_agpr2, implicit $exec - S_ENDPGM 0, implicit $agpr2_agpr3_agpr4 + liveins: $agpr4_agpr5_agpr6 + ; GCN-LABEL: name: a3_to_a3_nonoverlap_kill + ; GCN: liveins: $agpr4_agpr5_agpr6 + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 $agpr4, implicit $exec, implicit $agpr4_agpr5_agpr6 + ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr5, implicit $exec, implicit $agpr4_agpr5_agpr6 + ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec + ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr6, implicit $exec, implicit killed $agpr4_agpr5_agpr6 + ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 + $agpr0_agpr1_agpr2 = COPY killed $agpr4_agpr5_agpr6 + S_ENDPGM 0, implicit $agpr0_agpr1_agpr2 +... + +--- +name: a3_to_a3_overlap_kill +tracksRegLiveness: true +body: | + bb.0: + liveins: $agpr1_agpr2_agpr3 + ; GCN-LABEL: name: a3_to_a3_overlap_kill + ; GCN: liveins: $agpr1_agpr2_agpr3 + ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit $agpr1_agpr2_agpr3 + ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 + ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec, implicit $agpr1_agpr2_agpr3 + ; GCN: $vgpr4 = V_ACCVGPR_READ_B32 $agpr3, implicit $exec, implicit $agpr1_agpr2_agpr3 + ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr4, implicit $exec + ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec + ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2, implicit $vgpr1 + $agpr0_agpr1_agpr2 = COPY killed $agpr1_agpr2_agpr3 + $vgpr1 = COPY $agpr1 + S_ENDPGM 0, implicit $agpr0_agpr1_agpr2, implicit $vgpr1 ... --- @@ -502,7 +526,7 @@ ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec - ; GCN: $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 + ; GCN: $vgpr3 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr3, implicit $exec, implicit $exec ; GCN: S_ENDPGM 0, implicit $agpr2_agpr3_agpr4_agpr5 $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF Index: llvm/test/CodeGen/AMDGPU/copy-overlap-vgpr-kill.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/copy-overlap-vgpr-kill.mir @@ -0,0 +1,87 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=postrapseudos -o - %s | FileCheck %s + +# Don't set a kill of the super register on the last instruction with +# an overlapping copy. This would kill part of the values in the +# result copies. + +--- +name: overlapping_copy_kill_undef_reg_after_copy +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr30_sgpr31, $vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: overlapping_copy_kill_undef_reg_after_copy + ; CHECK: liveins: $sgpr30_sgpr31, $vgpr1_vgpr2_vgpr3 + ; CHECK: $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr1_vgpr2_vgpr3 + ; CHECK: $vgpr1 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $vgpr1_vgpr2_vgpr3 + ; CHECK: $vgpr2 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr1_vgpr2_vgpr3 + ; CHECK: renamable $vgpr1 = nofpexcept V_MUL_F32_e32 0, $vgpr1, implicit $mode, implicit $exec + ; CHECK: S_SETPC_B64 $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + renamable $vgpr0_vgpr1_vgpr2 = COPY killed renamable $vgpr1_vgpr2_vgpr3 + renamable $vgpr1 = nofpexcept V_MUL_F32_e32 0, $vgpr1, implicit $mode, implicit $exec + S_SETPC_B64 $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + +... + +--- +name: overlapping_copy_kill_undef_reg_after_copy_1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr30_sgpr31, $vgpr2_vgpr3_vgpr4 + + ; CHECK-LABEL: name: overlapping_copy_kill_undef_reg_after_copy_1 + ; CHECK: liveins: $sgpr30_sgpr31, $vgpr2_vgpr3_vgpr4 + ; CHECK: $vgpr0 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr2_vgpr3_vgpr4 + ; CHECK: $vgpr1 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3_vgpr4 + ; CHECK: $vgpr2 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit $vgpr2_vgpr3_vgpr4 + ; CHECK: renamable $vgpr1 = nofpexcept V_MUL_F32_e32 0, $vgpr1, implicit $mode, implicit $exec + ; CHECK: S_SETPC_B64 $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + renamable $vgpr0_vgpr1_vgpr2 = COPY killed renamable $vgpr2_vgpr3_vgpr4 + renamable $vgpr1 = nofpexcept V_MUL_F32_e32 0, $vgpr1, implicit $mode, implicit $exec + S_SETPC_B64 $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + +... + +--- +name: nonoverlapping_copy_kill +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr30_sgpr31, $vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: nonoverlapping_copy_kill + ; CHECK: liveins: $sgpr30_sgpr31, $vgpr3_vgpr4_vgpr5 + ; CHECK: $vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr3_vgpr4_vgpr5 + ; CHECK: $vgpr1 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit $vgpr3_vgpr4_vgpr5 + ; CHECK: $vgpr2 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit killed $vgpr3_vgpr4_vgpr5 + ; CHECK: renamable $vgpr1 = nofpexcept V_MUL_F32_e32 0, $vgpr1, implicit $mode, implicit $exec + ; CHECK: S_SETPC_B64 $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + renamable $vgpr0_vgpr1_vgpr2 = COPY killed renamable $vgpr3_vgpr4_vgpr5 + renamable $vgpr1 = nofpexcept V_MUL_F32_e32 0, $vgpr1, implicit $mode, implicit $exec + S_SETPC_B64 $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + +... + +--- +name: overlapping_copy_kill_half_s128 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr30_sgpr31, $vgpr2_vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: overlapping_copy_kill_half_s128 + ; CHECK: liveins: $sgpr30_sgpr31, $vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: $vgpr0 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: $vgpr1 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: $vgpr2 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: $vgpr3 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: renamable $vgpr1 = V_OR_B32_e32 1, $vgpr1, implicit $exec + ; CHECK: S_SETPC_B64 $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $vgpr2_vgpr3_vgpr4_vgpr5 + renamable $vgpr1 = V_OR_B32_e32 1, $vgpr1, implicit $exec + S_SETPC_B64 $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + +...