diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -580,11 +580,21 @@ if (!RegsOverlap) { for (auto Def = MI, E = MBB.begin(); Def != E; ) { --Def; - if (!Def->definesRegister(SrcReg, &RI)) + + if (!Def->modifiesRegister(SrcReg, &RI)) continue; + if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64) break; + // The 0th operand of ACCVGPR_WRITE on gfx908 will always be the operand + // that potentially contains the bits we are interested in + if (!Def->getOperand(0).isReg()) + break; + + if (Def->getOperand(0).getReg() != SrcReg) + break; + MachineOperand &DefOp = Def->getOperand(1); assert(DefOp.isReg() || DefOp.isImm()); diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir --- a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir @@ -29,6 +29,7 @@ define amdgpu_kernel void @a_to_a() #0 { ret void } define amdgpu_kernel void @a2_to_a2() #0 { ret void } define amdgpu_kernel void @a2_to_a2_kill() #0 { ret void } + define amdgpu_kernel void @a2_to_a2_implicit_defs() #0 { ret void } define amdgpu_kernel void @a3_to_a3_nonoverlap_kill() #0 { ret void } define amdgpu_kernel void @a3_to_a3_overlap_kill() #0 { ret void } define amdgpu_kernel void @a4_to_a4() #0 { ret void } @@ -887,6 +888,48 @@ S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3 ... +--- +name: a2_to_a2_implicit_defs +tracksRegLiveness: true +body: | + bb.0: + liveins: $agpr0_agpr1 + ; GFX908-LABEL: name: a2_to_a2_implicit_defs + ; GFX908: liveins: $agpr0_agpr1 + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr1_agpr2 + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1 + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec, implicit-def $agpr1_agpr2 + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr1_agpr2 + ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr3_agpr4 + ; GFX908-NEXT: $vgpr255 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr1_agpr2 + ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr255, implicit $exec, implicit $exec + ; GFX90A-LABEL: name: a2_to_a2_implicit_defs + ; GFX90A: liveins: $agpr0_agpr1 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1 + ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr1_agpr2 + ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1 + ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec, implicit-def $agpr1_agpr2 + ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit-def $agpr3_agpr4, implicit $agpr1_agpr2 + ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit killed $agpr1_agpr2, implicit $exec + ; GFX940-LABEL: name: a2_to_a2_implicit_defs + ; GFX940: liveins: $agpr0_agpr1 + ; GFX940-NEXT: {{ $}} + ; GFX940-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1 + ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr1_agpr2 + ; GFX940-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1 + ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec, implicit-def $agpr1_agpr2 + ; GFX940-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit-def $agpr3_agpr4, implicit $agpr1_agpr2 + ; GFX940-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit killed $agpr1_agpr2, implicit $exec + $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1 + $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr1_agpr2 + $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1 + $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec, implicit-def $agpr1_agpr2 + $agpr3_agpr4 = COPY killed $agpr1_agpr2, implicit $exec +... + --- name: a3_to_a3_nonoverlap_kill tracksRegLiveness: true @@ -959,6 +1002,9 @@ S_ENDPGM 0, implicit $agpr0_agpr1_agpr2, implicit $vgpr1 ... + + + --- name: a4_to_a4 tracksRegLiveness: true