diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1429,6 +1429,11 @@ return findRegisterDefOperandIdx(Reg, false, false, TRI) != -1; } + bool explicitlyDefinesRegister(Register Reg, + const TargetRegisterInfo *TRI = nullptr) const { + return findRegisterDefOperandIdx(Reg, false, false, TRI, true) != -1; + } + /// Return true if the MachineInstr modifies (fully define or partially /// define) the specified register. /// NOTE: It's ignoring subreg indices on virtual registers. @@ -1478,7 +1483,8 @@ /// This may also return a register mask operand when Overlap is true. int findRegisterDefOperandIdx(Register Reg, bool isDead = false, bool Overlap = false, - const TargetRegisterInfo *TRI = nullptr) const; + const TargetRegisterInfo *TRI = nullptr, + bool isExplicit = false) const; /// Wrapper for findRegisterDefOperandIdx, it returns /// a pointer to the MachineOperand rather than an index. diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1052,7 +1052,7 @@ /// also checks if there is a def of a super-register. int MachineInstr::findRegisterDefOperandIdx(Register Reg, bool isDead, bool Overlap, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, bool isExplicit) const { bool isPhys = Register::isPhysicalRegister(Reg); for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); @@ -1060,7 +1060,7 @@ // Ignore them when looking for a specific def operand (Overlap == false). if (isPhys && Overlap && MO.isRegMask() && MO.clobbersPhysReg(Reg)) return i; - if (!MO.isReg() || !MO.isDef()) + if (!MO.isReg() || !MO.isDef() || (isExplicit && MO.isImplicit())) continue; Register MOReg = MO.getReg(); bool Found = (MOReg == Reg); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -574,7 +574,7 @@ // First try to find defining accvgpr_write to avoid temporary registers. for (auto Def = MI, E = MBB.begin(); Def != E; ) { --Def; - if (!Def->definesRegister(SrcReg, &RI)) + if (!Def->explicitlyDefinesRegister(SrcReg, &RI)) continue; if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64) break; diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-copy-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-copy-implicit-def.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/accvgpr-copy-implicit-def.mir @@ -0,0 +1,27 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX908 %s + +--- +name: no_free_vgprs_for_copy_a32_to_a32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-LABEL: name: no_free_vgprs_for_copy_a32_to_a32 + ; GFX908: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: $vgpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 260, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr127, implicit $exec, implicit-def $agpr0_agpr1 + ; GFX908-NEXT: $vgpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 264, 0, 0, implicit $exec + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr127, implicit $exec, implicit-def $agpr0_agpr1 + ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr127, implicit $exec, implicit-def $agpr4_agpr5, implicit $agpr0_agpr1 + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1 + ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec + ; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs + $vgpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 260, 0, 0, implicit $exec + $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr127, implicit $exec, implicit-def $agpr0_agpr1 + $vgpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 264, 0, 0, implicit $exec + $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr127, implicit $exec, implicit-def $agpr0_agpr1 + $agpr4_agpr5 = COPY killed $agpr0_agpr1, implicit $exec + S_ENDPGM 0, amdgpu_allvgprs +... diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir --- a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir @@ -931,8 +931,9 @@ ; GFX908: liveins: $agpr1_agpr2_agpr3 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: $vgpr255 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr255, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr255, implicit $exec, implicit $agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr255, implicit $exec, implicit-def $agpr0_agpr1_agpr2 + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr1_agpr2_agpr3 ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec @@ -966,12 +967,13 @@ ; GFX908-LABEL: name: a4_to_a4 ; GFX908: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5 - ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; GFX908-NEXT: $vgpr255 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr255, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr2_agpr3_agpr4_agpr5 ; GFX90A-LABEL: name: a4_to_a4 ; GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF @@ -1002,12 +1004,13 @@ ; GFX908: liveins: $agpr0_agpr1_agpr2_agpr3 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5 - ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $agpr2_agpr3_agpr4_agpr5 + ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec ; GFX908-NEXT: $vgpr255 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr255, implicit $exec - ; GFX908-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 - ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $exec + ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2, implicit $agpr3, implicit $agpr4, implicit $agpr5 ; GFX90A-LABEL: name: a4_to_a4_overlap ; GFX90A: liveins: $agpr0_agpr1_agpr2_agpr3