diff --git a/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp --- a/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp @@ -280,7 +280,9 @@ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); unsigned Size = TRI->getRegSizeInBits(*RC); - if (Size > 32) + if (Size == 16) + Reg = TRI->get32BitRegister(Reg); + else if (Size > 32) Reg = TRI->getSubReg(Reg, AMDGPU::sub0); if (TRI->hasVGPRs(RC)) { @@ -306,9 +308,16 @@ } const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - unsigned Size = TRI->getRegSizeInBits(*RC) / 32; - if (Size > 1) - Reg = TRI->getSubReg(Reg, AMDGPU::sub0); + unsigned Size = TRI->getRegSizeInBits(*RC); + + if (Size == 16) { + Reg = TRI->get32BitRegister(Reg); + Size = 1; + } else { + Size /= 32; + if (Size > 1) + Reg = TRI->getSubReg(Reg, AMDGPU::sub0); + } if (TRI->hasVGPRs(RC)) { // VGPRs have 4 banks assigned in a round-robin fashion. @@ -440,10 +449,19 @@ } const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg); + unsigned Size = TRI->getRegSizeInBits(*RC); + + // TODO: Support 16 bit registers. Those needs to be moved with their + // parent VGPR_32 and potentially a sibling 16 bit sub-register. + if (Size < 32) + return false; + if (TRI->hasVGPRs(RC)) return true; - unsigned Size = TRI->getRegSizeInBits(*RC); + if (Size == 16) + return AMDGPU::SGPR_LO16RegClass.contains(PhysReg); + if (Size > 32) PhysReg = TRI->getSubReg(PhysReg, AMDGPU::sub0); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -694,18 +694,8 @@ bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) || AMDGPU::SReg_LO16RegClass.contains(SrcReg) || AMDGPU::AGPR_LO16RegClass.contains(SrcReg); - const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SGPR_32RegClass - : IsAGPRDst ? &AMDGPU::AGPR_32RegClass - : &AMDGPU::VGPR_32RegClass; - const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SGPR_32RegClass - : IsAGPRSrc ? &AMDGPU::AGPR_32RegClass - : &AMDGPU::VGPR_32RegClass; - MCRegister NewDestReg = - RI.getMatchingSuperReg(DestReg, DstLow ? AMDGPU::lo16 : AMDGPU::hi16, - DstRC); - MCRegister NewSrcReg = - RI.getMatchingSuperReg(SrcReg, SrcLow ? AMDGPU::lo16 : AMDGPU::hi16, - SrcRC); + MCRegister NewDestReg = RI.get32BitRegister(DestReg); + MCRegister NewSrcReg = RI.get32BitRegister(SrcReg); if (IsSGPRDst) { if (!IsSGPRSrc) { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -283,7 +283,7 @@ // \returns a DWORD offset of a \p SubReg unsigned getChannelFromSubReg(unsigned SubReg) const { - return SubReg ? divideCeil(getSubRegIdxOffset(SubReg), 32) : 0; + return SubReg ? (getSubRegIdxOffset(SubReg) + 31) / 32 : 0; } // \returns a DWORD size of a \p SubReg @@ -291,6 +291,10 @@ return getNumCoveredRegs(getSubRegIndexLaneMask(SubReg)); } + // For a given 16 bit \p Reg \returns a 32 bit register holding it. + // \returns \p Reg otherwise. + MCPhysReg get32BitRegister(MCPhysReg Reg) const; + private: void buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1798,3 +1798,21 @@ return Def; } + +MCPhysReg SIRegisterInfo::get32BitRegister(MCPhysReg Reg) const { + const TargetRegisterClass *RC = getPhysRegClass(Reg); + assert(getRegSizeInBits(*RC) <= 32); + + for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass, + AMDGPU::SReg_32RegClass, + AMDGPU::AGPR_32RegClass } ) { + if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC)) + return Super; + } + if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16, + &AMDGPU::VGPR_32RegClass)) { + return Super; + } + + return AMDGPU::NoRegister; +} diff --git a/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir b/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir --- a/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir +++ b/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir @@ -364,3 +364,133 @@ DS_WRITE2_B32_gfx9 %2, %1.sub14, %1.sub15, 14, 15, 0, implicit $exec S_ENDPGM 0 ... + +# GCN-LABEL: vgpr_lo16_sub{{$}} +# GCN: renamable $vgpr0 = V_AND_B32_e32 killed $vgpr3, killed $vgpr1, implicit $exec +# GCN: renamable $vgpr1_lo16 = COPY renamable $vgpr0_lo16 +--- +name: vgpr_lo16_sub +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_32, preferred-register: '$vgpr1' } + - { id: 1, class: vgpr_32, preferred-register: '$vgpr5' } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_lo16 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = V_AND_B32_e32 %1, %0, implicit $exec + %3 = COPY %2.lo16 + $vgpr1_lo16 = COPY %3 + SI_RETURN_TO_EPILOG $vgpr1_lo16 +... + +# GCN-LABEL: vgpr_lo16{{$}} +# GCN: $vgpr1_lo16 = COPY killed renamable $vgpr0_lo16 +--- +name: vgpr_lo16 +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_lo16, preferred-register: '$vgpr4_lo16' } +body: | + bb.0: + liveins: $vgpr0_lo16 + + %0 = COPY $vgpr0_lo16 + $vgpr1_lo16 = COPY %0 + SI_RETURN_TO_EPILOG $vgpr1_lo16 +... + +# GCN-LABEL: vgpr_hi16_sub{{$}} +# GCN: renamable $vgpr0 = V_AND_B32_e32 killed $vgpr3, killed $vgpr1, implicit $exec +# GCN: renamable $vgpr1_hi16 = COPY renamable $vgpr0_hi16 +--- +name: vgpr_hi16_sub +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_32, preferred-register: '$vgpr1' } + - { id: 1, class: vgpr_32, preferred-register: '$vgpr5' } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_hi16 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = V_AND_B32_e32 %1, %0, implicit $exec + %3 = COPY %2.hi16 + $vgpr1_hi16 = COPY %3 + SI_RETURN_TO_EPILOG $vgpr1_hi16 +... + +# GCN-LABEL: vgpr_hi16{{$}} +# GCN: $vgpr1_hi16 = COPY killed renamable $vgpr0_hi16 +--- +name: vgpr_hi16 +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_hi16, preferred-register: '$vgpr4_hi16' } +body: | + bb.0: + liveins: $vgpr0_hi16 + + %0 = COPY $vgpr0_hi16 + $vgpr1_hi16 = COPY %0 + SI_RETURN_TO_EPILOG $vgpr1_hi16 +... + +# GCN-LABEL: sgpr_lo16_sub{{$}} +# GCN: renamable $sgpr0 = S_AND_B32 killed renamable $sgpr14, $sgpr0, implicit-def $scc +# GCN: renamable $sgpr1_lo16 = COPY renamable $sgpr0_lo16 +--- +name: sgpr_lo16_sub +tracksRegLiveness: true +registers: + - { id: 0, class: sgpr_32, preferred-register: '$sgpr16' } + - { id: 1, class: sgpr_32 } + - { id: 2, class: sgpr_lo16 } +body: | + bb.0: + %0 = IMPLICIT_DEF + $sgpr0 = IMPLICIT_DEF + %1 = S_AND_B32 %0, $sgpr0, implicit-def $scc + %2 = COPY %1.lo16 + $sgpr1_lo16 = COPY %2 + SI_RETURN_TO_EPILOG $sgpr1_lo16 +... + +# GCN-LABEL: sgpr_lo16{{$}} +# GCN: $sgpr1_lo16 = COPY killed renamable $sgpr0_lo16 +--- +name: sgpr_lo16 +tracksRegLiveness: true +registers: + - { id: 0, class: sgpr_lo16, preferred-register: '$sgpr4_lo16' } +body: | + bb.0: + liveins: $sgpr0_lo16 + + %0 = COPY $sgpr0_lo16 + $sgpr1_lo16 = COPY %0 + SI_RETURN_TO_EPILOG $sgpr1_lo16 +... + +# Check that we do not use VGPR3 which we would use otherwise. +# We cannot use it because of interference with VGPR3_LO16. +# GCN-LABEL: v1_vs_v5_src_interence{{$}} +# GCN: V_AND_B32_e32 killed $vgpr7, killed $vgpr1, +--- +name: v1_vs_v5_src_interence +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_32, preferred-register: '$vgpr1' } + - { id: 1, class: vgpr_32, preferred-register: '$vgpr5' } + - { id: 2, class: vgpr_32 } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + $vgpr3_lo16 = IMPLICIT_DEF + %2 = V_AND_B32_e32 %1, %0, implicit $exec + S_ENDPGM 0 +...