diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -274,9 +274,13 @@ case AMDGPU::M0_LO16: case AMDGPU::M0_HI16: case AMDGPU::SRC_SHARED_BASE: + case AMDGPU::SRC_SHARED_BASE64: case AMDGPU::SRC_SHARED_LIMIT: + case AMDGPU::SRC_SHARED_LIMIT64: case AMDGPU::SRC_PRIVATE_BASE: + case AMDGPU::SRC_PRIVATE_BASE64: case AMDGPU::SRC_PRIVATE_LIMIT: + case AMDGPU::SRC_PRIVATE_LIMIT64: case AMDGPU::SGPR_NULL: case AMDGPU::SGPR_NULL64: case AMDGPU::MODE: diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -565,9 +565,13 @@ // Reserve the memory aperture registers. reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE); + reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE64); reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT); + reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT64); reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE); + reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE64); reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT); + reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT64); // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen. reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -217,27 +217,29 @@ defm M0_gfx11plus : SIRegLoHi16 <"m0", 125>; defm M0 : SIRegLoHi16 <"m0", 0>; +multiclass Constant32and64BitSGPR regIdx> { + let isConstant = true in { + defm "" : SIRegLoHi16 ; + defm _HI : SIRegLoHi16 <"", regIdx>; + + def "64" : RegisterWithSubRegs(NAME), !cast(NAME#_HI)]> { + let Namespace = "AMDGPU"; + let SubRegIndices = [sub0, sub1]; + let HWEncoding = !cast(NAME).HWEncoding; + } + } // isConstant = true +} + defm SGPR_NULL_gfxpre11 : SIRegLoHi16 <"null", 125>; defm SGPR_NULL_gfx11plus : SIRegLoHi16 <"null", 124>; -let isConstant = true in { -defm SGPR_NULL : SIRegLoHi16 <"null", 0>; -defm SGPR_NULL_HI : SIRegLoHi16 <"", 0>; -} // isConstant = true -def SGPR_NULL64 : - RegisterWithSubRegs<"null", [SGPR_NULL, SGPR_NULL_HI]> { - let Namespace = "AMDGPU"; - let SubRegIndices = [sub0, sub1]; - let HWEncoding = SGPR_NULL.HWEncoding; - let isConstant = true; -} +defm SGPR_NULL : Constant32and64BitSGPR<"null", 0>; + +defm SRC_SHARED_BASE : Constant32and64BitSGPR<"src_shared_base", 235>; +defm SRC_SHARED_LIMIT : Constant32and64BitSGPR<"src_shared_limit", 236>; +defm SRC_PRIVATE_BASE : Constant32and64BitSGPR<"src_private_base", 237>; +defm SRC_PRIVATE_LIMIT : Constant32and64BitSGPR<"src_private_limit", 238>; -let isConstant = true in { -defm SRC_SHARED_BASE : SIRegLoHi16<"src_shared_base", 235>; -defm SRC_SHARED_LIMIT : SIRegLoHi16<"src_shared_limit", 236>; -defm SRC_PRIVATE_BASE : SIRegLoHi16<"src_private_base", 237>; -defm SRC_PRIVATE_LIMIT : SIRegLoHi16<"src_private_limit", 238>; -} // isConstant = true defm SRC_POPS_EXITING_WAVE_ID : SIRegLoHi16<"src_pops_exiting_wave_id", 239>; // Not addressable @@ -665,7 +667,8 @@ def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI, SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, - SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID, + SRC_SHARED_BASE_HI, SRC_SHARED_LIMIT, SRC_SHARED_LIMIT_HI, SRC_PRIVATE_BASE, + SRC_PRIVATE_BASE_HI, SRC_PRIVATE_LIMIT, SRC_PRIVATE_LIMIT_HI, SRC_POPS_EXITING_WAVE_ID, SRC_VCCZ, SRC_EXECZ, SRC_SCC)> { let AllocationPriority = 0; } @@ -675,7 +678,9 @@ XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, SGPR_NULL_HI_LO16, TTMP_LO16, TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO16, SRC_SHARED_LIMIT_LO16, SRC_PRIVATE_BASE_LO16, SRC_PRIVATE_LIMIT_LO16, - SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16)> { + SRC_SHARED_BASE_HI_LO16, SRC_SHARED_LIMIT_HI_LO16, SRC_PRIVATE_BASE_HI_LO16, + SRC_PRIVATE_LIMIT_HI_LO16, SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, + SRC_SCC_LO16)> { let Size = 16; let AllocationPriority = 0; } @@ -737,7 +742,8 @@ } def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, - (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, TTMP_64, TBA, TMA)> { + (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, SRC_SHARED_BASE64, + SRC_SHARED_LIMIT64, SRC_PRIVATE_BASE64, SRC_PRIVATE_LIMIT64, TTMP_64, TBA, TMA)> { let CopyCost = 1; let AllocationPriority = 1; let HasSGPR = 1;