Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -489,6 +489,22 @@ RI->getHWRegIndex(MFI->getScratchRSrcReg()); } + // Check the addressable register limit before we add ExtraSGPRs. + if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && + !STM.hasSGPRInitBug()) { + unsigned MaxAddressableNumSGPRs = STM.getMaxNumSGPRs(); + if (MaxSGPR + 1 > MaxAddressableNumSGPRs) { + // This can happen due to a compiler bug or when using inline asm. + LLVMContext &Ctx = MF.getFunction()->getContext(); + DiagnosticInfoResourceLimit Diag(*MF.getFunction(), + "addressable scalar registers", + MaxSGPR + 1, DS_Error, + DK_ResourceLimit, MaxAddressableNumSGPRs); + Ctx.diagnose(Diag); + MaxSGPR = MaxAddressableNumSGPRs - 1; + } + } + // Account for extra SGPRs and VGPRs reserved for debugger use. MaxSGPR += ExtraSGPRs; MaxVGPR += RI->getNumDebuggerReservedVGPRs(STM); @@ -505,19 +521,22 @@ ProgInfo.NumVGPRsForWavesPerEU = std::max( ProgInfo.NumVGPR, RI->getMinNumVGPRs(MFI->getMaxWavesPerEU())); - unsigned MaxNumSGPRs = STM.getMaxNumSGPRs(); - if (ProgInfo.NumSGPR > MaxNumSGPRs) { - // This can happen due to a compiler bug or when using inline asm to use the - // registers which are usually reserved for vcc etc. - - LLVMContext &Ctx = MF.getFunction()->getContext(); - DiagnosticInfoResourceLimit Diag(*MF.getFunction(), - "scalar registers", - ProgInfo.NumSGPR, DS_Error, - DK_ResourceLimit, MaxNumSGPRs); - Ctx.diagnose(Diag); - ProgInfo.NumSGPR = MaxNumSGPRs; - ProgInfo.NumSGPRsForWavesPerEU = MaxNumSGPRs; + if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS || + STM.hasSGPRInitBug()) { + unsigned MaxNumSGPRs = STM.getMaxNumSGPRs(); + if (ProgInfo.NumSGPR > MaxNumSGPRs) { + // This can happen due to a compiler bug or when using inline asm to use the + // registers which are usually reserved for vcc etc. + + LLVMContext &Ctx = MF.getFunction()->getContext(); + DiagnosticInfoResourceLimit Diag(*MF.getFunction(), + "scalar registers", + ProgInfo.NumSGPR, DS_Error, + DK_ResourceLimit, MaxNumSGPRs); + Ctx.diagnose(Diag); + ProgInfo.NumSGPR = MaxNumSGPRs; + ProgInfo.NumSGPRsForWavesPerEU = MaxNumSGPRs; + } } if (STM.hasSGPRInitBug()) { Index: lib/Target/AMDGPU/GCNSchedStrategy.cpp =================================================================== --- lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -144,7 +144,7 @@ unsigned VGPRExcessLimit = Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass); unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF); - unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves); + unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves, true); unsigned VGPRCriticalLimit = SRI->getMaxNumVGPRs(MaxWaves); ReadyQueue &Q = Zone.Available; Index: lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.h +++ lib/Target/AMDGPU/SIRegisterInfo.h @@ -204,7 +204,8 @@ /// \returns Maximum number of SGPRs that meets given number of waves per /// execution unit requirement for given subtarget. - unsigned getMaxNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU) const; + unsigned getMaxNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU, + bool Addressable) const; /// \returns Maximum number of SGPRs that meets number of waves per execution /// unit requirement for function \p MF, or number of SGPRs explicitly Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1145,14 +1145,15 @@ } unsigned SIRegisterInfo::getMaxNumSGPRs(const SISubtarget &ST, - unsigned WavesPerEU) const { + unsigned WavesPerEU, + bool Addressable) const { if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { switch (WavesPerEU) { case 0: return 80; case 10: return 80; case 9: return 80; case 8: return 96; - default: return getNumAddressableSGPRs(ST); + default: return Addressable ? getNumAddressableSGPRs(ST) : 112; } } else { switch (WavesPerEU) { @@ -1177,7 +1178,8 @@ // Compute maximum number of SGPRs function can use using default/requested // minimum number of waves per execution unit. std::pair WavesPerEU = MFI.getWavesPerEU(); - unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first); + unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, false); + unsigned MaxNumAddressableSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, true); // Check if maximum number of SGPRs was explicitly requested using // "amdgpu-num-sgpr" attribute. @@ -1202,7 +1204,7 @@ // Make sure requested value is compatible with values implied by // default/requested minimum/maximum number of waves per execution unit. - if (Requested && Requested > getMaxNumSGPRs(ST, WavesPerEU.first)) + if (Requested && Requested > getMaxNumSGPRs(ST, WavesPerEU.first, false)) Requested = 0; if (WavesPerEU.second && Requested && Requested < getMinNumSGPRs(ST, WavesPerEU.second)) @@ -1215,7 +1217,7 @@ if (ST.hasSGPRInitBug()) MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; - return MaxNumSGPRs - getNumReservedSGPRs(ST); + return std::min(MaxNumSGPRs - getNumReservedSGPRs(ST), MaxNumAddressableSGPRs); } unsigned SIRegisterInfo::getNumDebuggerReservedVGPRs( Index: test/CodeGen/AMDGPU/exceed-max-sgprs.ll =================================================================== --- test/CodeGen/AMDGPU/exceed-max-sgprs.ll +++ test/CodeGen/AMDGPU/exceed-max-sgprs.ll @@ -76,7 +76,7 @@ ret void } -; ERROR: error: scalar registers limit of 102 exceeded (103) in use_too_many_sgprs_fiji +; ERROR: error: addressable scalar registers limit of 102 exceeded (103) in use_too_many_sgprs_fiji define void @use_too_many_sgprs_fiji() #3 { call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () Index: test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll =================================================================== --- test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll +++ test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll @@ -3,8 +3,9 @@ ; Make sure this doesn't crash. ; ALL-LABEL: {{^}}test: -; ALL: s_mov_b32 s92, SCRATCH_RSRC_DWORD0 -; ALL: s_mov_b32 s91, s3 +; ALL: s_mov_b32 s[[LO:[0-9]+]], SCRATCH_RSRC_DWORD0 +; ALL: s_mov_b32 s[[OFF:[0-9]+]], s3 +; ALL: s_mov_b32 s[[HI:[0-9]+]], 0xe80000 ; Make sure we are handling hazards correctly. ; SGPR: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:12 @@ -16,27 +17,27 @@ ; Make sure scratch wave offset register is correctly incremented and ; then restored. -; SMEM: s_mov_b32 m0, s91{{$}} -; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Spill -; SMEM: s_add_u32 m0, s91, 0x100{{$}} -; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Spill -; SMEM: s_add_u32 m0, s91, 0x200{{$}} -; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Spill -; SMEM: s_add_u32 m0, s91, 0x300{{$}} -; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Spill +; SMEM: s_mov_b32 m0, s[[OFF]]{{$}} +; SMEM: s_buffer_store_dword s{{[0-9]+}}, s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Spill +; SMEM: s_add_u32 m0, s[[OFF]], 0x100{{$}} +; SMEM: s_buffer_store_dword s{{[0-9]+}}, s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Spill +; SMEM: s_add_u32 m0, s[[OFF]], 0x200{{$}} +; SMEM: s_buffer_store_dword s{{[0-9]+}}, s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Spill +; SMEM: s_add_u32 m0, s[[OFF]], 0x300{{$}} +; SMEM: s_buffer_store_dword s{{[0-9]+}}, s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Spill -; SMEM: s_mov_b32 m0, s91{{$}} -; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Reload -; SMEM: s_add_u32 m0, s91, 0x100{{$}} +; SMEM: s_mov_b32 m0, s[[OFF]]{{$}} +; SMEM: s_buffer_load_dword s{{[0-9]+}}, s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Reload +; SMEM: s_add_u32 m0, s[[OFF]], 0x100{{$}} ; SMEM: s_waitcnt lgkmcnt(0) -; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Reload -; SMEM: s_add_u32 m0, s91, 0x200{{$}} +; SMEM: s_buffer_load_dword s{{[0-9]+}}, s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Reload +; SMEM: s_add_u32 m0, s[[OFF]], 0x200{{$}} ; SMEM: s_waitcnt lgkmcnt(0) -; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Reload -; SMEM: s_add_u32 m0, s91, 0x300{{$}} +; SMEM: s_buffer_load_dword s{{[0-9]+}}, s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Reload +; SMEM: s_add_u32 m0, s[[OFF]], 0x300{{$}} ; SMEM: s_waitcnt lgkmcnt(0) -; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Reload +; SMEM: s_buffer_load_dword s{{[0-9]+}}, s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Reload ; ALL: s_endpgm define void @test(i32 addrspace(1)* %out, i32 %in) {