Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -417,16 +417,24 @@ } } - if (VCCUsed || FlatUsed || STM.isXNACKEnabled()) { - MaxSGPR += 2; + unsigned ExtraSGPRs = 0; - if (FlatUsed) - MaxSGPR += 2; + if (VCCUsed) + ExtraSGPRs = 2; + if (STM.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (FlatUsed) + ExtraSGPRs = 4; + } else { if (STM.isXNACKEnabled()) - MaxSGPR += 2; + ExtraSGPRs = 4; + + if (FlatUsed) + ExtraSGPRs = 6; } + MaxSGPR += ExtraSGPRs; + // We found the maximum register index. They start at 0, so add one to get the // number of registers. ProgInfo.NumVGPR = MaxVGPR + 1; Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -36,18 +36,15 @@ const MachineFunction &MF) const { const AMDGPUSubtarget &ST = MF.getSubtarget(); if (ST.hasSGPRInitBug()) { - unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4; - if (ST.isXNACKEnabled()) - BaseIdx -= 4; - + // Leave space for flat_scr, xnack_mask, vcc, and alignment + unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 8 - 4; unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); } if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - // 98/99 need to be reserved for flat_scr or 96/97 for flat_scr and - // 98/99 for xnack_mask, and 100/101 for vcc. This is the next sgpr128 down - // either way. + // 96/97 need to be reserved for flat_scr, 98/99 for xnack_mask, and + // 100/101 for vcc. This is the next sgpr128 down. return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95; } @@ -60,18 +57,20 @@ if (ST.hasSGPRInitBug()) { unsigned Idx; - if (!ST.isXNACKEnabled()) - Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5; - else + if (ST.isXNACKEnabled()) { Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1; + } else { + // Use the gap that is left by the unused xnack_mask. + Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 2 - 1; + } return AMDGPU::SGPR_32RegClass.getRegister(Idx); } if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { if (!ST.isXNACKEnabled()) { - // Next register before reservations for flat_scr and vcc. - return AMDGPU::SGPR97; + // Use the gap that is left by the unused xnack_mask. + return AMDGPU::SGPR99; } else { // Next register before reservations for flat_scr, xnack_mask, vcc, // and scratch resource. @@ -100,27 +99,28 @@ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { // SI/CI have 104 SGPRs. VI has 102. We need to shift down the reservation // for VCC/FLAT_SCR. - reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99); - reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101); - + reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97); if (ST.isXNACKEnabled()) - reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97); + reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99); + reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101); } // Tonga and Iceland can only allocate a fixed number of SGPRs due // to a hw bug. if (ST.hasSGPRInitBug()) { unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); - // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs). - unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4; - - if (ST.isXNACKEnabled()) - Limit -= 2; + unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; for (unsigned i = Limit; i < NumSGPRs; ++i) { unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); reserveRegisterTuples(Reserved, Reg); } + + // Reserve some SGPRs for FLAT_SCRATCH, XNACK_MASK, and VCC (6 SGPRs). + reserveRegisterTuples(Reserved, AMDGPU::SGPR74_SGPR75); + if (ST.isXNACKEnabled()) + reserveRegisterTuples(Reserved, AMDGPU::SGPR76_SGPR77); + reserveRegisterTuples(Reserved, AMDGPU::SGPR78_SGPR79); } const SIMachineFunctionInfo *MFI = MF.getInfo(); Index: test/CodeGen/AMDGPU/flat-scratch-reg.ll =================================================================== --- test/CodeGen/AMDGPU/flat-scratch-reg.ll +++ test/CodeGen/AMDGPU/flat-scratch-reg.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=CI --check-prefix=NO-XNACK ; RUN: llc < %s -march=amdgcn -mcpu=fiji -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI --check-prefix=NO-XNACK -; RUN: llc < %s -march=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=XNACK +; RUN: llc < %s -march=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI --check-prefix=XNACK ; GCN-LABEL: {{^}}no_vcc_no_flat: ; NO-XNACK: ; NumSgprs: 8 @@ -22,8 +22,7 @@ ; GCN-LABEL: {{^}}no_vcc_flat: ; CI: ; NumSgprs: 12 -; VI: ; NumSgprs: 12 -; XNACK: ; NumSgprs: 14 +; VI: ; NumSgprs: 14 define void @no_vcc_flat() { entry: call void asm sideeffect "", "~{SGPR7},~{FLAT_SCR}"() @@ -32,8 +31,7 @@ ; GCN-LABEL: {{^}}vcc_flat: ; CI: ; NumSgprs: 12 -; VI: ; NumSgprs: 12 -; XNACK: ; NumSgprs: 14 +; VI: ; NumSgprs: 14 define void @vcc_flat() { entry: call void asm sideeffect "", "~{SGPR7},~{VCC},~{FLAT_SCR}"()