Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -417,16 +417,24 @@ } } - if (VCCUsed || FlatUsed || STM.isXNACKEnabled()) { - MaxSGPR += 2; + unsigned ExtraSGPRs = 0; - if (FlatUsed) - MaxSGPR += 2; + if (VCCUsed) + ExtraSGPRs = 2; + if (STM.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) { + if (FlatUsed) + ExtraSGPRs = 4; + } else { if (STM.isXNACKEnabled()) - MaxSGPR += 2; + ExtraSGPRs = 4; + + if (FlatUsed) + ExtraSGPRs = 6; } + MaxSGPR += ExtraSGPRs; + // We found the maximum register index. They start at 0, so add one to get the // number of registers. ProgInfo.NumVGPR = MaxVGPR + 1; Index: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -36,18 +36,15 @@ const MachineFunction &MF) const { const AMDGPUSubtarget &ST = MF.getSubtarget(); if (ST.hasSGPRInitBug()) { - unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4; - if (ST.isXNACKEnabled()) - BaseIdx -= 4; - + // Leave space for flat_scr, xnack_mask, vcc, and alignment + unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 8 - 4; unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); } if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - // 98/99 need to be reserved for flat_scr or 96/97 for flat_scr and - // 98/99 for xnack_mask, and 100/101 for vcc. This is the next sgpr128 down - // either way. + // 96/97 need to be reserved for flat_scr, 98/99 for xnack_mask, and + // 100/101 for vcc. This is the next sgpr128 down. return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95; } @@ -58,25 +55,14 @@ const MachineFunction &MF) const { const AMDGPUSubtarget &ST = MF.getSubtarget(); if (ST.hasSGPRInitBug()) { - unsigned Idx; - - if (!ST.isXNACKEnabled()) - Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5; - else - Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1; - + unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1; return AMDGPU::SGPR_32RegClass.getRegister(Idx); } if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - if (!ST.isXNACKEnabled()) { - // Next register before reservations for flat_scr and vcc. - return AMDGPU::SGPR97; - } else { - // Next register before reservations for flat_scr, xnack_mask, vcc, - // and scratch resource. - return AMDGPU::SGPR91; - } + // Next register before reservations for flat_scr, xnack_mask, vcc, + // and scratch resource. + return AMDGPU::SGPR91; } return AMDGPU::SGPR95; @@ -99,23 +85,22 @@ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { // SI/CI have 104 SGPRs. VI has 102. We need to shift down the reservation - // for VCC/FLAT_SCR. + // for VCC/XNACK_MASK/FLAT_SCR. + // + // TODO The SGPRs that alias to XNACK_MASK could be used as general purpose + // SGPRs when the XNACK feature is not used. This is currently not done + // because the code that counts SGPRs cannot account for such holes. + reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97); reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99); reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101); - - if (ST.isXNACKEnabled()) - reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97); } // Tonga and Iceland can only allocate a fixed number of SGPRs due // to a hw bug. if (ST.hasSGPRInitBug()) { unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); - // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs). - unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4; - - if (ST.isXNACKEnabled()) - Limit -= 2; + // Reserve some SGPRs for FLAT_SCRATCH, XNACK_MASK, and VCC (6 SGPRs). + unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6; for (unsigned i = Limit; i < NumSGPRs; ++i) { unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); Index: llvm/trunk/test/CodeGen/AMDGPU/flat-scratch-reg.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/flat-scratch-reg.ll +++ llvm/trunk/test/CodeGen/AMDGPU/flat-scratch-reg.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=CI --check-prefix=NO-XNACK ; RUN: llc < %s -march=amdgcn -mcpu=fiji -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI --check-prefix=NO-XNACK -; RUN: llc < %s -march=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=XNACK +; RUN: llc < %s -march=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI --check-prefix=XNACK ; GCN-LABEL: {{^}}no_vcc_no_flat: ; NO-XNACK: ; NumSgprs: 8 @@ -22,8 +22,7 @@ ; GCN-LABEL: {{^}}no_vcc_flat: ; CI: ; NumSgprs: 12 -; VI: ; NumSgprs: 12 -; XNACK: ; NumSgprs: 14 +; VI: ; NumSgprs: 14 define void @no_vcc_flat() { entry: call void asm sideeffect "", "~{SGPR7},~{FLAT_SCR}"() @@ -32,8 +31,7 @@ ; GCN-LABEL: {{^}}vcc_flat: ; CI: ; NumSgprs: 12 -; VI: ; NumSgprs: 12 -; XNACK: ; NumSgprs: 14 +; VI: ; NumSgprs: 14 define void @vcc_flat() { entry: call void asm sideeffect "", "~{SGPR7},~{VCC},~{FLAT_SCR}"()