Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -477,7 +477,7 @@ // update max register counts if "amdgpu-debugger-reserve-regs" attribute was // requested. ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0; - ProgInfo.ReservedVGPRCount = RI->getNumDebuggerReservedVGPRs(STM); + ProgInfo.ReservedVGPRCount = STM.getReservedNumVGPRs(MF); // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue" @@ -492,7 +492,7 @@ // Check the addressable register limit before we add ExtraSGPRs. if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && !STM.hasSGPRInitBug()) { - unsigned MaxAddressableNumSGPRs = STM.getMaxNumSGPRs(); + unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs(); if (MaxSGPR + 1 > MaxAddressableNumSGPRs) { // This can happen due to a compiler bug or when using inline asm. LLVMContext &Ctx = MF.getFunction()->getContext(); @@ -507,7 +507,7 @@ // Account for extra SGPRs and VGPRs reserved for debugger use. MaxSGPR += ExtraSGPRs; - MaxVGPR += RI->getNumDebuggerReservedVGPRs(STM); + MaxVGPR += STM.getReservedNumVGPRs(MF); // We found the maximum register index. They start at 0, so add one to get the // number of registers. @@ -517,13 +517,13 @@ // Adjust number of registers used to meet default/requested minimum/maximum // number of waves per execution unit request. ProgInfo.NumSGPRsForWavesPerEU = std::max( - ProgInfo.NumSGPR, RI->getMinNumSGPRs(STM, MFI->getMaxWavesPerEU())); + ProgInfo.NumSGPR, STM.getMinNumSGPRs(MFI->getMaxWavesPerEU())); ProgInfo.NumVGPRsForWavesPerEU = std::max( - ProgInfo.NumVGPR, RI->getMinNumVGPRs(MFI->getMaxWavesPerEU())); + ProgInfo.NumVGPR, STM.getMinNumVGPRs(MFI->getMaxWavesPerEU())); if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS || STM.hasSGPRInitBug()) { - unsigned MaxNumSGPRs = STM.getMaxNumSGPRs(); + unsigned MaxNumSGPRs = STM.getAddressableNumSGPRs(); if (ProgInfo.NumSGPR > MaxNumSGPRs) { // This can happen due to a compiler bug or when using inline asm to use the // registers which are usually reserved for vcc etc. @@ -560,13 +560,13 @@ // SGPRBlocks is actual number of SGPR blocks minus 1. ProgInfo.SGPRBlocks = alignTo(ProgInfo.NumSGPRsForWavesPerEU, - RI->getSGPRAllocGranule()); - ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / RI->getSGPRAllocGranule() - 1; + STM.getSGPRAllocGranule()); + ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / STM.getSGPRAllocGranule() - 1; // VGPRBlocks is actual number of VGPR blocks minus 1. ProgInfo.VGPRBlocks = alignTo(ProgInfo.NumVGPRsForWavesPerEU, - RI->getVGPRAllocGranule()); - ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / RI->getVGPRAllocGranule() - 1; + STM.getVGPRAllocGranule()); + ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPRAllocGranule() - 1; // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode // register. Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -642,7 +642,83 @@ return true; } - unsigned getMaxNumSGPRs() const; + /// \returns SGPR allocation granularity supported by the subtarget. + unsigned getSGPRAllocGranule() const { + return 8; + } + + /// \returns Total number of SGPRs supported by the subtarget. + unsigned getTotalNumSGPRs() const { + if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + return 800; + return 512; + } + + /// \returns Addressable number of SGPRs supported by the subtarget. + unsigned getAddressableNumSGPRs() const { + if (hasSGPRInitBug()) + return SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; + if (getGeneration() >= VOLCANIC_ISLANDS) + return 102; + return 104; + } + + /// \returns Minimum number of SGPRs that meets the given number of waves per + /// execution unit requirement supported by the subtarget. + unsigned getMinNumSGPRs(unsigned WavesPerEU) const; + + /// \returns Maximum number of SGPRs that meets the given number of waves per + /// execution unit requirement supported by the subtarget. + unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const; + + /// \returns Reserved number of SGPRs for given function \p MF. + unsigned getReservedNumSGPRs(const MachineFunction &MF) const; + + /// \returns Maximum number of SGPRs that meets number of waves per execution + /// unit requirement for function \p MF, or number of SGPRs explicitly + /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF. + /// + /// \returns Value that meets number of waves per execution unit requirement + /// if explicitly requested value cannot be converted to integer, violates + /// subtarget's specifications, or does not meet number of waves per execution + /// unit requirement. + unsigned getMaxNumSGPRs(const MachineFunction &MF) const; + + /// \returns VGPR allocation granularity supported by the subtarget. + unsigned getVGPRAllocGranule() const { + return 4; + } + + /// \returns Total number of VGPRs supported by the subtarget. + unsigned getTotalNumVGPRs() const { + return 256; + } + + /// \returns Addressable number of VGPRs supported by the subtarget. + unsigned getAddressableNumVGPRs() const { + return getTotalNumVGPRs(); + } + + /// \returns Minimum number of VGPRs that meets given number of waves per + /// execution unit requirement supported by the subtarget. + unsigned getMinNumVGPRs(unsigned WavesPerEU) const; + + /// \returns Maximum number of VGPRs that meets given number of waves per + /// execution unit requirement supported by the subtarget. + unsigned getMaxNumVGPRs(unsigned WavesPerEU) const; + + /// \returns Reserved number of VGPRs for given function \p MF. + unsigned getReservedNumVGPRs(const MachineFunction &MF) const; + + /// \returns Maximum number of VGPRs that meets number of waves per execution + /// unit requirement for function \p MF, or number of VGPRs explicitly + /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF. + /// + /// \returns Value that meets number of waves per execution unit requirement + /// if explicitly requested value cannot be converted to integer, violates + /// subtarget's specifications, or does not meet number of waves per execution + /// unit requirement. + unsigned getMaxNumVGPRs(const MachineFunction &MF) const; }; } // end namespace llvm Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUSubtarget.h" +#include "SIMachineFunctionInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/Target/TargetFrameLowering.h" @@ -324,12 +325,185 @@ return 1; } -unsigned SISubtarget::getMaxNumSGPRs() const { +unsigned SISubtarget::getMinNumSGPRs(unsigned WavesPerEU) const { + if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + switch (WavesPerEU) { + case 0: return 0; + case 10: return 0; + case 9: return 0; + case 8: return 81; + default: return 97; + } + } else { + switch (WavesPerEU) { + case 0: return 0; + case 10: return 0; + case 9: return 49; + case 8: return 57; + case 7: return 65; + case 6: return 73; + case 5: return 81; + default: return 97; + } + } +} + +unsigned SISubtarget::getMaxNumSGPRs(unsigned WavesPerEU, + bool Addressable) const { + if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + switch (WavesPerEU) { + case 0: return 80; + case 10: return 80; + case 9: return 80; + case 8: return 96; + default: return Addressable ? getAddressableNumSGPRs() : 112; + } + } else { + switch (WavesPerEU) { + case 0: return 48; + case 10: return 48; + case 9: return 56; + case 8: return 64; + case 7: return 72; + case 6: return 80; + case 5: return 96; + default: return getAddressableNumSGPRs(); + } + } +} + +unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { + const SIMachineFunctionInfo &MFI = *MF.getInfo(); + if (MFI.hasFlatScratchInit()) { + if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + return 6; // FLAT_SCRATCH, XNACK, VCC (in that order). + if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) + return 4; // FLAT_SCRATCH, VCC (in that order). + } + + if (isXNACKEnabled()) + return 4; // XNACK, VCC (in that order). + return 2; // VCC. +} + +unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { + const Function &F = *MF.getFunction(); + const SIMachineFunctionInfo &MFI = *MF.getInfo(); + + // Compute maximum number of SGPRs function can use using default/requested + // minimum number of waves per execution unit. + std::pair WavesPerEU = MFI.getWavesPerEU(); + unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false); + unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true); + + // Check if maximum number of SGPRs was explicitly requested using + // "amdgpu-num-sgpr" attribute. + if (F.hasFnAttribute("amdgpu-num-sgpr")) { + unsigned Requested = AMDGPU::getIntegerAttribute( + F, "amdgpu-num-sgpr", MaxNumSGPRs); + + // Make sure requested value does not violate subtarget's specifications. + if (Requested && (Requested <= getReservedNumSGPRs(MF))) + Requested = 0; + + // If more SGPRs are required to support the input user/system SGPRs, + // increase to accommodate them. + // + // FIXME: This really ends up using the requested number of SGPRs + number + // of reserved special registers in total. Theoretically you could re-use + // the last input registers for these special registers, but this would + // require a lot of complexity to deal with the weird aliasing. + unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs(); + if (Requested && Requested < InputNumSGPRs) + Requested = InputNumSGPRs; + + // Make sure requested value is compatible with values implied by + // default/requested minimum/maximum number of waves per execution unit. + if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false)) + Requested = 0; + if (WavesPerEU.second && + Requested && Requested < getMinNumSGPRs(WavesPerEU.second)) + Requested = 0; + + if (Requested) + MaxNumSGPRs = Requested; + } + if (hasSGPRInitBug()) - return SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; + MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; - if (getGeneration() >= VOLCANIC_ISLANDS) - return 102; + return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF), + MaxAddressableNumSGPRs); +} + +unsigned SISubtarget::getMinNumVGPRs(unsigned WavesPerEU) const { + switch (WavesPerEU) { + case 0: return 0; + case 10: return 0; + case 9: return 25; + case 8: return 29; + case 7: return 33; + case 6: return 37; + case 5: return 41; + case 4: return 49; + case 3: return 65; + case 2: return 85; + default: return 129; + } +} + +unsigned SISubtarget::getMaxNumVGPRs(unsigned WavesPerEU) const { + switch (WavesPerEU) { + case 0: return 24; + case 10: return 24; + case 9: return 28; + case 8: return 32; + case 7: return 36; + case 6: return 40; + case 5: return 48; + case 4: return 64; + case 3: return 84; + case 2: return 128; + default: return getTotalNumVGPRs(); + } +} + +unsigned SISubtarget::getReservedNumVGPRs(const MachineFunction &MF) const { + if (debuggerReserveRegs()) + return 4; + return 0; +} + +unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { + const Function &F = *MF.getFunction(); + const SIMachineFunctionInfo &MFI = *MF.getInfo(); + + // Compute maximum number of VGPRs function can use using default/requested + // minimum number of waves per execution unit. + std::pair WavesPerEU = MFI.getWavesPerEU(); + unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first); + + // Check if maximum number of VGPRs was explicitly requested using + // "amdgpu-num-vgpr" attribute. + if (F.hasFnAttribute("amdgpu-num-vgpr")) { + unsigned Requested = AMDGPU::getIntegerAttribute( + F, "amdgpu-num-vgpr", MaxNumVGPRs); + + // Make sure requested value does not violate subtarget's specifications. + if (Requested && Requested <= getReservedNumVGPRs(MF)) + Requested = 0; + + // Make sure requested value is compatible with values implied by + // default/requested minimum/maximum number of waves per execution unit. + if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first)) + Requested = 0; + if (WavesPerEU.second && + Requested && Requested < getMinNumVGPRs(WavesPerEU.second)) + Requested = 0; + + if (Requested) + MaxNumVGPRs = Requested; + } - return 104; + return MaxNumVGPRs - getReservedNumVGPRs(MF); } Index: lib/Target/AMDGPU/GCNSchedStrategy.cpp =================================================================== --- lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -145,8 +145,8 @@ unsigned VGPRExcessLimit = Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass); unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF); - unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves, true); - unsigned VGPRCriticalLimit = SRI->getMaxNumVGPRs(MaxWaves); + unsigned SGPRCriticalLimit = ST.getMaxNumSGPRs(MaxWaves, true); + unsigned VGPRCriticalLimit = ST.getMaxNumVGPRs(MaxWaves); ReadyQueue &Q = Zone.Available; for (SUnit *SU : Q) { Index: lib/Target/AMDGPU/SIFrameLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIFrameLowering.cpp +++ lib/Target/AMDGPU/SIFrameLowering.cpp @@ -21,16 +21,16 @@ using namespace llvm; -static ArrayRef getAllSGPR128(const MachineFunction &MF, - const SIRegisterInfo *TRI) { +static ArrayRef getAllSGPR128(const SISubtarget &ST, + const MachineFunction &MF) { return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(), - TRI->getMaxNumSGPRs(MF) / 4); + ST.getMaxNumSGPRs(MF) / 4); } -static ArrayRef getAllSGPRs(const MachineFunction &MF, - const SIRegisterInfo *TRI) { +static ArrayRef getAllSGPRs(const SISubtarget &ST, + const MachineFunction &MF) { return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(), - TRI->getMaxNumSGPRs(MF)); + ST.getMaxNumSGPRs(MF)); } void SIFrameLowering::emitFlatScratchInit(const SIInstrInfo *TII, @@ -111,7 +111,7 @@ MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4; - ArrayRef AllSGPR128s = getAllSGPR128(MF, TRI); + ArrayRef AllSGPR128s = getAllSGPR128(ST, MF); AllSGPR128s = AllSGPR128s.slice(std::min(static_cast(AllSGPR128s.size()), NumPreloaded)); // Skip the last 2 elements because the last one is reserved for VCC, and @@ -146,7 +146,7 @@ unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); - ArrayRef AllSGPRs = getAllSGPRs(MF, TRI); + ArrayRef AllSGPRs = getAllSGPRs(ST, MF); if (NumPreloaded > AllSGPRs.size()) return ScratchWaveOffsetReg; Index: lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.h +++ lib/Target/AMDGPU/SIRegisterInfo.h @@ -195,72 +195,6 @@ return VGPRPressureSets.test(SetID) && !SGPRPressureSets.test(SetID); } - /// \returns SGPR allocation granularity supported by the subtarget. - unsigned getSGPRAllocGranule() const { - return 8; - } - - /// \returns Total number of SGPRs supported by the subtarget. - unsigned getTotalNumSGPRs(const SISubtarget &ST) const; - - /// \returns Number of addressable SGPRs supported by the subtarget. - unsigned getNumAddressableSGPRs(const SISubtarget &ST) const; - - /// \returns Number of reserved SGPRs supported by the subtarget. - unsigned getNumReservedSGPRs(const SISubtarget &ST, - const SIMachineFunctionInfo &MFI) const; - - /// \returns Minimum number of SGPRs that meets given number of waves per - /// execution unit requirement for given subtarget. - unsigned getMinNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU) const; - - /// \returns Maximum number of SGPRs that meets given number of waves per - /// execution unit requirement for given subtarget. - unsigned getMaxNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU, - bool Addressable) const; - - /// \returns Maximum number of SGPRs that meets number of waves per execution - /// unit requirement for function \p MF, or number of SGPRs explicitly - /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF. - /// - /// \returns Value that meets number of waves per execution unit requirement - /// if explicitly requested value cannot be converted to integer, violates - /// subtarget's specifications, or does not meet number of waves per execution - /// unit requirement. - unsigned getMaxNumSGPRs(const MachineFunction &MF) const; - - /// \returns VGPR allocation granularity supported by the subtarget. - unsigned getVGPRAllocGranule() const { - return 4; - } - - /// \returns Total number of VGPRs supported by the subtarget. - unsigned getTotalNumVGPRs() const { - return 256; - } - - /// \returns Number of reserved VGPRs for debugger use supported by the - /// subtarget. - unsigned getNumDebuggerReservedVGPRs(const SISubtarget &ST) const; - - /// \returns Minimum number of SGPRs that meets given number of waves per - /// execution unit requirement. - unsigned getMinNumVGPRs(unsigned WavesPerEU) const; - - /// \returns Maximum number of VGPRs that meets given number of waves per - /// execution unit requirement. - unsigned getMaxNumVGPRs(unsigned WavesPerEU) const; - - /// \returns Maximum number of VGPRs that meets number of waves per execution - /// unit requirement for function \p MF, or number of VGPRs explicitly - /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF. - /// - /// \returns Value that meets number of waves per execution unit requirement - /// if explicitly requested value cannot be converted to integer, violates - /// subtarget's specifications, or does not meet number of waves per execution - /// unit requirement. - unsigned getMaxNumVGPRs(const MachineFunction &MF) const; - ArrayRef getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const; Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -97,14 +97,18 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( const MachineFunction &MF) const { - unsigned BaseIdx = alignDown(getMaxNumSGPRs(MF), 4) - 4; + + const SISubtarget &ST = MF.getSubtarget(); + unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4; unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); } unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( const MachineFunction &MF) const { - unsigned RegCount = getMaxNumSGPRs(MF); + + const SISubtarget &ST = MF.getSubtarget(); + unsigned RegCount = ST.getMaxNumSGPRs(MF); unsigned Reg; // Try to place it in a hole after PrivateSegmentbufferReg. @@ -139,14 +143,16 @@ reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9); reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11); - unsigned MaxNumSGPRs = getMaxNumSGPRs(MF); + const SISubtarget &ST = MF.getSubtarget(); + + unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF); unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) { unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); reserveRegisterTuples(Reserved, Reg); } - unsigned MaxNumVGPRs = getMaxNumVGPRs(MF); + unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF); unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs(); for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) { unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i); @@ -1156,210 +1162,6 @@ return AMDGPU::NoRegister; } -unsigned SIRegisterInfo::getTotalNumSGPRs(const SISubtarget &ST) const { - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) - return 800; - return 512; -} - -unsigned SIRegisterInfo::getNumAddressableSGPRs(const SISubtarget &ST) const { - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) - return 102; - return 104; -} - -unsigned SIRegisterInfo::getNumReservedSGPRs(const SISubtarget &ST, - const SIMachineFunctionInfo &MFI) const { - if (MFI.hasFlatScratchInit()) { - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) - return 6; // FLAT_SCRATCH, XNACK, VCC (in that order) - - if (ST.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) - return 4; // FLAT_SCRATCH, VCC (in that order) - } - - if (ST.isXNACKEnabled()) - return 4; // XNACK, VCC (in that order) - - return 2; // VCC. -} - -unsigned SIRegisterInfo::getMinNumSGPRs(const SISubtarget &ST, - unsigned WavesPerEU) const { - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - switch (WavesPerEU) { - case 0: return 0; - case 10: return 0; - case 9: return 0; - case 8: return 81; - default: return 97; - } - } else { - switch (WavesPerEU) { - case 0: return 0; - case 10: return 0; - case 9: return 49; - case 8: return 57; - case 7: return 65; - case 6: return 73; - case 5: return 81; - default: return 97; - } - } -} - -unsigned SIRegisterInfo::getMaxNumSGPRs(const SISubtarget &ST, - unsigned WavesPerEU, - bool Addressable) const { - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - switch (WavesPerEU) { - case 0: return 80; - case 10: return 80; - case 9: return 80; - case 8: return 96; - default: return Addressable ? getNumAddressableSGPRs(ST) : 112; - } - } else { - switch (WavesPerEU) { - case 0: return 48; - case 10: return 48; - case 9: return 56; - case 8: return 64; - case 7: return 72; - case 6: return 80; - case 5: return 96; - default: return getNumAddressableSGPRs(ST); - } - } -} - -unsigned SIRegisterInfo::getMaxNumSGPRs(const MachineFunction &MF) const { - const Function &F = *MF.getFunction(); - - const SISubtarget &ST = MF.getSubtarget(); - const SIMachineFunctionInfo &MFI = *MF.getInfo(); - - // Compute maximum number of SGPRs function can use using default/requested - // minimum number of waves per execution unit. - std::pair WavesPerEU = MFI.getWavesPerEU(); - unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, false); - unsigned MaxNumAddressableSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, true); - - // Check if maximum number of SGPRs was explicitly requested using - // "amdgpu-num-sgpr" attribute. - if (F.hasFnAttribute("amdgpu-num-sgpr")) { - unsigned Requested = AMDGPU::getIntegerAttribute( - F, "amdgpu-num-sgpr", MaxNumSGPRs); - - // Make sure requested value does not violate subtarget's specifications. - if (Requested && (Requested <= getNumReservedSGPRs(ST, MFI))) - Requested = 0; - - // If more SGPRs are required to support the input user/system SGPRs, - // increase to accommodate them. - // - // FIXME: This really ends up using the requested number of SGPRs + number - // of reserved special registers in total. Theoretically you could re-use - // the last input registers for these special registers, but this would - // require a lot of complexity to deal with the weird aliasing. - unsigned NumInputSGPRs = MFI.getNumPreloadedSGPRs(); - if (Requested && Requested < NumInputSGPRs) - Requested = NumInputSGPRs; - - // Make sure requested value is compatible with values implied by - // default/requested minimum/maximum number of waves per execution unit. - if (Requested && Requested > getMaxNumSGPRs(ST, WavesPerEU.first, false)) - Requested = 0; - if (WavesPerEU.second && - Requested && Requested < getMinNumSGPRs(ST, WavesPerEU.second)) - Requested = 0; - - if (Requested) - MaxNumSGPRs = Requested; - } - - if (ST.hasSGPRInitBug()) - MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; - - return std::min(MaxNumSGPRs - getNumReservedSGPRs(ST, MFI), - MaxNumAddressableSGPRs); -} - -unsigned SIRegisterInfo::getNumDebuggerReservedVGPRs( - const SISubtarget &ST) const { - if (ST.debuggerReserveRegs()) - return 4; - return 0; -} - -unsigned SIRegisterInfo::getMinNumVGPRs(unsigned WavesPerEU) const { - switch (WavesPerEU) { - case 0: return 0; - case 10: return 0; - case 9: return 25; - case 8: return 29; - case 7: return 33; - case 6: return 37; - case 5: return 41; - case 4: return 49; - case 3: return 65; - case 2: return 85; - default: return 129; - } -} - -unsigned SIRegisterInfo::getMaxNumVGPRs(unsigned WavesPerEU) const { - switch (WavesPerEU) { - case 0: return 24; - case 10: return 24; - case 9: return 28; - case 8: return 32; - case 7: return 36; - case 6: return 40; - case 5: return 48; - case 4: return 64; - case 3: return 84; - case 2: return 128; - default: return getTotalNumVGPRs(); - } -} - -unsigned SIRegisterInfo::getMaxNumVGPRs(const MachineFunction &MF) const { - const Function &F = *MF.getFunction(); - - const SISubtarget &ST = MF.getSubtarget(); - const SIMachineFunctionInfo &MFI = *MF.getInfo(); - - // Compute maximum number of VGPRs function can use using default/requested - // minimum number of waves per execution unit. - std::pair WavesPerEU = MFI.getWavesPerEU(); - unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first); - - // Check if maximum number of VGPRs was explicitly requested using - // "amdgpu-num-vgpr" attribute. - if (F.hasFnAttribute("amdgpu-num-vgpr")) { - unsigned Requested = AMDGPU::getIntegerAttribute( - F, "amdgpu-num-vgpr", MaxNumVGPRs); - - // Make sure requested value does not violate subtarget's specifications. - if (Requested && Requested <= getNumDebuggerReservedVGPRs(ST)) - Requested = 0; - - // Make sure requested value is compatible with values implied by - // default/requested minimum/maximum number of waves per execution unit. - if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first)) - Requested = 0; - if (WavesPerEU.second && - Requested && Requested < getMinNumVGPRs(WavesPerEU.second)) - Requested = 0; - - if (Requested) - MaxNumVGPRs = Requested; - } - - return MaxNumVGPRs - getNumDebuggerReservedVGPRs(ST); -} - ArrayRef SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const { if (EltSize == 4) {