diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -46,6 +46,8 @@ // Second index is 32 different dword offsets. static std::array, 9> SubRegFromChannelTable; + static std::array PhysRegClass; + void reserveRegisterTuples(BitVector &, MCRegister Reg) const; public: diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "SIRegisterInfo.h" #include "AMDGPU.h" #include "AMDGPURegisterBankInfo.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUInstPrinter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineDominators.h" @@ -37,6 +37,7 @@ std::array, 16> SIRegisterInfo::RegSplitParts; std::array, 9> SIRegisterInfo::SubRegFromChannelTable; +std::array SIRegisterInfo::PhysRegClass; // Map numbers of DWORDs to indexes in SubRegFromChannelTable. // Valid indexes are shifted 1, such that a 0 mapping means unsupported. @@ -45,6 +46,85 @@ static const std::array SubRegFromChannelTableWidthMap = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9}; +static const TargetRegisterClass *const PhysRegBaseClasses[] = { + &AMDGPU::VGPR_LO16RegClass, + &AMDGPU::VGPR_HI16RegClass, + &AMDGPU::SReg_LO16RegClass, + &AMDGPU::AGPR_LO16RegClass, + &AMDGPU::VGPR_32RegClass, + &AMDGPU::SReg_32RegClass, + &AMDGPU::AGPR_32RegClass, + &AMDGPU::AGPR_32RegClass, + &AMDGPU::VReg_64_Align2RegClass, + &AMDGPU::VReg_64RegClass, + &AMDGPU::SReg_64RegClass, + &AMDGPU::AReg_64_Align2RegClass, + &AMDGPU::AReg_64RegClass, + &AMDGPU::VReg_96_Align2RegClass, + &AMDGPU::VReg_96RegClass, + &AMDGPU::SReg_96RegClass, + &AMDGPU::AReg_96_Align2RegClass, + &AMDGPU::AReg_96RegClass, + &AMDGPU::VReg_128_Align2RegClass, + &AMDGPU::VReg_128RegClass, + &AMDGPU::SReg_128RegClass, + &AMDGPU::AReg_128_Align2RegClass, + &AMDGPU::AReg_128RegClass, + &AMDGPU::VReg_160_Align2RegClass, + &AMDGPU::VReg_160RegClass, + &AMDGPU::SReg_160RegClass, + &AMDGPU::AReg_160_Align2RegClass, + &AMDGPU::AReg_160RegClass, + &AMDGPU::VReg_192_Align2RegClass, + &AMDGPU::VReg_192RegClass, + &AMDGPU::SReg_192RegClass, + &AMDGPU::AReg_192_Align2RegClass, + &AMDGPU::AReg_192RegClass, + &AMDGPU::VReg_224_Align2RegClass, + &AMDGPU::VReg_224RegClass, + &AMDGPU::SReg_224RegClass, + &AMDGPU::AReg_224_Align2RegClass, + &AMDGPU::AReg_224RegClass, + &AMDGPU::VReg_256_Align2RegClass, + &AMDGPU::VReg_256RegClass, + &AMDGPU::SReg_256RegClass, + &AMDGPU::AReg_256_Align2RegClass, + &AMDGPU::AReg_256RegClass, + &AMDGPU::VReg_288_Align2RegClass, + &AMDGPU::VReg_288RegClass, + &AMDGPU::SReg_288RegClass, + &AMDGPU::AReg_288_Align2RegClass, + &AMDGPU::AReg_288RegClass, + &AMDGPU::VReg_320_Align2RegClass, + &AMDGPU::VReg_320RegClass, + &AMDGPU::SReg_320RegClass, + &AMDGPU::AReg_320_Align2RegClass, + &AMDGPU::AReg_320RegClass, + &AMDGPU::VReg_352_Align2RegClass, + &AMDGPU::VReg_352RegClass, + &AMDGPU::SReg_352RegClass, + &AMDGPU::AReg_352_Align2RegClass, + &AMDGPU::AReg_352RegClass, + &AMDGPU::VReg_384_Align2RegClass, + &AMDGPU::VReg_384RegClass, + &AMDGPU::SReg_384RegClass, + &AMDGPU::AReg_384_Align2RegClass, + &AMDGPU::AReg_384RegClass, + &AMDGPU::VReg_512_Align2RegClass, + &AMDGPU::VReg_512RegClass, + &AMDGPU::SReg_512RegClass, + &AMDGPU::AReg_512_Align2RegClass, + &AMDGPU::AReg_512RegClass, + &AMDGPU::SReg_1024RegClass, + &AMDGPU::VReg_1024_Align2RegClass, + &AMDGPU::VReg_1024RegClass, + &AMDGPU::AReg_1024_Align2RegClass, + &AMDGPU::AReg_1024RegClass, + &AMDGPU::SCC_CLASSRegClass, + &AMDGPU::Pseudo_SReg_32RegClass, + &AMDGPU::Pseudo_SReg_128RegClass, +}; + namespace llvm { // A temporary struct to spill SGPRs. @@ -373,9 +453,26 @@ } }; + static llvm::once_flag InitializePhysRegClassesFlag; + + static auto InitializePhysRegClassesOnce = [&]() { + const uint8_t NumClasses = sizeof(PhysRegBaseClasses) / sizeof(PhysRegBaseClasses[0]); + PhysRegClass.fill(NumClasses); + // Process base classes in reverse order to apply class priority to registers + for (int Idx = NumClasses - 1; Idx >= 0; Idx--) { + const TargetRegisterClass *BaseClass = PhysRegBaseClasses[Idx]; + for (const auto Reg : BaseClass->getRegisters()) { + assert(Reg < PhysRegClass.size()); + PhysRegClass[Reg] = Idx; + } + } + }; + llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce); llvm::call_once(InitializeSubRegFromChannelTableFlag, InitializeSubRegFromChannelTableOnce); + llvm::call_once(InitializePhysRegClassesFlag, + InitializePhysRegClassesOnce); } void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, @@ -2694,95 +2791,15 @@ return nullptr; } -// FIXME: This is very slow. It might be worth creating a map from physreg to -// register class. const TargetRegisterClass * SIRegisterInfo::getPhysRegClass(MCRegister Reg) const { - static const TargetRegisterClass *const BaseClasses[] = { - &AMDGPU::VGPR_LO16RegClass, - &AMDGPU::VGPR_HI16RegClass, - &AMDGPU::SReg_LO16RegClass, - &AMDGPU::AGPR_LO16RegClass, - &AMDGPU::VGPR_32RegClass, - &AMDGPU::SReg_32RegClass, - &AMDGPU::AGPR_32RegClass, - &AMDGPU::AGPR_32RegClass, - &AMDGPU::VReg_64_Align2RegClass, - &AMDGPU::VReg_64RegClass, - &AMDGPU::SReg_64RegClass, - &AMDGPU::AReg_64_Align2RegClass, - &AMDGPU::AReg_64RegClass, - &AMDGPU::VReg_96_Align2RegClass, - &AMDGPU::VReg_96RegClass, - &AMDGPU::SReg_96RegClass, - &AMDGPU::AReg_96_Align2RegClass, - &AMDGPU::AReg_96RegClass, - &AMDGPU::VReg_128_Align2RegClass, - &AMDGPU::VReg_128RegClass, - &AMDGPU::SReg_128RegClass, - &AMDGPU::AReg_128_Align2RegClass, - &AMDGPU::AReg_128RegClass, - &AMDGPU::VReg_160_Align2RegClass, - &AMDGPU::VReg_160RegClass, - &AMDGPU::SReg_160RegClass, - &AMDGPU::AReg_160_Align2RegClass, - &AMDGPU::AReg_160RegClass, - &AMDGPU::VReg_192_Align2RegClass, - &AMDGPU::VReg_192RegClass, - &AMDGPU::SReg_192RegClass, - &AMDGPU::AReg_192_Align2RegClass, - &AMDGPU::AReg_192RegClass, - &AMDGPU::VReg_224_Align2RegClass, - &AMDGPU::VReg_224RegClass, - &AMDGPU::SReg_224RegClass, - &AMDGPU::AReg_224_Align2RegClass, - &AMDGPU::AReg_224RegClass, - &AMDGPU::VReg_256_Align2RegClass, - &AMDGPU::VReg_256RegClass, - &AMDGPU::SReg_256RegClass, - &AMDGPU::AReg_256_Align2RegClass, - &AMDGPU::AReg_256RegClass, - &AMDGPU::VReg_288_Align2RegClass, - &AMDGPU::VReg_288RegClass, - &AMDGPU::SReg_288RegClass, - &AMDGPU::AReg_288_Align2RegClass, - &AMDGPU::AReg_288RegClass, - &AMDGPU::VReg_320_Align2RegClass, - &AMDGPU::VReg_320RegClass, - &AMDGPU::SReg_320RegClass, - &AMDGPU::AReg_320_Align2RegClass, - &AMDGPU::AReg_320RegClass, - &AMDGPU::VReg_352_Align2RegClass, - &AMDGPU::VReg_352RegClass, - &AMDGPU::SReg_352RegClass, - &AMDGPU::AReg_352_Align2RegClass, - &AMDGPU::AReg_352RegClass, - &AMDGPU::VReg_384_Align2RegClass, - &AMDGPU::VReg_384RegClass, - &AMDGPU::SReg_384RegClass, - &AMDGPU::AReg_384_Align2RegClass, - &AMDGPU::AReg_384RegClass, - &AMDGPU::VReg_512_Align2RegClass, - &AMDGPU::VReg_512RegClass, - &AMDGPU::SReg_512RegClass, - &AMDGPU::AReg_512_Align2RegClass, - &AMDGPU::AReg_512RegClass, - &AMDGPU::SReg_1024RegClass, - &AMDGPU::VReg_1024_Align2RegClass, - &AMDGPU::VReg_1024RegClass, - &AMDGPU::AReg_1024_Align2RegClass, - &AMDGPU::AReg_1024RegClass, - &AMDGPU::SCC_CLASSRegClass, - &AMDGPU::Pseudo_SReg_32RegClass, - &AMDGPU::Pseudo_SReg_128RegClass, - }; - - for (const TargetRegisterClass *BaseClass : BaseClasses) { - if (BaseClass->contains(Reg)) { - return BaseClass; - } - } - return nullptr; + if (Reg >= PhysRegClass.size()) + return nullptr; + const uint8_t NumClasses = sizeof(PhysRegBaseClasses) / sizeof(PhysRegBaseClasses[0]); + uint8_t Idx = PhysRegClass[Reg]; + if (Idx >= NumClasses) + return nullptr; + return PhysRegBaseClasses[Idx]; } bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,