diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -356,7 +356,7 @@ const SIRegisterInfo *TRI = static_cast(Subtarget)->getRegisterInfo(); - return TRI->getPhysRegClass(Reg); + return TRI->getPhysRegBaseClass(Reg); } return nullptr; @@ -1429,8 +1429,10 @@ static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) { if (Val.getOpcode() != ISD::CopyFromReg) return false; - auto RC = - TRI.getPhysRegClass(cast(Val.getOperand(1))->getReg()); + auto Reg = cast(Val.getOperand(1))->getReg(); + if (!Reg.isPhysical()) + return false; + auto RC = TRI.getPhysRegBaseClass(Reg); return RC && TRI.isSGPRClass(RC); } diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1210,7 +1210,7 @@ const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName); if (!SDST) { for (const auto &MO : MI->implicit_operands()) { - if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) { + if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegBaseClass(MO.getReg()))) { SDST = &MO; break; } @@ -1291,7 +1291,7 @@ if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) return true; for (auto MO : MI.implicit_operands()) - if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) + if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegBaseClass(MO.getReg()))) return true; } if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -187,14 +187,14 @@ const TargetRegisterClass *SrcRC = SrcReg.isVirtual() ? MRI.getRegClass(SrcReg) - : TRI.getPhysRegClass(SrcReg); + : TRI.getPhysRegBaseClass(SrcReg); // We don't really care about the subregister here. // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg()); const TargetRegisterClass *DstRC = DstReg.isVirtual() ? MRI.getRegClass(DstReg) - : TRI.getPhysRegClass(DstReg); + : TRI.getPhysRegBaseClass(DstReg); return std::pair(SrcRC, DstRC); } diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -338,7 +338,7 @@ ST(MF.getSubtarget()), MFI(MF.getFrameInfo()), FuncInfo(MF.getInfo()), TII(TII), TRI(TRI), SuperReg(Reg), SI(SI), LiveRegs(LiveRegs), DL(DL), FrameReg(FrameReg) { - const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg); + const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg); SplitParts = TRI.getRegSplitParts(RC, EltSize); NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); @@ -1322,7 +1322,7 @@ // Allocate spill slots for WWM reserved VGPRs. if (!FuncInfo->isEntryFunction()) { for (Register Reg : FuncInfo->getWWMReservedRegs()) { - const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg); + const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg); FuncInfo->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC), TRI->getSpillAlign(*RC)); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -12342,7 +12342,7 @@ auto Ret = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); if (Ret.first) - Ret.second = TRI->getPhysRegClass(Ret.first); + Ret.second = TRI->getPhysRegBaseClass(Ret.first); return Ret; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -712,13 +712,13 @@ MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const { - const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg); + const TargetRegisterClass *RC = RI.getPhysRegBaseClass(DestReg); // FIXME: This is hack to resolve copies between 16 bit and 32 bit // registers until all patterns are fixed. if (Fix16BitCopies && ((RI.getRegSizeInBits(*RC) == 16) ^ - (RI.getRegSizeInBits(*RI.getPhysRegClass(SrcReg)) == 16))) { + (RI.getRegSizeInBits(*RI.getPhysRegBaseClass(SrcReg)) == 16))) { MCRegister &RegToFix = (RI.getRegSizeInBits(*RC) == 16) ? DestReg : SrcReg; MCRegister Super = RI.get32BitRegister(RegToFix); assert(RI.getSubReg(Super, AMDGPU::lo16) == RegToFix); @@ -730,7 +730,7 @@ return; } - RC = RI.getPhysRegClass(DestReg); + RC = RI.getPhysRegBaseClass(DestReg); } if (RC == &AMDGPU::VGPR_32RegClass) { @@ -920,7 +920,7 @@ return; } - const TargetRegisterClass *SrcRC = RI.getPhysRegClass(SrcReg); + const TargetRegisterClass *SrcRC = RI.getPhysRegBaseClass(SrcReg); if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) { if (ST.hasMovB64()) { BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_e32), DestReg) @@ -3211,7 +3211,7 @@ Src0Inlined = true; } else if ((Src0->getReg().isPhysical() && (ST.getConstantBusLimit(Opc) <= 1 && - RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) || + RI.isSGPRClass(RI.getPhysRegBaseClass(Src0->getReg())))) || (Src0->getReg().isVirtual() && (ST.getConstantBusLimit(Opc) <= 1 && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))) @@ -3228,7 +3228,7 @@ commuteInstruction(UseMI)) { Src0->ChangeToImmediate(Def->getOperand(1).getImm()); } else if ((Src1->getReg().isPhysical() && - RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) || + RI.isSGPRClass(RI.getPhysRegBaseClass(Src1->getReg()))) || (Src1->getReg().isVirtual() && RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))) return false; @@ -4991,7 +4991,7 @@ if (Reg.isVirtual()) return MRI.getRegClass(Reg); - return RI.getPhysRegClass(Reg); + return RI.getPhysRegBaseClass(Reg); } unsigned RCID = Desc.OpInfo[OpNo].RegClass; @@ -8468,7 +8468,7 @@ if (opcode == AMDGPU::COPY) { const MachineOperand &srcOp = MI.getOperand(1); if (srcOp.isReg() && srcOp.getReg().isPhysical()) { - const TargetRegisterClass *regClass = RI.getPhysRegClass(srcOp.getReg()); + const TargetRegisterClass *regClass = RI.getPhysRegBaseClass(srcOp.getReg()); return RI.isSGPRClass(regClass) ? InstructionUniformity::AlwaysUniform : InstructionUniformity::NeverUniform; } @@ -8498,7 +8498,7 @@ // Handling $vpgr reads for (auto srcOp : MI.operands()) { if (srcOp.isReg() && srcOp.getReg().isPhysical()) { - const TargetRegisterClass *regClass = RI.getPhysRegClass(srcOp.getReg()); + const TargetRegisterClass *regClass = RI.getPhysRegBaseClass(srcOp.getReg()); if (RI.isVGPRClass(regClass)) return InstructionUniformity::NeverUniform; } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -182,10 +182,6 @@ LLVM_READONLY static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth); - /// Return the 'base' register class for this register. - /// e.g. SGPR0 => SReg_32, VGPR => VGPR_32 SGPR0_SGPR1 -> SReg_32, etc. - const TargetRegisterClass *getPhysRegClass(MCRegister Reg) const; - /// \returns true if this class contains only SGPR registers static bool isSGPRClass(const TargetRegisterClass *RC) { return hasSGPRs(RC) && !hasVGPRs(RC) && !hasAGPRs(RC); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "SIRegisterInfo.h" #include "AMDGPU.h" #include "AMDGPURegisterBankInfo.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUInstPrinter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineDominators.h" @@ -122,7 +122,7 @@ Index(Index), RS(RS), MBB(MI->getParent()), MF(*MBB->getParent()), MFI(*MF.getInfo()), TII(TII), TRI(TRI), IsWave32(IsWave32) { - const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg); + const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg); SplitParts = TRI.getRegSplitParts(RC, EltSize); NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); @@ -2824,104 +2824,13 @@ return nullptr; } -// FIXME: This is very slow. It might be worth creating a map from physreg to -// register class. -const TargetRegisterClass * -SIRegisterInfo::getPhysRegClass(MCRegister Reg) const { - static const TargetRegisterClass *const BaseClasses[] = { - &AMDGPU::VGPR_LO16RegClass, - &AMDGPU::VGPR_HI16RegClass, - &AMDGPU::SReg_LO16RegClass, - &AMDGPU::AGPR_LO16RegClass, - &AMDGPU::VGPR_32RegClass, - &AMDGPU::SReg_32RegClass, - &AMDGPU::AGPR_32RegClass, - &AMDGPU::AGPR_32RegClass, - &AMDGPU::VReg_64_Align2RegClass, - &AMDGPU::VReg_64RegClass, - &AMDGPU::SReg_64RegClass, - &AMDGPU::AReg_64_Align2RegClass, - &AMDGPU::AReg_64RegClass, - &AMDGPU::VReg_96_Align2RegClass, - &AMDGPU::VReg_96RegClass, - &AMDGPU::SReg_96RegClass, - &AMDGPU::AReg_96_Align2RegClass, - &AMDGPU::AReg_96RegClass, - &AMDGPU::VReg_128_Align2RegClass, - &AMDGPU::VReg_128RegClass, - &AMDGPU::SReg_128RegClass, - &AMDGPU::AReg_128_Align2RegClass, - &AMDGPU::AReg_128RegClass, - &AMDGPU::VReg_160_Align2RegClass, - &AMDGPU::VReg_160RegClass, - &AMDGPU::SReg_160RegClass, - &AMDGPU::AReg_160_Align2RegClass, - &AMDGPU::AReg_160RegClass, - &AMDGPU::VReg_192_Align2RegClass, - &AMDGPU::VReg_192RegClass, - &AMDGPU::SReg_192RegClass, - &AMDGPU::AReg_192_Align2RegClass, - &AMDGPU::AReg_192RegClass, - &AMDGPU::VReg_224_Align2RegClass, - &AMDGPU::VReg_224RegClass, - &AMDGPU::SReg_224RegClass, - &AMDGPU::AReg_224_Align2RegClass, - &AMDGPU::AReg_224RegClass, - &AMDGPU::VReg_256_Align2RegClass, - &AMDGPU::VReg_256RegClass, - &AMDGPU::SReg_256RegClass, - &AMDGPU::AReg_256_Align2RegClass, - &AMDGPU::AReg_256RegClass, - &AMDGPU::VReg_288_Align2RegClass, - &AMDGPU::VReg_288RegClass, - &AMDGPU::SReg_288RegClass, - &AMDGPU::AReg_288_Align2RegClass, - &AMDGPU::AReg_288RegClass, - &AMDGPU::VReg_320_Align2RegClass, - &AMDGPU::VReg_320RegClass, - &AMDGPU::SReg_320RegClass, - &AMDGPU::AReg_320_Align2RegClass, - &AMDGPU::AReg_320RegClass, - &AMDGPU::VReg_352_Align2RegClass, - &AMDGPU::VReg_352RegClass, - &AMDGPU::SReg_352RegClass, - &AMDGPU::AReg_352_Align2RegClass, - &AMDGPU::AReg_352RegClass, - &AMDGPU::VReg_384_Align2RegClass, - &AMDGPU::VReg_384RegClass, - &AMDGPU::SReg_384RegClass, - &AMDGPU::AReg_384_Align2RegClass, - &AMDGPU::AReg_384RegClass, - &AMDGPU::VReg_512_Align2RegClass, - &AMDGPU::VReg_512RegClass, - &AMDGPU::SReg_512RegClass, - &AMDGPU::AReg_512_Align2RegClass, - &AMDGPU::AReg_512RegClass, - &AMDGPU::SReg_1024RegClass, - &AMDGPU::VReg_1024_Align2RegClass, - &AMDGPU::VReg_1024RegClass, - &AMDGPU::AReg_1024_Align2RegClass, - &AMDGPU::AReg_1024RegClass, - &AMDGPU::SCC_CLASSRegClass, - &AMDGPU::Pseudo_SReg_32RegClass, - &AMDGPU::Pseudo_SReg_128RegClass, - }; - - for (const TargetRegisterClass *BaseClass : BaseClasses) { - if (BaseClass->contains(Reg)) { - return BaseClass; - } - } - return nullptr; -} - bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const { const TargetRegisterClass *RC; if (Reg.isVirtual()) RC = MRI.getRegClass(Reg); else - RC = getPhysRegClass(Reg); + RC = getPhysRegBaseClass(Reg); return RC ? isSGPRClass(RC) : false; } @@ -3038,7 +2947,7 @@ const TargetRegisterClass* SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const { - return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegClass(Reg); + return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg); } const TargetRegisterClass * @@ -3248,7 +3157,7 @@ } MCPhysReg SIRegisterInfo::get32BitRegister(MCPhysReg Reg) const { - assert(getRegSizeInBits(*getPhysRegClass(Reg)) <= 32); + assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32); for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass, AMDGPU::SReg_32RegClass, diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -373,6 +373,7 @@ let CopyCost = -1; let isAllocatable = 0; let HasSGPR = 1; + let BaseClassOrder = 10000; } def M0_CLASS : SIRegisterClass<"AMDGPU", [i32], 32, (add M0)> { @@ -598,6 +599,7 @@ let AllocationPriority = 0; let Size = 16; let GeneratePressureSet = 0; + let BaseClassOrder = 16; } def VGPR_HI16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, @@ -605,6 +607,7 @@ let AllocationPriority = 0; let Size = 16; let GeneratePressureSet = 0; + let BaseClassOrder = 17; } // VGPR 32-bit registers @@ -614,6 +617,7 @@ let AllocationPriority = 0; let Size = 32; let Weight = 1; + let BaseClassOrder = 32; } // Identical to VGPR_32 except it only contains the low 128 (Lo128) registers. @@ -671,6 +675,7 @@ let isAllocatable = 0; let Size = 16; let GeneratePressureSet = 0; + let BaseClassOrder = 16; } // AccVGPR 32-bit registers @@ -679,6 +684,7 @@ let AllocationPriority = 0; let Size = 32; let Weight = 1; + let BaseClassOrder = 32; } } // End HasAGPR = 1 @@ -730,6 +736,7 @@ let isAllocatable = 0; let CopyCost = -1; let HasSGPR = 1; + let BaseClassOrder = 10000; } def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64, v8i16, v8f16], 32, @@ -737,6 +744,7 @@ let isAllocatable = 0; let CopyCost = -1; let HasSGPR = 1; + let BaseClassOrder = 10000; } def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32], 32, @@ -767,6 +775,7 @@ SRC_EXECZ_LO16, SRC_SCC_LO16, EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16)> { let Size = 16; let AllocationPriority = 0; + let BaseClassOrder = 16; } def SReg_32_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, @@ -791,6 +800,7 @@ (add SReg_32_XM0, M0_CLASS)> { let AllocationPriority = 0; let HasSGPR = 1; + let BaseClassOrder = 32; } let GeneratePressureSet = 0 in { @@ -826,6 +836,7 @@ let CopyCost = 1; let AllocationPriority = 1; let HasSGPR = 1; + let BaseClassOrder = 64; } def SReg_1_XEXEC : SIRegisterClass<"AMDGPU", [i1], 32, @@ -869,6 +880,7 @@ !dag(add, [!cast(ttmpName)], ["ttmp"]), (add)))> { let isAllocatable = 0; + let BaseClassOrder = !mul(numRegs, 32); } } } @@ -911,10 +923,15 @@ multiclass VRegClass regTypes, dag regList> { let HasVGPR = 1 in { // Define the regular class. - def "" : VRegClassBase; + def "" : VRegClassBase { + let BaseClassOrder = !mul(numRegs, 32); + } // Define 2-aligned variant - def _Align2 : VRegClassBase; + def _Align2 : VRegClassBase { + // Give aligned class higher priority in base class resolution + let BaseClassOrder = !sub(!mul(numRegs, 32), 1); + } } } @@ -940,10 +957,15 @@ multiclass ARegClass regTypes, dag regList> { let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1 in { // Define the regular class. - def "" : VRegClassBase; + def "" : VRegClassBase { + let BaseClassOrder = !mul(numRegs, 32); + } // Define 2-aligned variant - def _Align2 : VRegClassBase; + def _Align2 : VRegClassBase { + // Give aligned class higher priority in base class resolution + let BaseClassOrder = !sub(!mul(numRegs, 32), 1); + } } } diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -607,7 +607,7 @@ Register Reg = MO.getReg(); if (!Reg.isVirtual() && - TRI->hasVectorRegisters(TRI->getPhysRegClass(Reg))) { + TRI->hasVectorRegisters(TRI->getPhysRegBaseClass(Reg))) { Flags = StateWQM; break; }