diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -14,6 +14,15 @@ namespace llvm { +namespace SIRCFlags { +// This needs to be kept in sync with the field bits in SIRegisterClass. +enum : uint8_t { + // For vector registers. + IsVGPR = 1 << 0, + IsAGPR = 1 << 1 +}; // enum +} // namespace SIRCFlags + namespace SIInstrFlags { // This needs to be kept in sync with the field bits in InstSI. enum : uint64_t { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -168,6 +168,11 @@ bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const; + /// \returns true if this class contains only VGPR registers + bool isVGPRClass(const TargetRegisterClass *RC) const { + return hasVGPRs(RC) && !hasAGPRs(RC); + } + /// \returns true if this class contains only AGPR registers bool isAGPRClass(const TargetRegisterClass *RC) const { return hasAGPRs(RC) && !hasVGPRs(RC); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -2166,32 +2166,12 @@ return isSGPRClass(RC); } -// TODO: It might be helpful to have some target specific flags in -// TargetRegisterClass to mark which classes are VGPRs to make this trivial. bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { - unsigned Size = getRegSizeInBits(*RC); - if (Size == 16) { - return getCommonSubClass(&AMDGPU::VGPR_LO16RegClass, RC) != nullptr || - getCommonSubClass(&AMDGPU::VGPR_HI16RegClass, RC) != nullptr; - } - const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size); - if (!VRC) { - assert(Size < 32 && "Invalid register class size"); - return false; - } - return getCommonSubClass(VRC, RC) != nullptr; + return RC->TSFlags & SIRCFlags::IsVGPR; } bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const { - unsigned Size = getRegSizeInBits(*RC); - if (Size < 16) - return false; - const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size); - if (!ARC) { - assert(getVGPRClassForBitWidth(Size) && "Invalid register class size"); - return false; - } - return getCommonSubClass(ARC, RC) != nullptr; + return RC->TSFlags & SIRCFlags::IsAGPR; } const TargetRegisterClass * @@ -2335,7 +2315,7 @@ Register Reg) const { const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg); // Registers without classes are unaddressable, SGPR-like registers. - return RC && hasVGPRs(RC); + return RC && isVGPRClass(RC); } bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI, @@ -2343,7 +2323,7 @@ const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg); // Registers without classes are unaddressable, SGPR-like registers. - return RC && hasAGPRs(RC); + return RC && isAGPRClass(RC); } bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI, diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -130,6 +130,16 @@ RegisterWithSubRegs { } +class SIRegisterClass rTypes, int Align, dag rList> : RegisterClass { + // For vector register classes. + field bit IsVGPR = 0; + field bit IsAGPR = 0; + + // These need to be kept in sync with the enum SIRCFlags. + let TSFlags{0} = IsVGPR; + let TSFlags{1} = IsAGPR; +} + multiclass SIRegLoHi16 regIdx, bit ArtificialHigh = 1, bit HWEncodingHigh = 0> { // There is no special encoding for 16 bit subregs, these are not real @@ -299,17 +309,17 @@ // Groupings using register classes and tuples //===----------------------------------------------------------------------===// -def SCC_CLASS : RegisterClass<"AMDGPU", [i1], 1, (add SCC)> { +def SCC_CLASS : SIRegisterClass<"AMDGPU", [i1], 1, (add SCC)> { let CopyCost = -1; let isAllocatable = 0; } -def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> { +def M0_CLASS : SIRegisterClass<"AMDGPU", [i32], 32, (add M0)> { let CopyCost = 1; let isAllocatable = 0; } -def M0_CLASS_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> { +def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> { let CopyCost = 1; let Size = 16; let isAllocatable = 0; @@ -317,14 +327,14 @@ // TODO: Do we need to set DwarfRegAlias on register tuples? -def SGPR_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, +def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add (sequence "SGPR%u_LO16", 0, 105))> { let AllocationPriority = 9; let Size = 16; let GeneratePressureSet = 0; } -def SGPR_HI16 : RegisterClass<"AMDGPU", [i16, f16], 16, +def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add (sequence "SGPR%u_HI16", 0, 105))> { let isAllocatable = 0; let Size = 16; @@ -332,7 +342,7 @@ } // SGPR 32-bit registers -def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add (sequence "SGPR%u", 0, 105))> { // Give all SGPR classes higher priority than VGPR classes, because // we want to spill SGPRs to VGPRs. @@ -368,13 +378,13 @@ def SGPR_1024Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 32, "s">; // Trap handler TMP 32-bit registers -def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32, +def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32, (add (sequence "TTMP%u", 0, 15))> { let isAllocatable = 0; } // Trap handler TMP 16-bit registers -def TTMP_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, +def TTMP_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add (sequence "TTMP%u_LO16", 0, 15))> { let Size = 16; let isAllocatable = 0; @@ -490,14 +500,15 @@ def Reg16Types : RegisterTypes<[i16, f16]>; def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, p2, p3, p5, p6]>; -def VGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16, +let IsVGPR = 1 in { +def VGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, (add (sequence "VGPR%u_LO16", 0, 255))> { let AllocationPriority = 1; let Size = 16; let GeneratePressureSet = 0; } -def VGPR_HI16 : RegisterClass<"AMDGPU", Reg16Types.types, 16, +def VGPR_HI16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, (add (sequence "VGPR%u_HI16", 0, 255))> { let AllocationPriority = 1; let Size = 16; @@ -506,12 +517,13 @@ // VGPR 32-bit registers // i16/f16 only on VI+ -def VGPR_32 : RegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32, +def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32, (add (sequence "VGPR%u", 0, 255))> { let AllocationPriority = 1; let Size = 32; let Weight = 1; } +} // End IsVGPR = 1 // VGPR 64-bit registers def VGPR_64 : SIRegisterTuples.ret, VGPR_32, 255, 1, 2, "v">; @@ -540,7 +552,8 @@ // VGPR 1024-bit registers def VGPR_1024 : SIRegisterTuples.ret, VGPR_32, 255, 1, 32, "v">; -def AGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16, +let IsAGPR = 1 in { +def AGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16, (add (sequence "AGPR%u_LO16", 0, 255))> { let isAllocatable = 0; let Size = 16; @@ -548,12 +561,13 @@ } // AccVGPR 32-bit registers -def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add (sequence "AGPR%u", 0, 255))> { let AllocationPriority = 1; let Size = 32; let Weight = 1; } +} // End IsAGPR = 1 // AGPR 64-bit registers def AGPR_64 : SIRegisterTuples.ret, AGPR_32, 255, 1, 2, "a">; @@ -586,19 +600,19 @@ // Register classes used as source and destination //===----------------------------------------------------------------------===// -def Pseudo_SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add FP_REG, SP_REG)> { let isAllocatable = 0; let CopyCost = -1; } -def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32, +def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32, (add PRIVATE_RSRC_REG)> { let isAllocatable = 0; let CopyCost = -1; } -def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32], 32, +def LDS_DIRECT_CLASS : SIRegisterClass<"AMDGPU", [i32], 32, (add LDS_DIRECT)> { let isAllocatable = 0; let CopyCost = -1; @@ -607,7 +621,7 @@ let GeneratePressureSet = 0 in { // Subset of SReg_32 without M0 for SMRD instructions and alike. // See comments in SIInstructions.td for more info. -def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, +def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI, SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID, @@ -615,7 +629,7 @@ let AllocationPriority = 10; } -def SReg_LO16_XM0_XEXEC : RegisterClass<"AMDGPU", [i16, f16], 16, +def SReg_LO16_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_HI_LO16, XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, TTMP_LO16, TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO16, @@ -625,29 +639,29 @@ let AllocationPriority = 10; } -def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, +def SReg_32_XEXEC_HI : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> { let AllocationPriority = 10; } -def SReg_LO16_XEXEC_HI : RegisterClass<"AMDGPU", [i16, f16], 16, +def SReg_LO16_XEXEC_HI : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add SReg_LO16_XM0_XEXEC, EXEC_LO_LO16, M0_CLASS_LO16)> { let Size = 16; let AllocationPriority = 10; } -def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, +def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> { let AllocationPriority = 10; } -def SReg_LO16_XM0 : RegisterClass<"AMDGPU", [i16, f16], 16, +def SReg_LO16_XM0 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add SReg_LO16_XM0_XEXEC, EXEC_LO_LO16, EXEC_HI_LO16)> { let Size = 16; let AllocationPriority = 10; } -def SReg_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, +def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add SGPR_LO16, SReg_LO16_XM0, M0_CLASS_LO16, EXEC_LO_LO16, EXEC_HI_LO16, SReg_LO16_XEXEC_HI)> { let Size = 16; let AllocationPriority = 10; @@ -655,54 +669,54 @@ } // End GeneratePressureSet = 0 // Register class for all scalar registers (SGPRs + Special Registers) -def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, +def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> { let AllocationPriority = 10; } let GeneratePressureSet = 0 in { -def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add SReg_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; } -def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, +def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, (add SGPR_64Regs)> { let CopyCost = 1; let AllocationPriority = 11; } // CCR (call clobbered registers) SGPR 64-bit registers -def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, +def CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, (add (trunc SGPR_64, 16))> { let CopyCost = SGPR_64.CopyCost; let AllocationPriority = SGPR_64.AllocationPriority; } -def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, +def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add TTMP_64Regs)> { let isAllocatable = 0; } -def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, +def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> { let CopyCost = 1; let AllocationPriority = 13; } -def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, +def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, (add SReg_64_XEXEC, EXEC)> { let CopyCost = 1; let AllocationPriority = 13; } -def SReg_1_XEXEC : RegisterClass<"AMDGPU", [i1], 32, +def SReg_1_XEXEC : SIRegisterClass<"AMDGPU", [i1], 32, (add SReg_64_XEXEC, SReg_32_XM0_XEXEC)> { let CopyCost = 1; let isAllocatable = 0; } -def SReg_1 : RegisterClass<"AMDGPU", [i1], 32, +def SReg_1 : SIRegisterClass<"AMDGPU", [i1], 32, (add SReg_1_XEXEC, EXEC, EXEC_LO)> { let CopyCost = 1; let isAllocatable = 0; @@ -719,17 +733,17 @@ defvar ttmpName = !strconcat("TTMP_", suffix); let AllocationPriority = priority, CopyCost = copyCost in { - def "" # sgprName : RegisterClass<"AMDGPU", regTypes, 32, (add regList)> { + def "" # sgprName : SIRegisterClass<"AMDGPU", regTypes, 32, (add regList)> { } if hasTTMP then { - def "" # ttmpName : RegisterClass<"AMDGPU", regTypes, 32, (add ttmpList)> { + def "" # ttmpName : SIRegisterClass<"AMDGPU", regTypes, 32, (add ttmpList)> { let isAllocatable = 0; } } def SReg_ # suffix : - RegisterClass<"AMDGPU", regTypes, 32, + SIRegisterClass<"AMDGPU", regTypes, 32, !con(!dag(add, [!cast(sgprName)], ["sgpr"]), !if(hasTTMP, !dag(add, [!cast(ttmpName)], ["ttmp"]), @@ -748,14 +762,15 @@ defm "" : SRegClass<16, 20, [v16i32, v16f32, v8i64, v8f64], SGPR_512Regs, TTMP_512Regs>; defm "" : SRegClass<32, 21, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>; -def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add VGPR_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; + let IsVGPR = 1; } // Register class for all vector registers (VGPRs + Interpolation Registers) class VRegClassBase regTypes, dag regList> : - RegisterClass<"AMDGPU", regTypes, 32, regList> { + SIRegisterClass<"AMDGPU", regTypes, 32, regList> { let Size = !mul(numRegs, 32); // Requires n v_mov_b32 to copy @@ -767,11 +782,13 @@ // Define a register tuple class, along with one requiring an even // aligned base register. multiclass VRegClass regTypes, dag regList> { - // Define the regular class. - def "" : VRegClassBase; + let IsVGPR = 1 in { + // Define the regular class. + def "" : VRegClassBase; - // Define 2-aligned variant - def _Align2 : VRegClassBase; + // Define 2-aligned variant + def _Align2 : VRegClassBase; + } } defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4], @@ -787,7 +804,7 @@ defm VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>; multiclass ARegClass regTypes, dag regList> { - let CopyCost = !add(numRegs, numRegs, 1) in { + let CopyCost = !add(numRegs, numRegs, 1), IsAGPR = 1 in { // Define the regular class. def "" : VRegClassBase; @@ -823,43 +840,56 @@ // on an empty register set, but also sorts register classes based on // the number of registerss in them. Add only one register so this is // sorted to the end and not preferred over VGPR_32. -def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add ARTIFICIAL_VGPR)> { +def VReg_1 : SIRegisterClass<"AMDGPU", [i1], 32, (add ARTIFICIAL_VGPR)> { let Size = 1; + let IsVGPR = 1; } -def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; + let IsVGPR = 1; } -def VS_64 : RegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_64)> { +def VS_64 : SIRegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_64)> { let isAllocatable = 0; + let IsVGPR = 1; } -def AV_32 : RegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, +def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add AGPR_32, VGPR_32)> { let isAllocatable = 0; + let IsVGPR = 1; + let IsAGPR = 1; } -def AV_64 : RegisterClass<"AMDGPU", VReg_64.RegTypes, 32, +def AV_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32, (add AReg_64, VReg_64)> { let isAllocatable = 0; + let IsVGPR = 1; + let IsAGPR = 1; } } // End GeneratePressureSet = 0 -def AV_96 : RegisterClass<"AMDGPU", VReg_96.RegTypes, 32, +def AV_96 : SIRegisterClass<"AMDGPU", VReg_96.RegTypes, 32, (add AReg_96, VReg_96)> { let isAllocatable = 0; + let IsVGPR = 1; + let IsAGPR = 1; } -def AV_128 : RegisterClass<"AMDGPU", VReg_128.RegTypes, 32, +def AV_128 : SIRegisterClass<"AMDGPU", VReg_128.RegTypes, 32, (add AReg_128, VReg_128)> { let isAllocatable = 0; + let IsVGPR = 1; + let IsAGPR = 1; } -def AV_160 : RegisterClass<"AMDGPU", VReg_160.RegTypes, 32, +def AV_160 : SIRegisterClass<"AMDGPU", VReg_160.RegTypes, 32, (add AReg_160, VReg_160)> { let isAllocatable = 0; + let IsVGPR = 1; + let IsAGPR = 1; } //===----------------------------------------------------------------------===//