diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td @@ -18,5 +18,5 @@ def VCCRegBank : RegisterBank <"VCC", [SReg_1]>; def AGPRRegBank : RegisterBank <"AGPR", - [AGPR_32, AReg_64, AReg_128, AReg_512, AReg_1024] + [AGPR_32, AReg_64, AReg_96, AReg_128, AReg_160, AReg_256, AReg_512, AReg_1024] >; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1307,8 +1307,14 @@ return &AMDGPU::AGPR_32RegClass; case 64: return &AMDGPU::AReg_64RegClass; + case 96: + return &AMDGPU::AReg_96RegClass; case 128: return &AMDGPU::AReg_128RegClass; + case 160: + return &AMDGPU::AReg_160RegClass; + case 256: + return &AMDGPU::AReg_256RegClass; case 512: return &AMDGPU::AReg_512RegClass; case 1024: @@ -1359,13 +1365,16 @@ &AMDGPU::AReg_64RegClass, &AMDGPU::VReg_96RegClass, &AMDGPU::SReg_96RegClass, + &AMDGPU::AReg_96RegClass, &AMDGPU::VReg_128RegClass, &AMDGPU::SReg_128RegClass, &AMDGPU::AReg_128RegClass, &AMDGPU::VReg_160RegClass, &AMDGPU::SReg_160RegClass, + &AMDGPU::AReg_160RegClass, &AMDGPU::VReg_256RegClass, &AMDGPU::SReg_256RegClass, + &AMDGPU::AReg_256RegClass, &AMDGPU::VReg_512RegClass, &AMDGPU::SReg_512RegClass, &AMDGPU::AReg_512RegClass, diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -530,9 +530,18 @@ // AGPR 64-bit registers def AGPR_64 : SIRegisterTuples.ret, AGPR_32, 255, 1, 2, "a">; +// AGPR 96-bit registers +def AGPR_96 : SIRegisterTuples.ret, AGPR_32, 255, 1, 3, "a">; + // AGPR 128-bit registers def AGPR_128 : SIRegisterTuples.ret, AGPR_32, 255, 1, 4, "a">; +// AGPR 160-bit registers +def AGPR_160 : SIRegisterTuples.ret, AGPR_32, 255, 1, 5, "a">; + +// AGPR 256-bit registers +def AGPR_256 : SIRegisterTuples.ret, AGPR_32, 255, 1, 8, "a">; + // AGPR 512-bit registers def AGPR_512 : SIRegisterTuples.ret, AGPR_32, 255, 1, 16, "a">; @@ -802,6 +811,15 @@ let AllocationPriority = 2; } +def AReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, + (add AGPR_96)> { + let Size = 96; + + // Requires 3 v_accvgpr_write and 3 v_accvgpr_read to copy + burn 1 vgpr + let CopyCost = 7; + let AllocationPriority = 3; +} + def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add AGPR_128)> { let Size = 128; @@ -811,6 +829,24 @@ let AllocationPriority = 4; } +def AReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, + (add AGPR_160)> { + let Size = 160; + + // Requires 5 v_accvgpr_write and 5 v_accvgpr_read to copy + burn 1 vgpr + let CopyCost = 11; + let AllocationPriority = 5; +} + +def AReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, + (add AGPR_256)> { + let Size = 256; + + // Requires 8 v_accvgpr_write and 8 v_accvgpr_read to copy + burn 1 vgpr + let CopyCost = 17; + let AllocationPriority = 6; +} + def AReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add AGPR_512)> { let Size = 512; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1104,6 +1104,7 @@ case AMDGPU::SGPR_96RegClassID: case AMDGPU::SReg_96RegClassID: case AMDGPU::VReg_96RegClassID: + case AMDGPU::AReg_96RegClassID: return 96; case AMDGPU::SGPR_128RegClassID: case AMDGPU::SReg_128RegClassID: @@ -1113,9 +1114,11 @@ case AMDGPU::SGPR_160RegClassID: case AMDGPU::SReg_160RegClassID: case AMDGPU::VReg_160RegClassID: + case AMDGPU::AReg_160RegClassID: return 160; case AMDGPU::SReg_256RegClassID: case AMDGPU::VReg_256RegClassID: + case AMDGPU::AReg_256RegClassID: return 256; case AMDGPU::SReg_512RegClassID: case AMDGPU::VReg_512RegClassID: