diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -338,11 +338,9 @@ break; } - if (AMDGPU::SReg_32RegClass.contains(Reg) || - AMDGPU::SReg_LO16RegClass.contains(Reg) || + if (AMDGPU::SGPR_32RegClass.contains(Reg) || + AMDGPU::SGPR_LO16RegClass.contains(Reg) || AMDGPU::SGPR_HI16RegClass.contains(Reg)) { - assert(!AMDGPU::TTMP_32RegClass.contains(Reg) && - "trap handler registers should not be used"); IsSGPR = true; Width = 1; } else if (AMDGPU::VGPR_32RegClass.contains(Reg) || @@ -355,9 +353,7 @@ IsSGPR = false; IsAGPR = true; Width = 1; - } else if (AMDGPU::SReg_64RegClass.contains(Reg)) { - assert(!AMDGPU::TTMP_64RegClass.contains(Reg) && - "trap handler registers should not be used"); + } else if (AMDGPU::SGPR_64RegClass.contains(Reg)) { IsSGPR = true; Width = 2; } else if (AMDGPU::VReg_64RegClass.contains(Reg)) { @@ -377,9 +373,7 @@ IsSGPR = false; IsAGPR = true; Width = 3; - } else if (AMDGPU::SReg_128RegClass.contains(Reg)) { - assert(!AMDGPU::TTMP_128RegClass.contains(Reg) && - "trap handler registers should not be used"); + } else if (AMDGPU::SGPR_128RegClass.contains(Reg)) { IsSGPR = true; Width = 4; } else if (AMDGPU::VReg_128RegClass.contains(Reg)) { @@ -420,8 +414,6 @@ IsAGPR = true; Width = 7; } else if (AMDGPU::SReg_256RegClass.contains(Reg)) { - assert(!AMDGPU::TTMP_256RegClass.contains(Reg) && - "trap handler registers should not be used"); IsSGPR = true; Width = 8; } else if (AMDGPU::VReg_256RegClass.contains(Reg)) { @@ -472,8 +464,6 @@ IsAGPR = true; Width = 12; } else if (AMDGPU::SReg_512RegClass.contains(Reg)) { - assert(!AMDGPU::TTMP_512RegClass.contains(Reg) && - "trap handler registers should not be used"); IsSGPR = true; Width = 16; } else if (AMDGPU::VReg_512RegClass.contains(Reg)) { @@ -494,7 +484,12 @@ IsAGPR = true; Width = 32; } else { - llvm_unreachable("Unknown register class"); + assert((AMDGPU::TTMP_32RegClass.contains(Reg) || + AMDGPU::TTMP_64RegClass.contains(Reg) || + AMDGPU::TTMP_128RegClass.contains(Reg) || + AMDGPU::TTMP_256RegClass.contains(Reg) || + AMDGPU::TTMP_512RegClass.contains(Reg)) && + "Unknown register class"); } unsigned HWReg = TRI.getHWRegIndex(Reg); int MaxUsed = HWReg + Width - 1; diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-SDAG %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-GISEL %s + define amdgpu_cs void @_amdgpu_cs_main() { ; GFX9-SDAG-LABEL: _amdgpu_cs_main: ; GFX9-SDAG: ; %bb.0: ; %.entry @@ -33,6 +34,53 @@ ret void } +define amdgpu_cs void @caller() { +; GFX9-SDAG-LABEL: caller: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_getpc_b64 s[8:9] +; GFX9-SDAG-NEXT: s_mov_b32 s8, s0 +; GFX9-SDAG-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9-SDAG-NEXT: s_mov_b32 s32, 0 +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-SDAG-NEXT: s_add_u32 s8, s8, s0 +; GFX9-SDAG-NEXT: s_addc_u32 s9, s9, 0 +; GFX9-SDAG-NEXT: s_getpc_b64 s[0:1] +; GFX9-SDAG-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4 +; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12 +; GFX9-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[8:9] +; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: caller: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_getpc_b64 s[8:9] +; GFX9-GISEL-NEXT: s_mov_b32 s8, s0 +; GFX9-GISEL-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9-GISEL-NEXT: s_mov_b32 s32, 0 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: s_add_u32 s8, s8, s0 +; GFX9-GISEL-NEXT: s_addc_u32 s9, s9, 0 +; GFX9-GISEL-NEXT: s_getpc_b64 s[0:1] +; GFX9-GISEL-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4 +; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12 +; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[8:9] +; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-GISEL-NEXT: s_endpgm + %idx = call i32 @llvm.amdgcn.workgroup.id.x() + call amdgpu_gfx void @callee(i32 %idx) + ret void +} + +declare amdgpu_gfx void @callee(i32) + declare i32 @llvm.amdgcn.workgroup.id.x() declare i32 @llvm.amdgcn.workgroup.id.y() declare i32 @llvm.amdgcn.workgroup.id.z()