Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -151,10 +151,15 @@ } bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F); - if (isAmdHsaOrMesa) { - if (!ST.enableFlatScratch()) - PrivateSegmentBuffer = true; + if (isAmdHsaOrMesa && !ST.enableFlatScratch()) + PrivateSegmentBuffer = true; + else if (ST.isMesaGfxShader(F)) + ImplicitBufferPtr = true; + + if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr")) + KernargSegmentPtr = true; + if (!AMDGPU::isGraphics(CC)) { if (UseFixedABI) { DispatchPtr = true; QueuePtr = true; @@ -171,13 +176,8 @@ if (F.hasFnAttribute("amdgpu-dispatch-id")) DispatchID = true; } - } else if (ST.isMesaGfxShader(F)) { - ImplicitBufferPtr = true; } - if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr")) - KernargSegmentPtr = true; - // TODO: This could be refined a lot. The attribute is a poor way of // detecting calls or stack objects that may require it before argument // lowering. Index: llvm/test/CodeGen/AMDGPU/trap-abis.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/trap-abis.ll +++ llvm/test/CodeGen/AMDGPU/trap-abis.ll @@ -18,7 +18,7 @@ define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) { ; NOHSA-TRAP-GFX900-V2-LABEL: trap: ; NOHSA-TRAP-GFX900-V2: ; %bb.0: -; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 ; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 ; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1 ; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) @@ -28,7 +28,7 @@ ; ; NOHSA-TRAP-GFX900-V3-LABEL: trap: ; NOHSA-TRAP-GFX900-V3: ; %bb.0: -; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 ; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 ; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 ; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) @@ -38,7 +38,7 @@ ; ; NOHSA-TRAP-GFX900-V4-LABEL: trap: ; NOHSA-TRAP-GFX900-V4: ; %bb.0: -; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 ; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 ; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 ; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) @@ -357,7 +357,7 @@ define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %arg0) local_unnamed_addr { ; NOHSA-TRAP-GFX900-V2-LABEL: non_entry_trap: ; NOHSA-TRAP-GFX900-V2: ; %bb.0: ; %entry -; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 ; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 ; NOHSA-TRAP-GFX900-V2-NEXT: s_waitcnt lgkmcnt(0) ; NOHSA-TRAP-GFX900-V2-NEXT: global_load_dword v1, v0, s[0:1] glc @@ -375,7 +375,7 @@ ; ; NOHSA-TRAP-GFX900-V3-LABEL: non_entry_trap: ; NOHSA-TRAP-GFX900-V3: ; %bb.0: ; %entry -; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 ; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 ; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0) ; NOHSA-TRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc @@ -393,7 +393,7 @@ ; ; NOHSA-TRAP-GFX900-V4-LABEL: non_entry_trap: ; NOHSA-TRAP-GFX900-V4: ; %bb.0: ; %entry -; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 ; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 ; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0) ; NOHSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc @@ -805,7 +805,7 @@ define amdgpu_kernel void @debugtrap(i32 addrspace(1)* nocapture readonly %arg0) { ; NOHSA-TRAP-GFX900-V2-LABEL: debugtrap: ; NOHSA-TRAP-GFX900-V2: ; %bb.0: -; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-V2-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 ; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v0, 0 ; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v1, 1 ; NOHSA-TRAP-GFX900-V2-NEXT: v_mov_b32_e32 v2, 2 @@ -818,7 +818,7 @@ ; ; NOHSA-TRAP-GFX900-V3-LABEL: debugtrap: ; NOHSA-TRAP-GFX900-V3: ; %bb.0: -; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 ; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0 ; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1 ; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2 @@ -831,7 +831,7 @@ ; ; NOHSA-TRAP-GFX900-V4-LABEL: debugtrap: ; NOHSA-TRAP-GFX900-V4: ; %bb.0: -; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 ; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0 ; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1 ; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2