Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -118,10 +118,12 @@ if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x")) WorkItemIDX = true; - if (!F.hasFnAttribute("amdgpu-no-workitem-id-y")) + if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") && + ST.getMaxWorkitemID(F, 1) != 0) WorkItemIDY = true; - if (!F.hasFnAttribute("amdgpu-no-workitem-id-z")) + if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") && + ST.getMaxWorkitemID(F, 2) != 0) WorkItemIDZ = true; if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr")) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll @@ -128,7 +128,7 @@ ; FIXME: Should be able to avoid enabling in kernel inputs ; FIXME: Packed tid should avoid the and ; ALL-LABEL: {{^}}test_reqd_workgroup_size_x_only: -; CO-V2: enable_vgpr_workitem_id = 2 +; CO-V2: enable_vgpr_workitem_id = 0 ; ALL-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; UNPACKED-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 @@ -149,7 +149,7 @@ } ; ALL-LABEL: {{^}}test_reqd_workgroup_size_y_only: -; CO-V2: enable_vgpr_workitem_id = 2 +; CO-V2: enable_vgpr_workitem_id = 1 ; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] Index: llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll +++ llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll @@ -63,7 +63,7 @@ call void @callee() ret void } -; CHECK: .amdhsa_system_vgpr_workitem_id 2 +; CHECK: .amdhsa_system_vgpr_workitem_id 1 define amdgpu_kernel void @known_yz_0(i32 addrspace(1)* %out) !reqd_work_group_size !3 { ; CHECK-LABEL: known_yz_0: @@ -82,7 +82,7 @@ call void @callee() ret void } -; CHECK: .amdhsa_system_vgpr_workitem_id 2 +; CHECK: .amdhsa_system_vgpr_workitem_id 0 define amdgpu_kernel void @known_xz_0(i32 addrspace(1)* %out) !reqd_work_group_size !4 { ; CHECK-LABEL: known_xz_0: @@ -101,7 +101,7 @@ call void @callee() ret void } -; CHECK: .amdhsa_system_vgpr_workitem_id 2 +; CHECK: .amdhsa_system_vgpr_workitem_id 1 define amdgpu_kernel void @known_xyz_0(i32 addrspace(1)* %out) !reqd_work_group_size !5 { @@ -121,7 +121,7 @@ call void @callee() ret void } -; CHECK: .amdhsa_system_vgpr_workitem_id 2 +; CHECK: .amdhsa_system_vgpr_workitem_id 0 attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" } Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll @@ -63,10 +63,9 @@ ret void } -; FIXME: Should be able to avoid enabling in kernel inputs ; FIXME: Packed tid should avoid the and ; ALL-LABEL: {{^}}test_reqd_workgroup_size_x_only: -; CO-V2: enable_vgpr_workitem_id = 2 +; CO-V2: enable_vgpr_workitem_id = 0 ; ALL-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; UNPACKED-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 @@ -87,7 +86,7 @@ } ; ALL-LABEL: {{^}}test_reqd_workgroup_size_y_only: -; CO-V2: enable_vgpr_workitem_id = 2 +; CO-V2: enable_vgpr_workitem_id = 1 ; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]