Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -17,25 +17,21 @@ let TargetPrefix = "r600" in { -class R600ReadPreloadRegisterIntrinsic - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, - GCCBuiltin; - -multiclass R600ReadPreloadRegisterIntrinsic_xyz { - def _x : R600ReadPreloadRegisterIntrinsic; - def _y : R600ReadPreloadRegisterIntrinsic; - def _z : R600ReadPreloadRegisterIntrinsic; +multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz { + def _x : AMDGPUReadPreloadRegisterIntrinsic; + def _y : AMDGPUReadPreloadRegisterIntrinsic; + def _z : AMDGPUReadPreloadRegisterIntrinsic; } -defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz < +defm int_r600_read_global_size : AMDGPUReadPreloadRegisterIntrinsic_xyz < "__builtin_r600_read_global_size">; -defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz < +defm int_r600_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz < "__builtin_r600_read_local_size">; -defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz < +defm int_r600_read_ngroups : AMDGPUReadPreloadRegisterIntrinsic_xyz < "__builtin_r600_read_ngroups">; -defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz < +defm int_r600_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz < "__builtin_r600_read_tgid">; -defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz < +defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz < "__builtin_r600_read_tidig">; def int_r600_rat_store_typed : @@ -64,6 +60,11 @@ let TargetPrefix = "amdgcn" in { +defm int_amdgcn_workitem_id : AMDGPUReadPreloadRegisterIntrinsic_xyz < + "__builtin_amdgcn_workitem_id">; +defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz < + "__builtin_amdgcn_workgroup_id">; + def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, Intrinsic<[], [], [IntrConvergent]>; Index: lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -89,6 +89,12 @@ static const StringRef IntrinsicToAttr[][2] = { // .x omitted + { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" }, + { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" }, + + { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" }, + { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" }, + { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" }, { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" }, Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1380,21 +1380,27 @@ // Really only 2 bits. return lowerImplicitZextParam(DAG, Op, MVT::i8, getImplicitParameterOffset(MFI, GRID_DIM)); + case Intrinsic::amdgcn_workgroup_id_x: case Intrinsic::r600_read_tgid_x: return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_X), VT); + case Intrinsic::amdgcn_workgroup_id_y: case Intrinsic::r600_read_tgid_y: return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Y), VT); + case Intrinsic::amdgcn_workgroup_id_z: case Intrinsic::r600_read_tgid_z: return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Z), VT); + case Intrinsic::amdgcn_workitem_id_x: case Intrinsic::r600_read_tidig_x: return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass, TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X), VT); + case Intrinsic::amdgcn_workitem_id_y: case Intrinsic::r600_read_tidig_y: return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass, TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y), VT); + case Intrinsic::amdgcn_workitem_id_z: case Intrinsic::r600_read_tidig_z: return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass, TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Z), VT); Index: test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll @@ -0,0 +1,107 @@ +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HSA -check-prefix=CI-HSA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HSA -check-prefix=VI-HSA %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MESA -check-prefix=SI-MESA %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MESA -check-prefix=VI-MESA %s + +declare i32 @llvm.amdgcn.workgroup.id.x() #0 +declare i32 @llvm.amdgcn.workgroup.id.y() #0 +declare i32 @llvm.amdgcn.workgroup.id.z() #0 + +; ALL-LABEL {{^}}test_workgroup_id_x: + +; HSA: .amd_kernel_code_t +; HSA: compute_pgm_rsrc2_user_sgpr = 6 +; HSA: compute_pgm_rsrc2_tgid_x_en = 1 +; HSA: compute_pgm_rsrc2_tgid_y_en = 0 +; HSA: compute_pgm_rsrc2_tgid_z_en = 0 +; HSA: compute_pgm_rsrc2_tg_size_en = 0 +; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 +; HSA: enable_sgpr_grid_workgroup_count_x = 0 +; HSA: enable_sgpr_grid_workgroup_count_y = 0 +; HSA: enable_sgpr_grid_workgroup_count_z = 0 +; HSA: .end_amd_kernel_code_t + +; MESA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s2{{$}} +; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s6{{$}} + +; ALL-NOT: [[VCOPY]] +; ALL: {{buffer|flat}}_store_dword [[VCOPY]] + +; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; ALL-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 +; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 +; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 +; ALL: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 +define void @test_workgroup_id_x(i32 addrspace(1)* %out) #1 { + %id = call i32 @llvm.amdgcn.workgroup.id.x() + store i32 %id, i32 addrspace(1)* %out + ret void +} + +; ALL-LABEL {{^}}test_workgroup_id_y: +; HSA: compute_pgm_rsrc2_user_sgpr = 6 +; HSA: compute_pgm_rsrc2_tgid_x_en = 1 +; HSA: compute_pgm_rsrc2_tgid_y_en = 1 +; HSA: compute_pgm_rsrc2_tgid_z_en = 0 +; HSA: compute_pgm_rsrc2_tg_size_en = 0 +; HSA: enable_sgpr_grid_workgroup_count_x = 0 +; HSA: enable_sgpr_grid_workgroup_count_y = 0 +; HSA: enable_sgpr_grid_workgroup_count_z = 0 + +; MESA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}} +; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}} + +; ALL-NOT: [[VCOPY]] +; ALL: {{buffer|flat}}_store_dword [[VCOPY]] + +; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; ALL-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 +; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1 +; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 +; ALL: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 +define void @test_workgroup_id_y(i32 addrspace(1)* %out) #1 { + %id = call i32 @llvm.amdgcn.workgroup.id.y() + store i32 %id, i32 addrspace(1)* %out + ret void +} + +; ALL-LABEL {{^}}test_workgroup_id_z: +; HSA: compute_pgm_rsrc2_user_sgpr = 6 +; HSA: compute_pgm_rsrc2_tgid_x_en = 1 +; HSA: compute_pgm_rsrc2_tgid_y_en = 1 +; HSA: compute_pgm_rsrc2_tgid_z_en = 1 +; HSA: compute_pgm_rsrc2_tg_size_en = 0 +; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 +; HSA: enable_sgpr_private_segment_buffer = 1 +; HSA: enable_sgpr_dispatch_ptr = 0 +; HSA: enable_sgpr_queue_ptr = 0 +; HSA: enable_sgpr_kernarg_segment_ptr = 1 +; HSA: enable_sgpr_dispatch_id = 0 +; HSA: enable_sgpr_flat_scratch_init = 0 +; HSA: enable_sgpr_private_segment_size = 0 +; HSA: enable_sgpr_grid_workgroup_count_x = 0 +; HSA: enable_sgpr_grid_workgroup_count_y = 0 +; HSA: enable_sgpr_grid_workgroup_count_z = 0 + +; MESA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s4{{$}} +; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s8{{$}} + +; ALL-NOT: [[VCOPY]] +; ALL: {{buffer|flat}}_store_dword [[VCOPY]] + +; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; ALL-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; ALL: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 +; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1 +; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 +; ALL: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 +define void @test_workgroup_id_z(i32 addrspace(1)* %out) #1 { + %id = call i32 @llvm.amdgcn.workgroup.id.z() + store i32 %id, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll @@ -0,0 +1,56 @@ +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HSA -check-prefix=CI-HSA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HSA -check-prefix=VI-HSA %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MESA -check-prefix=SI-MESA %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MESA -check-prefix=VI-MESA %s + +declare i32 @llvm.amdgcn.workitem.id.x() #0 +declare i32 @llvm.amdgcn.workitem.id.y() #0 +declare i32 @llvm.amdgcn.workitem.id.z() #0 + +; MESA: .section .AMDGPU.config +; MESA: .long 47180 +; MESA-NEXT: .long 132{{$}} + +; ALL-LABEL {{^}}test_workitem_id_x: +; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 + +; ALL-NOT: v0 +; ALL: {{buffer|flat}}_store_dword v0 +define void @test_workitem_id_x(i32 addrspace(1)* %out) #1 { + %id = call i32 @llvm.amdgcn.workitem.id.x() + store i32 %id, i32 addrspace(1)* %out + ret void +} + +; MESA: .section .AMDGPU.config +; MESA: .long 47180 +; MESA-NEXT: .long 2180{{$}} + +; ALL-LABEL {{^}}test_workitem_id_y: +; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 1 + +; ALL-NOT: v1 +; ALL: {{buffer|flat}}_store_dword v1 +define void @test_workitem_id_y(i32 addrspace(1)* %out) #1 { + %id = call i32 @llvm.amdgcn.workitem.id.y() + store i32 %id, i32 addrspace(1)* %out + ret void +} + +; MESA: .section .AMDGPU.config +; MESA: .long 47180 +; MESA-NEXT: .long 4228{{$}} + +; ALL-LABEL {{^}}test_workitem_id_z: +; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 2 + +; ALL-NOT: v2 +; ALL: {{buffer|flat}}_store_dword v2 +define void @test_workitem_id_z(i32 addrspace(1)* %out) #1 { + %id = call i32 @llvm.amdgcn.workitem.id.z() + store i32 %id, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/AMDGPU/work-item-intrinsics.ll =================================================================== --- test/CodeGen/AMDGPU/work-item-intrinsics.ll +++ test/CodeGen/AMDGPU/work-item-intrinsics.ll @@ -1,7 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=HSA -check-prefix=CI-HSA -check-prefix=FUNC %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=HSA -check-prefix=VI-HSA -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s @@ -9,22 +7,6 @@ ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] ; EG: MOV {{\*? *}}[[VAL]], KC0[0].X -; HSA: .amd_kernel_code_t - -; HSA: enable_sgpr_private_segment_buffer = 1 -; HSA: enable_sgpr_dispatch_ptr = 0 -; HSA: enable_sgpr_queue_ptr = 0 -; HSA: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: enable_sgpr_dispatch_id = 0 -; HSA: enable_sgpr_flat_scratch_init = 0 -; HSA: enable_sgpr_private_segment_size = 0 -; HSA: enable_sgpr_grid_workgroup_count_x = 0 -; HSA: enable_sgpr_grid_workgroup_count_y = 0 -; HSA: enable_sgpr_grid_workgroup_count_z = 0 - -; HSA: .end_amd_kernel_code_t - - ; GCN-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; GCN-NOHSA: buffer_store_dword [[VVAL]] @@ -115,24 +97,9 @@ ; sgprs. ; FUNC-LABEL: {{^}}tgid_x: -; HSA: .amd_kernel_code_t -; HSA: compute_pgm_rsrc2_user_sgpr = 6 -; HSA: compute_pgm_rsrc2_tgid_x_en = 1 -; HSA: compute_pgm_rsrc2_tgid_y_en = 0 -; HSA: compute_pgm_rsrc2_tgid_z_en = 0 -; HSA: compute_pgm_rsrc2_tg_size_en = 0 -; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 -; HSA: enable_sgpr_grid_workgroup_count_x = 0 -; HSA: enable_sgpr_grid_workgroup_count_y = 0 -; HSA: enable_sgpr_grid_workgroup_count_z = 0 -; HSA: .end_amd_kernel_code_t - ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}} -; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6{{$}} ; GCN-NOHSA: buffer_store_dword [[VVAL]] -; HSA: flat_store_dword [[VVAL]] -; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 ; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 ; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 ; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 @@ -146,25 +113,10 @@ } ; FUNC-LABEL: {{^}}tgid_y: -; HSA: compute_pgm_rsrc2_user_sgpr = 6 -; HSA: compute_pgm_rsrc2_tgid_x_en = 1 -; HSA: compute_pgm_rsrc2_tgid_y_en = 1 -; HSA: compute_pgm_rsrc2_tgid_z_en = 0 -; HSA: compute_pgm_rsrc2_tg_size_en = 0 -; HSA: enable_sgpr_grid_workgroup_count_x = 0 -; HSA: enable_sgpr_grid_workgroup_count_y = 0 -; HSA: enable_sgpr_grid_workgroup_count_z = 0 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3 -; GCN-HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7 ; GCN-NOHSA: buffer_store_dword [[VVAL]] -; HSA: flat_store_dword [[VVAL]] -; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 ; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 -; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 -; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1 -; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 -; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 define void @tgid_y(i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tgid.y() #0 @@ -173,29 +125,9 @@ } ; FUNC-LABEL: {{^}}tgid_z: -; HSA: compute_pgm_rsrc2_user_sgpr = 6 -; HSA: compute_pgm_rsrc2_tgid_x_en = 1 -; HSA: compute_pgm_rsrc2_tgid_y_en = 1 -; HSA: compute_pgm_rsrc2_tgid_z_en = 1 -; HSA: compute_pgm_rsrc2_tg_size_en = 0 -; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 -; HSA: enable_sgpr_private_segment_buffer = 1 -; HSA: enable_sgpr_dispatch_ptr = 0 -; HSA: enable_sgpr_queue_ptr = 0 -; HSA: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: enable_sgpr_dispatch_id = 0 -; HSA: enable_sgpr_flat_scratch_init = 0 -; HSA: enable_sgpr_private_segment_size = 0 -; HSA: enable_sgpr_grid_workgroup_count_x = 0 -; HSA: enable_sgpr_grid_workgroup_count_y = 0 -; HSA: enable_sgpr_grid_workgroup_count_z = 0 - ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4{{$}} -; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s8{{$}} ; GCN-NOHSA: buffer_store_dword [[VVAL]] -; HSA: flat_store_dword [[VVAL]] -; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 ; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 ; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 ; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1 @@ -213,9 +145,7 @@ ; GCN-NOHSA-NEXT: .long 132{{$}} ; FUNC-LABEL: {{^}}tidig_x: -; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 ; GCN-NOHSA: buffer_store_dword v0 -; HSA: flat_store_dword v0 define void @tidig_x(i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tidig.x() #0 @@ -229,9 +159,7 @@ ; FUNC-LABEL: {{^}}tidig_y: -; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 1 ; GCN-NOHSA: buffer_store_dword v1 -; HSA: flat_store_dword v1 define void @tidig_y(i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tidig.y() #0 @@ -244,9 +172,7 @@ ; GCN-NOHSA-NEXT: .long 4228{{$}} ; FUNC-LABEL: {{^}}tidig_z: -; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 2 ; GCN-NOHSA: buffer_store_dword v2 -; HSA: flat_store_dword v2 define void @tidig_z(i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tidig.z() #0