Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -17,25 +17,21 @@ let TargetPrefix = "r600" in { -class R600ReadPreloadRegisterIntrinsic - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, - GCCBuiltin; - -multiclass R600ReadPreloadRegisterIntrinsic_xyz { - def _x : R600ReadPreloadRegisterIntrinsic; - def _y : R600ReadPreloadRegisterIntrinsic; - def _z : R600ReadPreloadRegisterIntrinsic; +multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz { + def _x : AMDGPUReadPreloadRegisterIntrinsic; + def _y : AMDGPUReadPreloadRegisterIntrinsic; + def _z : AMDGPUReadPreloadRegisterIntrinsic; } -defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz < +defm int_r600_read_global_size : AMDGPUReadPreloadRegisterIntrinsic_xyz < "__builtin_r600_read_global_size">; -defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz < +defm int_r600_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz < "__builtin_r600_read_local_size">; -defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz < +defm int_r600_read_ngroups : AMDGPUReadPreloadRegisterIntrinsic_xyz < "__builtin_r600_read_ngroups">; -defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz < +defm int_r600_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz < "__builtin_r600_read_tgid">; -defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz < +defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz < "__builtin_r600_read_tidig">; def int_r600_rat_store_typed : @@ -64,6 +60,11 @@ let TargetPrefix = "amdgcn" in { +defm int_amdgcn_workitem_id : AMDGPUReadPreloadRegisterIntrinsic_xyz < + "__builtin_amdgcn_workitem_id">; +defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz < + "__builtin_amdgcn_workgroup_id">; + def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, Intrinsic<[], [], [IntrConvergent]>; Index: lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -89,6 +89,12 @@ static const StringRef IntrinsicToAttr[][2] = { // .x omitted + { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" }, + { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" }, + + { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" }, + { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" }, + { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" }, { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" }, Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1380,21 +1380,27 @@ // Really only 2 bits. return lowerImplicitZextParam(DAG, Op, MVT::i8, getImplicitParameterOffset(MFI, GRID_DIM)); + case Intrinsic::amdgcn_workgroup_id_x: case Intrinsic::r600_read_tgid_x: return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_X), VT); + case Intrinsic::amdgcn_workgroup_id_y: case Intrinsic::r600_read_tgid_y: return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Y), VT); + case Intrinsic::amdgcn_workgroup_id_z: case Intrinsic::r600_read_tgid_z: return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Z), VT); + case Intrinsic::amdgcn_workitem_id_x: case Intrinsic::r600_read_tidig_x: return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass, TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X), VT); + case Intrinsic::amdgcn_workitem_id_y: case Intrinsic::r600_read_tidig_y: return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass, TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y), VT); + case Intrinsic::amdgcn_workitem_id_z: case Intrinsic::r600_read_tidig_z: return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass, TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Z), VT); Index: test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll @@ -0,0 +1,58 @@ +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MESA %s + +declare i32 @llvm.amdgcn.workgroup.id.x() #0 +declare i32 @llvm.amdgcn.workgroup.id.y() #0 +declare i32 @llvm.amdgcn.workgroup.id.z() #0 + +; ALL-LABEL {{^}}test_workgroup_id_x: + +; HSA: compute_pgm_rsrc2_tgid_x_en = 1 +; HSA: compute_pgm_rsrc2_tgid_y_en = 0 +; HSA: compute_pgm_rsrc2_tgid_z_en = 0 + +; MESA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s2{{$}} +; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s6{{$}} + +; ALL-NOT: [[VCOPY]] +; ALL: {{buffer|flat}}_store_dword [[VCOPY]] +define void @test_workgroup_id_x(i32 addrspace(1)* %out) #1 { + %id = call i32 @llvm.amdgcn.workgroup.id.x() + store i32 %id, i32 addrspace(1)* %out + ret void +} + +; ALL-LABEL {{^}}test_workgroup_id_y: +; HSA: compute_pgm_rsrc2_tgid_x_en = 1 +; HSA: compute_pgm_rsrc2_tgid_y_en = 1 +; HSA: compute_pgm_rsrc2_tgid_z_en = 0 + +; MESA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s3{{$}} +; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}} + +; ALL-NOT: [[VCOPY]] +; ALL: {{buffer|flat}}_store_dword [[VCOPY]] +define void @test_workgroup_id_y(i32 addrspace(1)* %out) #1 { + %id = call i32 @llvm.amdgcn.workgroup.id.y() + store i32 %id, i32 addrspace(1)* %out + ret void +} + +; ALL-LABEL {{^}}test_workgroup_id_z: +; HSA: compute_pgm_rsrc2_tgid_x_en = 1 +; HSA: compute_pgm_rsrc2_tgid_y_en = 1 +; HSA: compute_pgm_rsrc2_tgid_z_en = 1 + +; MESA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s4{{$}} +; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s8{{$}} + +; ALL-NOT: [[VCOPY]] +; ALL: {{buffer|flat}}_store_dword [[VCOPY]] +define void @test_workgroup_id_z(i32 addrspace(1)* %out) #1 { + %id = call i32 @llvm.amdgcn.workgroup.id.z() + store i32 %id, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll @@ -0,0 +1,42 @@ +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MESA %s + +declare i32 @llvm.amdgcn.workitem.id.x() #0 +declare i32 @llvm.amdgcn.workitem.id.y() #0 +declare i32 @llvm.amdgcn.workitem.id.z() #0 + +; ALL-LABEL {{^}}test_workitem_id_x: +; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 + +; ALL-NOT: v0 +; ALL: {{buffer|flat}}_store_dword v0 +define void @test_workitem_id_x(i32 addrspace(1)* %out) #1 { + %id = call i32 @llvm.amdgcn.workitem.id.x() + store i32 %id, i32 addrspace(1)* %out + ret void +} + +; ALL-LABEL {{^}}test_workitem_id_y: +; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 1 + +; ALL-NOT: v1 +; ALL: {{buffer|flat}}_store_dword v1 +define void @test_workitem_id_y(i32 addrspace(1)* %out) #1 { + %id = call i32 @llvm.amdgcn.workitem.id.y() + store i32 %id, i32 addrspace(1)* %out + ret void +} + +; ALL-LABEL {{^}}test_workitem_id_z: +; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 2 + +; ALL-NOT: v2 +; ALL: {{buffer|flat}}_store_dword v2 +define void @test_workitem_id_z(i32 addrspace(1)* %out) #1 { + %id = call i32 @llvm.amdgcn.workitem.id.z() + store i32 %id, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind }