Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -33,6 +33,8 @@ "__builtin_r600_read_tgid">; defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz < "__builtin_r600_read_tidig">; +defm int_r600_read_global_offset : AMDGPUReadPreloadRegisterIntrinsic_xyz < + "__builtin_r600_read_global_offset">; def int_r600_rat_store_typed : // 1st parameter: Data Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -208,7 +208,9 @@ enum ImplicitParameter { GRID_DIM, - GRID_OFFSET + GRID_OFFSET_X, + GRID_OFFSET_Y, + GRID_OFFSET_Z, }; /// \brief Helper function that returns the byte offset of the given Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2639,8 +2639,12 @@ switch (Param) { case GRID_DIM: return ArgOffset; - case GRID_OFFSET: + case GRID_OFFSET_X: return ArgOffset + 4; + case GRID_OFFSET_Y: + return ArgOffset + 8; + case GRID_OFFSET_Z: + return ArgOffset + 12; } llvm_unreachable("unexpected implicit parameter type"); } Index: lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.cpp +++ lib/Target/AMDGPU/R600ISelLowering.cpp @@ -791,6 +791,18 @@ uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM); return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4); } + case Intrinsic::r600_read_global_offset_x: { + uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_OFFSET_X); + return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4); + } + case Intrinsic::r600_read_global_offset_y: { + uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_OFFSET_Y); + return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4); + } + case Intrinsic::r600_read_global_offset_z: { + uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_OFFSET_Z); + return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4); + } case Intrinsic::r600_read_tgid_x: return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Index: test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll @@ -0,0 +1,406 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s + + +; FUNC-LABEL: {{^}}workdim: + +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +define void @workdim (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.read.workdim() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; The workgroup.id values are stored in sgprs offset by the number of user +; sgprs. + +; FUNC-LABEL: {{^}}workgroup_id_x: +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}} +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 +define void @workgroup_id_x(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.workgroup.id.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}workgroup_id_y: +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3 +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +define void @workgroup_id_y(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.workgroup.id.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}workgroup_id_z: +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}} +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 +define void @workgroup_id_z(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.workgroup.id.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; GCN-NOHSA: .section .AMDGPU.config +; GCN-NOHSA: .long 47180 +; GCN-NOHSA-NEXT: .long 132{{$}} + +; FUNC-LABEL: {{^}}workitem_id_x: +; GCN-NOHSA: buffer_store_dword v0 +define void @workitem_id_x(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.workitem.id.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; GCN-NOHSA: .section .AMDGPU.config +; GCN-NOHSA: .long 47180 +; GCN-NOHSA-NEXT: .long 2180{{$}} + +; FUNC-LABEL: {{^}}workitem_id_y: + +; GCN-NOHSA: buffer_store_dword v1 +define void @workitem_id_y(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.workitem.id.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; GCN-NOHSA: .section .AMDGPU.config +; GCN-NOHSA: .long 47180 +; GCN-NOHSA-NEXT: .long 4228{{$}} + +; FUNC-LABEL: {{^}}workitem_id_z: +; GCN-NOHSA: buffer_store_dword v2 +define void @workitem_id_z(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.workitem.id.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +declare i32 @llvm.amdgcn.ngroups.x() #0 +declare i32 @llvm.amdgcn.ngroups.y() #0 +declare i32 @llvm.amdgcn.ngroups.z() #0 + +declare i32 @llvm.amdgcn.global.offset.x() #0 +declare i32 @llvm.amdgcn.global.offset.y() #0 +declare i32 @llvm.amdgcn.global.offset.z() #0 + +declare i32 @llvm.amdgcn.global.size.x() #0 +declare i32 @llvm.amdgcn.global.size.y() #0 +declare i32 @llvm.amdgcn.global.size.z() #0 + +declare i32 @llvm.amdgcn.local.size.x() #0 +declare i32 @llvm.amdgcn.local.size.y() #0 +declare i32 @llvm.amdgcn.local.size.z() #0 + +declare i32 @llvm.amdgcn.workgroup.id.x() #0 +declare i32 @llvm.amdgcn.workgroup.id.y() #0 +declare i32 @llvm.amdgcn.workgroup.id.z() #0 + +declare i32 @llvm.amdgcn.workitem.id.x() #0 +declare i32 @llvm.amdgcn.workitem.id.y() #0 +declare i32 @llvm.amdgcn.workitem.id.z() #0 + +declare i32 @llvm.amdgcn.read.workdim() #0 + + +;; Legacy (r600/AMDGPU) intrinsics + +; FUNC-LABEL: {{^}}workdim_legacy: + +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +define void @workdim_legacy (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.AMDGPU.read.workdim() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}ngroups_x_legacy: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] +define void @ngroups_x_legacy (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.ngroups.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}ngroups_y_legacy: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] +define void @ngroups_y_legacy (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.ngroups.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}ngroups_z_legacy: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] +define void @ngroups_z_legacy (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.ngroups.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_offset_x_legacy: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x30 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] +define void @global_offset_x_legacy (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.global.offset.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_offset_y_legacy: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xd +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x34 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] +define void @global_offset_y_legacy (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.global.offset.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_offset_z_legacy: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xe +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x38 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] +define void @global_offset_z_legacy (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.global.offset.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_size_x_legacy: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] +define void @global_size_x_legacy (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.global.size.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_size_y_legacy: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] +define void @global_size_y_legacy (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.global.size.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_size_z_legacy: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] +define void @global_size_z_legacy (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.global.size.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_x_legacy: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] +define void @local_size_x_legacy (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.local.size.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_y_legacy: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] +define void @local_size_y_legacy (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.local.size.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_z_legacy: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] +define void @local_size_z_legacy (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.local.size.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; The tgid values are stored in sgprs offset by the number of user +; sgprs. + +; FUNC-LABEL: {{^}}tgid_x_legacy: +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}} +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 +define void @tgid_x_legacy(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tgid.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tgid_y_legacy: +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3 +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +define void @tgid_y_legacy(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tgid.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tgid_z_legacy: +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}} +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 +define void @tgid_z_legacy(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tgid.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; GCN-NOHSA: .section .AMDGPU.config +; GCN-NOHSA: .long 47180 +; GCN-NOHSA-NEXT: .long 132{{$}} + +; FUNC-LABEL: {{^}}tidig_x_legacy: +; GCN-NOHSA: buffer_store_dword v0 +define void @tidig_x_legacy(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tidig.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; GCN-NOHSA: .section .AMDGPU.config +; GCN-NOHSA: .long 47180 +; GCN-NOHSA-NEXT: .long 2180{{$}} + +; FUNC-LABEL: {{^}}tidig_y_legacy: + +; GCN-NOHSA: buffer_store_dword v1 +define void @tidig_y_legacy(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tidig.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; GCN-NOHSA: .section .AMDGPU.config +; GCN-NOHSA: .long 47180 +; GCN-NOHSA-NEXT: .long 4228{{$}} + +; FUNC-LABEL: {{^}}tidig_z_legacy: +; GCN-NOHSA: buffer_store_dword v2 +define void @tidig_z_legacy(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tidig.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +declare i32 @llvm.r600.read.ngroups.x() #0 +declare i32 @llvm.r600.read.ngroups.y() #0 +declare i32 @llvm.r600.read.ngroups.z() #0 + +declare i32 @llvm.r600.read.global.offset.x() #0 +declare i32 @llvm.r600.read.global.offset.y() #0 +declare i32 @llvm.r600.read.global.offset.z() #0 + +declare i32 @llvm.r600.read.global.size.x() #0 +declare i32 @llvm.r600.read.global.size.y() #0 +declare i32 @llvm.r600.read.global.size.z() #0 + +declare i32 @llvm.r600.read.local.size.x() #0 +declare i32 @llvm.r600.read.local.size.y() #0 +declare i32 @llvm.r600.read.local.size.z() #0 + +declare i32 @llvm.r600.read.tgid.x() #0 +declare i32 @llvm.r600.read.tgid.y() #0 +declare i32 @llvm.r600.read.tgid.z() #0 + +declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.r600.read.tidig.y() #0 +declare i32 @llvm.r600.read.tidig.z() #0 + +declare i32 @llvm.AMDGPU.read.workdim() #0 + +attributes #0 = { readnone } Index: test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll @@ -0,0 +1,226 @@ +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}workdim: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[2].Z +define void @workdim (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.workdim() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}workdim_legacy: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[2].Z +define void @workdim_legacy (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.AMDGPU.read.workdim() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}ngroups_x: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[0].X +define void @ngroups_x (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.ngroups.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}ngroups_y: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y +define void @ngroups_y (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.ngroups.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}ngroups_z: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z +define void @ngroups_z (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.ngroups.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_offset_x: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[2].W +define void @global_offset_x (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.global.offset.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_offset_y: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[3].X +define void @global_offset_y (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.global.offset.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_offset_z: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[3].Y +define void @global_offset_z (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.global.offset.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_size_x: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[0].W +define void @global_size_x (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.global.size.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_size_y: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[1].X +define void @global_size_y (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.global.size.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_size_z: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y +define void @global_size_z (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.global.size.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_x: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[1].Z +define void @local_size_x (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.local.size.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_y: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[1].W +define void @local_size_y (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.local.size.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_z: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[2].X +define void @local_size_z (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.local.size.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; The tgid values are stored in registers + +; FUNC-LABEL: {{^}}tgid_x: +; EG: MEM_RAT_CACHELESS STORE_RAW T1.X +define void @tgid_x(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tgid.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tgid_y: +; EG: MEM_RAT_CACHELESS STORE_RAW T1.Y +define void @tgid_y(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tgid.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tgid_z: +; EG: MEM_RAT_CACHELESS STORE_RAW T1.Z +define void @tgid_z(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tgid.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tidig_x: +; EG: MEM_RAT_CACHELESS STORE_RAW T0.X +define void @tidig_x(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tidig.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tidig_y: +; EG: MEM_RAT_CACHELESS STORE_RAW T0.Y +define void @tidig_y(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tidig.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tidig_z: +; EG: MEM_RAT_CACHELESS STORE_RAW T0.Z +define void @tidig_z(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tidig.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +declare i32 @llvm.r600.read.ngroups.x() #0 +declare i32 @llvm.r600.read.ngroups.y() #0 +declare i32 @llvm.r600.read.ngroups.z() #0 + +declare i32 @llvm.r600.read.global.offset.x() #0 +declare i32 @llvm.r600.read.global.offset.y() #0 +declare i32 @llvm.r600.read.global.offset.z() #0 + +declare i32 @llvm.r600.read.global.size.x() #0 +declare i32 @llvm.r600.read.global.size.y() #0 +declare i32 @llvm.r600.read.global.size.z() #0 + +declare i32 @llvm.r600.read.local.size.x() #0 +declare i32 @llvm.r600.read.local.size.y() #0 +declare i32 @llvm.r600.read.local.size.z() #0 + +declare i32 @llvm.r600.read.tgid.x() #0 +declare i32 @llvm.r600.read.tgid.y() #0 +declare i32 @llvm.r600.read.tgid.z() #0 + +declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.r600.read.tidig.y() #0 +declare i32 @llvm.r600.read.tidig.z() #0 + +declare i32 @llvm.r600.read.workdim() #0 +declare i32 @llvm.AMDGPU.read.workdim() #0 + +attributes #0 = { readnone } Index: test/CodeGen/AMDGPU/work-item-intrinsics.ll =================================================================== --- test/CodeGen/AMDGPU/work-item-intrinsics.ll +++ /dev/null @@ -1,201 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s - - -; FUNC-LABEL: {{^}}ngroups_x: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[0].X - -; GCN-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0 -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN-NOHSA: buffer_store_dword [[VVAL]] - -define void @ngroups_x (i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.ngroups.x() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}ngroups_y: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y - -; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1 -; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN-NOHSA: buffer_store_dword [[VVAL]] -define void @ngroups_y (i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.ngroups.y() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}ngroups_z: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z - -; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2 -; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN-NOHSA: buffer_store_dword [[VVAL]] -define void @ngroups_z (i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.ngroups.z() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}global_size_x: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[0].W - -; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3 -; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN-NOHSA: buffer_store_dword [[VVAL]] -define void @global_size_x (i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.global.size.x() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}global_size_y: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[1].X - -; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 -; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10 -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN-NOHSA: buffer_store_dword [[VVAL]] -define void @global_size_y (i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.global.size.y() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}global_size_z: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y - -; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5 -; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14 -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN-NOHSA: buffer_store_dword [[VVAL]] -define void @global_size_z (i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.global.size.z() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; The tgid values are stored in sgprs offset by the number of user -; sgprs. - -; FUNC-LABEL: {{^}}tgid_x: -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}} -; GCN-NOHSA: buffer_store_dword [[VVAL]] - -; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 -; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 -; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 -; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 -; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 -define void @tgid_x(i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.tgid.x() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}tgid_y: -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3 -; GCN-NOHSA: buffer_store_dword [[VVAL]] - -; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 -define void @tgid_y(i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.tgid.y() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}tgid_z: -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}} -; GCN-NOHSA: buffer_store_dword [[VVAL]] - -; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 -; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 -; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 -; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 -; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 -define void @tgid_z(i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.tgid.z() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; GCN-NOHSA: .section .AMDGPU.config -; GCN-NOHSA: .long 47180 -; GCN-NOHSA-NEXT: .long 132{{$}} - -; FUNC-LABEL: {{^}}tidig_x: -; GCN-NOHSA: buffer_store_dword v0 -define void @tidig_x(i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.tidig.x() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; GCN-NOHSA: .section .AMDGPU.config -; GCN-NOHSA: .long 47180 -; GCN-NOHSA-NEXT: .long 2180{{$}} - -; FUNC-LABEL: {{^}}tidig_y: - -; GCN-NOHSA: buffer_store_dword v1 -define void @tidig_y(i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.tidig.y() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; GCN-NOHSA: .section .AMDGPU.config -; GCN-NOHSA: .long 47180 -; GCN-NOHSA-NEXT: .long 4228{{$}} - -; FUNC-LABEL: {{^}}tidig_z: -; GCN-NOHSA: buffer_store_dword v2 -define void @tidig_z(i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.tidig.z() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -declare i32 @llvm.r600.read.ngroups.x() #0 -declare i32 @llvm.r600.read.ngroups.y() #0 -declare i32 @llvm.r600.read.ngroups.z() #0 - -declare i32 @llvm.r600.read.global.size.x() #0 -declare i32 @llvm.r600.read.global.size.y() #0 -declare i32 @llvm.r600.read.global.size.z() #0 - -declare i32 @llvm.r600.read.tgid.x() #0 -declare i32 @llvm.r600.read.tgid.y() #0 -declare i32 @llvm.r600.read.tgid.z() #0 - -declare i32 @llvm.r600.read.tidig.x() #0 -declare i32 @llvm.r600.read.tidig.y() #0 -declare i32 @llvm.r600.read.tidig.z() #0 - -declare i32 @llvm.AMDGPU.read.workdim() #0 - -attributes #0 = { readnone }