Index: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td +++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td @@ -43,6 +43,12 @@ def int_r600_read_workdim : AMDGPUReadPreloadRegisterIntrinsic; + +// AS 7 is PARAM_I_ADDRESS, used for kernel arguments +def int_r600_implicitarg_ptr : + GCCBuiltin<"__builtin_r600_implicitarg_ptr">, + Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + def int_r600_rat_store_typed : // 1st parameter: Data // 2nd parameter: Index Index: llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td @@ -210,23 +210,23 @@ // VTX Read from parameter memory space //===----------------------------------------------------------------------===// -def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, +def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <3, [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))] >; -def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, +def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <3, [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))] >; -def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, +def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <3, [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] >; -def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <0, +def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <3, [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] >; -def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, +def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <3, [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] >; Index: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -782,6 +782,11 @@ return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args); } + case Intrinsic::r600_implicitarg_ptr: { + MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS); + uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT); + return DAG.getConstant(ByteOffset, DL, PtrVT); + } case Intrinsic::r600_read_ngroups_x: return LowerImplicitParameter(DAG, VT, DL, 0); case Intrinsic::r600_read_ngroups_y: Index: llvm/trunk/lib/Target/AMDGPU/R600Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R600Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/R600Instructions.td @@ -329,7 +329,8 @@ class LoadParamFrag : PatFrag < (ops node:$ptr), (load_type node:$ptr), - [{ return isConstantLoad(dyn_cast(N), 0); }] + [{ return isConstantLoad(cast(N), 0) || + (cast(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }] >; def load_param : LoadParamFrag; Index: llvm/trunk/test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll +++ llvm/trunk/test/CodeGen/AMDGPU/amdgcn.work-item-intrinsics.ll @@ -0,0 +1,114 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s + + +; FUNC-LABEL: {{^}}workdim: + +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +define void @workdim (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.read.workdim() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; The workgroup.id values are stored in sgprs offset by the number of user +; sgprs. + +; FUNC-LABEL: {{^}}workgroup_id_x: +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}} +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 +define void @workgroup_id_x(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.workgroup.id.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}workgroup_id_y: +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3 +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +define void @workgroup_id_y(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.workgroup.id.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}workgroup_id_z: +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}} +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 +define void @workgroup_id_z(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.workgroup.id.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; GCN-NOHSA: .section .AMDGPU.config +; GCN-NOHSA: .long 47180 +; GCN-NOHSA-NEXT: .long 132{{$}} + +; FUNC-LABEL: {{^}}workitem_id_x: +; GCN-NOHSA: buffer_store_dword v0 +define void @workitem_id_x(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.workitem.id.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; GCN-NOHSA: .section .AMDGPU.config +; GCN-NOHSA: .long 47180 +; GCN-NOHSA-NEXT: .long 2180{{$}} + +; FUNC-LABEL: {{^}}workitem_id_y: + +; GCN-NOHSA: buffer_store_dword v1 +define void @workitem_id_y(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.workitem.id.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; GCN-NOHSA: .section .AMDGPU.config +; GCN-NOHSA: .long 47180 +; GCN-NOHSA-NEXT: .long 4228{{$}} + +; FUNC-LABEL: {{^}}workitem_id_z: +; GCN-NOHSA: buffer_store_dword v2 +define void @workitem_id_z(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.amdgcn.workitem.id.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +declare i32 @llvm.amdgcn.workgroup.id.x() #0 +declare i32 @llvm.amdgcn.workgroup.id.y() #0 +declare i32 @llvm.amdgcn.workgroup.id.z() #0 + +declare i32 @llvm.amdgcn.workitem.id.x() #0 +declare i32 @llvm.amdgcn.workitem.id.y() #0 +declare i32 @llvm.amdgcn.workitem.id.z() #0 + +declare i32 @llvm.amdgcn.read.workdim() #0 Index: llvm/trunk/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll +++ llvm/trunk/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll @@ -0,0 +1,268 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; Legacy intrinsics that just read implicit parameters + +; FUNC-LABEL: {{^}}workdim_legacy: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[2].Z +define void @workdim_legacy (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.AMDGPU.read.workdim() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}ngroups_x: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[0].X +define void @ngroups_x (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.ngroups.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}ngroups_y: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y +define void @ngroups_y (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.ngroups.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}ngroups_z: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z +define void @ngroups_z (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.ngroups.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_size_x: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[0].W +define void @global_size_x (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.global.size.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_size_y: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[1].X +define void @global_size_y (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.global.size.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_size_z: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y +define void @global_size_z (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.global.size.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_x: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[1].Z +define void @local_size_x (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.local.size.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_y: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[1].W +define void @local_size_y (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.local.size.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_z: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[2].X +define void @local_size_z (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.local.size.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; Legacy use of r600 intrinsics by GCN + +; The tgid values are stored in sgprs offset by the number of user +; sgprs. + +; FUNC-LABEL: {{^}}tgid_x_legacy: +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}} +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 +define void @tgid_x_legacy(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tgid.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tgid_y_legacy: +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3 +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +define void @tgid_y_legacy(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tgid.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tgid_z_legacy: +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}} +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 +define void @tgid_z_legacy(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tgid.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; GCN-NOHSA: .section .AMDGPU.config +; GCN-NOHSA: .long 47180 +; GCN-NOHSA-NEXT: .long 132{{$}} + +; FUNC-LABEL: {{^}}tidig_x_legacy: +; GCN-NOHSA: buffer_store_dword v0 +define void @tidig_x_legacy(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tidig.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; GCN-NOHSA: .section .AMDGPU.config +; GCN-NOHSA: .long 47180 +; GCN-NOHSA-NEXT: .long 2180{{$}} + +; FUNC-LABEL: {{^}}tidig_y_legacy: + +; GCN-NOHSA: buffer_store_dword v1 +define void @tidig_y_legacy(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tidig.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; GCN-NOHSA: .section .AMDGPU.config +; GCN-NOHSA: .long 47180 +; GCN-NOHSA-NEXT: .long 4228{{$}} + +; FUNC-LABEL: {{^}}tidig_z_legacy: +; GCN-NOHSA: buffer_store_dword v2 +define void @tidig_z_legacy(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tidig.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +declare i32 @llvm.r600.read.ngroups.x() #0 +declare i32 @llvm.r600.read.ngroups.y() #0 +declare i32 @llvm.r600.read.ngroups.z() #0 + +declare i32 @llvm.r600.read.global.size.x() #0 +declare i32 @llvm.r600.read.global.size.y() #0 +declare i32 @llvm.r600.read.global.size.z() #0 + +declare i32 @llvm.r600.read.local.size.x() #0 +declare i32 @llvm.r600.read.local.size.y() #0 +declare i32 @llvm.r600.read.local.size.z() #0 + +declare i32 @llvm.r600.read.tgid.x() #0 +declare i32 @llvm.r600.read.tgid.y() #0 +declare i32 @llvm.r600.read.tgid.z() #0 + +declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.r600.read.tidig.y() #0 +declare i32 @llvm.r600.read.tidig.z() #0 + +declare i32 @llvm.AMDGPU.read.workdim() #0 + +attributes #0 = { readnone } Index: llvm/trunk/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll +++ llvm/trunk/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll @@ -0,0 +1,107 @@ +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}tgid_x: +; EG: MEM_RAT_CACHELESS STORE_RAW T1.X +define void @tgid_x(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tgid.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tgid_y: +; EG: MEM_RAT_CACHELESS STORE_RAW T1.Y +define void @tgid_y(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tgid.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tgid_z: +; EG: MEM_RAT_CACHELESS STORE_RAW T1.Z +define void @tgid_z(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tgid.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tidig_x: +; EG: MEM_RAT_CACHELESS STORE_RAW T0.X +define void @tidig_x(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tidig.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tidig_y: +; EG: MEM_RAT_CACHELESS STORE_RAW T0.Y +define void @tidig_y(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tidig.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}tidig_z: +; EG: MEM_RAT_CACHELESS STORE_RAW T0.Z +define void @tidig_z(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.tidig.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_implicit: +; 36 prepended implicit bytes + 4(out pointer) + 4*4 = 56 +; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 56 +define void @test_implicit(i32 addrspace(1)* %out) #1 { + %implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr() + %header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)* + %gep = getelementptr i32, i32 addrspace(7)* %header.ptr, i32 4 + %value = load i32, i32 addrspace(7)* %gep + store i32 %value, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_implicit_dyn: +; 36 prepended implicit bytes + 8(out pointer + in) = 44 +; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 44 +define void @test_implicit_dyn(i32 addrspace(1)* %out, i32 %in) #1 { + %implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr() + %header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)* + %gep = getelementptr i32, i32 addrspace(7)* %header.ptr, i32 %in + %value = load i32, i32 addrspace(7)* %gep + store i32 %value, i32 addrspace(1)* %out + ret void +} + + + +; DEPRECATED but R600 only + +; FUNC-LABEL: {{^}}workdim: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[2].Z +define void @workdim (i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.workdim() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +declare i32 @llvm.r600.read.workdim() #0 + +declare i8 addrspace(7)* @llvm.r600.implicitarg.ptr() #0 + +declare i32 @llvm.r600.read.tgid.x() #0 +declare i32 @llvm.r600.read.tgid.y() #0 +declare i32 @llvm.r600.read.tgid.z() #0 + +declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.r600.read.tidig.y() #0 +declare i32 @llvm.r600.read.tidig.z() #0 + +attributes #0 = { readnone } Index: llvm/trunk/test/CodeGen/AMDGPU/work-item-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/work-item-intrinsics.ll +++ llvm/trunk/test/CodeGen/AMDGPU/work-item-intrinsics.ll @@ -1,201 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s - - -; FUNC-LABEL: {{^}}ngroups_x: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[0].X - -; GCN-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0 -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN-NOHSA: buffer_store_dword [[VVAL]] - -define void @ngroups_x (i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.ngroups.x() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}ngroups_y: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y - -; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1 -; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN-NOHSA: buffer_store_dword [[VVAL]] -define void @ngroups_y (i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.ngroups.y() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}ngroups_z: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z - -; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2 -; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN-NOHSA: buffer_store_dword [[VVAL]] -define void @ngroups_z (i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.ngroups.z() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}global_size_x: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[0].W - -; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3 -; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN-NOHSA: buffer_store_dword [[VVAL]] -define void @global_size_x (i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.global.size.x() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}global_size_y: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[1].X - -; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 -; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10 -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN-NOHSA: buffer_store_dword [[VVAL]] -define void @global_size_y (i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.global.size.y() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}global_size_z: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y - -; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5 -; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14 -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN-NOHSA: buffer_store_dword [[VVAL]] -define void @global_size_z (i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.global.size.z() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; The tgid values are stored in sgprs offset by the number of user -; sgprs. - -; FUNC-LABEL: {{^}}tgid_x: -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}} -; GCN-NOHSA: buffer_store_dword [[VVAL]] - -; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 -; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 -; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 -; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 -; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 -define void @tgid_x(i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.tgid.x() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}tgid_y: -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3 -; GCN-NOHSA: buffer_store_dword [[VVAL]] - -; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 -define void @tgid_y(i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.tgid.y() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}tgid_z: -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}} -; GCN-NOHSA: buffer_store_dword [[VVAL]] - -; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 -; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 -; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 -; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 -; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 -define void @tgid_z(i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.tgid.z() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; GCN-NOHSA: .section .AMDGPU.config -; GCN-NOHSA: .long 47180 -; GCN-NOHSA-NEXT: .long 132{{$}} - -; FUNC-LABEL: {{^}}tidig_x: -; GCN-NOHSA: buffer_store_dword v0 -define void @tidig_x(i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.tidig.x() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; GCN-NOHSA: .section .AMDGPU.config -; GCN-NOHSA: .long 47180 -; GCN-NOHSA-NEXT: .long 2180{{$}} - -; FUNC-LABEL: {{^}}tidig_y: - -; GCN-NOHSA: buffer_store_dword v1 -define void @tidig_y(i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.tidig.y() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; GCN-NOHSA: .section .AMDGPU.config -; GCN-NOHSA: .long 47180 -; GCN-NOHSA-NEXT: .long 4228{{$}} - -; FUNC-LABEL: {{^}}tidig_z: -; GCN-NOHSA: buffer_store_dword v2 -define void @tidig_z(i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.tidig.z() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -declare i32 @llvm.r600.read.ngroups.x() #0 -declare i32 @llvm.r600.read.ngroups.y() #0 -declare i32 @llvm.r600.read.ngroups.z() #0 - -declare i32 @llvm.r600.read.global.size.x() #0 -declare i32 @llvm.r600.read.global.size.y() #0 -declare i32 @llvm.r600.read.global.size.z() #0 - -declare i32 @llvm.r600.read.tgid.x() #0 -declare i32 @llvm.r600.read.tgid.y() #0 -declare i32 @llvm.r600.read.tgid.z() #0 - -declare i32 @llvm.r600.read.tidig.x() #0 -declare i32 @llvm.r600.read.tidig.y() #0 -declare i32 @llvm.r600.read.tidig.z() #0 - -declare i32 @llvm.AMDGPU.read.workdim() #0 - -attributes #0 = { readnone }