Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -70,10 +70,42 @@ let TargetPrefix = "amdgcn" in { +//===----------------------------------------------------------------------===// +// ABI Special Intrinsics +//===----------------------------------------------------------------------===// + defm int_amdgcn_workitem_id : AMDGPUReadPreloadRegisterIntrinsic_xyz; defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named <"__builtin_amdgcn_workgroup_id">; +def int_amdgcn_dispatch_ptr : + GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">, + Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + +def int_amdgcn_queue_ptr : + GCCBuiltin<"__builtin_amdgcn_queue_ptr">, + Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + +def int_amdgcn_kernarg_segment_ptr : + GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">, + Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + +def int_amdgcn_implicitarg_ptr : + GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">, + Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + +def int_amdgcn_groupstaticsize : + GCCBuiltin<"__builtin_amdgcn_groupstaticsize">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + +def int_amdgcn_dispatch_id : + GCCBuiltin<"__builtin_amdgcn_dispatch_id">, + Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; + +//===----------------------------------------------------------------------===// +// Instruction Intrinsics +//===----------------------------------------------------------------------===// + def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, Intrinsic<[], [], [IntrConvergent]>; @@ -331,26 +363,6 @@ GCCBuiltin<"__builtin_amdgcn_s_getreg">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem]>; -def int_amdgcn_groupstaticsize : - GCCBuiltin<"__builtin_amdgcn_groupstaticsize">, - Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; - -def int_amdgcn_dispatch_ptr : - GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">, - Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; - -def int_amdgcn_queue_ptr : - GCCBuiltin<"__builtin_amdgcn_queue_ptr">, - Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; - -def int_amdgcn_kernarg_segment_ptr : - GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">, - Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; - -def int_amdgcn_implicitarg_ptr : - GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">, - Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; - // __builtin_amdgcn_interp_p1 , , , def int_amdgcn_interp_p1 : GCCBuiltin<"__builtin_amdgcn_interp_p1">, Index: lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -188,7 +188,8 @@ static const StringRef HSAIntrinsicToAttr[][2] = { { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }, - { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" } + { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" }, + { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" } }; // TODO: We should not add the attributes if the known compile time workgroup Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -717,6 +717,12 @@ CCInfo.AllocateReg(InputPtrReg); } + if (Info->hasDispatchID()) { + unsigned DispatchIDReg = Info->addDispatchID(*TRI); + MF.addLiveIn(DispatchIDReg, &AMDGPU::SReg_64RegClass); + CCInfo.AllocateReg(DispatchIDReg); + } + if (Info->hasFlatScratchInit()) { unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI); MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SReg_64RegClass); @@ -1671,6 +1677,10 @@ = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT); } + case Intrinsic::amdgcn_dispatch_id: { + unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_ID); + return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT); + } case Intrinsic::amdgcn_rcp: return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1)); case Intrinsic::amdgcn_rsq: Index: lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -92,8 +92,8 @@ bool PrivateSegmentBuffer : 1; bool DispatchPtr : 1; bool QueuePtr : 1; - bool DispatchID : 1; bool KernargSegmentPtr : 1; + bool DispatchID : 1; bool FlatScratchInit : 1; bool GridWorkgroupCountX : 1; bool GridWorkgroupCountY : 1; @@ -143,6 +143,7 @@ unsigned addDispatchPtr(const SIRegisterInfo &TRI); unsigned addQueuePtr(const SIRegisterInfo &TRI); unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); + unsigned addDispatchID(const SIRegisterInfo &TRI); unsigned addFlatScratchInit(const SIRegisterInfo &TRI); // Add system SGPRs. @@ -192,14 +193,14 @@ return QueuePtr; } - bool hasDispatchID() const { - return DispatchID; - } - bool hasKernargSegmentPtr() const { return KernargSegmentPtr; } + bool hasDispatchID() const { + return DispatchID; + } + bool hasFlatScratchInit() const { return FlatScratchInit; } Index: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -68,8 +68,8 @@ PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), - DispatchID(false), KernargSegmentPtr(false), + DispatchID(false), FlatScratchInit(false), GridWorkgroupCountX(false), GridWorkgroupCountY(false), @@ -127,6 +127,9 @@ if (F->hasFnAttribute("amdgpu-queue-ptr")) QueuePtr = true; + + if (F->hasFnAttribute("amdgpu-dispatch-id")) + DispatchID = true; } // We don't need to worry about accessing spills with flat instructions. @@ -174,6 +177,13 @@ return KernargSegmentPtrUserSGPR; } +unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { + DispatchIDUserSGPR = TRI.getMatchingSuperReg( + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + NumUserSGPRs += 2; + return DispatchIDUserSGPR; +} + unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { FlatScratchInitUserSGPR = TRI.getMatchingSuperReg( getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -931,7 +931,8 @@ assert(MFI->hasKernargSegmentPtr()); return MFI->KernargSegmentPtrUserSGPR; case SIRegisterInfo::DISPATCH_ID: - llvm_unreachable("unimplemented"); + assert(MFI->hasDispatchID()); + return MFI->DispatchIDUserSGPR; case SIRegisterInfo::FLAT_SCRATCH_INIT: assert(MFI->hasFlatScratchInit()); return MFI->FlatScratchInitUserSGPR; Index: test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i64 @llvm.amdgcn.dispatch.id() #1 + +; GCN-LABEL: {{^}}dispatch_id: +; GCN: .amd_kernel_code_t +; GCN: enable_sgpr_dispatch_id = 1 + +; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s6 +; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s7 +; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @dispatch_id(i64 addrspace(1)* %out) #0 { + %tmp0 = call i64 @llvm.amdgcn.dispatch.id() + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone }