Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -127,4 +127,8 @@ GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">, Intrinsic<[], [], []>; +def int_amdgcn_dispatch_ptr : + GCCBuiltin<"__builtin_amdgcn_disptch_ptr">, + Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + } Index: lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -105,7 +105,8 @@ { "llvm.r600.read.global.size.x", "amdgpu-dispatch-ptr" }, { "llvm.r600.read.global.size.y", "amdgpu-dispatch-ptr" }, - { "llvm.r600.read.global.size.z", "amdgpu-dispatch-ptr" } + { "llvm.r600.read.global.size.z", "amdgpu-dispatch-ptr" }, + { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" } }; // TODO: Intrinsics that require queue ptr. Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1083,6 +1083,10 @@ // TODO: Should this propagate fast-math-flags? switch (IntrinsicID) { + case Intrinsic::amdgcn_dispatch_ptr: + return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, + TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR), VT); + case Intrinsic::r600_read_ngroups_x: return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), SI::KernelInputOffsets::NGROUPS_X, Index: test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}test: +; GCN: enable_sgpr_dispatch_ptr = 1 +; GCN: s_load_dword s{{[0-9]+}}, s[0:1], 0x0 +define void @test(i32 addrspace(1)* %out) { + %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0 + %header_ptr = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)* + %value = load i32, i32 addrspace(2)* %header_ptr + store i32 %value, i32 addrspace(1)* %out + ret void +} + +declare noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0 + +attributes #0 = { readnone }