Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -13313,6 +13313,21 @@ case AMDGPU::BI__builtin_amdgcn_cosf: case AMDGPU::BI__builtin_amdgcn_cosh: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); + case AMDGPU::BI__builtin_amdgcn_dispatch_ptr: { + auto *F = CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr); + auto *Call = Builder.CreateCall(F); + Call->addAttribute( + AttributeList::ReturnIndex, + Attribute::getWithDereferenceableBytes(Call->getContext(), 64)); + Call->addAttribute( + AttributeList::ReturnIndex, + Attribute::getWithAlignment(Call->getContext(), Align(4))); + QualType BuiltinRetType = E->getType(); + auto *RetTy = cast(ConvertType(BuiltinRetType)); + if (RetTy == Call->getType()) + return Call; + return Builder.CreateAddrSpaceCast(Call, RetTy); + } case AMDGPU::BI__builtin_amdgcn_log_clampf: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); case AMDGPU::BI__builtin_amdgcn_ldexp: Index: clang/test/CodeGenCUDA/builtins-amdgcn.cu =================================================================== --- clang/test/CodeGenCUDA/builtins-amdgcn.cu +++ clang/test/CodeGenCUDA/builtins-amdgcn.cu @@ -2,8 +2,8 @@ #include "Inputs/cuda.h" // CHECK-LABEL: @_Z16use_dispatch_ptrPi( -// CHECK: %[[PTR:.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -// CHECK: %{{.*}} = addrspacecast i8 addrspace(4)* %[[PTR]] to i8 addrspace(4)** +// CHECK: %[[PTR:.*]] = call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() +// CHECK: %{{.*}} = addrspacecast i8 addrspace(4)* %[[PTR]] to i8* __global__ void use_dispatch_ptr(int* out) { const int* dispatch_ptr = (const int*)__builtin_amdgcn_dispatch_ptr(); *out = *dispatch_ptr; Index: clang/test/CodeGenOpenCL/builtins-amdgcn.cl =================================================================== --- clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -461,7 +461,7 @@ } // CHECK-LABEL: @test_dispatch_ptr -// CHECK: call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() +// CHECK: call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() void test_dispatch_ptr(__constant unsigned char ** out) { *out = __builtin_amdgcn_dispatch_ptr(); Index: llvm/include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -141,7 +141,6 @@ <"__builtin_amdgcn_workgroup_id">; def int_amdgcn_dispatch_ptr : - GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">, Intrinsic<[LLVMQualPointerType], [], [IntrNoMem, IntrSpeculatable]>;