diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -13292,6 +13292,21 @@ case AMDGPU::BI__builtin_amdgcn_cosf: case AMDGPU::BI__builtin_amdgcn_cosh: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); + case AMDGPU::BI__builtin_amdgcn_dispatch_ptr: { + auto *F = CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr); + auto *Call = Builder.CreateCall(F); + Call->addAttribute( + AttributeList::ReturnIndex, + Attribute::getWithDereferenceableBytes(Call->getContext(), 64)); + Call->addAttribute( + AttributeList::ReturnIndex, + Attribute::getWithAlignment(Call->getContext(), Align(4))); + QualType BuiltinRetType = E->getType(); + auto *RetTy = cast(ConvertType(BuiltinRetType)); + if (RetTy == Call->getType()) + return Call; + return Builder.CreateAddrSpaceCast(Call, RetTy); + } case AMDGPU::BI__builtin_amdgcn_log_clampf: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); case AMDGPU::BI__builtin_amdgcn_ldexp: diff --git a/clang/test/CodeGenCUDA/builtins-amdgcn.cu b/clang/test/CodeGenCUDA/builtins-amdgcn.cu --- a/clang/test/CodeGenCUDA/builtins-amdgcn.cu +++ b/clang/test/CodeGenCUDA/builtins-amdgcn.cu @@ -2,8 +2,8 @@ #include "Inputs/cuda.h" // CHECK-LABEL: @_Z16use_dispatch_ptrPi( -// CHECK: %[[PTR:.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -// CHECK: %{{.*}} = addrspacecast i8 addrspace(4)* %[[PTR]] to i8 addrspace(4)** +// CHECK: %[[PTR:.*]] = call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() +// CHECK: %{{.*}} = addrspacecast i8 addrspace(4)* %[[PTR]] to i8* __global__ void use_dispatch_ptr(int* out) { const int* dispatch_ptr = (const int*)__builtin_amdgcn_dispatch_ptr(); *out = *dispatch_ptr; diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -461,7 +461,7 @@ } // CHECK-LABEL: @test_dispatch_ptr -// CHECK: call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() +// CHECK: call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() void test_dispatch_ptr(__constant unsigned char ** out) { *out = __builtin_amdgcn_dispatch_ptr(); diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -141,7 +141,6 @@ <"__builtin_amdgcn_workgroup_id">; def int_amdgcn_dispatch_ptr : - GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">, Intrinsic<[LLVMQualPointerType], [], [IntrNoMem, IntrSpeculatable]>;