Index: clang/lib/CodeGen/CGCall.cpp =================================================================== --- clang/lib/CodeGen/CGCall.cpp +++ clang/lib/CodeGen/CGCall.cpp @@ -1169,7 +1169,7 @@ if (isa(Val->getType())) { // If this is Pointer->Pointer avoid conversion to and from int. if (isa(Ty)) - return CGF.Builder.CreateBitCast(Val, Ty, "coerce.val"); + return CGF.Builder.CreatePointerCast(Val, Ty, "coerce.val"); // Convert the pointer to an integer so we can play with its width. Val = CGF.Builder.CreatePtrToInt(Val, CGF.IntPtrTy, "coerce.val.pi"); Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -7816,6 +7816,27 @@ if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + // Coerce pointer type kernel arguments in default address space to + // device address space for HIP. + QualType PointeeTy; + if (getContext().getLangOpts().HIP) { + if (auto *PT = Ty->getAs()) { + if (PT->getPointeeType().getAddressSpace() == LangAS::Default) { + PointeeTy = PT->getPointeeType(); + } + } else if (auto *RT = Ty->getAs()) { + if (RT->getPointeeType().getAddressSpace() == LangAS::Default) { + PointeeTy = RT->getPointeeType(); + } + } + + if (PointeeTy != QualType()) { + return ABIArgInfo::getDirect( + CGT.ConvertType(PointeeTy) + ->getPointerTo( + getContext().getTargetAddressSpace(LangAS::cuda_device))); + } + } // If we set CanBeFlattened to true, CodeGen will expand the struct to its // individual elements, which confuses the Clover OpenCL backend; therefore we // have to set it to false here. Other args of getDirect() are just defaults. Index: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: -emit-llvm -x hip %s -o - | FileCheck %s +#include "Inputs/cuda.h" +// CHECK: define amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)* %x.coerce) +__global__ void kernel1(int *x) { + x[0]++; +} + +// CHECK: define amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)* dereferenceable(4) %x.coerce) +__global__ void kernel2(int &x) { + x++; +} + +// CHECK: define amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y) +__global__ void kernel3(__attribute__((address_space(2))) int *x, + __attribute__((address_space(1))) int *y) { + y[0] = x[0]; +} + +// CHECK: define void @_Z4funcPi(i32* %x) +__device__ void func(int *x) { + x[0]++; +}