Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -8336,33 +8336,11 @@ // Coerce HIP pointer arguments from generic pointers to global ones. llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS, unsigned ToAS) const { - // Structure types. - if (auto STy = dyn_cast(Ty)) { - SmallVector EltTys; - bool Changed = false; - for (auto T : STy->elements()) { - auto NT = coerceKernelArgumentType(T, FromAS, ToAS); - EltTys.push_back(NT); - Changed |= (NT != T); - } - // Skip if there is no change in element types. - if (!Changed) - return STy; - if (STy->hasName()) - return llvm::StructType::create( - EltTys, (STy->getName() + ".coerce").str(), STy->isPacked()); - return llvm::StructType::get(getVMContext(), EltTys, STy->isPacked()); - } - // Array types. - if (auto ATy = dyn_cast(Ty)) { - auto T = ATy->getElementType(); - auto NT = coerceKernelArgumentType(T, FromAS, ToAS); - // Skip if there is no change in that element type. - if (NT == T) - return ATy; - return llvm::ArrayType::get(NT, ATy->getNumElements()); - } - // Single value types. + // TODO: This should promote generic pointers in aggregates. This used to be + // done, but was removed due to the way memory reinterpret resulted in the + // optimizer introducing ptrotoint/inttoptr. This blocks the address space + // inference, thereby defeating the point of doing the replacement. Single + // value types. if (Ty->isPointerTy() && Ty->getPointerAddressSpace() == FromAS) return llvm::PointerType::get( cast(Ty)->getElementType(), ToAS); Index: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu =================================================================== --- clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu +++ clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -3,10 +3,12 @@ #include "Inputs/cuda.h" -// Coerced struct from `struct S` without all generic pointers lowered into -// global ones. -// CHECK: %struct.S.coerce = type { i32 addrspace(1)*, float addrspace(1)* } -// CHECK: %struct.T.coerce = type { [2 x float addrspace(1)*] } +// TODO: These should coerced structs from `struct S` without all generic +// pointers lowered into global ones when the optimizer doesn't introduce +// ptrtoint/inttoptr + +// CHECK: %struct.S = type { i32*, float* } +// CHECK: %struct.T = type { [2 x float*] } // On the host-side compilation, generic pointer won't be coerced. // HOST-NOT: %struct.S.coerce @@ -42,7 +44,7 @@ }; // `by-val` struct will be coerced into a similar struct with all generic // pointers lowerd into global ones. -// CHECK: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce) +// CHECK: define amdgpu_kernel void @_Z7kernel41S(%struct.S %s.coerce) // HOST: define void @_Z22__device_stub__kernel41S(i32* %s.coerce0, float* %s.coerce1) __global__ void kernel4(struct S s) { s.x[0]++; @@ -61,7 +63,7 @@ float *x[2]; }; // `by-val` array is also coerced. -// CHECK: define amdgpu_kernel void @_Z7kernel61T(%struct.T.coerce %t.coerce) +// CHECK: define amdgpu_kernel void @_Z7kernel61T(%struct.T %t.coerce) // HOST: define void @_Z22__device_stub__kernel61T(float* %t.coerce0, float* %t.coerce1) __global__ void kernel6(struct T t) { t.x[0][0] += 1.f; @@ -74,3 +76,12 @@ __global__ void kernel7(int *__restrict x) { x[0]++; } + +struct SinglePtrEltStruct { + int *x; +}; + +// CHECK: define amdgpu_kernel void @_Z14single_ptr_elt18SinglePtrEltStruct(i32 addrspace(1)* %s.coerce) +__global__ void single_ptr_elt(struct SinglePtrEltStruct s) { + s.x[0]++; +}