Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -1253,8 +1253,8 @@ // Otherwise do coercion through memory. This is stupid, but simple. Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment()); - Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy); - Address SrcCasted = CGF.Builder.CreateBitCast(Src, CGF.AllocaInt8PtrTy); + Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.Int8Ty->getPointerTo(Tmp.getAddressSpace())); + Address SrcCasted = CGF.Builder.CreateBitCast(Src, CGF.Int8Ty->getPointerTo(Src.getAddressSpace())); CGF.Builder.CreateMemCpy(Casted, SrcCasted, llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize), false); @@ -1335,8 +1335,8 @@ // to that information. Address Tmp = CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment()); CGF.Builder.CreateStore(Src, Tmp); - Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy); - Address DstCasted = CGF.Builder.CreateBitCast(Dst, CGF.AllocaInt8PtrTy); + Address Casted = CGF.Builder.CreateBitCast(Tmp,CGF.Int8Ty->getPointerTo(Tmp.getAddressSpace()) ); + Address DstCasted = CGF.Builder.CreateBitCast(Dst, CGF.Int8Ty->getPointerTo(Dst.getAddressSpace())); CGF.Builder.CreateMemCpy(DstCasted, Casted, llvm::ConstantInt::get(CGF.IntPtrTy, DstSize), false); Index: test/CodeGenCXX/address-space-cast-coerce.cpp =================================================================== --- /dev/null +++ test/CodeGenCXX/address-space-cast-coerce.cpp @@ -0,0 +1,81 @@ +// RUN: %clang_cc1 %s -triple=amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s + +template struct my_vector_base; + + template + struct my_vector_base { + typedef T Native_vec_ __attribute__((ext_vector_type(1))); + + union { + Native_vec_ data; + struct { + T x; + }; + }; + }; + + template + struct my_vector_type : public my_vector_base { + using my_vector_base::data; + using typename my_vector_base::Native_vec_; + + template< typename U> + __attribute__((cpu)) __attribute__((hc)) + my_vector_type(U x) noexcept + { + for (auto i = 0u; i != rank; ++i) data[i] = x; + } + __attribute__((cpu)) __attribute__((hc)) + my_vector_type& operator+=(const my_vector_type& x) noexcept + { + data += x.data; + return *this; + } + }; + +template + __attribute__((cpu)) __attribute__((hc)) + inline + my_vector_type operator+( + const my_vector_type& x, const my_vector_type& y) noexcept + { + return my_vector_type{x} += y; + } + +using char1 = my_vector_type; + +int main() { + + char1 f1{1}; + char1 f2{1}; + +// CHECK: %[[a:[^ ]+]] = addrspacecast i16 addrspace(5)* %{{[^ ]+}} to i16* +// CHECK: %[[a:[^ ]+]] = addrspacecast %{{[^ ]+}} addrspace(5)* %{{[^ ]+}} to %{{[^ ]+}} + + char1 f3 = f1 + f2; +} + +/* + +Look for this: +HECK: %4 = addrspacecast i16 addrspace(5)* %tmp to i16* +HECK: %5 = addrspacecast %struct.my_vector_type* addrspace(5)* %this.addr.i to %struct.my_vector_type** + +FAIL< +pass> +38,39c38,39 +< %5 = bitcast i16* %4 to i8 addrspace(5)* +< %6 = bitcast <1 x i8>* %coerce.dive2 to i8 addrspace(5)* +--- +> %5 = addrspacecast i16* %4 to i8 addrspace(5)* +> %6 = addrspacecast <1 x i8>* %coerce.dive2 to i8 addrspace(5)* +86,87c86,87 +< %11 = bitcast i16* %4 to i8 addrspace(5)* +< %12 = bitcast <1 x i8>* %coerce.dive2 to i8 addrspace(5)* +--- +> %11 = addrspacecast i16* %4 to i8 addrspace(5)* +> %12 = addrspacecast <1 x i8>* %coerce.dive2 to i8 addrspace(5)* + + +*/ +