Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -3954,15 +3954,28 @@ } else if (I->hasLValue()) { auto LV = I->getKnownLValue(); auto AS = LV.getAddressSpace(); + if ((!ArgInfo.getIndirectByVal() && (LV.getAlignment() >= - getContext().getTypeAlignInChars(I->Ty))) || - (ArgInfo.getIndirectByVal() && - ((AS != LangAS::Default && AS != LangAS::opencl_private && - AS != CGM.getASTAllocaAddressSpace())))) { + getContext().getTypeAlignInChars(I->Ty)))) { + NeedCopy = true; + } + if (!getLangOpts().OpenCL) { + if ((ArgInfo.getIndirectByVal() && + (AS != LangAS::Default && + AS != CGM.getASTAllocaAddressSpace()))) { + NeedCopy = true; + } + } + // For OpenCL even if RV is located in default or alloca address space + // we don't want to perform address space cast for it. + else if ((ArgInfo.getIndirectByVal() && + Addr.getType()->getAddressSpace() != IRFuncTy-> + getParamType(FirstIRArg)->getPointerAddressSpace())) { NeedCopy = true; } } + if (NeedCopy) { // Create an aligned temporary, and copy to it. Address AI = CreateMemTempWithoutCast( Index: test/CodeGenOpenCL/addr-space-struct-arg.cl =================================================================== --- test/CodeGenOpenCL/addr-space-struct-arg.cl +++ test/CodeGenOpenCL/addr-space-struct-arg.cl @@ -1,6 +1,9 @@ -// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=COM,X86 %s -// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s -// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -finclude-default-header -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN,AMDGCN20 %s +// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=COM,X86 %s +// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s +// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN,AMDGCN20 %s +// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s + +typedef int int2 __attribute__((ext_vector_type(2))); typedef struct { int cells[9]; @@ -130,6 +133,12 @@ FuncOneMember(u); } +// SPIR: call void @llvm.memcpy.p0i8.p1i8.i32 +// SPIR-NOT: addrspacecast +kernel void KernelOneMemberSpir(global struct StructOneMember* u) { + FuncOneMember(*u); +} + // AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeOneMember( // AMDGCN: %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) // AMDGCN: store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8