Index: include/clang/AST/Type.h =================================================================== --- include/clang/AST/Type.h +++ include/clang/AST/Type.h @@ -333,13 +333,16 @@ bool hasAddressSpace() const { return Mask & AddressSpaceMask; } unsigned getAddressSpace() const { return Mask >> AddressSpaceShift; } + bool hasTargetSpecificAddressSpace() const { + return getAddressSpace() >= LangAS::Count; + } /// Get the address space attribute value to be printed by diagnostics. unsigned getAddressSpaceAttributePrintValue() const { auto Addr = getAddressSpace(); // This function is not supposed to be used with language specific // address spaces. If that happens, the diagnostic message should consider // printing the QualType instead of the address space value. - assert(Addr == 0 || Addr >= LangAS::Count); + assert(Addr == 0 || hasTargetSpecificAddressSpace()); if (Addr) return Addr - LangAS::Count; // TODO: The diagnostic messages where Addr may be 0 should be fixed Index: lib/CodeGen/CGDecl.cpp =================================================================== --- lib/CodeGen/CGDecl.cpp +++ lib/CodeGen/CGDecl.cpp @@ -19,6 +19,7 @@ #include "CGOpenCLRuntime.h" #include "CGOpenMPRuntime.h" #include "CodeGenModule.h" +#include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" #include "clang/AST/Decl.h" @@ -1102,7 +1103,8 @@ address = Address(vla, alignment); } - setAddrOfLocalVar(&D, address); + setAddrOfLocalVar(&D, getTargetHooks().adjustAddrSpaceForAutoVar( + address, &D, *this)); emission.Addr = address; // Emit debug info for local var declaration. Index: lib/CodeGen/CodeGenTypes.cpp =================================================================== --- lib/CodeGen/CodeGenTypes.cpp +++ lib/CodeGen/CodeGenTypes.cpp @@ -92,7 +92,6 @@ (unsigned)Context.getTypeSize(T)); } - /// isRecordLayoutComplete - Return true if the specified type is already /// completely laid out. bool CodeGenTypes::isRecordLayoutComplete(const Type *Ty) const { Index: lib/CodeGen/TargetInfo.h =================================================================== --- lib/CodeGen/TargetInfo.h +++ lib/CodeGen/TargetInfo.h @@ -129,6 +129,12 @@ return Ty; } + /// Adjust address space of an automatic variable. + virtual Address adjustAddrSpaceForAutoVar(Address A, const VarDecl *VD, + CodeGen::CodeGenFunction &CGF) const { + return A; + } + /// Adds constraints and types for result registers. virtual void addReturnRegisterOutputs( CodeGen::CodeGenFunction &CGF, CodeGen::LValue ReturnValue, Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -7286,6 +7286,9 @@ llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *T, QualType QT) const override; + + Address adjustAddrSpaceForAutoVar(Address A, const VarDecl *VD, + CodeGen::CodeGenFunction &CGF) const override; }; } @@ -7355,6 +7358,21 @@ return llvm::CallingConv::AMDGPU_KERNEL; } +Address AMDGPUTargetCodeGenInfo::adjustAddrSpaceForAutoVar(Address A, + const VarDecl *VD, CodeGen::CodeGenFunction &CGF) const { + // Alloca always returns a pointer in alloca address space, which may + // be different from the type defined by the language. For example, + // in C++ the auto variables are in the default address space. Therefore + // cast alloca to the expected address space when necessary. + auto T = VD->getType(); + assert(isa(A.getPointer()) && + (T.getAddressSpace() == LangAS::Default || + T.getQualifiers().hasTargetSpecificAddressSpace())); + auto Addr = performAddrSpaceCast(CGF, A.getPointer(), QualType(), + CGF.getContext().getPointerType(T)); + return Address(Addr, A.getAlignment()); +} + // Currently LLVM assumes null pointers always have value 0, // which results in incorrectly transformed IR. Therefore, instead of // emitting null pointers in private and local address spaces, a null Index: test/CodeGen/address-space.c =================================================================== --- test/CodeGen/address-space.c +++ test/CodeGen/address-space.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm < %s | FileCheck -check-prefixes=CHECK,GIZ %s +// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm < %s | FileCheck -check-prefixes=CHECK,GIZ,X86 %s // RUN: %clang_cc1 -triple amdgcn -emit-llvm < %s | FileCheck -check-prefixes=CHECK,PIZ %s -// RUN: %clang_cc1 -triple amdgcn---amdgiz -emit-llvm < %s | FileCheck -check-prefixes=CHeCK,GIZ %s +// RUN: %clang_cc1 -triple amdgcn---amdgiz -emit-llvm < %s | FileCheck -check-prefixes=CHECK,GIZ,AMD %s // CHECK: @foo = common addrspace(1) global int foo __attribute__((address_space(1))); @@ -40,8 +40,12 @@ } MyStruct; // CHECK-LABEL: define void @test4( -// CHECK: call void @llvm.memcpy.p0i8.p2i8 -// CHECK: call void @llvm.memcpy.p2i8.p0i8 +// X86: call void @llvm.memcpy.p0i8.p2i8 +// X86: call void @llvm.memcpy.p2i8.p0i8 +// AMD: call void @llvm.memcpy.p5i8.p2i8 +// AMD: call void @llvm.memcpy.p2i8.p0i8 +// PIZ: call void @llvm.memcpy.p0i8.p2i8 +// PIZ: call void @llvm.memcpy.p2i8.p4i8 void test4(MyStruct __attribute__((address_space(2))) *pPtr) { MyStruct s = pPtr[0]; pPtr[0] = s; Index: test/CodeGenCXX/amdgcn-automatic-variable.cpp =================================================================== --- /dev/null +++ test/CodeGenCXX/amdgcn-automatic-variable.cpp @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -O0 -triple amdgcn---amdgiz -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: define void @_Z5func1Pi(i32* %x) +void func1(int *x) { + // CHECK: %[[x_addr:.*]] = alloca i32*{{.*}}addrspace(5) + // CHECK: store i32* %x, i32* addrspace(5)* %[[x_addr]] + // CHECK: %[[r0:.*]] = load i32*, i32* addrspace(5)* %[[x_addr]] + // CHECK: store i32 1, i32* %[[r0]] + *x = 1; +} + +// CHECK-LABEL: define void @_Z5func2v() +void func2(void) { + // CHECK: %lv1 = alloca i32, align 4, addrspace(5) + // CHECK: %lv2 = alloca i32, align 4, addrspace(5) + // CHECK: %la = alloca [100 x i32], align 4, addrspace(5) + // CHECK: %lp1 = alloca i32*, align 4, addrspace(5) + // CHECK: %lp2 = alloca i32*, align 4, addrspace(5) + // CHECK: %lvc = alloca i32, align 4, addrspace(5) + + // CHECK: %[[r0:.*]] = addrspacecast i32 addrspace(5)* %lv1 to i32* + // CHECK: store i32 1, i32* %[[r0]] + int lv1; + lv1 = 1; + // CHECK: store i32 2, i32 addrspace(5)* %lv2 + int lv2 = 2; + + // CHECK: %[[r2:.*]] = addrspacecast [100 x i32] addrspace(5)* %la to [100 x i32]* + // CHECK: %[[arrayidx:.*]] = getelementptr inbounds [100 x i32], [100 x i32]* %[[r2]], i64 0, i64 0 + // CHECK: store i32 3, i32* %[[arrayidx]], align 4 + int la[100]; + la[0] = 3; + + // CHECK: store i32* %[[r0]], i32* addrspace(5)* %lp1, align 4 + int *lp1 = &lv1; + + // CHECK: %[[arraydecay:.*]] = getelementptr inbounds [100 x i32], [100 x i32]* %[[r2]], i32 0, i32 0 + // CHECK: store i32* %[[arraydecay]], i32* addrspace(5)* %lp2, align 4 + int *lp2 = la; + + // CHECK: call void @_Z5func1Pi(i32* %[[r0]]) + func1(&lv1); + + // CHECK: store i32 4, i32 addrspace(5)* %lvc + // CHECK: store i32 4, i32* %[[r0]] + const int lvc = 4; + lv1 = lvc; +} Index: test/CodeGenOpenCL/amdgcn-automatic-variable.cl =================================================================== --- /dev/null +++ test/CodeGenOpenCL/amdgcn-automatic-variable.cl @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 -O0 -cl-std=CL1.2 -triple amdgcn---amdgiz -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL12 %s +// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn---amdgiz -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL20 %s + +// CL12-LABEL: define void @func1(i32 addrspace(5)* %x) +// CL20-LABEL: define void @func1(i32* %x) +void func1(int *x) { + // CL12: %[[x_addr:.*]] = alloca i32 addrspace(5)*{{.*}}addrspace(5) + // CL12: store i32 addrspace(5)* %x, i32 addrspace(5)* addrspace(5)* %[[x_addr]] + // CL12: %[[r0:.*]] = load i32 addrspace(5)*, i32 addrspace(5)* addrspace(5)* %[[x_addr]] + // CL12: store i32 1, i32 addrspace(5)* %[[r0]] + // CL20: %[[x_addr:.*]] = alloca i32*{{.*}}addrspace(5) + // CL20: store i32* %x, i32* addrspace(5)* %[[x_addr]] + // CL20: %[[r0:.*]] = load i32*, i32* addrspace(5)* %[[x_addr]] + // CL20: store i32 1, i32* %[[r0]] + *x = 1; +} + +// CHECK-LABEL: define void @func2() +void func2(void) { + // CHECK: %lv1 = alloca i32, align 4, addrspace(5) + // CHECK: %lv2 = alloca i32, align 4, addrspace(5) + // CHECK: %la = alloca [100 x i32], align 4, addrspace(5) + // CL12: %lp1 = alloca i32 addrspace(5)*, align 4, addrspace(5) + // CL12: %lp2 = alloca i32 addrspace(5)*, align 4, addrspace(5) + // CL20: %lp1 = alloca i32*, align 4, addrspace(5) + // CL20: %lp2 = alloca i32*, align 4, addrspace(5) + // CHECK: %lvc = alloca i32, align 4, addrspace(5) + + // CHECK: store i32 1, i32 addrspace(5)* %lv1 + int lv1; + lv1 = 1; + // CHECK: store i32 2, i32 addrspace(5)* %lv2 + int lv2 = 2; + + // CHECK: %[[arrayidx:.*]] = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %la, i64 0, i64 0 + // CHECK: store i32 3, i32 addrspace(5)* %[[arrayidx]], align 4 + int la[100]; + la[0] = 3; + + // CL12: store i32 addrspace(5)* %lv1, i32 addrspace(5)* addrspace(5)* %lp1, align 4 + // CL20: %[[r0:.*]] = addrspacecast i32 addrspace(5)* %lv1 to i32* + // CL20: store i32* %[[r0]], i32* addrspace(5)* %lp1, align 4 + int *lp1 = &lv1; + + // CHECK: %[[arraydecay:.*]] = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %la, i32 0, i32 0 + // CL12: store i32 addrspace(5)* %[[arraydecay]], i32 addrspace(5)* addrspace(5)* %lp2, align 4 + // CL20: %[[r1:.*]] = addrspacecast i32 addrspace(5)* %[[arraydecay]] to i32* + // CL20: store i32* %[[r1]], i32* addrspace(5)* %lp2, align 4 + int *lp2 = la; + + // CL12: call void @func1(i32 addrspace(5)* %lv1) + // CL20: %[[r2:.*]] = addrspacecast i32 addrspace(5)* %lv1 to i32* + // CL20: call void @func1(i32* %[[r2]]) + func1(&lv1); + + // CHECK: store i32 4, i32 addrspace(5)* %lvc + // CHECK: store i32 4, i32 addrspace(5)* %lv1 + const int lvc = 4; + lv1 = lvc; +}