Index: cfe/trunk/lib/CodeGen/CGCall.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGCall.cpp +++ cfe/trunk/lib/CodeGen/CGCall.cpp @@ -3813,7 +3813,8 @@ assert(NumIRArgs == 1); if (RV.isScalar() || RV.isComplex()) { // Make a temporary alloca to pass the argument. - Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign()); + Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), + "indirect-arg-temp", false); IRCallArgs[FirstIRArg] = Addr.getPointer(); LValue argLV = MakeAddrLValue(Addr, I->Ty); @@ -3842,7 +3843,8 @@ < Align.getQuantity()) || (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) { // Create an aligned temporary, and copy to it. - Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign()); + Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), + "byval-temp", false); IRCallArgs[FirstIRArg] = AI.getPointer(); EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified()); } else { Index: cfe/trunk/lib/CodeGen/CGDecl.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGDecl.cpp +++ cfe/trunk/lib/CodeGen/CGDecl.cpp @@ -954,6 +954,7 @@ CodeGenFunction::AutoVarEmission CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { QualType Ty = D.getType(); + assert(Ty.getAddressSpace() == LangAS::Default); AutoVarEmission emission(D); @@ -1046,8 +1047,7 @@ // Create the alloca. Note that we set the name separately from // building the instruction so that it's there even in no-asserts // builds. - address = CreateTempAlloca(allocaTy, allocaAlignment); - address.getPointer()->setName(D.getName()); + address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName()); // Don't emit lifetime markers for MSVC catch parameters. The lifetime of // the catch parameter starts in the catchpad instruction, and we can't @@ -1107,27 +1107,9 @@ llvm::Type *llvmTy = ConvertTypeForMem(elementType); // Allocate memory for the array. - llvm::AllocaInst *vla = Builder.CreateAlloca(llvmTy, elementCount, "vla"); - vla->setAlignment(alignment.getQuantity()); - - address = Address(vla, alignment); + address = CreateTempAlloca(llvmTy, alignment, "vla", elementCount); } - // Alloca always returns a pointer in alloca address space, which may - // be different from the type defined by the language. For example, - // in C++ the auto variables are in the default address space. Therefore - // cast alloca to the expected address space when necessary. - auto T = D.getType(); - assert(T.getAddressSpace() == LangAS::Default); - if (getASTAllocaAddressSpace() != LangAS::Default) { - auto *Addr = getTargetHooks().performAddrSpaceCast( - *this, address.getPointer(), getASTAllocaAddressSpace(), - T.getAddressSpace(), - address.getElementType()->getPointerTo( - getContext().getTargetAddressSpace(T.getAddressSpace())), - /*non-null*/ true); - address = Address(Addr, address.getAlignment()); - } setAddrOfLocalVar(&D, address); emission.Addr = address; Index: cfe/trunk/lib/CodeGen/CGExpr.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGExpr.cpp +++ cfe/trunk/lib/CodeGen/CGExpr.cpp @@ -61,18 +61,36 @@ /// CreateTempAlloca - This creates a alloca and inserts it into the entry /// block. Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, - const Twine &Name) { - auto Alloca = CreateTempAlloca(Ty, Name); + const Twine &Name, + llvm::Value *ArraySize, + bool CastToDefaultAddrSpace) { + auto Alloca = CreateTempAlloca(Ty, Name, ArraySize); Alloca->setAlignment(Align.getQuantity()); - return Address(Alloca, Align); -} - -/// CreateTempAlloca - This creates a alloca and inserts it into the entry -/// block. + llvm::Value *V = Alloca; + // Alloca always returns a pointer in alloca address space, which may + // be different from the type defined by the language. For example, + // in C++ the auto variables are in the default address space. Therefore + // cast alloca to the default address space when necessary. + if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) { + auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default); + V = getTargetHooks().performAddrSpaceCast( + *this, V, getASTAllocaAddressSpace(), LangAS::Default, + Ty->getPointerTo(DestAddrSpace), /*non-null*/ true); + } + + return Address(V, Align); +} + +/// CreateTempAlloca - This creates an alloca and inserts it into the entry +/// block if \p ArraySize is nullptr, otherwise inserts it at the current +/// insertion point of the builder. llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, - const Twine &Name) { + const Twine &Name, + llvm::Value *ArraySize) { + if (ArraySize) + return Builder.CreateAlloca(Ty, ArraySize, Name); return new llvm::AllocaInst(Ty, CGM.getDataLayout().getAllocaAddrSpace(), - nullptr, Name, AllocaInsertPt); + ArraySize, Name, AllocaInsertPt); } /// CreateDefaultAlignTempAlloca - This creates an alloca with the @@ -99,14 +117,18 @@ return CreateTempAlloca(ConvertType(Ty), Align, Name); } -Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name) { +Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name, + bool CastToDefaultAddrSpace) { // FIXME: Should we prefer the preferred type alignment here? - return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name); + return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name, + CastToDefaultAddrSpace); } Address CodeGenFunction::CreateMemTemp(QualType Ty, CharUnits Align, - const Twine &Name) { - return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name); + const Twine &Name, + bool CastToDefaultAddrSpace) { + return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name, nullptr, + CastToDefaultAddrSpace); } /// EvaluateExprAsBool - Perform the usual unary conversions on the specified Index: cfe/trunk/lib/CodeGen/CodeGenFunction.h =================================================================== --- cfe/trunk/lib/CodeGen/CodeGenFunction.h +++ cfe/trunk/lib/CodeGen/CodeGenFunction.h @@ -1916,13 +1916,36 @@ LValueBaseInfo *BaseInfo = nullptr); LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy); - /// CreateTempAlloca - This creates a alloca and inserts it into the entry - /// block. The caller is responsible for setting an appropriate alignment on + /// CreateTempAlloca - This creates an alloca and inserts it into the entry + /// block if \p ArraySize is nullptr, otherwise inserts it at the current + /// insertion point of the builder. The caller is responsible for setting an + /// appropriate alignment on /// the alloca. - llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, - const Twine &Name = "tmp"); + /// + /// \p ArraySize is the number of array elements to be allocated if it + /// is not nullptr. + /// + /// LangAS::Default is the address space of pointers to local variables and + /// temporaries, as exposed in the source language. In certain + /// configurations, this is not the same as the alloca address space, and a + /// cast is needed to lift the pointer from the alloca AS into + /// LangAS::Default. This can happen when the target uses a restricted + /// address space for the stack but the source language requires + /// LangAS::Default to be a generic address space. The latter condition is + /// common for most programming languages; OpenCL is an exception in that + /// LangAS::Default is the private address space, which naturally maps + /// to the stack. + /// + /// Because the address of a temporary is often exposed to the program in + /// various ways, this function will perform the cast by default. The cast + /// may be avoided by passing false as \p CastToDefaultAddrSpace; this is + /// more efficient if the caller knows that the address will not be exposed. + llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, const Twine &Name = "tmp", + llvm::Value *ArraySize = nullptr); Address CreateTempAlloca(llvm::Type *Ty, CharUnits align, - const Twine &Name = "tmp"); + const Twine &Name = "tmp", + llvm::Value *ArraySize = nullptr, + bool CastToDefaultAddrSpace = true); /// CreateDefaultAlignedTempAlloca - This creates an alloca with the /// default ABI alignment of the given LLVM type. @@ -1957,9 +1980,12 @@ Address CreateIRTemp(QualType T, const Twine &Name = "tmp"); /// CreateMemTemp - Create a temporary memory object of the given type, with - /// appropriate alignment. - Address CreateMemTemp(QualType T, const Twine &Name = "tmp"); - Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp"); + /// appropriate alignment. Cast it to the default address space if + /// \p CastToDefaultAddrSpace is true. + Address CreateMemTemp(QualType T, const Twine &Name = "tmp", + bool CastToDefaultAddrSpace = true); + Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp", + bool CastToDefaultAddrSpace = true); /// CreateAggTemp - Create a temporary memory object for the given /// aggregate type. Index: cfe/trunk/test/CodeGen/address-space.c =================================================================== --- cfe/trunk/test/CodeGen/address-space.c +++ cfe/trunk/test/CodeGen/address-space.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm < %s | FileCheck -check-prefixes=CHECK,GIZ %s +// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm < %s | FileCheck -check-prefixes=CHECK,X86,GIZ %s // RUN: %clang_cc1 -triple amdgcn -emit-llvm < %s | FileCheck -check-prefixes=CHECK,PIZ %s -// RUN: %clang_cc1 -triple amdgcn---amdgiz -emit-llvm < %s | FileCheck -check-prefixes=CHECK,GIZ %s +// RUN: %clang_cc1 -triple amdgcn---amdgiz -emit-llvm < %s | FileCheck -check-prefixes=CHECK,AMDGIZ,GIZ %s // CHECK: @foo = common addrspace(1) global int foo __attribute__((address_space(1))); Index: cfe/trunk/test/CodeGen/default-address-space.c =================================================================== --- cfe/trunk/test/CodeGen/default-address-space.c +++ cfe/trunk/test/CodeGen/default-address-space.c @@ -22,9 +22,10 @@ int test1() { return foo; } // COM-LABEL: define i32 @test2(i32 %i) -// PIZ: load i32, i32 addrspace(4)* +// COM: %[[addr:.*]] = getelementptr +// PIZ: load i32, i32 addrspace(4)* %[[addr]] // PIZ-NEXT: ret i32 -// CHECK: load i32, i32* +// CHECK: load i32, i32* %[[addr]] // CHECK-NEXT: ret i32 int test2(int i) { return ban[i]; } @@ -42,15 +43,17 @@ } // PIZ-LABEL: define void @test4(i32 addrspace(4)* %a) -// PIZ: %[[a_addr:.*]] = alloca i32 addrspace(4)* -// PIZ: store i32 addrspace(4)* %a, i32 addrspace(4)** %[[a_addr]] -// PIZ: %[[r0:.*]] = load i32 addrspace(4)*, i32 addrspace(4)** %[[a_addr]] +// PIZ: %[[alloca:.*]] = alloca i32 addrspace(4)* +// PIZ: %[[a_addr:.*]] = addrspacecast{{.*}} %[[alloca]] to i32 addrspace(4)* addrspace(4)* +// PIZ: store i32 addrspace(4)* %a, i32 addrspace(4)* addrspace(4)* %[[a_addr]] +// PIZ: %[[r0:.*]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* %[[a_addr]] // PIZ: %[[arrayidx:.*]] = getelementptr inbounds i32, i32 addrspace(4)* %[[r0]] // PIZ: store i32 0, i32 addrspace(4)* %[[arrayidx]] // CHECK-LABEL: define void @test4(i32* %a) -// CHECK: %[[a_addr:.*]] = alloca i32*, align 4, addrspace(5) -// CHECK: store i32* %a, i32* addrspace(5)* %[[a_addr]] -// CHECK: %[[r0:.*]] = load i32*, i32* addrspace(5)* %[[a_addr]] +// CHECK: %[[alloca:.*]] = alloca i32*, align 4, addrspace(5) +// CHECK: %[[a_addr:.*]] = addrspacecast{{.*}} %[[alloca]] to i32** +// CHECK: store i32* %a, i32** %[[a_addr]] +// CHECK: %[[r0:.*]] = load i32*, i32** %[[a_addr]] // CHECK: %[[arrayidx:.*]] = getelementptr inbounds i32, i32* %[[r0]] // CHECK: store i32 0, i32* %[[arrayidx]] void test4(int *a) { Index: cfe/trunk/test/CodeGen/x86_64-arguments.c =================================================================== --- cfe/trunk/test/CodeGen/x86_64-arguments.c +++ cfe/trunk/test/CodeGen/x86_64-arguments.c @@ -460,7 +460,7 @@ test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); } // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) -// AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}}) +// AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[^)]+}}) typedef float __m512 __attribute__ ((__vector_size__ (64))); typedef struct { @@ -529,7 +529,7 @@ } // AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) -// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}}) +// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[^)]+}}) void f64_helper(__m512, ...); __m512 x64; void f64() { Index: cfe/trunk/test/CodeGenCXX/amdgcn-automatic-variable.cpp =================================================================== --- cfe/trunk/test/CodeGenCXX/amdgcn-automatic-variable.cpp +++ cfe/trunk/test/CodeGenCXX/amdgcn-automatic-variable.cpp @@ -3,9 +3,10 @@ // CHECK-LABEL: define void @_Z5func1Pi(i32* %x) void func1(int *x) { // CHECK: %[[x_addr:.*]] = alloca i32*{{.*}}addrspace(5) - // CHECK: store i32* %x, i32* addrspace(5)* %[[x_addr]] - // CHECK: %[[r0:.*]] = load i32*, i32* addrspace(5)* %[[x_addr]] - // CHECK: store i32 1, i32* %[[r0]] + // CHECK: %[[r0:.*]] = addrspacecast i32* addrspace(5)* %[[x_addr]] to i32** + // CHECK: store i32* %x, i32** %[[r0]] + // CHECK: %[[r1:.*]] = load i32*, i32** %[[r0]] + // CHECK: store i32 1, i32* %[[r1]] *x = 1; } @@ -70,3 +71,12 @@ // CHECK: call void @_ZN1AD1Ev(%class.A* %[[r0]]) A a; } + +// CHECK-LABEL: define void @_Z5func4i +void func4(int x) { + // CHECK: %[[x_addr:.*]] = alloca i32, align 4, addrspace(5) + // CHECK: %[[r0:.*]] = addrspacecast i32 addrspace(5)* %[[x_addr]] to i32* + // CHECK: store i32 %x, i32* %[[r0]], align 4 + // CHECK: call void @_Z5func1Pi(i32* %[[r0]]) + func1(&x); +}