Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -1599,8 +1599,8 @@ assert(NumIRArgs == 1); // indirect arguments are always on the stack, which is alloca addr space. llvm::Type *LTy = ConvertTypeForMem(it->type); - ArgTypes[FirstIRArg] = LTy->getPointerTo( - CGM.getDataLayout().getAllocaAddrSpace()); + ArgTypes[FirstIRArg] = + LTy->getPointerTo(CGM.getDataLayout().getAllocaAddrSpace()); break; } @@ -3818,12 +3818,18 @@ } case ABIArgInfo::Indirect: { + auto CastToAllocaAddrSpace = [&](llvm::Value *V) { + auto *T = V->getType()->getPointerElementType()->getPointerTo( + CGM.getDataLayout().getAllocaAddrSpace()); + return getTargetHooks().performAddrSpaceCast( + *this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T, true); + }; assert(NumIRArgs == 1); if (RV.isScalar() || RV.isComplex()) { // Make a temporary alloca to pass the argument. Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), - "indirect-arg-temp", false); - IRCallArgs[FirstIRArg] = Addr.getPointer(); + "indirect-arg-temp"); + IRCallArgs[FirstIRArg] = CastToAllocaAddrSpace(Addr.getPointer()); LValue argLV = MakeAddrLValue(Addr, I->Ty); EmitInitStoreOfNonAggregate(*this, RV, argLV); @@ -3835,15 +3841,18 @@ // 2. If the argument is byval, RV is not sufficiently aligned, and // we cannot force it to be sufficiently aligned. // 3. If the argument is byval, but RV is located in an address space - // different than that of the argument (0). + // different than that of the argument (alloca address space). Address Addr = RV.getAggregateAddress(); CharUnits Align = ArgInfo.getIndirectAlign(); const llvm::DataLayout *TD = &CGM.getDataLayout(); - const unsigned RVAddrSpace = Addr.getType()->getAddressSpace(); + const unsigned RVAddrSpace = Addr.getPointer() + ->stripPointerCasts() + ->getType() + ->getPointerAddressSpace(); const unsigned ArgAddrSpace = (FirstIRArg < IRFuncTy->getNumParams() ? IRFuncTy->getParamType(FirstIRArg)->getPointerAddressSpace() - : 0); + : TD->getAllocaAddrSpace()); if ((!ArgInfo.getIndirectByVal() && I->NeedsCopy) || (ArgInfo.getIndirectByVal() && Addr.getAlignment() < Align && llvm::getOrEnforceKnownAlignment(Addr.getPointer(), @@ -3852,12 +3861,12 @@ (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) { // Create an aligned temporary, and copy to it. Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(), - "byval-temp", false); - IRCallArgs[FirstIRArg] = AI.getPointer(); + "byval-temp"); + IRCallArgs[FirstIRArg] = CastToAllocaAddrSpace(AI.getPointer()); EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified()); } else { // Skip the extra memcpy call. - IRCallArgs[FirstIRArg] = Addr.getPointer(); + IRCallArgs[FirstIRArg] = CastToAllocaAddrSpace(Addr.getPointer()); } } break; Index: lib/CodeGen/CGDecl.cpp =================================================================== --- lib/CodeGen/CGDecl.cpp +++ lib/CodeGen/CGDecl.cpp @@ -1819,6 +1819,19 @@ llvm::Type *IRTy = ConvertTypeForMem(Ty)->getPointerTo(AS); if (DeclPtr.getType() != IRTy) DeclPtr = Builder.CreateBitCast(DeclPtr, IRTy, D.getName()); + // Indirect argument is in alloca address space, which may be different + // from the default address space. + auto AllocaAS = CGM.getASTAllocaAddressSpace(); + auto *V = DeclPtr.getPointer(); + auto SrcAS = V->getType()->getPointerAddressSpace(); + auto DestAS = getContext().getTargetAddressSpace(LangAS::Default); + if (SrcAS != DestAS) { + assert(SrcAS == CGM.getDataLayout().getAllocaAddrSpace()); + auto *T = V->getType()->getPointerElementType()->getPointerTo(DestAS); + DeclPtr = Address(getTargetHooks().performAddrSpaceCast( + *this, V, AllocaAS, LangAS::Default, T, true), + DeclPtr.getAlignment()); + } // Push a destructor cleanup for this parameter if the ABI requires it. // Don't push a cleanup in a thunk for a method that will also emit a Index: test/CodeGenCXX/amdgcn-func-arg.cpp =================================================================== --- /dev/null +++ test/CodeGenCXX/amdgcn-func-arg.cpp @@ -0,0 +1,94 @@ +// RUN: %clang_cc1 -O0 -triple amdgcn---amdgiz -emit-llvm %s -o - | FileCheck %s + +class A { +public: + int x; + A():x(0) {} + ~A() {} +}; + +class B { +int x; +}; + +A g_a; +B g_b; + +void func_with_ref_arg(A &a); +void func_with_ref_arg(B &b); + +// CHECK-LABEL: define void @_Z22func_with_indirect_arg1A(%class.A addrspace(5)* %a) +// CHECK: %p = alloca %class.A*, align 8, addrspace(5) +// CHECK: %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %a to %class.A* +// CHECK: %[[r1:.+]] = addrspacecast %class.A* addrspace(5)* %p to %class.A** +// CHECK: store %class.A* %[[r0]], %class.A** %[[r1]], align 8 +void func_with_indirect_arg(A a) { + A *p = &a; +} + +// CHECK-LABEL: define void @_Z22test_indirect_arg_autov() +// CHECK: %a = alloca %class.A, align 4, addrspace(5) +// CHECK: %agg.tmp = alloca %class.A, align 4, addrspace(5) +// CHECK: %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %a to %class.A* +// CHECK: call void @_ZN1AC1Ev(%class.A* %[[r0]]) +// CHECK: %[[r1:.+]] = addrspacecast %class.A addrspace(5)* %agg.tmp to %class.A* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +// CHECK: %[[r4:.+]] = addrspacecast %class.A* %[[r1]] to %class.A addrspace(5)* +// CHECK: call void @_Z22func_with_indirect_arg1A(%class.A addrspace(5)* %[[r4]]) +// CHECK: call void @_ZN1AD1Ev(%class.A* %[[r1]]) +// CHECK: call void @_Z17func_with_ref_argR1A(%class.A* dereferenceable(4) %[[r0]]) +// CHECK: call void @_ZN1AD1Ev(%class.A* %[[r0]]) +void test_indirect_arg_auto() { + A a; + func_with_indirect_arg(a); + func_with_ref_arg(a); +} + +// CHECK: define void @_Z24test_indirect_arg_globalv() +// CHECK: %agg.tmp = alloca %class.A, align 4, addrspace(5) +// CHECK: %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %agg.tmp to %class.A* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +// CHECK: %[[r2:.+]] = addrspacecast %class.A* %[[r0]] to %class.A addrspace(5)* +// CHECK: call void @_Z22func_with_indirect_arg1A(%class.A addrspace(5)* %[[r2]]) +// CHECK: call void @_ZN1AD1Ev(%class.A* %[[r0]]) +// CHECK: call void @_Z17func_with_ref_argR1A(%class.A* dereferenceable(4) addrspacecast (%class.A addrspace(1)* @g_a to %class.A*)) +void test_indirect_arg_global() { + func_with_indirect_arg(g_a); + func_with_ref_arg(g_a); +} + +// CHECK-LABEL: define void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval align 4 %b) +// CHECK: %p = alloca %class.B*, align 8, addrspace(5) +// CHECK: %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %b to %class.B* +// CHECK: %[[r1:.+]] = addrspacecast %class.B* addrspace(5)* %p to %class.B** +// CHECK: store %class.B* %[[r0]], %class.B** %[[r1]], align 8 +void func_with_byval_arg(B b) { + B *p = &b; +} + +// CHECK-LABEL: define void @_Z19test_byval_arg_autov() +// CHECK: %b = alloca %class.B, align 4, addrspace(5) +// CHECK: %agg.tmp = alloca %class.B, align 4, addrspace(5) +// CHECK: %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %b to %class.B* +// CHECK: %[[r1:.+]] = addrspacecast %class.B addrspace(5)* %agg.tmp to %class.B* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +// CHECK: %[[r4:.+]] = addrspacecast %class.B* %[[r1]] to %class.B addrspace(5)* +// CHECK: call void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval align 4 %[[r4]]) +// CHECK: call void @_Z17func_with_ref_argR1B(%class.B* dereferenceable(4) %[[r0]]) +void test_byval_arg_auto() { + B b; + func_with_byval_arg(b); + func_with_ref_arg(b); +} + +// CHECK-LABEL: define void @_Z21test_byval_arg_globalv() +// CHECK: %agg.tmp = alloca %class.B, align 4, addrspace(5) +// CHECK: %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %agg.tmp to %class.B* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +// CHECK: %[[r2:.+]] = addrspacecast %class.B* %[[r0]] to %class.B addrspace(5)* +// CHECK: call void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval align 4 %[[r2]]) +// CHECK: call void @_Z17func_with_ref_argR1B(%class.B* dereferenceable(4) addrspacecast (%class.B addrspace(1)* @g_b to %class.B*)) +void test_byval_arg_global() { + func_with_byval_arg(g_b); + func_with_ref_arg(g_b); +}