diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h --- a/clang/lib/CodeGen/CGCall.h +++ b/clang/lib/CodeGen/CGCall.h @@ -278,6 +278,11 @@ llvm::Instruction *IsActiveIP; }; + struct EndLifetimeInfo { + llvm::Value *Addr; + llvm::Value *Size; + }; + void add(RValue rvalue, QualType type) { push_back(CallArg(rvalue, type)); } void addUncopiedAggregate(LValue LV, QualType type) { @@ -294,6 +299,9 @@ CleanupsToDeactivate.insert(CleanupsToDeactivate.end(), other.CleanupsToDeactivate.begin(), other.CleanupsToDeactivate.end()); + LifetimeCleanups.insert(LifetimeCleanups.end(), + other.LifetimeCleanups.begin(), + other.LifetimeCleanups.end()); assert(!(StackBase && other.StackBase) && "can't merge stackbases"); if (!StackBase) StackBase = other.StackBase; @@ -333,6 +341,14 @@ /// memory. bool isUsingInAlloca() const { return StackBase; } + void addLifetimeCleanup(EndLifetimeInfo Info) { + LifetimeCleanups.push_back(Info); + } + + ArrayRef getLifetimeCleanups() const { + return LifetimeCleanups; + } + private: SmallVector Writebacks; @@ -341,6 +357,10 @@ /// occurs. SmallVector CleanupsToDeactivate; + /// Lifetime information needed to call llvm.lifetime.end for any temporary + /// argument allocas. + SmallVector LifetimeCleanups; + /// The stacksave call. It dominates all of the argument evaluation. llvm::CallInst *StackBase = nullptr; }; diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -40,6 +40,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" +#include "llvm/Support/TypeSize.h" #include "llvm/Transforms/Utils/Local.h" #include using namespace clang; @@ -4510,6 +4511,10 @@ Args.allocateArgumentMemory(*this); } + bool ReturnsReference = false; + if (const auto *F = dyn_cast_or_null(AC.getDecl())) + ReturnsReference = F->getReturnType()->isReferenceType(); + // Evaluate each argument in the appropriate order. size_t CallArgsStart = Args.size(); for (unsigned I = 0, E = ArgTypes.size(); I != E; ++I) { @@ -4524,7 +4529,7 @@ (isa(AC.getDecl()) && isObjCMethodWithTypeParams(cast(AC.getDecl())))) && "Argument and parameter types don't match"); - EmitCallArg(Args, *Arg, ArgTypes[Idx]); + EmitCallArg(Args, *Arg, ArgTypes[Idx], ReturnsReference); // In particular, we depend on it being the last arg in Args, and the // objectsize bits depend on there only being one arg if !LeftToRight. assert(InitialArgSize + 1 == Args.size() && @@ -4615,7 +4620,8 @@ } void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E, - QualType type) { + QualType type, + const bool CalleeReturnsReference) { DisableDebugLocationUpdates Dis(*this, E); if (const ObjCIndirectCopyRestoreExpr *CRE = dyn_cast(E)) { @@ -4677,7 +4683,28 @@ return; } - args.add(EmitAnyExprToTemp(E), type); + AggValueSlot ArgSlot = AggValueSlot::ignored(); + // If the callee returns a reference, skip this stack saving optimization; + // we don't want to prematurely end the lifetime of the temporary. It may be + // possible to still perform this optimization if the return type is a + // reference to a different type than the parameter. + if (!CalleeReturnsReference && hasAggregateEvaluationKind(E->getType())) { + Address ArgSlotAlloca = Address::invalid(); + ArgSlot = CreateAggTemp(E->getType(), "agg.tmp", &ArgSlotAlloca); + + // Emit a lifetime start/end for this temporary. If the type has a + // destructor, then we need to keep it alive. FIXME: We should still be able + // to end the lifetime after the destructor returns. + if (!E->getType().isDestructedType()) { + llvm::TypeSize size = + CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(E->getType())); + if (llvm::Value *lifetimeSize = + EmitLifetimeStart(size, ArgSlotAlloca.getPointer())) + args.addLifetimeCleanup({ArgSlotAlloca.getPointer(), lifetimeSize}); + } + } + + args.add(EmitAnyExpr(E, ArgSlot), type); } QualType CodeGenFunction::getVarArgType(const Expr *Arg) { @@ -5873,6 +5900,9 @@ for (CallLifetimeEnd &LifetimeEnd : CallLifetimeEndAfterCall) LifetimeEnd.Emit(*this, /*Flags=*/{}); + for (const CallArgList::EndLifetimeInfo < : CallArgs.getLifetimeCleanups()) + EmitLifetimeEnd(LT.Size, LT.Addr); + if (!ReturnValue.isExternallyDestructed() && RetTy.isDestructedType() == QualType::DK_nontrivial_c_struct) pushDestroy(QualType::DK_nontrivial_c_struct, Ret.getAggregateAddress(), diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4714,7 +4714,8 @@ AbstractCallee AC, unsigned ParmNum); /// EmitCallArg - Emit a single call argument. - void EmitCallArg(CallArgList &args, const Expr *E, QualType ArgType); + void EmitCallArg(CallArgList &args, const Expr *E, QualType ArgType, + const bool CalleeReturnsReference); /// EmitDelegateCallArg - We are performing a delegate call; that /// is, the current function is delegating to another one. Produce diff --git a/clang/test/CodeGen/lifetime-call-temp.c b/clang/test/CodeGen/lifetime-call-temp.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/lifetime-call-temp.c @@ -0,0 +1,108 @@ +// RUN: %clang -cc1 -triple x86_64-apple-macos -O1 -disable-llvm-passes %s -S \ +// RUN: -emit-llvm -o - | FileCheck %s --implicit-check-not=llvm.lifetime +// RUN: %clang -cc1 -xc++ -std=c++17 -triple x86_64-apple-macos -O1 \ +// RUN: -disable-llvm-passes %s -S -emit-llvm -o - -Wno-return-type-c-linkage | \ +// RUN: FileCheck %s --implicit-check-not=llvm.lifetime --check-prefixes=CHECK,CXX +// RUN: %clang -cc1 -xobjective-c -triple x86_64-apple-macos -O1 \ +// RUN: -disable-llvm-passes %s -S -emit-llvm -o - | \ +// RUN: FileCheck %s --implicit-check-not=llvm.lifetime --check-prefixes=CHECK,OBJC + +typedef struct { int x[100]; } aggregate; + +#ifdef __cplusplus +extern "C" { +#endif + +void takes_aggregate(aggregate); +aggregate gives_aggregate(); + +// CHECK-LABEL: define void @t1 +void t1() { + takes_aggregate(gives_aggregate()); + + // CHECK: [[AGGTMP:%.*]] = alloca %struct.aggregate, align 8 + // CHECK: call void @llvm.lifetime.start.p0(i64 400, ptr [[AGGTMP]]) + // CHECK: call void{{.*}} @gives_aggregate(ptr sret(%struct.aggregate) align 4 [[AGGTMP]]) + // CHECK: call void @takes_aggregate(ptr noundef byval(%struct.aggregate) align 8 [[AGGTMP]]) + // CHECK: call void @llvm.lifetime.end.p0(i64 400, ptr [[AGGTMP]]) +} + +// CHECK: declare {{.*}}llvm.lifetime.start +// CHECK: declare {{.*}}llvm.lifetime.end + +#ifdef __cplusplus +// CXX: define void @t2 +void t2() { + struct S { + S(aggregate) {} + }; + S{gives_aggregate()}; + + // CXX: [[AGG:%.*]] = alloca %struct.aggregate + // CXX: call void @llvm.lifetime.start.p0(i64 400, ptr + // CXX: call void @gives_aggregate(ptr sret(%struct.aggregate) align 4 [[AGG]]) + // CXX: call void @_ZZ2t2EN1SC1E9aggregate(ptr {{.*}}, ptr {{.*}} byval(%struct.aggregate) align 8 [[AGG]]) + // CXX: call void @llvm.lifetime.end.p0(i64 400, ptr +} + +struct Dtor { + ~Dtor(); +}; + +void takes_dtor(Dtor); +Dtor gives_dtor(); + +// CXX: define void @t3 +void t3() { + takes_dtor(gives_dtor()); + + // CXX-NOT: @llvm.lifetime + // CXX: ret void +} + +struct foo { int x; }; +foo &min(foo a, foo b); + +void consume (foo&); + +// The intent of this test is that we skip the lifetime markers for the temp +// aggregates because the callee returns a reference. Maybe it's possible to +// improve this in the future? +void bar () { + foo a, b; + // CXX: call void @llvm.lifetime.start.p0(i64 4, ptr %a) + // CXX: call void @llvm.lifetime.start.p0(i64 4, ptr %b) + // CXX: call void @llvm.lifetime.end.p0(i64 4, ptr %b) + // CXX: call void @llvm.lifetime.end.p0(i64 4, ptr %a) + // CXX-NOT: call void @llvm.lifetime.start.p0(i64 4, ptr %agg.tmp) + // CXX-NOT: call void @llvm.lifetime.start.p0(i64 4, ptr %agg.tmp1) + // CXX-NOT: call void @llvm.lifetime.end.p0(i64 4, ptr %agg.tmp) + // CXX-NOT: call void @llvm.lifetime.end.p0(i64 4, ptr %agg.tmp1) + consume(min(a, b)); +} + + +#endif + +#ifdef __OBJC__ + +@interface X +-m:(aggregate)x; +@end + +// OBJC: define void @t4 +void t4(X *x) { + [x m: gives_aggregate()]; + + // OBJC: [[AGG:%.*]] = alloca %struct.aggregate + // OBJC: call void @llvm.lifetime.start.p0(i64 400, ptr + // OBJC: call void{{.*}} @gives_aggregate(ptr sret(%struct.aggregate) align 4 [[AGGTMP]]) + // OBJC: call {{.*}}@objc_msgSend + // OBJC: call void @llvm.lifetime.end.p0(i64 400, ptr +} + +#endif + +#ifdef __cplusplus +} +#endif diff --git a/clang/test/CodeGenCXX/amdgcn-call-with-aggarg.cpp b/clang/test/CodeGenCXX/amdgcn-call-with-aggarg.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/amdgcn-call-with-aggarg.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -O3 -disable-llvm-passes -o - %s | FileCheck %s + +struct A { + float x, y, z, w; +}; + +void foo(A a); + +// CHECK-LABEL: @_Z4testv +// CHECK: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_A]], align 4, addrspace(5) +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr +// CHECK-NEXT: [[AGG_TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_TMP]] to ptr +// CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[A]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[AGG_TMP]]) #[[ATTR4]] +void test() { + A a; + foo(a); +} diff --git a/clang/test/CodeGenCXX/stack-reuse-miscompile.cpp b/clang/test/CodeGenCXX/stack-reuse-miscompile.cpp --- a/clang/test/CodeGenCXX/stack-reuse-miscompile.cpp +++ b/clang/test/CodeGenCXX/stack-reuse-miscompile.cpp @@ -26,6 +26,8 @@ // CHECK: [[T2:%.*]] = alloca %class.T, align 4 // CHECK: [[T3:%.*]] = alloca %class.T, align 4 // +// CHECK: [[AGG:%.*]] = alloca %class.S, align 4 +// // FIXME: We could defer starting the lifetime of the return object of concat // until the call. // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr [[T1]]) @@ -34,7 +36,9 @@ // CHECK: [[T4:%.*]] = call noundef ptr @_ZN1TC1EPKc(ptr {{[^,]*}} [[T2]], ptr noundef @.str) // // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr [[T3]]) +// CHECK: call void @llvm.lifetime.start.p0(i64 8, ptr [[AGG]]) // CHECK: [[T5:%.*]] = call noundef ptr @_ZN1TC1E1S(ptr {{[^,]*}} [[T3]], [2 x i32] %{{.*}}) +// CHECK: call void @llvm.lifetime.end.p0(i64 8, ptr [[AGG]] // // CHECK: call void @_ZNK1T6concatERKS_(ptr sret(%class.T) align 4 [[T1]], ptr {{[^,]*}} [[T2]], ptr noundef nonnull align 4 dereferenceable(16) [[T3]]) // CHECK: [[T6:%.*]] = call noundef ptr @_ZNK1T3strEv(ptr {{[^,]*}} [[T1]]) diff --git a/clang/test/CodeGenCoroutines/pr59181.cpp b/clang/test/CodeGenCoroutines/pr59181.cpp --- a/clang/test/CodeGenCoroutines/pr59181.cpp +++ b/clang/test/CodeGenCoroutines/pr59181.cpp @@ -49,6 +49,7 @@ } // CHECK: cleanup.cont:{{.*}} +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[AGG:%agg.tmp[0-9]+]]) // CHECK-NEXT: load i8 // CHECK-NEXT: trunc // CHECK-NEXT: store i1 false @@ -56,5 +57,7 @@ // CHECK: await.suspend:{{.*}} // CHECK-NOT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF]]) +// CHECK-NOT: call void @llvm.lifetime.start.p0(i64 8, ptr [[AGG]]) // CHECK: call void @_ZZN4Task12promise_type15await_transformES_EN10Suspension13await_suspendESt16coroutine_handleIvE +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[AGG2:%agg.tmp[0-9]+]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF]])