diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -40,6 +40,14 @@ C/C++ Language Potentially Breaking Changes ------------------------------------------- +- Clang is now more precise with regards to the lifetime of temporary objects + such as when aggregates are passed by value to a function, resulting in + better sharing of stack slots and reduced stack usage. If recompiling with + ``-fsanitize=address`` shows a use-after-scope warning, then this is likely + the case, and the report printed should be able to help users pinpoint where + the use-after-scope is occurring. Users can use ``-Xclang + -sloppy-temporary-lifetimes`` to retain the old behavior until they are able + to find and resolve issues in their code. C++ Specific Potentially Breaking Changes ----------------------------------------- diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -514,6 +514,10 @@ /// non-deleting destructors. (No effect on Microsoft ABI.) CODEGENOPT(CtorDtorReturnThis, 1, 0) +/// Set via -Xclang -sloppy-temporary-lifetimes to disable emission of lifetime +/// marker intrinsic calls. +CODEGENOPT(NoLifetimeMarkersForTemporaries, 1, 0) + #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6849,6 +6849,9 @@ NormalizedValuesScope<"CodeGenOptions::AssignmentTrackingOpts">, Values<"disabled,enabled,forced">, NormalizedValues<["Disabled","Enabled","Forced"]>, MarshallingInfoEnum, "Enabled">; +def sloppy_temporary_lifetimes : Flag<["-"], "sloppy-temporary-lifetimes">, + HelpText<"Don't emit lifetime markers for temporary objects">, + MarshallingInfoFlag>; } // let Visibility = [CC1Option] diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h --- a/clang/lib/CodeGen/CGCall.h +++ b/clang/lib/CodeGen/CGCall.h @@ -278,6 +278,11 @@ llvm::Instruction *IsActiveIP; }; + struct EndLifetimeInfo { + llvm::Value *Addr; + llvm::Value *Size; + }; + void add(RValue rvalue, QualType type) { push_back(CallArg(rvalue, type)); } void addUncopiedAggregate(LValue LV, QualType type) { @@ -294,6 +299,9 @@ CleanupsToDeactivate.insert(CleanupsToDeactivate.end(), other.CleanupsToDeactivate.begin(), other.CleanupsToDeactivate.end()); + LifetimeCleanups.insert(LifetimeCleanups.end(), + other.LifetimeCleanups.begin(), + other.LifetimeCleanups.end()); assert(!(StackBase && other.StackBase) && "can't merge stackbases"); if (!StackBase) StackBase = other.StackBase; @@ -333,6 +341,14 @@ /// memory. bool isUsingInAlloca() const { return StackBase; } + void addLifetimeCleanup(EndLifetimeInfo Info) { + LifetimeCleanups.push_back(Info); + } + + ArrayRef getLifetimeCleanups() const { + return LifetimeCleanups; + } + private: SmallVector Writebacks; @@ -341,6 +357,10 @@ /// occurs. SmallVector CleanupsToDeactivate; + /// Lifetime information needed to call llvm.lifetime.end for any temporary + /// argument allocas. + SmallVector LifetimeCleanups; + /// The stacksave call. It dominates all of the argument evaluation. llvm::CallInst *StackBase = nullptr; }; diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -40,6 +40,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" +#include "llvm/Support/TypeSize.h" #include "llvm/Transforms/Utils/Local.h" #include using namespace clang; @@ -4677,7 +4678,28 @@ return; } - args.add(EmitAnyExprToTemp(E), type); + AggValueSlot ArgSlot = AggValueSlot::ignored(); + // If the callee returns a reference, skip this stack saving optimization; + // we don't want to prematurely end the lifetime of the temporary. It may be + // possible to still perform this optimization if the return type is a + // reference to a different type than the parameter. + if (hasAggregateEvaluationKind(E->getType())) { + Address ArgSlotAlloca = Address::invalid(); + ArgSlot = CreateAggTemp(E->getType(), "agg.tmp", &ArgSlotAlloca); + + // Emit a lifetime start/end for this temporary. If the type has a + // destructor, then we need to keep it alive. FIXME: We should still be able + // to end the lifetime after the destructor returns. + if (!CGM.getCodeGenOpts().NoLifetimeMarkersForTemporaries && !E->getType().isDestructedType()) { + llvm::TypeSize size = + CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(E->getType())); + if (llvm::Value *lifetimeSize = + EmitLifetimeStart(size, ArgSlotAlloca.getPointer())) + args.addLifetimeCleanup({ArgSlotAlloca.getPointer(), lifetimeSize}); + } + } + + args.add(EmitAnyExpr(E, ArgSlot), type); } QualType CodeGenFunction::getVarArgType(const Expr *Arg) { @@ -5873,6 +5895,10 @@ for (CallLifetimeEnd &LifetimeEnd : CallLifetimeEndAfterCall) LifetimeEnd.Emit(*this, /*Flags=*/{}); + if (!CGM.getCodeGenOpts().NoLifetimeMarkersForTemporaries) + for (const CallArgList::EndLifetimeInfo < : CallArgs.getLifetimeCleanups()) + EmitLifetimeEnd(LT.Size, LT.Addr); + if (!ReturnValue.isExternallyDestructed() && RetTy.isDestructedType() == QualType::DK_nontrivial_c_struct) pushDestroy(QualType::DK_nontrivial_c_struct, Ret.getAggregateAddress(), diff --git a/clang/test/CodeGen/lifetime-call-temp.c b/clang/test/CodeGen/lifetime-call-temp.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/lifetime-call-temp.c @@ -0,0 +1,93 @@ +// RUN: %clang -cc1 -triple x86_64-apple-macos -O1 -disable-llvm-passes %s -S \ +// RUN: -emit-llvm -o - | FileCheck %s --implicit-check-not=llvm.lifetime +// RUN: %clang -cc1 -xc++ -std=c++17 -triple x86_64-apple-macos -O1 \ +// RUN: -disable-llvm-passes %s -S -emit-llvm -o - -Wno-return-type-c-linkage | \ +// RUN: FileCheck %s --implicit-check-not=llvm.lifetime --check-prefixes=CHECK,CXX +// RUN: %clang -cc1 -xobjective-c -triple x86_64-apple-macos -O1 \ +// RUN: -disable-llvm-passes %s -S -emit-llvm -o - | \ +// RUN: FileCheck %s --implicit-check-not=llvm.lifetime --check-prefixes=CHECK,OBJC +// RUN: %clang -cc1 -triple x86_64-apple-macos -O1 -disable-llvm-passes %s -S \ +// RUN: -emit-llvm -o - -sloppy-temporary-lifetimes | \ +// RUN: FileCheck %s --implicit-check-not=llvm.lifetime --check-prefixes=SLOPPY + +typedef struct { int x[100]; } aggregate; + +#ifdef __cplusplus +extern "C" { +#endif + +void takes_aggregate(aggregate); +aggregate gives_aggregate(); + +// CHECK-LABEL: define void @t1 +void t1() { + takes_aggregate(gives_aggregate()); + + // CHECK: [[AGGTMP:%.*]] = alloca %struct.aggregate, align 8 + // CHECK: call void @llvm.lifetime.start.p0(i64 400, ptr [[AGGTMP]]) + // CHECK: call void{{.*}} @gives_aggregate(ptr sret(%struct.aggregate) align 4 [[AGGTMP]]) + // CHECK: call void @takes_aggregate(ptr noundef byval(%struct.aggregate) align 8 [[AGGTMP]]) + // CHECK: call void @llvm.lifetime.end.p0(i64 400, ptr [[AGGTMP]]) + + // SLOPPY: [[AGGTMP:%.*]] = alloca %struct.aggregate, align 8 + // SLOPPY-NEXT: call void (ptr, ...) @gives_aggregate(ptr sret(%struct.aggregate) align 4 [[AGGTMP]]) + // SLOPPY-NEXT: call void @takes_aggregate(ptr noundef byval(%struct.aggregate) align 8 [[AGGTMP]]) +} + +// CHECK: declare {{.*}}llvm.lifetime.start +// CHECK: declare {{.*}}llvm.lifetime.end + +#ifdef __cplusplus +// CXX: define void @t2 +void t2() { + struct S { + S(aggregate) {} + }; + S{gives_aggregate()}; + + // CXX: [[AGG:%.*]] = alloca %struct.aggregate + // CXX: call void @llvm.lifetime.start.p0(i64 400, ptr + // CXX: call void @gives_aggregate(ptr sret(%struct.aggregate) align 4 [[AGG]]) + // CXX: call void @_ZZ2t2EN1SC1E9aggregate(ptr {{.*}}, ptr {{.*}} byval(%struct.aggregate) align 8 [[AGG]]) + // CXX: call void @llvm.lifetime.end.p0(i64 400, ptr +} + +struct Dtor { + ~Dtor(); +}; + +void takes_dtor(Dtor); +Dtor gives_dtor(); + +// CXX: define void @t3 +void t3() { + takes_dtor(gives_dtor()); + + // CXX-NOT: @llvm.lifetime + // CXX: ret void +} + +#endif + +#ifdef __OBJC__ + +@interface X +-m:(aggregate)x; +@end + +// OBJC: define void @t4 +void t4(X *x) { + [x m: gives_aggregate()]; + + // OBJC: [[AGG:%.*]] = alloca %struct.aggregate + // OBJC: call void @llvm.lifetime.start.p0(i64 400, ptr + // OBJC: call void{{.*}} @gives_aggregate(ptr sret(%struct.aggregate) align 4 [[AGGTMP]]) + // OBJC: call {{.*}}@objc_msgSend + // OBJC: call void @llvm.lifetime.end.p0(i64 400, ptr +} + +#endif + +#ifdef __cplusplus +} +#endif diff --git a/clang/test/CodeGenCXX/amdgcn-call-with-aggarg.cpp b/clang/test/CodeGenCXX/amdgcn-call-with-aggarg.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/amdgcn-call-with-aggarg.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -O3 -disable-llvm-passes -o - %s | FileCheck %s + +struct A { + float x, y, z, w; +}; + +void foo(A a); + +// CHECK-LABEL: @_Z4testv +// CHECK: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_A]], align 4, addrspace(5) +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr +// CHECK-NEXT: [[AGG_TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_TMP]] to ptr +// CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[A]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) [[AGG_TMP]]) #[[ATTR4]] +void test() { + A a; + foo(a); +} diff --git a/clang/test/CodeGenCXX/stack-reuse-miscompile.cpp b/clang/test/CodeGenCXX/stack-reuse-miscompile.cpp --- a/clang/test/CodeGenCXX/stack-reuse-miscompile.cpp +++ b/clang/test/CodeGenCXX/stack-reuse-miscompile.cpp @@ -26,6 +26,8 @@ // CHECK: [[T2:%.*]] = alloca %class.T, align 4 // CHECK: [[T3:%.*]] = alloca %class.T, align 4 // +// CHECK: [[AGG:%.*]] = alloca %class.S, align 4 +// // FIXME: We could defer starting the lifetime of the return object of concat // until the call. // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr [[T1]]) @@ -34,7 +36,9 @@ // CHECK: [[T4:%.*]] = call noundef ptr @_ZN1TC1EPKc(ptr {{[^,]*}} [[T2]], ptr noundef @.str) // // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr [[T3]]) +// CHECK: call void @llvm.lifetime.start.p0(i64 8, ptr [[AGG]]) // CHECK: [[T5:%.*]] = call noundef ptr @_ZN1TC1E1S(ptr {{[^,]*}} [[T3]], [2 x i32] %{{.*}}) +// CHECK: call void @llvm.lifetime.end.p0(i64 8, ptr [[AGG]] // // CHECK: call void @_ZNK1T6concatERKS_(ptr sret(%class.T) align 4 [[T1]], ptr {{[^,]*}} [[T2]], ptr noundef nonnull align 4 dereferenceable(16) [[T3]]) // CHECK: [[T6:%.*]] = call noundef ptr @_ZNK1T3strEv(ptr {{[^,]*}} [[T1]]) diff --git a/clang/test/CodeGenCoroutines/pr59181.cpp b/clang/test/CodeGenCoroutines/pr59181.cpp --- a/clang/test/CodeGenCoroutines/pr59181.cpp +++ b/clang/test/CodeGenCoroutines/pr59181.cpp @@ -49,6 +49,7 @@ } // CHECK: cleanup.cont:{{.*}} +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[AGG:%agg.tmp[0-9]+]]) // CHECK-NEXT: load i8 // CHECK-NEXT: trunc // CHECK-NEXT: store i1 false @@ -56,5 +57,7 @@ // CHECK: await.suspend:{{.*}} // CHECK-NOT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF]]) +// CHECK-NOT: call void @llvm.lifetime.start.p0(i64 8, ptr [[AGG]]) // CHECK: call void @_ZZN4Task12promise_type15await_transformES_EN10Suspension13await_suspendESt16coroutine_handleIvE +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[AGG2:%agg.tmp[0-9]+]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF]])