Skip to content

Commit 4deb75d

Browse files
committedMar 10, 2018
[CodeGen] Eagerly emit lifetime.end markers for calls
In C, we'll wait until the end of the scope to clean up aggregate temporaries used for returns from calls. This means in cases like: { // Assuming that `Bar` is large enough to warrant indirect returns struct Bar b = {}; b = foo(&b); b = foo(&b); b = foo(&b); b = foo(&b); } ...We'll allocate space for 5 Bars on the stack (`b`, and 4 temporaries). This becomes painful in things like large switch statements. If cleaning up sooner is trivial, we should do it. llvm-svn: 327229
1 parent 4fb6f81 commit 4deb75d

File tree

2 files changed

+129
-6
lines changed

2 files changed

+129
-6
lines changed
 

‎clang/lib/CodeGen/CGExprAgg.cpp

+28-6
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "llvm/IR/Function.h"
2424
#include "llvm/IR/GlobalVariable.h"
2525
#include "llvm/IR/Intrinsics.h"
26+
#include "llvm/IR/IntrinsicInst.h"
2627
using namespace clang;
2728
using namespace CodeGen;
2829

@@ -48,7 +49,7 @@ class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
4849

4950
// Calls `Fn` with a valid return value slot, potentially creating a temporary
5051
// to do so. If a temporary is created, an appropriate copy into `Dest` will
51-
// be emitted.
52+
// be emitted, as will lifetime markers.
5253
//
5354
// The given function should take a ReturnValueSlot, and return an RValue that
5455
// points to said slot.
@@ -250,16 +251,28 @@ void AggExprEmitter::withReturnValueSlot(
250251
(RequiresDestruction && !Dest.getAddress().isValid());
251252

252253
Address RetAddr = Address::invalid();
254+
255+
EHScopeStack::stable_iterator LifetimeEndBlock;
256+
llvm::Value *LifetimeSizePtr = nullptr;
257+
llvm::IntrinsicInst *LifetimeStartInst = nullptr;
253258
if (!UseTemp) {
254259
RetAddr = Dest.getAddress();
255260
} else {
256261
RetAddr = CGF.CreateMemTemp(RetTy);
257262
uint64_t Size =
258263
CGF.CGM.getDataLayout().getTypeAllocSize(CGF.ConvertTypeForMem(RetTy));
259-
if (llvm::Value *LifetimeSizePtr =
260-
CGF.EmitLifetimeStart(Size, RetAddr.getPointer()))
264+
LifetimeSizePtr = CGF.EmitLifetimeStart(Size, RetAddr.getPointer());
265+
if (LifetimeSizePtr) {
266+
LifetimeStartInst =
267+
cast<llvm::IntrinsicInst>(std::prev(Builder.GetInsertPoint()));
268+
assert(LifetimeStartInst->getIntrinsicID() ==
269+
llvm::Intrinsic::lifetime_start &&
270+
"Last insertion wasn't a lifetime.start?");
271+
261272
CGF.pushFullExprCleanup<CodeGenFunction::CallLifetimeEnd>(
262273
NormalEHLifetimeMarker, RetAddr, LifetimeSizePtr);
274+
LifetimeEndBlock = CGF.EHStack.stable_begin();
275+
}
263276
}
264277

265278
RValue Src =
@@ -268,9 +281,18 @@ void AggExprEmitter::withReturnValueSlot(
268281
if (RequiresDestruction)
269282
CGF.pushDestroy(RetTy.isDestructedType(), Src.getAggregateAddress(), RetTy);
270283

271-
if (UseTemp) {
272-
assert(Dest.getPointer() != Src.getAggregatePointer());
273-
EmitFinalDestCopy(E->getType(), Src);
284+
if (!UseTemp)
285+
return;
286+
287+
assert(Dest.getPointer() != Src.getAggregatePointer());
288+
EmitFinalDestCopy(E->getType(), Src);
289+
290+
if (!RequiresDestruction && LifetimeStartInst) {
291+
// If there's no dtor to run, the copy was the last use of our temporary.
292+
// Since we're not guaranteed to be in an ExprWithCleanups, clean up
293+
// eagerly.
294+
CGF.DeactivateCleanupBlock(LifetimeEndBlock, LifetimeStartInst);
295+
CGF.EmitLifetimeEnd(LifetimeSizePtr, RetAddr.getPointer());
274296
}
275297
}
276298

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=O1
2+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=O0
3+
//
4+
// Ensure that we place appropriate lifetime markers around indirectly returned
5+
// temporaries, and that the lifetime.ends appear in a timely manner.
6+
//
7+
// -O1 is used so lifetime markers actually get emitted.
8+
9+
struct S {
10+
int ns[40];
11+
};
12+
13+
struct S foo(void);
14+
15+
// CHECK-LABEL: define dso_local void @bar
16+
struct S bar() {
17+
// O0-NOT: @llvm.lifetime.start
18+
// O0-NOT: @llvm.lifetime.end
19+
20+
struct S r;
21+
// O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* nonnull %[[R_TMP:[^)]+]])
22+
23+
// O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* nonnull %[[TMP1:[^)]+]])
24+
// O1: call void @foo
25+
r = foo();
26+
// O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* nonnull %[[TMP1]])
27+
28+
// O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* nonnull %[[TMP2:[^)]+]])
29+
// O1: call void @foo
30+
r = foo();
31+
// O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* nonnull %[[TMP2]])
32+
33+
// O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* nonnull %[[TMP3:[^)]+]])
34+
// O1: call void @foo
35+
r = foo();
36+
// O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* nonnull %[[TMP3]])
37+
38+
// O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* nonnull %[[R_TMP]])
39+
return r;
40+
}
41+
42+
struct S foo_int(int);
43+
44+
// Be sure that we're placing the lifetime.end so that all paths go through it.
45+
// Since this function turns out to be large-ish, optnone to hopefully keep it
46+
// stable.
47+
// CHECK-LABEL: define dso_local void @baz
48+
__attribute__((optnone))
49+
struct S baz(int i, volatile int *j) {
50+
// O0-NOT: @llvm.lifetime.start
51+
// O0-NOT: @llvm.lifetime.end
52+
53+
struct S r;
54+
// O1: %[[RESULT_ALLOCA:[^ ]+]] = alloca %struct.S
55+
// O1: %[[TMP1_ALLOCA:[^ ]+]] = alloca %struct.S
56+
// O1: %[[TMP2_ALLOCA:[^ ]+]] = alloca %struct.S
57+
// O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[RESULT_ALLOCA]] to i8*
58+
// O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* %[[P]])
59+
// O1: br label %[[DO_BODY:.+]]
60+
61+
do {
62+
// O1: [[DO_BODY]]:
63+
// O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP1_ALLOCA]] to i8*
64+
// O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* %[[P]])
65+
// O1: br i1 {{[^,]+}}, label %[[IF_THEN:[^,]+]], label %[[IF_END:[^,]+]]
66+
//
67+
// O1: [[IF_THEN]]:
68+
// O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP1_ALLOCA]] to i8*
69+
// O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* %[[P]])
70+
// O1: br label %[[DO_END:.*]]
71+
//
72+
// O1: [[IF_END]]:
73+
// O1: call void @foo_int(%struct.S* sret %[[TMP1_ALLOCA]],
74+
// O1: call void @llvm.memcpy
75+
// O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP1_ALLOCA]] to i8*
76+
// O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* %[[P]])
77+
// O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP2_ALLOCA]] to i8*
78+
// O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* %[[P]])
79+
// O1: call void @foo_int(%struct.S* sret %[[TMP2_ALLOCA]],
80+
// O1: call void @llvm.memcpy
81+
// O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP2_ALLOCA]] to i8*
82+
// O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* %[[P]])
83+
// O1: br label %[[DO_COND:.*]]
84+
//
85+
// O1: [[DO_COND]]:
86+
// O1: br label %[[DO_BODY]]
87+
r = foo_int(({
88+
if (*j)
89+
break;
90+
i++;
91+
}));
92+
93+
r = foo_int(i++);
94+
} while (1);
95+
96+
// O1: [[DO_END]]:
97+
// O1: call void @llvm.memcpy
98+
// O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[RESULT_ALLOCA]] to i8*
99+
// O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* %[[P]])
100+
return r;
101+
}

0 commit comments

Comments
 (0)
Please sign in to comment.