Index: include/clang/AST/Decl.h =================================================================== --- include/clang/AST/Decl.h +++ include/clang/AST/Decl.h @@ -4008,6 +4008,13 @@ bool doesNotEscape() const { return BlockDeclBits.DoesNotEscape; } void setDoesNotEscape(bool B = true) { BlockDeclBits.DoesNotEscape = B; } + bool canAvoidCopyToHeap() const { + return BlockDeclBits.CanAvoidCopyToHeap; + } + void setCanAvoidCopyToHeap(bool B = true) { + BlockDeclBits.CanAvoidCopyToHeap = B; + } + bool capturesVariable(const VarDecl *var) const; void setCaptures(ASTContext &Context, ArrayRef Captures, Index: include/clang/AST/DeclBase.h =================================================================== --- include/clang/AST/DeclBase.h +++ include/clang/AST/DeclBase.h @@ -1665,6 +1665,11 @@ /// A bit that indicates this block is passed directly to a function as a /// non-escaping parameter. uint64_t DoesNotEscape : 1; + + /// A bit that indicates whether it's possible to avoid coying this block to + /// the heap when it initializes or is assigned to a local variable with + /// automatic storage. + uint64_t CanAvoidCopyToHeap : 1; }; /// Number of non-inherited bits in BlockDeclBitfields. Index: lib/AST/Decl.cpp =================================================================== --- lib/AST/Decl.cpp +++ lib/AST/Decl.cpp @@ -4265,6 +4265,7 @@ setBlockMissingReturnType(true); setIsConversionFromLambda(false); setDoesNotEscape(false); + setCanAvoidCopyToHeap(false); } void BlockDecl::setParams(ArrayRef NewParamInfo) { Index: lib/CodeGen/CGObjC.cpp =================================================================== --- lib/CodeGen/CGObjC.cpp +++ lib/CodeGen/CGObjC.cpp @@ -2870,6 +2870,7 @@ Result visit(const Expr *e); Result visitCastExpr(const CastExpr *e); Result visitPseudoObjectExpr(const PseudoObjectExpr *e); + Result visitBlockExpr(const BlockExpr *e); Result visitBinaryOperator(const BinaryOperator *e); Result visitBinAssign(const BinaryOperator *e); Result visitBinAssignUnsafeUnretained(const BinaryOperator *e); @@ -2945,6 +2946,12 @@ return result; } +template +Result ARCExprEmitter::visitBlockExpr(const BlockExpr *e) { + // The default implementation just forwards the expression to visitExpr. + return asImpl().visitExpr(e); +} + template Result ARCExprEmitter::visitCastExpr(const CastExpr *e) { switch (e->getCastKind()) { @@ -3088,7 +3095,8 @@ // Look through pseudo-object expressions. } else if (const PseudoObjectExpr *pseudo = dyn_cast(e)) { return asImpl().visitPseudoObjectExpr(pseudo); - } + } else if (auto *be = dyn_cast(e)) + return asImpl().visitBlockExpr(be); return asImpl().visitExpr(e); } @@ -3123,6 +3131,15 @@ return TryEmitResult(result, true); } + TryEmitResult visitBlockExpr(const BlockExpr *e) { + TryEmitResult result = visitExpr(e); + // Avoid the block-retain if this is a block literal that doesn't need to be + // copied to the heap. + if (e->getBlockDecl()->canAvoidCopyToHeap()) + result.setInt(true); + return result; + } + /// Block extends are net +0. Naively, we could just recurse on /// the subexpression, but actually we need to ensure that the /// value is copied as a block, so there's a little filter here. Index: lib/Sema/SemaDecl.cpp =================================================================== --- lib/Sema/SemaDecl.cpp +++ lib/Sema/SemaDecl.cpp @@ -11257,6 +11257,11 @@ << Culprit->getSourceRange(); } } + + if (auto *E = dyn_cast(Init)) + if (auto *BE = dyn_cast(E->getSubExpr()->IgnoreParens())) + if (VDecl->hasLocalStorage()) + BE->getBlockDecl()->setCanAvoidCopyToHeap(); } else if (VDecl->isStaticDataMember() && !VDecl->isInline() && VDecl->getLexicalDeclContext()->isRecord()) { // This is an in-class initialization for a static data member, e.g., Index: lib/Sema/SemaExpr.cpp =================================================================== --- lib/Sema/SemaExpr.cpp +++ lib/Sema/SemaExpr.cpp @@ -12443,6 +12443,25 @@ if (!ResultTy.isNull()) { DiagnoseSelfAssignment(*this, LHS.get(), RHS.get(), OpLoc, true); DiagnoseSelfMove(LHS.get(), RHS.get(), OpLoc); + + // Avoid copying a block to the heap if the block is assigned to a local + // auto variable that is declared in the same scope as the block. This + // optimization is unsafe if the local variable is declared in an outer + // scope. For example: + // + // BlockTy b; + // { + // b = ^{...}; + // } + // // It is unsafe to invoke the block here if it wasn't copied to the + // // heap. + // b(); + + if (auto *BE = dyn_cast(RHS.get()->IgnoreParens())) + if (auto *DRE = dyn_cast(LHS.get()->IgnoreParens())) + if (auto *VD = dyn_cast(DRE->getDecl())) + if (VD->hasLocalStorage() && getCurScope()->isDeclScope(VD)) + BE->getBlockDecl()->setCanAvoidCopyToHeap(); } RecordModifiableNonNullParam(*this, LHS.get()); break; Index: lib/Serialization/ASTReaderDecl.cpp =================================================================== --- lib/Serialization/ASTReaderDecl.cpp +++ lib/Serialization/ASTReaderDecl.cpp @@ -1479,6 +1479,7 @@ BD->setBlockMissingReturnType(Record.readInt()); BD->setIsConversionFromLambda(Record.readInt()); BD->setDoesNotEscape(Record.readInt()); + BD->setCanAvoidCopyToHeap(Record.readInt()); bool capturesCXXThis = Record.readInt(); unsigned numCaptures = Record.readInt(); Index: lib/Serialization/ASTWriterDecl.cpp =================================================================== --- lib/Serialization/ASTWriterDecl.cpp +++ lib/Serialization/ASTWriterDecl.cpp @@ -1110,6 +1110,7 @@ Record.push_back(D->blockMissingReturnType()); Record.push_back(D->isConversionFromLambda()); Record.push_back(D->doesNotEscape()); + Record.push_back(D->canAvoidCopyToHeap()); Record.push_back(D->capturesCXXThis()); Record.push_back(D->getNumCaptures()); for (const auto &capture : D->captures()) { Index: test/CodeGenObjC/arc-block-copy-escape.m =================================================================== --- test/CodeGenObjC/arc-block-copy-escape.m +++ test/CodeGenObjC/arc-block-copy-escape.m @@ -9,14 +9,14 @@ void test0(int i) { block_t block = ^{ use_int(i); }; // CHECK-LABEL: define {{.*}}void @test0( - // CHECK: call {{.*}}i8* @llvm.objc.retainBlock(i8* {{%.*}}) [[NUW:#[0-9]+]], !clang.arc.copy_on_escape + // CHECK-NOT: @llvm.objc.retainBlock( // CHECK: ret void } void test1(int i) { id block = ^{ use_int(i); }; // CHECK-LABEL: define {{.*}}void @test1( - // CHECK: call {{.*}}i8* @llvm.objc.retainBlock(i8* {{%.*}}) [[NUW]] + // CHECK: call {{.*}}i8* @llvm.objc.retainBlock(i8* {{%.*}}) [[NUW:#[0-9]+]] // CHECK-NOT: !clang.arc.copy_on_escape // CHECK: ret void } Index: test/CodeGenObjC/arc-blocks.m =================================================================== --- test/CodeGenObjC/arc-blocks.m +++ test/CodeGenObjC/arc-blocks.m @@ -338,20 +338,19 @@ __block void (^block)(void) = ^{ block(); }; // CHECK-LABEL: define void @test10a() // CHECK: [[BYREF:%.*]] = alloca [[BYREF_T:%.*]], + // CHECK: [[BLOCK1:%.*]] = alloca <{ i8*, i32, i32, i8*, %[[STRUCT_BLOCK_DESCRIPTOR]]*, i8* }>, align 8 // Zero-initialization before running the initializer. // CHECK: [[T0:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* [[BYREF]], i32 0, i32 6 // CHECK-NEXT: store void ()* null, void ()** [[T0]], align 8 // Run the initializer as an assignment. - // CHECK: [[T0:%.*]] = bitcast void ()* {{%.*}} to i8* - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainBlock(i8* [[T0]]) - // CHECK-NEXT: [[T2:%.*]] = bitcast i8* [[T1]] to void ()* + // CHECK: [[T2:%.*]] = bitcast <{ i8*, i32, i32, i8*, %[[STRUCT_BLOCK_DESCRIPTOR]]*, i8* }>* [[BLOCK1]] to void ()* // CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* [[BYREF]], i32 0, i32 1 // CHECK-NEXT: [[T4:%.*]] = load [[BYREF_T]]*, [[BYREF_T]]** [[T3]] // CHECK-NEXT: [[T5:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* [[T4]], i32 0, i32 6 // CHECK-NEXT: [[T6:%.*]] = load void ()*, void ()** [[T5]], align 8 - // CHECK-NEXT: store void ()* {{%.*}}, void ()** [[T5]], align 8 + // CHECK-NEXT: store void ()* [[T2]], void ()** [[T5]], align 8 // CHECK-NEXT: [[T7:%.*]] = bitcast void ()* [[T6]] to i8* // CHECK-NEXT: call void @llvm.objc.release(i8* [[T7]]) @@ -401,6 +400,7 @@ // CHECK-LABEL: define void @test10b() // CHECK: [[BYREF:%.*]] = alloca [[BYREF_T:%.*]], + // CHECK: [[BLOCK3:%.*]] = alloca <{ i8*, i32, i32, i8*, %[[STRUCT_BLOCK_DESCRIPTOR]]*, i8* }>, align 8 // Zero-initialize. // CHECK: [[T0:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* [[BYREF]], i32 0, i32 6 @@ -409,14 +409,12 @@ // CHECK-NEXT: [[SLOT:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* [[BYREF]], i32 0, i32 6 // The assignment. - // CHECK: [[T0:%.*]] = bitcast void ()* {{%.*}} to i8* - // CHECK-NEXT: [[T1:%.*]] = call i8* @llvm.objc.retainBlock(i8* [[T0]]) - // CHECK-NEXT: [[T2:%.*]] = bitcast i8* [[T1]] to void ()* + // CHECK: [[T2:%.*]] = bitcast <{ i8*, i32, i32, i8*, %[[STRUCT_BLOCK_DESCRIPTOR]]*, i8* }>* [[BLOCK3]] to void ()* // CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* [[BYREF]], i32 0, i32 1 // CHECK-NEXT: [[T4:%.*]] = load [[BYREF_T]]*, [[BYREF_T]]** [[T3]] // CHECK-NEXT: [[T5:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* [[T4]], i32 0, i32 6 // CHECK-NEXT: [[T6:%.*]] = load void ()*, void ()** [[T5]], align 8 - // CHECK-NEXT: store void ()* {{%.*}}, void ()** [[T5]], align 8 + // CHECK-NEXT: store void ()* [[T2]], void ()** [[T5]], align 8 // CHECK-NEXT: [[T7:%.*]] = bitcast void ()* [[T6]] to i8* // CHECK-NEXT: call void @llvm.objc.release(i8* [[T7]]) Index: test/CodeGenObjCXX/arc-blocks.mm =================================================================== --- test/CodeGenObjCXX/arc-blocks.mm +++ test/CodeGenObjCXX/arc-blocks.mm @@ -201,3 +201,123 @@ ^{ (void)t0; (void)t1; (void)t2; (void)t3; (void)t4; (void)t5; }; } } + +// Test that calls to @llvm.objc.retainBlock aren't emitted in some cases. + +namespace test_block_retain { + typedef void (^BlockTy)(); + + void foo1(id); + +// CHECK-LABEL: define void @_ZN17test_block_retain14initializationEP11objc_object( +// CHECK-NOT: @llvm.objc.retainBlock( + void initialization(id a) { + BlockTy b0 = ^{ foo1(a); }; + BlockTy b1 = (^{ foo1(a); }); + b0(); + b1(); + } + +// CHECK-LABEL: define void @_ZN17test_block_retain20initializationStaticEP11objc_object( +// CHECK: @llvm.objc.retainBlock( + void initializationStatic(id a) { + static BlockTy b0 = ^{ foo1(a); }; + b0(); + } + +// CHECK-LABEL: define void @_ZN17test_block_retain15initialization2EP11objc_object +// CHECK: %[[B0:.*]] = alloca void ()*, align 8 +// CHECK: %[[B1:.*]] = alloca void ()*, align 8 +// CHECK: load void ()*, void ()** %[[B0]], align 8 +// CHECK-NOT: @llvm.objc.retainBlock +// CHECK: %[[V9:.*]] = load void ()*, void ()** %[[B0]], align 8 +// CHECK: %[[V10:.*]] = bitcast void ()* %[[V9]] to i8* +// CHECK: %[[V11:.*]] = call i8* @llvm.objc.retainBlock(i8* %[[V10]]) +// CHECK: %[[V12:.*]] = bitcast i8* %[[V11]] to void ()* +// CHECK: store void ()* %[[V12]], void ()** %[[B1]], align 8 + void initialization2(id a) { + BlockTy b0 = ^{ foo1(a); }; + b0(); + BlockTy b1 = b0; // can't optimize this yet. + b1(); + } + +// CHECK-LABEL: define void @_ZN17test_block_retain10assignmentEP11objc_object( +// CHECK-NOT: @llvm.objc.retainBlock( + void assignment(id a) { + BlockTy b0; + (b0) = ^{ foo1(a); }; + b0(); + b0 = (^{ foo1(a); }); + b0(); + } + +// CHECK-LABEL: define void @_ZN17test_block_retain16assignmentStaticEP11objc_object( +// CHECK: @llvm.objc.retainBlock( + void assignmentStatic(id a) { + static BlockTy b0; + b0 = ^{ foo1(a); }; + b0(); + } + +// CHECK-LABEL: define void @_ZN17test_block_retain21assignmentConditionalEP11objc_objectb( +// CHECK: @llvm.objc.retainBlock( + void assignmentConditional(id a, bool c) { + BlockTy b0; + if (c) + // can't optimize this since 'b0' is declared in the outer scope. + b0 = ^{ foo1(a); }; + b0(); + } + +// CHECK-LABEL: define void @_ZN17test_block_retain11assignment2EP11objc_object( +// CHECK: %[[B0:.*]] = alloca void ()*, align 8 +// CHECK: %[[B1:.*]] = alloca void ()*, align 8 +// CHECK-NOT: @llvm.objc.retainBlock +// CHECK: store void ()* null, void ()** %[[B1]], align 8 +// CHECK: %[[V9:.*]] = load void ()*, void ()** %[[B0]], align 8 +// CHECK: %[[V10:.*]] = bitcast void ()* %[[V9]] to i8* +// CHECK: %[[V11:.*]] = call i8* @llvm.objc.retainBlock(i8* %[[V10]] +// CHECK: %[[V12:.*]] = bitcast i8* %[[V11]] to void ()* +// CHECK: store void ()* %[[V12]], void ()** %[[B1]], align 8 + void assignment2(id a) { + BlockTy b0 = ^{ foo1(a); }; + b0(); + BlockTy b1; + b1 = b0; // can't optimize this yet. + b1(); + } + +// We cannot remove the call to @llvm.objc.retainBlock if the variable is of type id. + +// CHECK: define void @_ZN17test_block_retain21initializationObjCPtrEP11objc_object( +// CHECK: alloca i8*, align 8 +// CHECK: %[[B0:.*]] = alloca i8*, align 8 +// CHECK: %[[BLOCK:.*]] = alloca <{ i8*, i32, i32, i8*, %[[STRUCT_BLOCK_DESCRIPTOR]]*, i8* }>, align 8 +// CHECK: %[[V3:.*]] = bitcast <{ i8*, i32, i32, i8*, %[[STRUCT_BLOCK_DESCRIPTOR]]*, i8* }>* %[[BLOCK]] to void ()* +// CHECK: %[[V4:.*]] = bitcast void ()* %[[V3]] to i8* +// CHECK: %[[V5:.*]] = call i8* @llvm.objc.retainBlock(i8* %[[V4]]) +// CHECK: %[[V6:.*]] = bitcast i8* %[[V5]] to void ()* +// CHECK: %[[V7:.*]] = bitcast void ()* %[[V6]] to i8* +// CHECK: store i8* %[[V7]], i8** %[[B0]], align 8 + void initializationObjCPtr(id a) { + id b0 = ^{ foo1(a); }; + ((BlockTy)b0)(); + } + +// CHECK: define void @_ZN17test_block_retain17assignmentObjCPtrEP11objc_object( +// CHECK: %[[B0:.*]] = alloca void ()*, align 8 +// CHECK: %[[B1:.*]] = alloca i8*, align 8 +// CHECK: %[[V4:.*]] = load void ()*, void ()** %[[B0]], align 8 +// CHECK: %[[V5:.*]] = bitcast void ()* %[[V4]] to i8* +// CHECK: %[[V6:.*]] = call i8* @llvm.objc.retainBlock(i8* %[[V5]]) +// CHECK: %[[V7:.*]] = bitcast i8* %[[V6]] to void ()* +// CHECK: %[[V8:.*]] = bitcast void ()* %[[V7]] to i8* +// CHECK: store i8* %[[V8]], i8** %[[B1]], align 8 + void assignmentObjCPtr(id a) { + BlockTy b0 = ^{ foo1(a); }; + id b1; + b1 = b0; + ((BlockTy)b1)(); + } +} Index: test/PCH/arc-blocks.mm =================================================================== --- /dev/null +++ test/PCH/arc-blocks.mm @@ -0,0 +1,49 @@ +// RUN: %clang_cc1 -fobjc-arc -fblocks -std=c++1y -emit-pch %s -o %t +// RUN: %clang_cc1 -fobjc-arc -fblocks -std=c++1y -include-pch %t -emit-llvm -o - %s | FileCheck %s + +#ifndef HEADER_INCLUDED +#define HEADER_INCLUDED + +namespace test_block_retain { + typedef void (^BlockTy)(); + void foo1(id); + + inline void initialization(id a) { + // Call to @llvm.objc.retainBlock isn't needed. + BlockTy b0 = ^{ foo1(a); }; + b0(); + } + + inline void assignmentConditional(id a, bool c) { + BlockTy b0; + if (c) + // @llvm.objc.retainBlock is called since 'b0' is declared in the outer scope. + b0 = ^{ foo1(a); }; + b0(); + } +} + +#else + +// CHECK: %[[STRUCT_BLOCK_DESCRIPTOR:.*]] = type { i64, i64 } + +namespace test_block_retain { +// CHECK-LABEL: define linkonce_odr void @_ZN17test_block_retain14initializationEP11objc_object( +// CHECK-NOT: call i8* @llvm.objc.retainBlock( + + void test_initialization(id a) { + initialization(a); + } + +// CHECK-LABEL: define void @_ZN17test_block_retain26test_assignmentConditionalEP11objc_objectb( +// CHECK: %[[BLOCK:.*]] = alloca <{ i8*, i32, i32, i8*, %[[STRUCT_BLOCK_DESCRIPTOR]]*, i8* }>, align 8 +// CHECK: %[[V4:.*]] = bitcast <{ i8*, i32, i32, i8*, %[[STRUCT_BLOCK_DESCRIPTOR]]*, i8* }>* %[[BLOCK]] to void ()* +// CHECK: %[[V5:.*]] = bitcast void ()* %[[V4]] to i8* +// CHECK: call i8* @llvm.objc.retainBlock(i8* %[[V5]]) + + void test_assignmentConditional(id a, bool c) { + assignmentConditional(a, c); + } +} + +#endif