Index: lib/CodeGen/CGBlocks.cpp =================================================================== --- lib/CodeGen/CGBlocks.cpp +++ lib/CodeGen/CGBlocks.cpp @@ -690,6 +690,8 @@ // If the block has no captures, we won't have a pre-computed // layout for it. if (!blockExpr->getBlockDecl()->hasCaptures()) { + if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) + return Block; CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName()); computeBlockInfo(CGM, this, blockInfo); blockInfo.BlockExpression = blockExpr; @@ -1051,9 +1053,19 @@ return addr; } +void CodeGenModule::setAddrOfGlobalBlock(const BlockExpr *BE, + llvm::Constant *Addr) { + bool Ok = EmittedGlobalBlocks.insert(std::make_pair(BE, Addr)).second; + (void)Ok; + assert(Ok && "Trying to replace an already-existing global block!"); +} + llvm::Constant * CodeGenModule::GetAddrOfGlobalBlock(const BlockExpr *BE, StringRef Name) { + if (llvm::Constant *Block = getAddrOfGlobalBlockIfEmitted(BE)) + return Block; + CGBlockInfo blockInfo(BE->getBlockDecl(), Name); blockInfo.BlockExpression = BE; @@ -1078,6 +1090,11 @@ const CGBlockInfo &blockInfo, llvm::Constant *blockFn) { assert(blockInfo.CanBeGlobal); + // Callers should detect this case on their own: calling this function + // generally requires computing layout information, which is a waste of time + // if we've already emitted this block. + assert(!CGM.getAddrOfGlobalBlockIfEmitted(blockInfo.BlockExpression) && + "Refusing to re-emit a global block."); // Generate the constants for the block literal initializer. llvm::Constant *fields[BlockHeaderSize]; @@ -1088,7 +1105,7 @@ // __flags BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE; if (blockInfo.UsesStret) flags |= BLOCK_USE_STRET; - + fields[1] = llvm::ConstantInt::get(CGM.IntTy, flags.getBitMask()); // Reserved @@ -1112,9 +1129,12 @@ literal->setAlignment(blockInfo.BlockAlign.getQuantity()); // Return a constant of the appropriately-casted type. - llvm::Type *requiredType = + llvm::Type *RequiredType = CGM.getTypes().ConvertType(blockInfo.getBlockExpr()->getType()); - return llvm::ConstantExpr::getBitCast(literal, requiredType); + llvm::Constant *Result = + llvm::ConstantExpr::getBitCast(literal, RequiredType); + CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result); + return Result; } void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D, Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -1353,7 +1353,6 @@ //===--------------------------------------------------------------------===// llvm::Value *EmitBlockLiteral(const BlockExpr *); - llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info); static void destroyBlockInfos(CGBlockInfo *info); llvm::Function *GenerateBlockFunction(GlobalDecl GD, @@ -2573,6 +2572,9 @@ OMPPrivateScope &LoopScope); private: + /// Helpers for blocks + llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info); + /// Helpers for the OpenMP loop directives. void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit); void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false); Index: lib/CodeGen/CodeGenModule.h =================================================================== --- lib/CodeGen/CodeGenModule.h +++ lib/CodeGen/CodeGenModule.h @@ -457,6 +457,10 @@ bool isTriviallyRecursive(const FunctionDecl *F); bool shouldEmitFunction(GlobalDecl GD); + /// Map of the global blocks we've emitted, so that we don't have to re-emit + /// them if the constexpr evaluator gets aggressive. + llvm::DenseMap EmittedGlobalBlocks; + /// @name Cache for Blocks Runtime Globals /// @{ @@ -774,6 +778,16 @@ /// Gets the address of a block which requires no captures. llvm::Constant *GetAddrOfGlobalBlock(const BlockExpr *BE, StringRef Name); + + /// Returns the address of a block which requires no caputres, or null if + /// we've yet to emit the block for BE. + llvm::Constant *getAddrOfGlobalBlockIfEmitted(const BlockExpr *BE) { + return EmittedGlobalBlocks.lookup(BE); + } + + /// Notes that BE's global block is available via Addr. Asserts that BE + /// isn't already emitted. + void setAddrOfGlobalBlock(const BlockExpr *BE, llvm::Constant *Addr); /// Return a pointer to a constant CFString object for the given string. ConstantAddress GetAddrOfConstantCFString(const StringLiteral *Literal); Index: test/CodeGenCXX/block-in-ctor-dtor.cpp =================================================================== --- test/CodeGenCXX/block-in-ctor-dtor.cpp +++ test/CodeGenCXX/block-in-ctor-dtor.cpp @@ -42,7 +42,5 @@ // CHECK-LABEL: define internal void @___ZN4ZoneD2Ev_block_invoke_ // CHECK-LABEL: define internal void @___ZN1XC2Ev_block_invoke // CHECK-LABEL: define internal void @___ZN1XC2Ev_block_invoke_ -// CHECK-LABEL: define internal void @___ZN1XC1Ev_block_invoke -// CHECK-LABEL: define internal void @___ZN1XC1Ev_block_invoke_ // CHECK-LABEL: define internal void @___ZN1XD2Ev_block_invoke // CHECK-LABEL: define internal void @___ZN1XD2Ev_block_invoke_ Index: test/CodeGenOpenCL/cl20-device-side-enqueue.cl =================================================================== --- test/CodeGenOpenCL/cl20-device-side-enqueue.cl +++ test/CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -2,6 +2,8 @@ typedef void (^bl_t)(local void *); +// N.B. The check here only exists to set BL_GLOBAL +// CHECK: @block_G = {{.*}}bitcast ([[BL_GLOBAL:[^@]+@__block_literal_global(\.[0-9]+)?]] const bl_t block_G = (bl_t) ^ (local void *a) {}; kernel void device_side_enqueue(global int *a, global int *b, int i) { @@ -84,27 +86,23 @@ }, c); + // The full type of these expressions are long (and repeated elsewhere), so we + // capture it as part of the regex for convenience and clarity. + // CHECK: store void ()* bitcast ([[BL_A:[^@]+@__block_literal_global.[0-9]+]] to void ()*), void ()** %block_A void (^const block_A)(void) = ^{ return; }; + // CHECK: store void (i8 addrspace(2)*)* bitcast ([[BL_B:[^@]+@__block_literal_global.[0-9]+]] to void (i8 addrspace(2)*)*), void (i8 addrspace(2)*)** %block_B void (^const block_B)(local void *) = ^(local void *a) { return; }; - // CHECK: [[BL:%[0-9]+]] = load void ()*, void ()** %block_A - // CHECK: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8* - // CHECK: call i32 @__get_kernel_work_group_size_impl(i8* [[BL_I8]]) + // CHECK: call i32 @__get_kernel_work_group_size_impl(i8* bitcast ([[BL_A]] to i8*)) unsigned size = get_kernel_work_group_size(block_A); - // CHECK: [[BL:%[0-9]+]] = load void (i8 addrspace(2)*)*, void (i8 addrspace(2)*)** %block_B - // CHECK: [[BL_I8:%[0-9]+]] = bitcast void (i8 addrspace(2)*)* [[BL]] to i8* - // CHECK: call i32 @__get_kernel_work_group_size_impl(i8* [[BL_I8]]) + // CHECK: call i32 @__get_kernel_work_group_size_impl(i8* bitcast ([[BL_B]] to i8*)) size = get_kernel_work_group_size(block_B); - // CHECK: [[BL:%[0-9]+]] = load void ()*, void ()** %block_A - // CHECK: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8* - // CHECK: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8* [[BL_I8]]) + // CHECK: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8* bitcast ([[BL_A]] to i8*)) size = get_kernel_preferred_work_group_size_multiple(block_A); - // CHECK: [[BL:%[0-9]+]] = load void (i8 addrspace(2)*)*, void (i8 addrspace(2)*)* addrspace(1)* @block_G - // CHECK: [[BL_I8:%[0-9]+]] = bitcast void (i8 addrspace(2)*)* [[BL]] to i8* - // CHECK: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8* [[BL_I8]]) + // CHECK: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8* bitcast ([[BL_GLOBAL]] to i8*)) size = get_kernel_preferred_work_group_size_multiple(block_G); }