Index: cfe/trunk/lib/CodeGen/CGBlocks.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGBlocks.cpp +++ cfe/trunk/lib/CodeGen/CGBlocks.cpp @@ -740,27 +740,19 @@ } /// Emit a block literal expression in the current function. -llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr, - llvm::Function **InvokeF) { +llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) { // If the block has no captures, we won't have a pre-computed // layout for it. if (!blockExpr->getBlockDecl()->hasCaptures()) { // The block literal is emitted as a global variable, and the block invoke // function has to be extracted from its initializer. if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) { - if (InvokeF) { - auto *GV = cast( - cast(Block)->stripPointerCasts()); - auto *BlockInit = cast(GV->getInitializer()); - *InvokeF = cast( - BlockInit->getAggregateElement(2)->stripPointerCasts()); - } return Block; } CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName()); computeBlockInfo(CGM, this, blockInfo); blockInfo.BlockExpression = blockExpr; - return EmitBlockLiteral(blockInfo, InvokeF); + return EmitBlockLiteral(blockInfo); } // Find the block info for this block and take ownership of it. @@ -769,11 +761,10 @@ blockExpr->getBlockDecl())); blockInfo->BlockExpression = blockExpr; - return EmitBlockLiteral(*blockInfo, InvokeF); + return EmitBlockLiteral(*blockInfo); } -llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo, - llvm::Function **InvokeF) { +llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL; auto GenVoidPtrTy = IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy; @@ -788,8 +779,6 @@ BlockCGF.SanOpts = SanOpts; auto *InvokeFn = BlockCGF.GenerateBlockFunction( CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal); - if (InvokeF) - *InvokeF = InvokeFn; auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy); // If there is nothing to capture, we can emit this as a global block. @@ -1024,6 +1013,11 @@ llvm::Value *result = Builder.CreatePointerCast( blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType())); + if (IsOpenCL) { + CGM.getOpenCLRuntime().recordBlockInfo(blockInfo.BlockExpression, InvokeFn, + result); + } + return result; } @@ -1287,6 +1281,10 @@ llvm::Constant *Result = llvm::ConstantExpr::getPointerCast(literal, RequiredType); CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result); + if (CGM.getContext().getLangOpts().OpenCL) + CGM.getOpenCLRuntime().recordBlockInfo( + blockInfo.BlockExpression, + cast(blockFn->stripPointerCasts()), Result); return Result; } Index: cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h =================================================================== --- cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h +++ cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h @@ -23,6 +23,7 @@ namespace clang { +class BlockExpr; class Expr; class VarDecl; @@ -39,8 +40,9 @@ /// Structure for enqueued block information. struct EnqueuedBlockInfo { - llvm::Function *Kernel; /// Enqueued block kernel. - llvm::Value *BlockArg; /// The first argument to enqueued block kernel. + llvm::Function *InvokeFunc; /// Block invoke function. + llvm::Function *Kernel; /// Enqueued block kernel. + llvm::Value *BlockArg; /// The first argument to enqueued block kernel. }; /// Maps block expression to block information. llvm::DenseMap EnqueuedBlockMap; @@ -76,6 +78,15 @@ /// \return enqueued block information for enqueued block. EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E); + + /// \brief Record invoke function and block literal emitted during normal + /// codegen for a block expression. The information is used by + /// emitOpenCLEnqueuedBlock to emit wrapper kernel. + /// + /// \param InvokeF invoke function emitted for the block expression. + /// \param Block block literal emitted for the block expression. + void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF, + llvm::Value *Block); }; } Index: cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp +++ cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp @@ -112,37 +112,51 @@ CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); } +/// Record emitted llvm invoke function and llvm block literal for the +/// corresponding block expression. +void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, + llvm::Function *InvokeF, + llvm::Value *Block) { + assert(EnqueuedBlockMap.find(E) == EnqueuedBlockMap.end() && + "Block expression emitted twice"); + assert(isa(InvokeF) && "Invalid invoke function"); + assert(Block->getType()->isPointerTy() && "Invalid block literal type"); + EnqueuedBlockMap[E].InvokeFunc = InvokeF; + EnqueuedBlockMap[E].BlockArg = Block; + EnqueuedBlockMap[E].Kernel = nullptr; +} + CGOpenCLRuntime::EnqueuedBlockInfo CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) { + CGF.EmitScalarExpr(E); + // The block literal may be assigned to a const variable. Chasing down // to get the block literal. if (auto DR = dyn_cast(E)) { E = cast(DR->getDecl())->getInit(); } + E = E->IgnoreImplicit(); if (auto Cast = dyn_cast(E)) { E = Cast->getSubExpr(); } auto *Block = cast(E); - // The same block literal may be enqueued multiple times. Cache it if - // possible. - auto Loc = EnqueuedBlockMap.find(Block); - if (Loc != EnqueuedBlockMap.end()) { - return Loc->second; + assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() && + "Block expression not emitted"); + + // Do not emit the block wrapper again if it has been emitted. + if (EnqueuedBlockMap[Block].Kernel) { + return EnqueuedBlockMap[Block]; } - // Emit block literal as a common block expression and get the block invoke - // function. - llvm::Function *Invoke; - auto *V = CGF.EmitBlockLiteral(cast(Block), &Invoke); auto *F = CGF.getTargetHooks().createEnqueuedBlockKernel( - CGF, Invoke, V->stripPointerCasts()); + CGF, EnqueuedBlockMap[Block].InvokeFunc, + EnqueuedBlockMap[Block].BlockArg->stripPointerCasts()); // The common part of the post-processing of the kernel goes here. F->addFnAttr(llvm::Attribute::NoUnwind); F->setCallingConv( CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel)); - EnqueuedBlockInfo Info{F, V}; - EnqueuedBlockMap[Block] = Info; - return Info; + EnqueuedBlockMap[Block].Kernel = F; + return EnqueuedBlockMap[Block]; } Index: cfe/trunk/lib/CodeGen/CodeGenFunction.h =================================================================== --- cfe/trunk/lib/CodeGen/CodeGenFunction.h +++ cfe/trunk/lib/CodeGen/CodeGenFunction.h @@ -1583,10 +1583,7 @@ /// \return an LLVM value which is a pointer to a struct which contains /// information about the block, including the block invoke function, the /// captured variables, etc. - /// \param InvokeF will contain the block invoke function if it is not - /// nullptr. - llvm::Value *EmitBlockLiteral(const BlockExpr *, - llvm::Function **InvokeF = nullptr); + llvm::Value *EmitBlockLiteral(const BlockExpr *); static void destroyBlockInfos(CGBlockInfo *info); llvm::Function *GenerateBlockFunction(GlobalDecl GD, @@ -3010,11 +3007,8 @@ LValue EmitOMPSharedLValue(const Expr *E); private: - /// Helpers for blocks. Returns invoke function by \p InvokeF if it is not - /// nullptr. It should be called without \p InvokeF if the caller does not - /// need invoke function to be returned. - llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info, - llvm::Function **InvokeF = nullptr); + /// Helpers for blocks. + llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info); /// struct with the values to be passed to the OpenMP loop-related functions struct OMPLoopArguments { Index: cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl =================================================================== --- cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -2,6 +2,10 @@ typedef struct {int a;} ndrange_t; +void callee(long id, global long *out) { + out[id] = id; +} + // CHECK-LABEL: define amdgpu_kernel void @test kernel void test(global char *a, char b, global long *c, long d) { queue_t default_queue; @@ -24,6 +28,12 @@ c[0] = d; ((local int*)lp)[0] = 1; }, 100); + + void (^block)(void) = ^{ + callee(d, c); + }; + + enqueue_kernel(default_queue, flags, ndrange, block); } // CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i8 }>) @@ -42,4 +52,7 @@ // CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, i8 addrspace(3)*) // CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}} +// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel(<{ i32, i32, i8*, i64, i64 addrspace(1)* }>) +// CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}} + // CHECK: attributes #[[ATTR]] = { nounwind "enqueued-block" } Index: cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl =================================================================== --- cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl +++ cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -29,6 +29,10 @@ // COMMON: define internal spir_func void [[INV_G]](i8 addrspace(4)* %{{.*}}, i8 addrspace(3)* %{{.*}}) const bl_t block_G = (bl_t) ^ (local void *a) {}; +void callee(int id, __global int *out) { + out[id] = id; +} + // COMMON-LABEL: define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %{{.*}}, i32 addrspace(1)* %b, i32 %i) kernel void device_side_enqueue(global int *a, global int *b, int i) { // COMMON: %default_queue = alloca %opencl.queue_t* @@ -282,6 +286,21 @@ // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) block_A(); + void (^block_C)(void) = ^{ + callee(i, a); + }; + + // Emits block literal on stack and block kernel [[INVLK3]]. + // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke + // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue + // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags + // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)* + // COMMON-LABEL: call i32 @__enqueue_kernel_basic( + // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}}, + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), + // COMMON-SAME: i8 addrspace(4)* [[BL_I8]]) + enqueue_kernel(default_queue, flags, ndrange, block_C); + // Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INV9]]. // COMMON: call i32 @__get_kernel_work_group_size_impl( // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK9:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), @@ -333,6 +352,7 @@ // COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}}) // COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}}) // COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}}) +// COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}}) // COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) // COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) // COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})