Index: lib/CodeGen/CGBlocks.cpp =================================================================== --- lib/CodeGen/CGBlocks.cpp +++ lib/CodeGen/CGBlocks.cpp @@ -740,27 +740,19 @@ } /// Emit a block literal expression in the current function. -llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr, - llvm::Function **InvokeF) { +llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) { // If the block has no captures, we won't have a pre-computed // layout for it. if (!blockExpr->getBlockDecl()->hasCaptures()) { // The block literal is emitted as a global variable, and the block invoke // function has to be extracted from its initializer. if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) { - if (InvokeF) { - auto *GV = cast( - cast(Block)->stripPointerCasts()); - auto *BlockInit = cast(GV->getInitializer()); - *InvokeF = cast( - BlockInit->getAggregateElement(2)->stripPointerCasts()); - } return Block; } CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName()); computeBlockInfo(CGM, this, blockInfo); blockInfo.BlockExpression = blockExpr; - return EmitBlockLiteral(blockInfo, InvokeF); + return EmitBlockLiteral(blockInfo); } // Find the block info for this block and take ownership of it. @@ -769,11 +761,10 @@ blockExpr->getBlockDecl())); blockInfo->BlockExpression = blockExpr; - return EmitBlockLiteral(*blockInfo, InvokeF); + return EmitBlockLiteral(*blockInfo); } -llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo, - llvm::Function **InvokeF) { +llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL; auto GenVoidPtrTy = IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy; @@ -788,8 +779,6 @@ BlockCGF.SanOpts = SanOpts; auto *InvokeFn = BlockCGF.GenerateBlockFunction( CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal); - if (InvokeF) - *InvokeF = InvokeFn; auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy); // If there is nothing to capture, we can emit this as a global block. @@ -1024,6 +1013,11 @@ llvm::Value *result = Builder.CreatePointerCast( blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType())); + if (IsOpenCL) { + CGM.getOpenCLRuntime().recordBlockInfo(blockInfo.BlockExpression, InvokeFn, + result); + } + return result; } @@ -1287,6 +1281,10 @@ llvm::Constant *Result = llvm::ConstantExpr::getPointerCast(literal, RequiredType); CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result); + if (CGM.getContext().getLangOpts().OpenCL) + CGM.getOpenCLRuntime().recordBlockInfo( + blockInfo.BlockExpression, + cast(blockFn->stripPointerCasts()), Result); return Result; } Index: lib/CodeGen/CGOpenCLRuntime.h =================================================================== --- lib/CodeGen/CGOpenCLRuntime.h +++ lib/CodeGen/CGOpenCLRuntime.h @@ -23,6 +23,7 @@ namespace clang { +class BlockExpr; class Expr; class VarDecl; @@ -39,8 +40,9 @@ /// Structure for enqueued block information. struct EnqueuedBlockInfo { - llvm::Function *Kernel; /// Enqueued block kernel. - llvm::Value *BlockArg; /// The first argument to enqueued block kernel. + llvm::Function *InvokeFunc; /// Block invoke function. + llvm::Function *Kernel; /// Enqueued block kernel. + llvm::Value *BlockArg; /// The first argument to enqueued block kernel. }; /// Maps block expression to block information. llvm::DenseMap EnqueuedBlockMap; @@ -76,6 +78,15 @@ /// \return enqueued block information for enqueued block. EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E); + + /// \brief Record invoke function and block literal emitted during normal + /// codegen for a block expression. The information is used by + /// emitOpenCLEnqueuedBlock to emit wrapper kernel. + /// + /// \param InvokeF invoke function emitted for the block expression. + /// \param Block block literal emitted for the block expression. + void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF, + llvm::Value *Block); }; } Index: lib/CodeGen/CGOpenCLRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenCLRuntime.cpp +++ lib/CodeGen/CGOpenCLRuntime.cpp @@ -112,37 +112,50 @@ CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); } +/// \return reference to enqueued block info. +void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, + llvm::Function *InvokeF, + llvm::Value *Block) { + assert(EnqueuedBlockMap.find(E) == EnqueuedBlockMap.end() && + "Block expression emitted twice"); + assert(isa(InvokeF) && "Invalid invoke function"); + assert(Block->getType()->isPointerTy() && "Invalid block literal type"); + EnqueuedBlockMap[E].InvokeFunc = InvokeF; + EnqueuedBlockMap[E].BlockArg = Block; + EnqueuedBlockMap[E].Kernel = nullptr; +} + CGOpenCLRuntime::EnqueuedBlockInfo CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) { + CGF.EmitScalarExpr(E); + // The block literal may be assigned to a const variable. Chasing down // to get the block literal. if (auto DR = dyn_cast(E)) { E = cast(DR->getDecl())->getInit(); } + E = E->IgnoreImplicit(); if (auto Cast = dyn_cast(E)) { E = Cast->getSubExpr(); } auto *Block = cast(E); - // The same block literal may be enqueued multiple times. Cache it if - // possible. - auto Loc = EnqueuedBlockMap.find(Block); - if (Loc != EnqueuedBlockMap.end()) { - return Loc->second; + assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() && + "Block expression not emitted"); + + // Do not emit the block wrapper again if it has been emitted. + if (EnqueuedBlockMap[Block].Kernel) { + return EnqueuedBlockMap[Block]; } - // Emit block literal as a common block expression and get the block invoke - // function. - llvm::Function *Invoke; - auto *V = CGF.EmitBlockLiteral(cast(Block), &Invoke); auto *F = CGF.getTargetHooks().createEnqueuedBlockKernel( - CGF, Invoke, V->stripPointerCasts()); + CGF, EnqueuedBlockMap[Block].InvokeFunc, + EnqueuedBlockMap[Block].BlockArg->stripPointerCasts()); // The common part of the post-processing of the kernel goes here. F->addFnAttr(llvm::Attribute::NoUnwind); F->setCallingConv( CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel)); - EnqueuedBlockInfo Info{F, V}; - EnqueuedBlockMap[Block] = Info; - return Info; + EnqueuedBlockMap[Block].Kernel = F; + return EnqueuedBlockMap[Block]; } Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -1583,10 +1583,7 @@ /// \return an LLVM value which is a pointer to a struct which contains /// information about the block, including the block invoke function, the /// captured variables, etc. - /// \param InvokeF will contain the block invoke function if it is not - /// nullptr. - llvm::Value *EmitBlockLiteral(const BlockExpr *, - llvm::Function **InvokeF = nullptr); + llvm::Value *EmitBlockLiteral(const BlockExpr *); static void destroyBlockInfos(CGBlockInfo *info); llvm::Function *GenerateBlockFunction(GlobalDecl GD, @@ -3010,11 +3007,8 @@ LValue EmitOMPSharedLValue(const Expr *E); private: - /// Helpers for blocks. Returns invoke function by \p InvokeF if it is not - /// nullptr. It should be called without \p InvokeF if the caller does not - /// need invoke function to be returned. - llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info, - llvm::Function **InvokeF = nullptr); + /// Helpers for blocks. + llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info); /// struct with the values to be passed to the OpenMP loop-related functions struct OMPLoopArguments { Index: test/CodeGenOpenCL/enqueue-block-with-captures.cl =================================================================== --- /dev/null +++ test/CodeGenOpenCL/enqueue-block-with-captures.cl @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple spir-unknown-unknown | FileCheck %s + +typedef struct {int a;} ndrange_t; + +void callee(int id, __global int* out) { + out[id] = id; +} + +kernel void test(int id, __global int* out) { + + void (^block)(void) = ^{ callee(id, out); }; + + queue_t queue; + ndrange_t ndrange; + // CHECK: call i32 @__enqueue_kernel_basic + enqueue_kernel(queue, 0, ndrange, block); +} + +// CHECK: define internal spir_kernel void @__test_block_invoke_kernel(i8 addrspace(4)*) +// CHECK: call void @__test_block_invoke(i8 addrspace(4)* %0)