Index: lib/CodeGen/CGBlocks.h =================================================================== --- lib/CodeGen/CGBlocks.h +++ lib/CodeGen/CGBlocks.h @@ -189,6 +189,8 @@ return reinterpret_cast(Data); } + ImplicitParamDecl* FunctionArgDecl; + static Capture makeIndex(unsigned index, CharUnits offset) { Capture v; v.Data = (index << 1) | 1; @@ -200,7 +202,13 @@ Capture v; v.Data = reinterpret_cast(value); return v; - } + } + + /* + ~Capture(){ + if(FunctionArgDecl) delete FunctionArgDecl; + } + */ }; /// CanBeGlobal - True if the block can be global, i.e. it has @@ -225,6 +233,8 @@ /// The mapping of allocated indexes within the block. llvm::DenseMap Captures; + llvm::DenseMap FunctionArgCaptures; + Address LocalAddress; llvm::StructType *StructureType; const BlockDecl *Block; Index: lib/CodeGen/CGBlocks.cpp =================================================================== --- lib/CodeGen/CGBlocks.cpp +++ lib/CodeGen/CGBlocks.cpp @@ -45,7 +45,8 @@ /// Build the given block as a global block. static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, const CGBlockInfo &blockInfo, - llvm::Constant *blockFn); + llvm::Constant *blockFn, + llvm::Constant *blockInvokeWrapper); /// Build the helper function to copy a block. static llvm::Constant *buildCopyHelper(CodeGenModule &CGM, @@ -309,7 +310,14 @@ assert((2 * CGM.getIntSize()).isMultipleOf(CGM.getPointerAlign())); info.BlockAlign = CGM.getPointerAlign(); - info.BlockSize = 3 * CGM.getPointerSize() + 2 * CGM.getIntSize(); + + bool isOCL2X = (CGM.getLangOpts().OpenCL && + CGM.getLangOpts().OpenCLVersion >= 200); + auto blockHeaderSize = (isOCL2X)? BlockHeaderSize + 3 : BlockHeaderSize; + + info.BlockSize = (isOCL2X)? + 5 * CGM.getPointerSize() + 3 * CGM.getIntSize() : + 3 * CGM.getPointerSize() + 2 * CGM.getIntSize(); assert(elementTypes.empty()); elementTypes.push_back(CGM.VoidPtrTy); @@ -317,8 +325,13 @@ elementTypes.push_back(CGM.IntTy); elementTypes.push_back(CGM.VoidPtrTy); elementTypes.push_back(CGM.getBlockDescriptorType()); + if(isOCL2X){ + elementTypes.push_back(CGM.IntTy); + elementTypes.push_back(CGM.VoidPtrTy); + elementTypes.push_back(CGM.VoidPtrTy); + } - assert(elementTypes.size() == BlockHeaderSize); + assert(elementTypes.size() == blockHeaderSize); } /// Compute the layout of the given block. Attempts to lay the block @@ -702,11 +715,27 @@ = CodeGenFunction(CGM, true).GenerateBlockFunction(CurGD, blockInfo, LocalDeclMap, isLambdaConv); + llvm::Function *targetFn = dyn_cast(blockFn); blockFn = llvm::ConstantExpr::getBitCast(blockFn, VoidPtrTy); + llvm::Constant *oclCapExtractFn = nullptr; + llvm::Constant *oclInvokeWrapperFn = nullptr; + if(getLangOpts().OpenCL && getLangOpts().OpenCLVersion >= 200){ + oclCapExtractFn + = CodeGenFunction(CGM, true).GenerateOCLCapturesCopyFunction(blockInfo); + oclCapExtractFn = llvm::ConstantExpr::getBitCast(oclCapExtractFn, VoidPtrTy); + + oclInvokeWrapperFn = CodeGenFunction(CGM, true) + .GenerateBlockFunctionWrapper(blockInfo, + const_cast(targetFn)); + oclInvokeWrapperFn = llvm::ConstantExpr::getBitCast(oclInvokeWrapperFn, + VoidPtrTy); + //llvm::ConstantExpr::getBitCast(oclInvokeWrapperFn, VoidPtrTy); + } + // If there is nothing to capture, we can emit this as a global block. if (blockInfo.CanBeGlobal) - return buildGlobalBlock(CGM, blockInfo, blockFn); + return buildGlobalBlock(CGM, blockInfo, blockFn, oclInvokeWrapperFn); // Otherwise, we have to emit this as a local block. @@ -755,6 +784,24 @@ getIntSize(), "block.reserved"); addHeaderField(blockFn, getPointerSize(), "block.invoke"); addHeaderField(descriptor, getPointerSize(), "block.descriptor"); + if(getLangOpts().OpenCL && + getLangOpts().OpenCLVersion >= 200){ + addHeaderField(llvm::ConstantInt::get(IntTy, blockInfo.Captures.size()), + getIntSize(), "block.ocl.cap_num"); + + if(oclCapExtractFn) + addHeaderField(oclCapExtractFn, getPointerSize(), "block.ocl.cap_extract"); + else + addHeaderField(llvm::ConstantPointerNull::get(VoidPtrTy), + getPointerSize(), "block.ocl.cap_extract"); + + if(oclInvokeWrapperFn) + addHeaderField(oclInvokeWrapperFn, getPointerSize(), + "block.ocl.invoke_wrapper"); + else + addHeaderField(llvm::ConstantPointerNull::get(VoidPtrTy), + getPointerSize(), "block.ocl.invoke_wrapper"); + } } // Finally, capture all the values into the block. @@ -791,7 +838,11 @@ // special; we'll simply emit it directly. src = Address::invalid(); } else if (CI.isByRef()) { - if (BlockInfo && CI.isNested()) { + if (BlockInfo && CI.isNested() && + !(CGM.getLangOpts().OpenCL && CGM.getLangOpts().OpenCLVersion >= 200 + /*OpenCL 2.x doesn't set up BlockPointer. + Captured by reference is also not allowed by spec*/)) { + // We need to use the capture from the enclosing block. const CGBlockInfo::Capture &enclosingCapture = BlockInfo->getCapture(variable); @@ -960,6 +1011,29 @@ return GenericBlockLiteralType; } +llvm::Type* CodeGenModule::getGenericOCLBlockLiteralType(){ + if(GenericOCLBlockLiteralType) + return GenericOCLBlockLiteralType; + + llvm::Type *BlockDescPtrTy = getBlockDescriptorType(); + + // struct __block_literal_generic { + // void *__isa; + // int __flags; + // int __reserved; + // void (*__invoke)(void *); + // struct __block_descriptor *__descriptor; + // int ocl_cap_num; + // void (*ocl_cap_copy)(void*,int,void*); + // void (*ocl_invoke_wrapper)(void*,...); + // }; + GenericOCLBlockLiteralType = + llvm::StructType::create("struct.ocl.__block_literal_generic", + VoidPtrTy, IntTy, IntTy, VoidPtrTy, BlockDescPtrTy, + IntTy, VoidPtrTy, VoidPtrTy, nullptr); + return GenericOCLBlockLiteralType; +} + RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue) { const BlockPointerType *BPT = @@ -967,17 +1041,33 @@ llvm::Value *Callee = EmitScalarExpr(E->getCallee()); + bool isOCL2X = (getLangOpts().OpenCL && + getLangOpts().OpenCLVersion >= 200); + // Get a pointer to the generic block literal. llvm::Type *BlockLiteralTy = - llvm::PointerType::getUnqual(CGM.getGenericBlockLiteralType()); + llvm::PointerType::getUnqual((isOCL2X)? + CGM.getGenericOCLBlockLiteralType() : + CGM.getGenericBlockLiteralType()); // Bitcast the callee to a block literal. llvm::Value *BlockLiteral = Builder.CreateBitCast(Callee, BlockLiteralTy, "block.literal"); // Get the function pointer from the literal. - llvm::Value *FuncPtr = - Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockLiteral, 3); + llvm::Value *FuncPtr; + if(isOCL2X){ + // Invoke function wrapper + FuncPtr = + Builder.CreateStructGEP(CGM.getGenericOCLBlockLiteralType(), + BlockLiteral, + 7); + }else{ + FuncPtr = + Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), + BlockLiteral, + 3); + } BlockLiteral = Builder.CreateBitCast(BlockLiteral, VoidPtrTy); @@ -1012,6 +1102,21 @@ assert(BlockInfo && "evaluating block ref without block information?"); const CGBlockInfo::Capture &capture = BlockInfo->getCapture(variable); + if(CGM.getLangOpts().OpenCL && + CGM.getLangOpts().OpenCLVersion >= 200){ + + const ImplicitParamDecl* capFuncArgDecl = const_cast(capture.FunctionArgDecl); + auto itResult = BlockInfo->FunctionArgCaptures.find(capFuncArgDecl); + assert(itResult != BlockInfo->FunctionArgCaptures.end() && + "no entry for capture as function argument"); + llvm::Value* capValue = itResult->second; + + Address addr = CreateTempAlloca(capValue->getType(), + getContext().getDeclAlign(variable)); + Builder.CreateStore(capValue, addr); + return addr; + } + // Handle constant captures. if (capture.isConstant()) return LocalDeclMap.find(variable)->second; @@ -1019,6 +1124,7 @@ Builder.CreateStructGEP(LoadBlockStruct(), capture.getIndex(), capture.getOffset(), "block.capture.addr"); + if (isByRef) { // addr should be a void** right now. Load, then cast the result // to byref*. @@ -1051,45 +1157,91 @@ // Using that metadata, generate the actual block function. llvm::Constant *blockFn; + llvm::Constant *oclInvokeWrapper = nullptr; { CodeGenFunction::DeclMapTy LocalDeclMap; blockFn = CodeGenFunction(*this).GenerateBlockFunction(GlobalDecl(), blockInfo, LocalDeclMap, false); + if(getLangOpts().OpenCL && getLangOpts().OpenCLVersion >= 200){ + llvm::Function *TFn = dyn_cast(blockFn); + oclInvokeWrapper = CodeGenFunction(*this) + .GenerateBlockFunctionWrapper(blockInfo, + const_cast(TFn)); + oclInvokeWrapper = llvm::ConstantExpr::getBitCast(oclInvokeWrapper, + VoidPtrTy); + } } blockFn = llvm::ConstantExpr::getBitCast(blockFn, VoidPtrTy); - return buildGlobalBlock(*this, blockInfo, blockFn); + return buildGlobalBlock(*this, blockInfo, blockFn, oclInvokeWrapper); } static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, const CGBlockInfo &blockInfo, - llvm::Constant *blockFn) { + llvm::Constant *blockFn, + llvm::Constant *blockFnWrapper) { assert(blockInfo.CanBeGlobal); + bool isOCL2X = (CGM.getLangOpts().OpenCL && + CGM.getLangOpts().OpenCLVersion >= 200); + // Generate the constants for the block literal initializer. - llvm::Constant *fields[BlockHeaderSize]; - - // isa - fields[0] = CGM.getNSConcreteGlobalBlock(); - - // __flags + llvm::Constant *init; BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE; if (blockInfo.UsesStret) flags |= BLOCK_USE_STRET; - - fields[1] = llvm::ConstantInt::get(CGM.IntTy, flags.getBitMask()); - // Reserved - fields[2] = llvm::Constant::getNullValue(CGM.IntTy); + if(!isOCL2X){ + llvm::Constant *fields[BlockHeaderSize]; + + // isa + fields[0] = CGM.getNSConcreteGlobalBlock(); + + // __flags + fields[1] = llvm::ConstantInt::get(CGM.IntTy, flags.getBitMask()); + + // Reserved + fields[2] = llvm::Constant::getNullValue(CGM.IntTy); - // Function - fields[3] = blockFn; + // Function + fields[3] = blockFn; - // Descriptor - fields[4] = buildBlockDescriptor(CGM, blockInfo); + // Descriptor + fields[4] = buildBlockDescriptor(CGM, blockInfo); + + init = llvm::ConstantStruct::getAnon(fields); + }else{ + llvm::Constant *fields[BlockHeaderSize + 3]; + + assert(blockFnWrapper && "Block invoke function wrapper not built yet"); + + // isa + fields[0] = CGM.getNSConcreteGlobalBlock(); + + // __flags + fields[1] = llvm::ConstantInt::get(CGM.IntTy, flags.getBitMask()); + + // Reserved + fields[2] = llvm::Constant::getNullValue(CGM.IntTy); - llvm::Constant *init = llvm::ConstantStruct::getAnon(fields); + // Function + fields[3] = blockFn; + + // Descriptor + fields[4] = buildBlockDescriptor(CGM, blockInfo); + + // Captured variables amount + fields[5] = llvm::ConstantInt::get(CGM.IntTy, 0); + + // Captured variables extraction function + fields[6] = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); + + // Block invoke wrapper function + fields[7] = blockFnWrapper; + + init = llvm::ConstantStruct::getAnon(fields); + } llvm::GlobalVariable *literal = new llvm::GlobalVariable(CGM.getModule(), @@ -1111,6 +1263,21 @@ llvm::Value *arg) { assert(BlockInfo && "not emitting prologue of block invocation function?!"); + if(CGM.getLangOpts().OpenCL && + CGM.getLangOpts().OpenCLVersion >= 200){ + + if(IsOCLChildKernelInvoke){ + /* + * Store the llvm::Value* type captured variables, which is passed + * as arguments to the block invoke function + */ + const_cast(BlockInfo)-> + FunctionArgCaptures.insert(std::make_pair(D, arg)); + } + + return; + } + llvm::Value *localAddr = nullptr; if (CGM.getCodeGenOpts().OptimizationLevel == 0) { // Allocate a stack slot to let the debug info survive the RA. @@ -1131,6 +1298,7 @@ SourceLocation StartLoc = BlockInfo->getBlockExpr()->getBody()->getLocStart(); ApplyDebugLocation Scope(*this, StartLoc); + // Instead of messing around with LocalDeclMap, just set the value // directly as BlockPointer. BlockPointer = Builder.CreateBitCast(arg, @@ -1157,6 +1325,10 @@ BlockInfo = &blockInfo; + if(getLangOpts().OpenCL && getLangOpts().OpenCLVersion >= 200){ + IsOCLChildKernelInvoke = true; + } + // Arrange for local static and local extern declarations to appear // to be local to this function as well, in case they're directly // referenced in a block. @@ -1171,14 +1343,25 @@ // Build the argument list. FunctionArgList args; + /* // The first argument is the block pointer. Just take it as a void* // and cast it later. QualType selfTy = getContext().VoidPtrTy; IdentifierInfo *II = &CGM.getContext().Idents.get(".block_descriptor"); - ImplicitParamDecl selfDecl(getContext(), const_cast(blockDecl), SourceLocation(), II, selfTy); args.push_back(&selfDecl); + */ + for(auto& capturePair : blockInfo.Captures) { + const VarDecl* capVarDecl = capturePair.getFirst(); + ImplicitParamDecl* capParamDecl = ImplicitParamDecl::Create(getContext(), const_cast(blockDecl), + SourceLocation(), + capVarDecl->getIdentifier(), + capVarDecl->getType()); + auto& capture = capturePair.getSecond(); + const_cast(capture).FunctionArgDecl = capParamDecl; + args.push_back( const_cast(capParamDecl) ); + } // Now add the rest of the parameters. args.append(blockDecl->param_begin(), blockDecl->param_end()); @@ -1207,7 +1390,8 @@ // At -O0 we generate an explicit alloca for the BlockPointer, so the RA // won't delete the dbg.declare intrinsics for captured variables. llvm::Value *BlockPointerDbgLoc = BlockPointer; - if (CGM.getCodeGenOpts().OptimizationLevel == 0) { + if (CGM.getCodeGenOpts().OptimizationLevel == 0 && + !(CGM.getLangOpts().OpenCL && CGM.getLangOpts().OpenCLVersion >= 200)) { // Allocate a stack slot for it, so we can point the debugger to it Address Alloca = CreateTempAlloca(BlockPointer->getType(), getPointerAlign(), @@ -1221,7 +1405,9 @@ // If we have a C++ 'this' reference, go ahead and force it into // existence now. - if (blockDecl->capturesCXXThis()) { + if (blockDecl->capturesCXXThis() && + !(CGM.getLangOpts().OpenCL && CGM.getLangOpts().OpenCLVersion >= 200 + /*OpenCL 2.x doesn't set up BlockPointer*/)) { Address addr = Builder.CreateStructGEP(LoadBlockStruct(), blockInfo.CXXThisIndex, blockInfo.CXXThisOffset, "block.captured-this"); @@ -1298,6 +1484,88 @@ FinishFunction(cast(blockDecl->getBody())->getRBracLoc()); + if(getLangOpts().OpenCL && getLangOpts().OpenCLVersion >= 200){ + IsOCLChildKernelInvoke = false; + } + + return fn; +} + +llvm::Constant* +CodeGenFunction::GenerateBlockFunctionWrapper(const CGBlockInfo &blockInfo, + const llvm::Function *invokeFunc){ + + const BlockDecl* blockDecl = blockInfo.Block; + + BlockInfo = &blockInfo; + + FunctionArgList args; + + QualType selfTy = getContext().VoidPtrTy; + IdentifierInfo *II = &CGM.getContext().Idents.get(".block_descriptor"); + ImplicitParamDecl selfDecl(getContext(), const_cast(blockDecl), + SourceLocation(), II, selfTy); + args.push_back(&selfDecl); + + args.append(blockDecl->param_begin(), blockDecl->param_end()); + + const FunctionProtoType *fnType = blockInfo.getBlockExpr()->getFunctionType(); + const CGFunctionInfo &fnInfo = + CGM.getTypes().arrangeBlockFunctionDeclaration(fnType, args); + + llvm::FunctionType *fnLLVMType = CGM.getTypes().GetFunctionType(fnInfo); + + llvm::Function *fn = + llvm::Function::Create(fnLLVMType, + llvm::GlobalValue::InternalLinkage, + "__ocl_block_invoke_wrapper", + &CGM.getModule()); + + IdentifierInfo *FII = &CGM.getContext().Idents.get("__ocl_block_invoke_wrapper"); + FunctionDecl *FD = FunctionDecl::Create(getContext(), + getContext().getTranslationUnitDecl(), + SourceLocation(), + SourceLocation(), FII, + fnType->getReturnType(), + nullptr, SC_Static, + false, + false); + + CGM.SetInternalFunctionAttributes(nullptr, fn, fnInfo); + + StartFunction(FD, fnType->getReturnType(), + fn, fnInfo, args); + + if(!fn->getReturnType()->isVoidTy()) + ReturnValue = CreateDefaultAlignTempAlloca(fn->getReturnType()); + + auto itBlockCtx = fn->arg_begin(); + // Used by LoadBlockStruct() + BlockPointer = Builder.CreateBitCast(&(*itBlockCtx), + blockInfo.StructureType->getPointerTo(), + "block"); + + llvm::SmallVector targetArgs; + for(const auto& capPair : blockInfo.Captures){ + const auto& capture = capPair.getSecond(); + Address capturePtr = Builder.CreateStructGEP(LoadBlockStruct(), + capture.getIndex(), + capture.getOffset()); + llvm::Value* captureVal = Builder.CreateLoad(capturePtr, "block_cap"); + targetArgs.push_back(captureVal); + } + auto itParam = fn->arg_begin(); + for(++itParam; itParam != fn->arg_end(); ++itParam){ + targetArgs.push_back(&(*itParam)); + } + + auto* targetInvoke = Builder.CreateCall(const_cast(invokeFunc), + llvm::ArrayRef(targetArgs)); + if(!fn->getReturnType()->isVoidTy()) + Builder.CreateStore(targetInvoke, ReturnValue); + + FinishFunction(); + return fn; } @@ -1634,6 +1902,134 @@ return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); } +// size_t __ocl_block_captures_copy_helper(void* block, uint indexOfArg, uint8* dst) +llvm::Constant * +CodeGenFunction::GenerateOCLCapturesCopyFunction(const CGBlockInfo &blockInfo){ + ASTContext &C = getContext(); + + BlockInfo = &blockInfo; + + FunctionArgList args; + + ImplicitParamDecl blockCtxDecl(getContext(), + nullptr,/*Decl Ctx*/ + SourceLocation(), + nullptr,/*II*/ + C.VoidPtrTy); + args.push_back(&blockCtxDecl); + + ImplicitParamDecl indexDecl(getContext(), + nullptr/*Decl Ctx*/, + SourceLocation(), + nullptr/*II*/, + C.UnsignedIntTy); + args.push_back(&indexDecl); + + ImplicitParamDecl destDecl(getContext(), + nullptr,/*Decl Ctx*/ + SourceLocation(), + nullptr,/*II*/ + C.VoidPtrTy); + args.push_back(&destDecl); + + auto retType = C.UnsignedIntTy; + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(retType, args); + + llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI); + + llvm::Function *Fn = + llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage, + "__ocl_block_captures_copy_helper", &CGM.getModule()); + + IdentifierInfo *II + = &CGM.getContext().Idents.get("__ocl_block_captures_copy_helper"); + + FunctionDecl *FD = FunctionDecl::Create(C, + C.getTranslationUnitDecl(), + SourceLocation(), + SourceLocation(), II, retType, + nullptr, SC_Static, + false, + false); + + CGM.SetInternalFunctionAttributes(nullptr, Fn, FI); + + StartFunction(FD, retType, Fn, FI, args); + + ReturnValue = CreateDefaultAlignTempAlloca(IntTy); + + auto* bbRet = createBasicBlock(".ret"); + auto* bbDefault = createBasicBlock(".default"); + auto itBlock = Fn->arg_begin(); + auto itArgIndex = itBlock; + ++itArgIndex; + // Used by LoadBlockStruct() + BlockPointer = Builder.CreateBitCast(&(*itBlock), + blockInfo.StructureType->getPointerTo(), + "block"); + auto itDest = itArgIndex; + ++itDest; + llvm::Argument* destVal = &(*itDest); + + // A portable sizeof() + auto getTypeSize = [&](llvm::Type* type, QualType qualTy) -> llvm::Value* { + unsigned elementNum = 1; + llvm::Type* elementTy = type; + if(auto* vecType = dyn_cast(type)){ + elementNum = vecType->getNumElements(); + elementTy = vecType->getScalarType(); + } + + llvm::PointerType* typePtr + = elementTy->getPointerTo(C.getTargetAddressSpace(qualTy)); + llvm::Value* nilPtr = llvm::ConstantPointerNull::get(typePtr); + llvm::Value* ptrOffset = Builder.CreateGEP(nilPtr, + Builder.getInt32(elementNum)); + return Builder.CreatePtrToInt(ptrOffset, IntTy); + }; + + auto* switchInst = Builder.CreateSwitch(&(*itArgIndex), bbDefault, + blockInfo.Captures.size() + 1); + + unsigned int indexCounter = 0; + for(const auto& capturePair : blockInfo.Captures){ + auto* bbCase = createBasicBlock(".case"); + switchInst->addCase(Builder.getInt32(indexCounter), bbCase); + EmitBlock(bbCase); + + const auto* captureDecl = capturePair.getFirst(); + const auto& capture = capturePair.getSecond(); + Address capturePtr = Builder.CreateStructGEP(LoadBlockStruct(), + capture.getIndex(), + capture.getOffset()); + + llvm::Type* captureType = capturePtr.getElementType(); + llvm::Value* resultVal = getTypeSize(captureType, captureDecl->getType()); + assert(resultVal != nullptr && "Can't get size of captured variable"); + Builder.CreateMemCpy(Address(destVal, getPointerAlign()), + capturePtr, + resultVal); + + Builder.CreateStore(resultVal, ReturnValue); + EmitBranch(bbRet); + + indexCounter++; + } + + // Create default BB which returns null pointer + EmitBlock(bbDefault); + Builder.CreateStore(Builder.getInt32(0), + ReturnValue); + EmitBranch(bbRet); + + EmitBlock(bbRet, true); + + FinishFunction(); + + return Fn; +} + namespace { /// Emits the copy/dispose helper functions for a __block object of id type. Index: lib/CodeGen/CGExpr.cpp =================================================================== --- lib/CodeGen/CGExpr.cpp +++ lib/CodeGen/CGExpr.cpp @@ -2104,7 +2104,9 @@ if (E->refersToEnclosingVariableOrCapture()) { if (auto *FD = LambdaCaptureFields.lookup(VD)) return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue); - else if (CapturedStmtInfo) { + else if (CapturedStmtInfo && + !(CGM.getLangOpts().OpenCL && + CGM.getLangOpts().OpenCLVersion >= 200)) { auto it = LocalDeclMap.find(VD); if (it != LocalDeclMap.end()) { if (auto RefTy = VD->getType()->getAs()) { Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -288,6 +288,7 @@ const CodeGen::CGBlockInfo *BlockInfo; llvm::Value *BlockPointer; + bool IsOCLChildKernelInvoke; llvm::DenseMap LambdaCaptureFields; FieldDecl *LambdaThisCaptureField; @@ -1341,8 +1342,12 @@ const DeclMapTy &ldm, bool IsLambdaConversionToBlock); + llvm::Constant* GenerateBlockFunctionWrapper(const CGBlockInfo &blockInfo, + const llvm::Function *invokeFunc); + llvm::Constant *GenerateCopyHelperFunction(const CGBlockInfo &blockInfo); llvm::Constant *GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo); + llvm::Constant *GenerateOCLCapturesCopyFunction(const CGBlockInfo &blockInfo); llvm::Constant *GenerateObjCAtomicSetterCopyHelperFunction( const ObjCPropertyImplDecl *PID); llvm::Constant *GenerateObjCAtomicGetterCopyHelperFunction( Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -46,7 +46,8 @@ SanOpts(CGM.getLangOpts().Sanitize), IsSanitizerScope(false), CurFuncIsThunk(false), AutoreleaseResult(false), SawAsmBlock(false), IsOutlinedSEHHelper(false), - BlockInfo(nullptr), BlockPointer(nullptr), + BlockInfo(nullptr), BlockPointer(nullptr), + IsOCLChildKernelInvoke(false), LambdaThisCaptureField(nullptr), NormalCleanupDest(nullptr), NextCleanupDestIndex(1), FirstBlockInfo(nullptr), EHResumeBlock(nullptr), ExceptionSlot(nullptr), EHSelectorSlot(nullptr), Index: lib/CodeGen/CodeGenModule.h =================================================================== --- lib/CodeGen/CodeGenModule.h +++ lib/CodeGen/CodeGenModule.h @@ -464,6 +464,7 @@ llvm::Type *BlockDescriptorType = nullptr; llvm::Type *GenericBlockLiteralType = nullptr; + llvm::Type *GenericOCLBlockLiteralType = nullptr; struct { int GlobalUniqueCount; @@ -768,6 +769,8 @@ /// The type of a generic block literal. llvm::Type *getGenericBlockLiteralType(); + llvm::Type *getGenericOCLBlockLiteralType(); + /// Gets the address of a block which requires no captures. llvm::Constant *GetAddrOfGlobalBlock(const BlockExpr *BE, const char *);