Index: lib/CodeGen/CGBlocks.h =================================================================== --- lib/CodeGen/CGBlocks.h +++ lib/CodeGen/CGBlocks.h @@ -189,6 +189,8 @@ return reinterpret_cast(Data); } + ImplicitParamDecl* FunctionArgDecl; + static Capture makeIndex(unsigned index, CharUnits offset) { Capture v; v.Data = (index << 1) | 1; @@ -200,7 +202,13 @@ Capture v; v.Data = reinterpret_cast(value); return v; - } + } + + /* + ~Capture(){ + if(FunctionArgDecl) delete FunctionArgDecl; + } + */ }; /// CanBeGlobal - True if the block can be global, i.e. it has @@ -225,6 +233,8 @@ /// The mapping of allocated indexes within the block. llvm::DenseMap Captures; + llvm::DenseMap FunctionArgCaptures; + Address LocalAddress; llvm::StructType *StructureType; const BlockDecl *Block; Index: lib/CodeGen/CGBlocks.cpp =================================================================== --- lib/CodeGen/CGBlocks.cpp +++ lib/CodeGen/CGBlocks.cpp @@ -311,14 +311,22 @@ info.BlockAlign = CGM.getPointerAlign(); info.BlockSize = 3 * CGM.getPointerSize() + 2 * CGM.getIntSize(); + bool isOCL2X = (CGM.getLangOpts().OpenCL && + CGM.getLangOpts().OpenCLVersion >= 200); + auto blockHeaderSize = (isOCL2X)? BlockHeaderSize + 2 : BlockHeaderSize; + assert(elementTypes.empty()); elementTypes.push_back(CGM.VoidPtrTy); elementTypes.push_back(CGM.IntTy); elementTypes.push_back(CGM.IntTy); elementTypes.push_back(CGM.VoidPtrTy); elementTypes.push_back(CGM.getBlockDescriptorType()); + if(isOCL2X){ + elementTypes.push_back(CGM.IntTy); + elementTypes.push_back(CGM.VoidPtrTy); + } - assert(elementTypes.size() == BlockHeaderSize); + assert(elementTypes.size() == blockHeaderSize); } /// Compute the layout of the given block. Attempts to lay the block @@ -704,6 +712,13 @@ isLambdaConv); blockFn = llvm::ConstantExpr::getBitCast(blockFn, VoidPtrTy); + llvm::Constant *oclCapExtractFn = nullptr; + if(getLangOpts().OpenCL && getLangOpts().OpenCLVersion >= 200){ + oclCapExtractFn + = CodeGenFunction(CGM, true).GenerateOCLCapturesCopyFunction(blockInfo); + oclCapExtractFn = llvm::ConstantExpr::getBitCast(oclCapExtractFn, VoidPtrTy); + } + // If there is nothing to capture, we can emit this as a global block. if (blockInfo.CanBeGlobal) return buildGlobalBlock(CGM, blockInfo, blockFn); @@ -755,6 +770,17 @@ getIntSize(), "block.reserved"); addHeaderField(blockFn, getPointerSize(), "block.invoke"); addHeaderField(descriptor, getPointerSize(), "block.descriptor"); + if(getLangOpts().OpenCL && + getLangOpts().OpenCLVersion >= 200){ + addHeaderField(llvm::ConstantInt::get(IntTy, blockInfo.Captures.size()), + getIntSize(), "block.ocl.cap_num"); + + if(oclCapExtractFn) + addHeaderField(oclCapExtractFn, getPointerSize(), "block.ocl.cap_extract"); + else + addHeaderField(llvm::ConstantPointerNull::get(VoidPtrTy), + getPointerSize(), "block.ocl.cap_extract"); + } } // Finally, capture all the values into the block. @@ -791,7 +817,11 @@ // special; we'll simply emit it directly. src = Address::invalid(); } else if (CI.isByRef()) { - if (BlockInfo && CI.isNested()) { + if (BlockInfo && CI.isNested() && + !(CGM.getLangOpts().OpenCL && CGM.getLangOpts().OpenCLVersion >= 200 + /*OpenCL 2.x doesn't set up BlockPointer. + Captured by reference is also not allowed by spec*/)) { + // We need to use the capture from the enclosing block. const CGBlockInfo::Capture &enclosingCapture = BlockInfo->getCapture(variable); @@ -1012,6 +1042,21 @@ assert(BlockInfo && "evaluating block ref without block information?"); const CGBlockInfo::Capture &capture = BlockInfo->getCapture(variable); + if(CGM.getLangOpts().OpenCL && + CGM.getLangOpts().OpenCLVersion >= 200){ + + const ImplicitParamDecl* capFuncArgDecl = const_cast(capture.FunctionArgDecl); + auto itResult = BlockInfo->FunctionArgCaptures.find(capFuncArgDecl); + assert(itResult != BlockInfo->FunctionArgCaptures.end() && + "no entry for capture as function argument"); + llvm::Value* capValue = itResult->second; + + Address addr = CreateTempAlloca(capValue->getType(), + getContext().getDeclAlign(variable)); + Builder.CreateStore(capValue, addr); + return addr; + } + // Handle constant captures. if (capture.isConstant()) return LocalDeclMap.find(variable)->second; @@ -1019,6 +1064,7 @@ Builder.CreateStructGEP(LoadBlockStruct(), capture.getIndex(), capture.getOffset(), "block.capture.addr"); + if (isByRef) { // addr should be a void** right now. Load, then cast the result // to byref*. @@ -1068,28 +1114,58 @@ llvm::Constant *blockFn) { assert(blockInfo.CanBeGlobal); + bool isOCL2X = (CGM.getLangOpts().OpenCL && + CGM.getLangOpts().OpenCLVersion >= 200); + // Generate the constants for the block literal initializer. - llvm::Constant *fields[BlockHeaderSize]; - - // isa - fields[0] = CGM.getNSConcreteGlobalBlock(); - - // __flags + llvm::Constant *init; BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE; if (blockInfo.UsesStret) flags |= BLOCK_USE_STRET; - - fields[1] = llvm::ConstantInt::get(CGM.IntTy, flags.getBitMask()); - // Reserved - fields[2] = llvm::Constant::getNullValue(CGM.IntTy); + if(!isOCL2X){ + llvm::Constant *fields[BlockHeaderSize]; + + // isa + fields[0] = CGM.getNSConcreteGlobalBlock(); + + // __flags + fields[1] = llvm::ConstantInt::get(CGM.IntTy, flags.getBitMask()); + + // Reserved + fields[2] = llvm::Constant::getNullValue(CGM.IntTy); + + // Function + fields[3] = blockFn; + + // Descriptor + fields[4] = buildBlockDescriptor(CGM, blockInfo); + + init = llvm::ConstantStruct::getAnon(fields); + }else{ + llvm::Constant *fields[BlockHeaderSize + 2]; + + // isa + fields[0] = CGM.getNSConcreteGlobalBlock(); + + // __flags + fields[1] = llvm::ConstantInt::get(CGM.IntTy, flags.getBitMask()); - // Function - fields[3] = blockFn; + // Reserved + fields[2] = llvm::Constant::getNullValue(CGM.IntTy); - // Descriptor - fields[4] = buildBlockDescriptor(CGM, blockInfo); + // Function + fields[3] = blockFn; - llvm::Constant *init = llvm::ConstantStruct::getAnon(fields); + // Descriptor + fields[4] = buildBlockDescriptor(CGM, blockInfo); + + // Captured variables amount + fields[5] = llvm::ConstantInt::get(CGM.IntTy, 0); + // Captured variables extraction function + fields[6] = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); + + init = llvm::ConstantStruct::getAnon(fields); + } llvm::GlobalVariable *literal = new llvm::GlobalVariable(CGM.getModule(), @@ -1111,6 +1187,21 @@ llvm::Value *arg) { assert(BlockInfo && "not emitting prologue of block invocation function?!"); + if(CGM.getLangOpts().OpenCL && + CGM.getLangOpts().OpenCLVersion >= 200){ + + if(IsOCLChildKernelInvoke){ + /* + * Store the llvm::Value* type captured variables, which is passed + * as arguments to the block invoke function + */ + const_cast(BlockInfo)-> + FunctionArgCaptures.insert(std::make_pair(D, arg)); + } + + return; + } + llvm::Value *localAddr = nullptr; if (CGM.getCodeGenOpts().OptimizationLevel == 0) { // Allocate a stack slot to let the debug info survive the RA. @@ -1131,6 +1222,7 @@ SourceLocation StartLoc = BlockInfo->getBlockExpr()->getBody()->getLocStart(); ApplyDebugLocation Scope(*this, StartLoc); + // Instead of messing around with LocalDeclMap, just set the value // directly as BlockPointer. BlockPointer = Builder.CreateBitCast(arg, @@ -1157,6 +1249,10 @@ BlockInfo = &blockInfo; + if(getLangOpts().OpenCL && getLangOpts().OpenCLVersion >= 200){ + IsOCLChildKernelInvoke = true; + } + // Arrange for local static and local extern declarations to appear // to be local to this function as well, in case they're directly // referenced in a block. @@ -1171,14 +1267,25 @@ // Build the argument list. FunctionArgList args; + /* // The first argument is the block pointer. Just take it as a void* // and cast it later. QualType selfTy = getContext().VoidPtrTy; IdentifierInfo *II = &CGM.getContext().Idents.get(".block_descriptor"); - ImplicitParamDecl selfDecl(getContext(), const_cast(blockDecl), SourceLocation(), II, selfTy); args.push_back(&selfDecl); + */ + for(auto& capturePair : blockInfo.Captures) { + const VarDecl* capVarDecl = capturePair.getFirst(); + ImplicitParamDecl* capParamDecl = ImplicitParamDecl::Create(getContext(), const_cast(blockDecl), + SourceLocation(), + capVarDecl->getIdentifier(), + capVarDecl->getType()); + auto& capture = capturePair.getSecond(); + const_cast(capture).FunctionArgDecl = capParamDecl; + args.push_back( const_cast(capParamDecl) ); + } // Now add the rest of the parameters. args.append(blockDecl->param_begin(), blockDecl->param_end()); @@ -1207,7 +1314,8 @@ // At -O0 we generate an explicit alloca for the BlockPointer, so the RA // won't delete the dbg.declare intrinsics for captured variables. llvm::Value *BlockPointerDbgLoc = BlockPointer; - if (CGM.getCodeGenOpts().OptimizationLevel == 0) { + if (CGM.getCodeGenOpts().OptimizationLevel == 0 && + !(CGM.getLangOpts().OpenCL && CGM.getLangOpts().OpenCLVersion >= 200)) { // Allocate a stack slot for it, so we can point the debugger to it Address Alloca = CreateTempAlloca(BlockPointer->getType(), getPointerAlign(), @@ -1221,7 +1329,9 @@ // If we have a C++ 'this' reference, go ahead and force it into // existence now. - if (blockDecl->capturesCXXThis()) { + if (blockDecl->capturesCXXThis() && + !(CGM.getLangOpts().OpenCL && CGM.getLangOpts().OpenCLVersion >= 200 + /*OpenCL 2.x doesn't set up BlockPointer*/)) { Address addr = Builder.CreateStructGEP(LoadBlockStruct(), blockInfo.CXXThisIndex, blockInfo.CXXThisOffset, "block.captured-this"); @@ -1298,6 +1408,10 @@ FinishFunction(cast(blockDecl->getBody())->getRBracLoc()); + if(getLangOpts().OpenCL && getLangOpts().OpenCLVersion >= 200){ + IsOCLChildKernelInvoke = false; + } + return fn; } @@ -1634,6 +1748,135 @@ return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); } +// size_t __ocl_block_captures_copy_helper(void* block, uint indexOfArg, uint8* dst) +llvm::Constant * +CodeGenFunction::GenerateOCLCapturesCopyFunction(const CGBlockInfo &blockInfo){ + ASTContext &C = getContext(); + + BlockInfo = &blockInfo; + + FunctionArgList args; + + ImplicitParamDecl blockCtxDecl(getContext(), + nullptr,/*Decl Ctx*/ + SourceLocation(), + nullptr,/*II*/ + C.VoidPtrTy); + args.push_back(&blockCtxDecl); + + ImplicitParamDecl indexDecl(getContext(), + nullptr/*Decl Ctx*/, + SourceLocation(), + nullptr/*II*/, + C.UnsignedIntTy); + args.push_back(&indexDecl); + + ImplicitParamDecl destDecl(getContext(), + nullptr,/*Decl Ctx*/ + SourceLocation(), + nullptr,/*II*/ + C.VoidPtrTy); + args.push_back(&destDecl); + + auto retType = C.UnsignedIntTy; + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(retType, args); + + llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI); + + llvm::Function *Fn = + llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage, + "__ocl_block_captures_copy_helper", &CGM.getModule()); + + IdentifierInfo *II + = &CGM.getContext().Idents.get("__ocl_block_captures_copy_helper"); + + FunctionDecl *FD = FunctionDecl::Create(C, + C.getTranslationUnitDecl(), + SourceLocation(), + SourceLocation(), II, retType, + nullptr, SC_Static, + false, + false); + + CGM.SetInternalFunctionAttributes(nullptr, Fn, FI); + + StartFunction(FD, retType, Fn, FI, args); + + ReturnValue = CreateDefaultAlignTempAlloca(IntTy); + + auto* bbRet = createBasicBlock(".ret"); + auto* bbDefault = createBasicBlock(".default"); + auto itBlock = Fn->arg_begin(); + auto itArgIndex = itBlock; + ++itArgIndex; + // Used by LoadBlockStruct() + BlockPointer = Builder.CreateBitCast(&(*itBlock), + blockInfo.StructureType->getPointerTo(), + "block"); + auto itDest = itArgIndex; + ++itDest; + llvm::Argument* destVal = &(*itDest); + auto destAlign = CharUnits::fromQuantity(4);//FIXME: What's the alignment? + + // A portable sizeof() + auto getTypeSize = [&](llvm::Type* type, QualType qualTy) -> llvm::Value* { + unsigned elementNum = 1; + llvm::Type* elementTy = type; + if(auto* vecType = dyn_cast(type)){ + elementNum = vecType->getNumElements(); + elementTy = vecType->getScalarType(); + } + + llvm::PointerType* typePtr + = elementTy->getPointerTo(C.getTargetAddressSpace(qualTy)); + llvm::Value* nilPtr = llvm::ConstantPointerNull::get(typePtr); + llvm::Value* ptrOffset = Builder.CreateGEP(nilPtr, + Builder.getInt32(elementNum)); + return Builder.CreatePtrToInt(ptrOffset, IntTy); + }; + + auto* switchInst = Builder.CreateSwitch(&(*itArgIndex), bbDefault, + blockInfo.Captures.size() + 1); + + unsigned int indexCounter = 0; + for(const auto& capturePair : blockInfo.Captures){ + auto* bbCase = createBasicBlock(".case"); + switchInst->addCase(Builder.getInt32(indexCounter), bbCase); + EmitBlock(bbCase); + + const auto* captureDecl = capturePair.getFirst(); + const auto& capture = capturePair.getSecond(); + Address capturePtr = Builder.CreateStructGEP(LoadBlockStruct(), + capture.getIndex(), + capture.getOffset()); + + llvm::Type* captureType = capturePtr.getElementType(); + llvm::Value* resultVal = getTypeSize(captureType, captureDecl->getType()); + assert(resultVal != nullptr && "Can't get size of captured variable"); + Builder.CreateMemCpy(Address(destVal, destAlign), + capturePtr, + resultVal); + + Builder.CreateStore(resultVal, ReturnValue); + EmitBranch(bbRet); + + indexCounter++; + } + + // Create default BB which returns null pointer + EmitBlock(bbDefault); + Builder.CreateStore(Builder.getInt32(0), + ReturnValue); + EmitBranch(bbRet); + + EmitBlock(bbRet, true); + + FinishFunction(); + + return Fn; +} + namespace { /// Emits the copy/dispose helper functions for a __block object of id type. Index: lib/CodeGen/CGExpr.cpp =================================================================== --- lib/CodeGen/CGExpr.cpp +++ lib/CodeGen/CGExpr.cpp @@ -2104,7 +2104,9 @@ if (E->refersToEnclosingVariableOrCapture()) { if (auto *FD = LambdaCaptureFields.lookup(VD)) return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue); - else if (CapturedStmtInfo) { + else if (CapturedStmtInfo && + !(CGM.getLangOpts().OpenCL && + CGM.getLangOpts().OpenCLVersion >= 200)) { auto it = LocalDeclMap.find(VD); if (it != LocalDeclMap.end()) { if (auto RefTy = VD->getType()->getAs()) { Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -288,6 +288,7 @@ const CodeGen::CGBlockInfo *BlockInfo; llvm::Value *BlockPointer; + bool IsOCLChildKernelInvoke; llvm::DenseMap LambdaCaptureFields; FieldDecl *LambdaThisCaptureField; @@ -1343,6 +1344,7 @@ llvm::Constant *GenerateCopyHelperFunction(const CGBlockInfo &blockInfo); llvm::Constant *GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo); + llvm::Constant *GenerateOCLCapturesCopyFunction(const CGBlockInfo &blockInfo); llvm::Constant *GenerateObjCAtomicSetterCopyHelperFunction( const ObjCPropertyImplDecl *PID); llvm::Constant *GenerateObjCAtomicGetterCopyHelperFunction( Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -46,7 +46,8 @@ SanOpts(CGM.getLangOpts().Sanitize), IsSanitizerScope(false), CurFuncIsThunk(false), AutoreleaseResult(false), SawAsmBlock(false), IsOutlinedSEHHelper(false), - BlockInfo(nullptr), BlockPointer(nullptr), + BlockInfo(nullptr), BlockPointer(nullptr), + IsOCLChildKernelInvoke(false), LambdaThisCaptureField(nullptr), NormalCleanupDest(nullptr), NextCleanupDestIndex(1), FirstBlockInfo(nullptr), EHResumeBlock(nullptr), ExceptionSlot(nullptr), EHSelectorSlot(nullptr),