Index: lib/AST/Expr.cpp =================================================================== --- lib/AST/Expr.cpp +++ lib/AST/Expr.cpp @@ -1571,8 +1571,9 @@ goto CheckNoBasePath; case CK_AddressSpaceConversion: - assert(getType()->isPointerType()); - assert(getSubExpr()->getType()->isPointerType()); + assert(getType()->isPointerType() || getType()->isBlockPointerType()); + assert(getSubExpr()->getType()->isPointerType() || + getSubExpr()->getType()->isBlockPointerType()); assert(getType()->getPointeeType().getAddressSpace() != getSubExpr()->getType()->getPointeeType().getAddressSpace()); // These should not have an inheritance path. Index: lib/CodeGen/CGBlocks.cpp =================================================================== --- lib/CodeGen/CGBlocks.cpp +++ lib/CodeGen/CGBlocks.cpp @@ -718,7 +718,12 @@ // Otherwise, we have to emit this as a local block. - llvm::Constant *isa = CGM.getNSConcreteStackBlock(); + llvm::Constant *isa = + (!CGM.getContext().getLangOpts().OpenCL) + ? CGM.getNSConcreteStackBlock() + : CGM.getNullPointer(cast( + CGM.getNSConcreteStackBlock()->getType()), + QualType()); isa = llvm::ConstantExpr::getBitCast(isa, VoidPtrTy); // Build the block descriptor. @@ -906,9 +911,8 @@ // Cast to the converted block-pointer type, which happens (somewhat // unfortunately) to be a pointer to function type. - llvm::Value *result = - Builder.CreateBitCast(blockAddr.getPointer(), - ConvertType(blockInfo.getBlockExpr()->getType())); + llvm::Value *result = Builder.CreatePointerCast( + blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType())); return result; } @@ -976,21 +980,41 @@ llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee()); // Get a pointer to the generic block literal. + // For OpenCL we generate generic AS void ptr to be able to reuse the same + // block definition for blocks with captures generated as private AS local + // variables and without captures generated as global AS program scope + // variables. + unsigned AddrSpace = 0; + if (getLangOpts().OpenCL) + AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic); + llvm::Type *BlockLiteralTy = - llvm::PointerType::getUnqual(CGM.getGenericBlockLiteralType()); + llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace); // Bitcast the callee to a block literal. - BlockPtr = Builder.CreateBitCast(BlockPtr, BlockLiteralTy, "block.literal"); + BlockPtr = + Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); // Get the function pointer from the literal. llvm::Value *FuncPtr = Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3); - BlockPtr = Builder.CreateBitCast(BlockPtr, VoidPtrTy); // Add the block literal. CallArgList Args; - Args.add(RValue::get(BlockPtr), getContext().VoidPtrTy); + + QualType VoidPtrQualTy = getContext().VoidPtrTy; + llvm::Type *GenericVoidPtrTy = VoidPtrTy; + if (getLangOpts().OpenCL) { + GenericVoidPtrTy = Builder.getInt8PtrTy( + getContext().getTargetAddressSpace(LangAS::opencl_generic)); + VoidPtrQualTy = + getContext().getPointerType(getContext().getAddrSpaceQualType( + getContext().VoidTy, LangAS::opencl_generic)); + } + + BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy); + Args.add(RValue::get(BlockPtr), VoidPtrQualTy); QualType FnType = BPT->getPointeeType(); @@ -1097,7 +1121,12 @@ auto fields = builder.beginStruct(); // isa - fields.add(CGM.getNSConcreteGlobalBlock()); + fields.add( + (!CGM.getContext().getLangOpts().OpenCL) + ? CGM.getNSConcreteGlobalBlock() + : CGM.getNullPointer(cast( + CGM.getNSConcreteGlobalBlock()->getType()), + QualType())); // __flags BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE; @@ -1114,16 +1143,19 @@ // Descriptor fields.add(buildBlockDescriptor(CGM, blockInfo)); - llvm::Constant *literal = - fields.finishAndCreateGlobal("__block_literal_global", - blockInfo.BlockAlign, - /*constant*/ true); + unsigned AddrSpace = 0; + if (CGM.getContext().getLangOpts().OpenCL) + AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_global); + + llvm::Constant *literal = fields.finishAndCreateGlobal( + "__block_literal_global", blockInfo.BlockAlign, + /*constant*/ true, llvm::GlobalVariable::InternalLinkage, AddrSpace); // Return a constant of the appropriately-casted type. llvm::Type *RequiredType = CGM.getTypes().ConvertType(blockInfo.getBlockExpr()->getType()); llvm::Constant *Result = - llvm::ConstantExpr::getBitCast(literal, RequiredType); + llvm::ConstantExpr::getPointerCast(literal, RequiredType); CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result); return Result; } @@ -1155,9 +1187,13 @@ // Instead of messing around with LocalDeclMap, just set the value // directly as BlockPointer. - BlockPointer = Builder.CreateBitCast(arg, - BlockInfo->StructureType->getPointerTo(), - "block"); + BlockPointer = Builder.CreatePointerCast( + arg, + BlockInfo->StructureType->getPointerTo( + getContext().getLangOpts().OpenCL + ? getContext().getTargetAddressSpace(LangAS::opencl_generic) + : 0), + "block"); } Address CodeGenFunction::LoadBlockStruct() { @@ -1196,6 +1232,15 @@ // The first argument is the block pointer. Just take it as a void* // and cast it later. QualType selfTy = getContext().VoidPtrTy; + + // For OpenCL passed block pointer can be private AS local variable or + // global AS program scope variable (for the case with and without captures). + // Generic AS is used therefore to be able to accomodate both private and + // generic AS in one implementation. + if (getLangOpts().OpenCL) + selfTy = getContext().getPointerType(getContext().getAddrSpaceQualType( + getContext().VoidTy, LangAS::opencl_generic)); + IdentifierInfo *II = &CGM.getContext().Idents.get(".block_descriptor"); ImplicitParamDecl selfDecl(getContext(), const_cast(blockDecl), Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -2493,6 +2493,8 @@ llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy); + llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + getContext().getTargetAddressSpace(LangAS::opencl_generic)); llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); @@ -2502,12 +2504,12 @@ // The most basic form of the call with parameters: // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) Name = "__enqueue_kernel_basic"; - llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy}; + llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef(ArgTys, 4), false); - llvm::Value *Block = - Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); + llvm::Value *Block = Builder.CreatePointerCast( + EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); return RValue::get(Builder.CreateCall( CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block})); @@ -2518,14 +2520,14 @@ if (E->getArg(3)->getType()->isBlockPointerType()) { // No events passed, but has variadic arguments. Name = "__enqueue_kernel_vaargs"; - llvm::Value *Block = - Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); + llvm::Value *Block = Builder.CreatePointerCast( + EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); // Create a vector of the arguments, as well as a constant value to // express to the runtime the number of variadic arguments. std::vector Args = {Queue, Flags, Range, Block, ConstantInt::get(IntTy, NumArgs - 4)}; - std::vector ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy, - IntTy}; + std::vector ArgTys = {QueueTy, IntTy, RangeTy, + GenericVoidPtrTy, IntTy}; // Each of the following arguments specifies the size of the corresponding // argument passed to the enqueued block. @@ -2555,12 +2557,12 @@ // Convert to generic address space. EventList = Builder.CreatePointerCast(EventList, EventPtrTy); ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy); - llvm::Value *Block = - Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy); + llvm::Value *Block = Builder.CreatePointerCast( + EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy); - std::vector ArgTys = {QueueTy, Int32Ty, RangeTy, - Int32Ty, EventPtrTy, EventPtrTy, - Int8PtrTy}; + std::vector ArgTys = { + QueueTy, Int32Ty, RangeTy, Int32Ty, + EventPtrTy, EventPtrTy, GenericVoidPtrTy}; std::vector Args = {Queue, Flags, Range, NumEvents, EventList, ClkEvent, Block}; @@ -2596,20 +2598,24 @@ // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block // parameter. case Builtin::BIget_kernel_work_group_size: { + llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + getContext().getTargetAddressSpace(LangAS::opencl_generic)); Value *Arg = EmitScalarExpr(E->getArg(0)); - Arg = Builder.CreateBitCast(Arg, Int8PtrTy); - return RValue::get( - Builder.CreateCall(CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, Int8PtrTy, false), - "__get_kernel_work_group_size_impl"), - Arg)); + Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); + return RValue::get(Builder.CreateCall( + CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), + "__get_kernel_work_group_size_impl"), + Arg)); } case Builtin::BIget_kernel_preferred_work_group_size_multiple: { + llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + getContext().getTargetAddressSpace(LangAS::opencl_generic)); Value *Arg = EmitScalarExpr(E->getArg(0)); - Arg = Builder.CreateBitCast(Arg, Int8PtrTy); + Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); return RValue::get(Builder.CreateCall( CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, Int8PtrTy, false), + llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), "__get_kernel_preferred_work_group_multiple_impl"), Arg)); } Index: lib/Sema/SemaExpr.cpp =================================================================== --- lib/Sema/SemaExpr.cpp +++ lib/Sema/SemaExpr.cpp @@ -7404,7 +7404,13 @@ Sema::AssignConvertType ConvTy = Sema::Compatible; // For blocks we enforce that qualifiers are identical. - if (lhptee.getLocalQualifiers() != rhptee.getLocalQualifiers()) + Qualifiers LQuals = lhptee.getLocalQualifiers(); + Qualifiers RQuals = rhptee.getLocalQualifiers(); + if (S.getLangOpts().OpenCL) { + LQuals.removeAddressSpace(); + RQuals.removeAddressSpace(); + } + if (LQuals != RQuals) ConvTy = Sema::CompatiblePointerDiscardsQualifiers; if (!S.Context.typesAreBlockPointerCompatible(LHSType, RHSType)) @@ -7629,7 +7635,12 @@ // U^ -> void* if (RHSType->getAs()) { if (LHSPointer->getPointeeType()->isVoidType()) { - Kind = CK_BitCast; + unsigned AddrSpaceL = LHSPointer->getPointeeType().getAddressSpace(); + unsigned AddrSpaceR = RHSType->getAs() + ->getPointeeType() + .getAddressSpace(); + Kind = + AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; return Compatible; } } @@ -7641,7 +7652,13 @@ if (isa(LHSType)) { // U^ -> T^ if (RHSType->isBlockPointerType()) { - Kind = CK_BitCast; + unsigned AddrSpaceL = LHSType->getAs() + ->getPointeeType() + .getAddressSpace(); + unsigned AddrSpaceR = RHSType->getAs() + ->getPointeeType() + .getAddressSpace(); + Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; return checkBlockPointerTypesForAssignment(*this, LHSType, RHSType); } Index: lib/Sema/SemaType.cpp =================================================================== --- lib/Sema/SemaType.cpp +++ lib/Sema/SemaType.cpp @@ -6893,8 +6893,10 @@ (TAL == TAL_DeclSpec || TAL == TAL_DeclChunk)) { Declarator &D = state.getDeclarator(); if (state.getCurrentChunkIndex() > 0 && - D.getTypeObject(state.getCurrentChunkIndex() - 1).Kind == - DeclaratorChunk::Pointer) { + (D.getTypeObject(state.getCurrentChunkIndex() - 1).Kind == + DeclaratorChunk::Pointer || + D.getTypeObject(state.getCurrentChunkIndex() - 1).Kind == + DeclaratorChunk::BlockPointer)) { type = state.getSema().Context.getAddrSpaceQualType( type, LangAS::opencl_generic); } else if (state.getCurrentChunkIndex() == 0 && Index: test/CodeGenOpenCL/cl20-device-side-enqueue.cl =================================================================== --- test/CodeGenOpenCL/cl20-device-side-enqueue.cl +++ test/CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -2,9 +2,8 @@ // RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=COMMON --check-prefix=B64 typedef void (^bl_t)(local void *); - // N.B. The check here only exists to set BL_GLOBAL -// COMMON: @block_G = {{.*}}bitcast ([[BL_GLOBAL:[^@]+@__block_literal_global(\.[0-9]+)?]] +// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_GLOBAL:@__block_literal_global(\.[0-9]+)?]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*) const bl_t block_G = (bl_t) ^ (local void *a) {}; kernel void device_side_enqueue(global int *a, global int *b, int i) { @@ -25,8 +24,8 @@ // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange // COMMON: [[BL:%[0-9]+]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor addrspace(2)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block to void ()* - // COMMON: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8* - // COMMON: call i32 @__enqueue_kernel_basic(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* [[BL_I8]]) + // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)* + // COMMON: call i32 @__enqueue_kernel_basic(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8 addrspace(4)* [[BL_I8]]) enqueue_kernel(default_queue, flags, ndrange, ^(void) { a[i] = b[i]; @@ -38,8 +37,8 @@ // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)* // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* // COMMON: [[BL:%[0-9]+]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor addrspace(2)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()* - // COMMON: [[BL_I8:%[0-9]+]] = bitcast void ()* [[BL]] to i8* - // COMMON: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8* [[BL_I8]]) + // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)* + // COMMON: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8 addrspace(4)* [[BL_I8]]) enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list, &clk_event, ^(void) { a[i] = b[i]; @@ -48,8 +47,8 @@ // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange - // B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 256) - // B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 256) + // B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32 256) + // B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64 256) enqueue_kernel(default_queue, flags, ndrange, ^(local void *p) { return; @@ -60,9 +59,9 @@ // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange // B32: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i32 - // B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]]) + // B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32 [[SIZE]]) // B64: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i64 - // B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 [[SIZE]]) + // B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64 [[SIZE]]) enqueue_kernel(default_queue, flags, ndrange, ^(local void *p) { return; @@ -75,8 +74,8 @@ // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)* // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* - // B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 256) - // B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 256) + // B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32 256) + // B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64 256) enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event, ^(local void *p) { return; @@ -90,9 +89,9 @@ // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)* // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* // B32: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i32 - // B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]]) + // B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32 [[SIZE]]) // B64: [[SIZE:%[0-9]+]] = zext i8 {{%[0-9]+}} to i64 - // B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 [[SIZE]]) + // B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i32, %opencl.clk_event_t{{.*}}* addrspace(4)*, %opencl.clk_event_t{{.*}}* addrspace(4)*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64 [[SIZE]]) enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event, ^(local void *p) { return; @@ -104,9 +103,9 @@ // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange // B32: [[SIZE:%[0-9]+]] = trunc i64 {{%[0-9]+}} to i32 - // B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 [[SIZE]]) + // B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32 [[SIZE]]) // B64: [[SIZE:%[0-9]+]] = load i64, i64* %l - // B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 [[SIZE]]) + // B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64 [[SIZE]]) enqueue_kernel(default_queue, flags, ndrange, ^(local void *p) { return; @@ -116,8 +115,8 @@ // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags // COMMON: [[NDR:%[0-9]+]] = load %opencl.ndrange_t*, %opencl.ndrange_t** %ndrange - // B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i32 0) - // B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* }* @__block_literal_global{{(.[0-9]+)?}} to i8*), i32 1, i64 4294967296) + // B32: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32 0) + // B64: call i32 (%opencl.queue_t{{.*}}*, i32, %opencl.ndrange_t*, i8 addrspace(4)*, i32, ...) @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %opencl.ndrange_t* [[NDR]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64 4294967296) enqueue_kernel(default_queue, flags, ndrange, ^(local void *p) { return; @@ -126,22 +125,22 @@ // The full type of these expressions are long (and repeated elsewhere), so we // capture it as part of the regex for convenience and clarity. - // COMMON: store void ()* bitcast ([[BL_A:[^@]+@__block_literal_global.[0-9]+]] to void ()*), void ()** %block_A + // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_A:@__block_literal_global(\.[0-9]+)?]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A void (^const block_A)(void) = ^{ return; }; - // COMMON: store void (i8 addrspace(3)*)* bitcast ([[BL_B:[^@]+@__block_literal_global.[0-9]+]] to void (i8 addrspace(3)*)*), void (i8 addrspace(3)*)** %block_B + // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_B:@__block_literal_global(\.[0-9]+)?]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B void (^const block_B)(local void *) = ^(local void *a) { return; }; - // COMMON: call i32 @__get_kernel_work_group_size_impl(i8* bitcast ([[BL_A]] to i8*)) + // COMMON: call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_A]] to i8 addrspace(1)*) to i8 addrspace(4)*)) unsigned size = get_kernel_work_group_size(block_A); - // COMMON: call i32 @__get_kernel_work_group_size_impl(i8* bitcast ([[BL_B]] to i8*)) + // COMMON: call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_B]] to i8 addrspace(1)*) to i8 addrspace(4)*)) size = get_kernel_work_group_size(block_B); - // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8* bitcast ([[BL_A]] to i8*)) + // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_A]] to i8 addrspace(1)*) to i8 addrspace(4)*)) size = get_kernel_preferred_work_group_size_multiple(block_A); - // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8* bitcast ([[BL_GLOBAL]] to i8*)) + // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*)) size = get_kernel_preferred_work_group_size_multiple(block_G); } Index: test/SemaOpenCL/invalid-block.cl =================================================================== --- test/SemaOpenCL/invalid-block.cl +++ test/SemaOpenCL/invalid-block.cl @@ -4,26 +4,34 @@ void f0(int (^const bl)()); // All blocks declarations must be const qualified and initialized. void f1() { - int (^bl1)() = ^() {return 1;}; - int (^const bl2)() = ^(){return 1;}; + int (^bl1)(void) = ^() { + return 1; + }; + int (^const bl2)(void) = ^() { + return 1; + }; f0(bl1); f0(bl2); - bl1 = bl2; // expected-error{{invalid operands to binary expression ('int (^const)()' and 'int (^const)()')}} + bl1 = bl2; // expected-error{{invalid operands to binary expression ('int (__generic ^const)(void)' and 'int (__generic ^const)(void)')}} int (^const bl3)(); // expected-error{{invalid block variable declaration - must be initialized}} } // A block with extern storage class is not allowed. -extern int (^bl)() = ^(){return 1;}; // expected-error{{invalid block variable declaration - using 'extern' storage class is disallowed}} +extern int (^bl)(void) = ^() { // expected-error{{invalid block variable declaration - using 'extern' storage class is disallowed}} + return 1; +}; void f2() { - extern int (^bl)() = ^(){return 1;}; // expected-error{{invalid block variable declaration - using 'extern' storage class is disallowed}} + extern int (^bl)(void) = ^() { // expected-error{{invalid block variable declaration - using 'extern' storage class is disallowed}} + return 1; + }; } // A block cannot be the return value of a function. typedef int (^bl_t)(void); -bl_t f3(bl_t bl); // expected-error{{declaring function return value of type 'bl_t' (aka 'int (^const)(void)') is not allowed}} +bl_t f3(bl_t bl); // expected-error{{declaring function return value of type 'bl_t' (aka 'int (__generic ^const)(void)') is not allowed}} struct bl_s { - int (^bl)(void); // expected-error {{the 'int (^const)(void)' type cannot be used to declare a structure or union field}} + int (^bl)(void); // expected-error {{the 'int (__generic ^const)(void)' type cannot be used to declare a structure or union field}} }; void f4() { @@ -45,16 +53,16 @@ bl2_t bl2 = ^(int i) { return 2; }; - bl2_t arr[] = {bl1, bl2}; // expected-error {{array of 'bl2_t' (aka 'int (^const)(int)') type is invalid in OpenCL}} + bl2_t arr[] = {bl1, bl2}; // expected-error {{array of 'bl2_t' (aka 'int (__generic ^const)(int)') type is invalid in OpenCL}} int tmp = i ? bl1(i) // expected-error {{block type cannot be used as expression in ternary expression in OpenCL}} : bl2(i); // expected-error {{block type cannot be used as expression in ternary expression in OpenCL}} } // A block pointer type and all pointer operations are disallowed -void f6(bl2_t *bl_ptr) { // expected-error{{pointer to type '__generic bl2_t' (aka 'int (^const __generic)(int)') is invalid in OpenCL}} +void f6(bl2_t *bl_ptr) { // expected-error{{pointer to type '__generic bl2_t' (aka 'int (__generic ^const __generic)(int)') is invalid in OpenCL}} bl2_t bl = ^(int i) { return 1; }; - bl2_t *p; // expected-error {{pointer to type '__generic bl2_t' (aka 'int (^const __generic)(int)') is invalid in OpenCL}} - *bl; // expected-error {{invalid argument type 'bl2_t' (aka 'int (^const)(int)') to unary expression}} - &bl; // expected-error {{invalid argument type 'bl2_t' (aka 'int (^const)(int)') to unary expression}} + bl2_t *p; // expected-error {{pointer to type '__generic bl2_t' (aka 'int (__generic ^const __generic)(int)') is invalid in OpenCL}} + *bl; // expected-error {{invalid argument type 'bl2_t' (aka 'int (__generic ^const)(int)') to unary expression}} + &bl; // expected-error {{invalid argument type 'bl2_t' (aka 'int (__generic ^const)(int)') to unary expression}} }