Index: CodeGen/CGBuiltin.cpp =================================================================== --- CodeGen/CGBuiltin.cpp +++ CodeGen/CGBuiltin.cpp @@ -2339,7 +2339,6 @@ Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T); return RValue::get(Builder.CreateCall(F)); } - case Builtin::BI__builtin_coro_id: return EmitCoroutineIntrinsic(E, Intrinsic::coro_id); case Builtin::BI__builtin_coro_promise: @@ -2685,6 +2684,25 @@ "__get_kernel_preferred_work_group_multiple_impl"), Arg)); } + case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: + case Builtin::BIget_kernel_sub_group_count_for_ndrange: { + llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + getContext().getTargetAddressSpace(LangAS::opencl_generic)); + LValue NDRangeL = EmitAggExprToLValue(E->getArg(0)); + llvm::Value *NDRange = NDRangeL.getAddress().getPointer(); + Value *Block = EmitScalarExpr(E->getArg(1)); + Block = Builder.CreatePointerCast(Block, GenericVoidPtrTy); + const char *Name = + BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange + ? "__get_kernel_max_sub_group_size_for_ndrange_impl" + : "__get_kernel_sub_group_count_for_ndrange_impl"; + return RValue::get(Builder.CreateCall( + CGM.CreateRuntimeFunction( + llvm::FunctionType::get( + IntTy, {NDRange->getType(), GenericVoidPtrTy}, false), + Name), + {NDRange, Block})); + } case Builtin::BIprintf: if (getTarget().getTriple().isNVPTX()) return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue); Index: CodeGenOpenCL/cl20-device-side-enqueue.cl =================================================================== --- CodeGenOpenCL/cl20-device-side-enqueue.cl +++ CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -1,6 +1,8 @@ // RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefix=COMMON --check-prefix=B32 // RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=COMMON --check-prefix=B64 +#pragma OPENCL EXTENSION cl_khr_subgroups : enable + typedef void (^bl_t)(local void *); typedef struct {int a;} ndrange_t; @@ -138,4 +140,9 @@ size = get_kernel_preferred_work_group_size_multiple(block_A); // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*)) size = get_kernel_preferred_work_group_size_multiple(block_G); + + // COMMON: call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t* {{.*}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* {{.*}} to i8 addrspace(1)*) to i8 addrspace(4)*)) + size = get_kernel_max_sub_group_size_for_ndrange(ndrange, ^(){}); + // COMMON: call i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t* {{.*}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* {{.*}} to i8 addrspace(1)*) to i8 addrspace(4)*)) + size = get_kernel_sub_group_count_for_ndrange(ndrange, ^(){}); } Index: CodeGenOpenCL/pipe_builtin.cl =================================================================== --- CodeGenOpenCL/pipe_builtin.cl +++ CodeGenOpenCL/pipe_builtin.cl @@ -3,6 +3,8 @@ // CHECK: %opencl.pipe_t = type opaque // CHECK: %opencl.reserve_id_t = type opaque +#pragma OPENCL EXTENSION cl_khr_subgroups : enable + void test1(read_only pipe int p, global int *ptr) { // CHECK: call i32 @__read_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4) read_pipe(p, ptr); Index: Sema/Sema.cpp =================================================================== --- Sema/Sema.cpp +++ Sema/Sema.cpp @@ -1685,7 +1685,8 @@ QT, OpenCLTypeExtMap); } -bool Sema::checkOpenCLDisabledDecl(const Decl &D, const Expr &E) { - return checkOpenCLDisabledTypeOrDecl(&D, E.getLocStart(), "", +bool Sema::checkOpenCLDisabledDecl(const FunctionDecl &D, const Expr &E) { + IdentifierInfo *FnName = D.getIdentifier(); + return checkOpenCLDisabledTypeOrDecl(&D, E.getLocStart(), FnName, OpenCLDeclExtMap, 1, D.getSourceRange()); } Index: Sema/SemaChecking.cpp =================================================================== --- Sema/SemaChecking.cpp +++ Sema/SemaChecking.cpp @@ -299,6 +299,41 @@ return IllegalParams; } +static bool checkOpenCLSubgroupExt(Sema &S, CallExpr *Call) { + if (!S.getOpenCLOptions().isEnabled("cl_khr_subgroups")) { + S.Diag(Call->getLocStart(), diag::err_opencl_requires_extension) + << 1 << Call->getDirectCallee() << "cl_khr_subgroups"; + return true; + } + return false; +} + +static bool SemaOpenCLBuiltinNDRangeAndBlock(Sema &S, CallExpr *TheCall) { + if (checkArgCount(S, TheCall, 2)) + return true; + + if (checkOpenCLSubgroupExt(S, TheCall)) + return true; + + // First argument is an ndrange_t type. + Expr *NDRangeArg = TheCall->getArg(0); + if (NDRangeArg->getType().getAsString() != "ndrange_t") { + S.Diag(TheCall->getArg(0)->getLocStart(), + diag::err_opencl_enqueue_kernel_expected_type) + << TheCall->getDirectCallee() << "'ndrange_t'"; + return true; + } + + Expr *BlockArg = TheCall->getArg(1); + if (!isBlockPointer(BlockArg)) { + S.Diag(BlockArg->getLocStart(), + diag::err_opencl_enqueue_kernel_expected_type) + << TheCall->getDirectCallee() << "block"; + return true; + } + return checkOpenCLBlockArgs(S, BlockArg); +} + /// OpenCL C v2.0, s6.13.17.6 - Check the argument to the /// get_kernel_work_group_size /// and get_kernel_preferred_work_group_size_multiple builtin functions. @@ -309,7 +344,8 @@ Expr *BlockArg = TheCall->getArg(0); if (!isBlockPointer(BlockArg)) { S.Diag(BlockArg->getLocStart(), - diag::err_opencl_enqueue_kernel_expected_type) << "block"; + diag::err_opencl_enqueue_kernel_expected_type) + << TheCall->getDirectCallee() << "block"; return true; } return checkOpenCLBlockArgs(S, BlockArg); @@ -395,7 +431,7 @@ if (!Arg0->getType()->isQueueT()) { S.Diag(TheCall->getArg(0)->getLocStart(), diag::err_opencl_enqueue_kernel_expected_type) - << S.Context.OCLQueueTy; + << TheCall->getDirectCallee() << S.Context.OCLQueueTy; return true; } @@ -403,7 +439,7 @@ if (!Arg1->getType()->isIntegerType()) { S.Diag(TheCall->getArg(1)->getLocStart(), diag::err_opencl_enqueue_kernel_expected_type) - << "'kernel_enqueue_flags_t' (i.e. uint)"; + << TheCall->getDirectCallee() << "'kernel_enqueue_flags_t' (i.e. uint)"; return true; } @@ -411,7 +447,7 @@ if (Arg2->getType().getUnqualifiedType().getAsString() != "ndrange_t") { S.Diag(TheCall->getArg(2)->getLocStart(), diag::err_opencl_enqueue_kernel_expected_type) - << "'ndrange_t'"; + << TheCall->getDirectCallee() << "'ndrange_t'"; return true; } @@ -421,7 +457,7 @@ // check that the last argument is the right block type. if (!isBlockPointer(Arg3)) { S.Diag(Arg3->getLocStart(), diag::err_opencl_enqueue_kernel_expected_type) - << "block"; + << TheCall->getDirectCallee() << "block"; return true; } // we have a block type, check the prototype @@ -444,7 +480,7 @@ Expr *Arg6 = TheCall->getArg(6); if (!isBlockPointer(Arg6)) { S.Diag(Arg6->getLocStart(), diag::err_opencl_enqueue_kernel_expected_type) - << "block"; + << TheCall->getDirectCallee() << "block"; return true; } if (checkOpenCLBlockArgs(S, Arg6)) @@ -454,7 +490,7 @@ if (!Arg3->getType()->isIntegerType()) { S.Diag(TheCall->getArg(3)->getLocStart(), diag::err_opencl_enqueue_kernel_expected_type) - << "integer"; + << TheCall->getDirectCallee() << "integer"; return true; } // check remaining common arguments. @@ -467,6 +503,7 @@ !Arg4->getType()->getPointeeOrArrayElementType()->isClkEventT()) { S.Diag(TheCall->getArg(4)->getLocStart(), diag::err_opencl_enqueue_kernel_expected_type) + << TheCall->getDirectCallee() << S.Context.getPointerType(S.Context.OCLClkEventTy); return true; } @@ -478,6 +515,7 @@ Arg5->getType()->getPointeeType()->isClkEventT())) { S.Diag(TheCall->getArg(5)->getLocStart(), diag::err_opencl_enqueue_kernel_expected_type) + << TheCall->getDirectCallee() << S.Context.getPointerType(S.Context.OCLClkEventTy); return true; } @@ -628,10 +666,13 @@ // \param S Reference to the semantic analyzer. // \param Call The call to the builtin function to be analyzed. // \return True if a semantic error was found, false otherwise. -static bool SemaBuiltinReserveRWPipe(Sema &S, CallExpr *Call) { +static bool SemaBuiltinReserveRWPipe(Sema &S, CallExpr *Call, bool isSubgroup) { if (checkArgCount(S, Call, 2)) return true; + if (isSubgroup && checkOpenCLSubgroupExt(S, Call)) + return true; + if (checkOpenCLPipeArg(S, Call)) return true; @@ -652,10 +693,13 @@ // \param S Reference to the semantic analyzer. // \param Call The call to the builtin function to be analyzed. // \return True if a semantic error was found, false otherwise. -static bool SemaBuiltinCommitRWPipe(Sema &S, CallExpr *Call) { +static bool SemaBuiltinCommitRWPipe(Sema &S, CallExpr *Call, bool isSubgroup) { if (checkArgCount(S, Call, 2)) return true; + if (isSubgroup && checkOpenCLSubgroupExt(S, Call)) + return true; + if (checkOpenCLPipeArg(S, Call)) return true; @@ -1044,9 +1088,16 @@ case Builtin::BIreserve_write_pipe: case Builtin::BIwork_group_reserve_read_pipe: case Builtin::BIwork_group_reserve_write_pipe: + if (SemaBuiltinReserveRWPipe(*this, TheCall, false)) + return ExprError(); + // Since return type of reserve_read/write_pipe built-in function is + // reserve_id_t, which is not defined in the builtin def file , we used int + // as return type and need to override the return type of these functions. + TheCall->setType(Context.OCLReserveIDTy); + break; case Builtin::BIsub_group_reserve_read_pipe: case Builtin::BIsub_group_reserve_write_pipe: - if (SemaBuiltinReserveRWPipe(*this, TheCall)) + if (SemaBuiltinReserveRWPipe(*this, TheCall, true)) return ExprError(); // Since return type of reserve_read/write_pipe built-in function is // reserve_id_t, which is not defined in the builtin def file , we used int @@ -1057,9 +1108,12 @@ case Builtin::BIcommit_write_pipe: case Builtin::BIwork_group_commit_read_pipe: case Builtin::BIwork_group_commit_write_pipe: + if (SemaBuiltinCommitRWPipe(*this, TheCall, false)) + return ExprError(); + break; case Builtin::BIsub_group_commit_read_pipe: case Builtin::BIsub_group_commit_write_pipe: - if (SemaBuiltinCommitRWPipe(*this, TheCall)) + if (SemaBuiltinCommitRWPipe(*this, TheCall, true)) return ExprError(); break; case Builtin::BIget_pipe_num_packets: @@ -1084,6 +1138,12 @@ if (SemaOpenCLBuiltinKernelWorkGroupSize(*this, TheCall)) return ExprError(); break; + break; + case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: + case Builtin::BIget_kernel_sub_group_count_for_ndrange: + if (SemaOpenCLBuiltinNDRangeAndBlock(*this, TheCall)) + return ExprError(); + break; case Builtin::BI__builtin_os_log_format: case Builtin::BI__builtin_os_log_format_buffer_size: if (SemaBuiltinOSLogFormat(TheCall)) { Index: SemaOpenCL/cl20-device-side-enqueue.cl =================================================================== --- SemaOpenCL/cl20-device-side-enqueue.cl +++ SemaOpenCL/cl20-device-side-enqueue.cl @@ -19,19 +19,19 @@ return 0; }); - enqueue_kernel(vptr, flags, ndrange, ^(void) { // expected-error{{illegal call to enqueue_kernel, expected 'queue_t' argument type}} + enqueue_kernel(vptr, flags, ndrange, ^(void) { // expected-error{{illegal call to 'enqueue_kernel', expected 'queue_t' argument type}} return 0; }); - enqueue_kernel(default_queue, vptr, ndrange, ^(void) { // expected-error{{illegal call to enqueue_kernel, expected 'kernel_enqueue_flags_t' (i.e. uint) argument type}} + enqueue_kernel(default_queue, vptr, ndrange, ^(void) { // expected-error{{illegal call to 'enqueue_kernel', expected 'kernel_enqueue_flags_t' (i.e. uint) argument type}} return 0; }); - enqueue_kernel(default_queue, flags, vptr, ^(void) { // expected-error{{illegal call to enqueue_kernel, expected 'ndrange_t' argument type}} + enqueue_kernel(default_queue, flags, vptr, ^(void) { // expected-error{{illegal call to 'enqueue_kernel', expected 'ndrange_t' argument type}} return 0; }); - enqueue_kernel(default_queue, flags, ndrange, vptr); // expected-error{{illegal call to enqueue_kernel, expected block argument}} + enqueue_kernel(default_queue, flags, ndrange, vptr); // expected-error{{illegal call to 'enqueue_kernel', expected block argument}} enqueue_kernel(default_queue, flags, ndrange, ^(int i) { // expected-error{{blocks with parameters are not accepted in this prototype of enqueue_kernel call}} return 0; @@ -46,21 +46,21 @@ return 0; }); - enqueue_kernel(default_queue, flags, ndrange, vptr, &event_wait_list, &evt, ^(void) { // expected-error{{illegal call to enqueue_kernel, expected integer argument type}} + enqueue_kernel(default_queue, flags, ndrange, vptr, &event_wait_list, &evt, ^(void) { // expected-error{{illegal call to 'enqueue_kernel', expected integer argument type}} return 0; }); - enqueue_kernel(default_queue, flags, ndrange, 1, vptr, &evt, ^(void) // expected-error{{illegal call to enqueue_kernel, expected 'clk_event_t *' argument type}} + enqueue_kernel(default_queue, flags, ndrange, 1, vptr, &evt, ^(void) // expected-error{{illegal call to 'enqueue_kernel', expected 'clk_event_t *' argument type}} { return 0; }); - enqueue_kernel(default_queue, flags, ndrange, 1, &event_wait_list, vptr, ^(void) // expected-error{{illegal call to enqueue_kernel, expected 'clk_event_t *' argument type}} + enqueue_kernel(default_queue, flags, ndrange, 1, &event_wait_list, vptr, ^(void) // expected-error{{illegal call to 'enqueue_kernel', expected 'clk_event_t *' argument type}} { return 0; }); - enqueue_kernel(default_queue, flags, ndrange, 1, &event_wait_list, &evt, vptr); // expected-error{{illegal call to enqueue_kernel, expected block argument}} + enqueue_kernel(default_queue, flags, ndrange, 1, &event_wait_list, &evt, vptr); // expected-error{{illegal call to 'enqueue_kernel', expected block argument}} // Testing the third overload type enqueue_kernel(default_queue, flags, ndrange, @@ -209,3 +209,33 @@ size = get_kernel_preferred_work_group_size_multiple(1); // expected-error{{expected block argument}} size = get_kernel_preferred_work_group_size_multiple(block_A, 1); // expected-error{{too many arguments to function call, expected 1, have 2}} } + +#pragma OPENCL EXTENSION cl_khr_subgroups : enable + +kernel void foo(global int *buf) +{ + ndrange_t n; + buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); + buf[0] = get_kernel_max_sub_group_size_for_ndrange(0, ^(){}); // expected-error{{illegal call to 'get_kernel_max_sub_group_size_for_ndrange', expected 'ndrange_t' argument type}} +} + +kernel void bar(global int *buf) +{ + ndrange_t n; + buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); + buf[0] = get_kernel_sub_group_count_for_ndrange(0, ^(){}); // expected-error{{illegal call to 'get_kernel_sub_group_count_for_ndrange', expected 'ndrange_t' argument type}} +} + +#pragma OPENCL EXTENSION cl_khr_subgroups : disable + +kernel void foo1(global int *buf) +{ + ndrange_t n; + buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}} +} + +kernel void bar1(global int *buf) +{ + ndrange_t n; + buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}} +} Index: SemaOpenCL/extension-begin.cl =================================================================== --- SemaOpenCL/extension-begin.cl +++ SemaOpenCL/extension-begin.cl @@ -46,7 +46,7 @@ const struct A test_A_local; // expected-error {{use of type 'struct A' requires my_ext extension to be enabled}} TypedefOfA test_typedef_A; // expected-error {{use of type 'TypedefOfA' (aka 'struct A') requires my_ext extension to be enabled}} PointerOfA test_A_pointer; // expected-error {{use of type 'PointerOfA' (aka 'const struct A *') requires my_ext extension to be enabled}} - f(); // expected-error {{use of declaration requires my_ext extension to be enabled}} + f(); // expected-error {{use of declaration 'f' requires my_ext extension to be enabled}} g(0); // expected-error {{no matching function for call to 'g'}} // expected-note@-26 {{candidate disabled due to OpenCL extension}} // expected-note@-22 {{candidate function not viable: requires 0 arguments, but 1 was provided}} Index: SemaOpenCL/invalid-pipe-builtin-cl2.0.cl =================================================================== --- SemaOpenCL/invalid-pipe-builtin-cl2.0.cl +++ SemaOpenCL/invalid-pipe-builtin-cl2.0.cl @@ -1,5 +1,7 @@ // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 +#pragma OPENCL EXTENSION cl_khr_subgroups : enable + void test1(read_only pipe int p, global int* ptr){ int tmp; reserve_id_t rid; Index: SemaOpenCL/sub-group-bifs.cl =================================================================== --- /dev/null +++ SemaOpenCL/sub-group-bifs.cl @@ -0,0 +1,33 @@ +// RUN: %clang_cc1 %s -verify -fsyntax-only -cl-std=CL2.0 + +#pragma OPENCL EXTENSION cl_khr_subgroups : enable + +typedef struct {} ndrange_t; + +kernel void foo(global int *buf) +{ + ndrange_t n; + buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); + buf[0] = get_kernel_max_sub_group_size_for_ndrange(0, ^(){}); // expected-error{{illegal call to 'get_kernel_max_sub_group_size_for_ndrange', expected 'ndrange_t' argument type}} +} + +kernel void bar(global int *buf) +{ + ndrange_t n; + buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); + buf[0] = get_kernel_sub_group_count_for_ndrange(0, ^(){}); // expected-error{{illegal call to 'get_kernel_sub_group_count_for_ndrange', expected 'ndrange_t' argument type}} +} + +#pragma OPENCL EXTENSION cl_khr_subgroups : disable + +kernel void foo1(global int *buf) +{ + ndrange_t n; + buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}} +} + +kernel void bar1(global int *buf) +{ + ndrange_t n; + buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}} +} Index: clang/Basic/Builtins.def =================================================================== --- clang/Basic/Builtins.def +++ clang/Basic/Builtins.def @@ -1399,6 +1399,8 @@ LANGBUILTIN(enqueue_kernel, "i.", "tn", OCLC20_LANG) LANGBUILTIN(get_kernel_work_group_size, "i.", "tn", OCLC20_LANG) LANGBUILTIN(get_kernel_preferred_work_group_size_multiple, "i.", "tn", OCLC20_LANG) +LANGBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "i.", "tn", OCLC20_LANG) +LANGBUILTIN(get_kernel_sub_group_count_for_ndrange, "i.", "tn", OCLC20_LANG) // OpenCL v2.0 s6.13.9 - Address space qualifier functions. LANGBUILTIN(to_global, "v*v*", "tn", OCLC20_LANG) Index: clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/Basic/DiagnosticSemaKinds.td +++ clang/Basic/DiagnosticSemaKinds.td @@ -8375,7 +8375,7 @@ def err_opencl_variadic_function : Error< "invalid prototype, variadic arguments are not allowed in OpenCL">; def err_opencl_requires_extension : Error< - "use of %select{type |declaration}0%1 requires %2 extension to be enabled">; + "use of %select{type |declaration }0%1 requires %2 extension to be enabled">; // OpenCL v2.0 s6.13.6 -- Builtin Pipe Functions def err_opencl_builtin_pipe_first_arg : Error< @@ -8421,7 +8421,7 @@ def err_opencl_enqueue_kernel_incorrect_args : Error< "illegal call to enqueue_kernel, incorrect argument types">; def err_opencl_enqueue_kernel_expected_type : Error< - "illegal call to enqueue_kernel, expected %0 argument type">; + "illegal call to %0, expected %1 argument type">; def err_opencl_enqueue_kernel_local_size_args : Error< "mismatch in number of block parameters and local size arguments passed">; def err_opencl_enqueue_kernel_invalid_local_size_type : Error< Index: clang/Sema/Sema.h =================================================================== --- clang/Sema/Sema.h +++ clang/Sema/Sema.h @@ -8422,7 +8422,7 @@ /// is disabled due to required OpenCL extensions being disabled. If so, /// emit diagnostics. /// \return true if type is disabled. - bool checkOpenCLDisabledDecl(const Decl &D, const Expr &E); + bool checkOpenCLDisabledDecl(const FunctionDecl &D, const Expr &E); //===--------------------------------------------------------------------===// // OpenMP directives and clauses.