diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td --- a/clang/lib/Sema/OpenCLBuiltins.td +++ b/clang/lib/Sema/OpenCLBuiltins.td @@ -310,6 +310,7 @@ def Queue : Type<"queue_t", QualType<"Context.OCLQueueTy">>; def ReserveId : Type<"reserve_id_t", QualType<"Context.OCLReserveIDTy">>; def MemFenceFlags : TypedefType<"cl_mem_fence_flags">; +def ClkProfilingInfo : TypedefType<"clk_profiling_info">; // OpenCL v2.0 s6.13.11: Atomic integer and floating-point types. def AtomicInt : Type<"atomic_int", QualType<"Context.getAtomicType(Context.IntTy)">>; @@ -323,6 +324,7 @@ def AtomicSize : Type<"atomic_size_t", QualType<"Context.getAtomicType(Context.getSizeType())">>; def AtomicPtrDiff : Type<"atomic_ptrdiff_t", QualType<"Context.getAtomicType(Context.getPointerDiffType())">>; +def AtomicFlag : TypedefType<"atomic_flag">; def MemoryOrder : EnumType<"memory_order">; def MemoryScope : EnumType<"memory_scope">; @@ -913,6 +915,26 @@ // OpenCL v3.0 s6.15.8 - Synchronization Functions. def : Builtin<"barrier", [Void, MemFenceFlags], Attr.Convergent>; +let MinVersion = CL20 in { + def : Builtin<"work_group_barrier", [Void, MemFenceFlags], Attr.Convergent>; + def : Builtin<"work_group_barrier", [Void, MemFenceFlags, MemoryScope], Attr.Convergent>; +} + +// OpenCL v3.0 s6.15.9 - Legacy Explicit Memory Fence Functions. +def : Builtin<"mem_fence", [Void, MemFenceFlags]>; +def : Builtin<"read_mem_fence", [Void, MemFenceFlags]>; +def : Builtin<"write_mem_fence", [Void, MemFenceFlags]>; + +// OpenCL v3.0 s6.15.10 - Address Space Qualifier Functions. +// to_global, to_local, to_private are declared in Builtins.def. + +let MinVersion = CL20 in { + // The OpenCL 3.0 specification defines these with a "gentype" argument indicating any builtin + // type or user-defined type, which cannot be represented currently. Hence we slightly diverge + // by providing only the following overloads with a void pointer. + def : Builtin<"get_fence", [MemFenceFlags, PointerType]>; + def : Builtin<"get_fence", [MemFenceFlags, PointerType, GenericAS>]>; +} //-------------------------------------------------------------------- // OpenCL v1.1 s6.11.10, v1.2 s6.12.10, v2.0 s6.13.10: Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch @@ -1030,6 +1052,8 @@ } // OpenCL v2.0 s6.13.11 - Atomic Functions. let MinVersion = CL20 in { + def : Builtin<"atomic_work_item_fence", [Void, MemFenceFlags, MemoryOrder, MemoryScope]>; + foreach TypePair = [[AtomicInt, Int], [AtomicUInt, UInt], [AtomicLong, Long], [AtomicULong, ULong], [AtomicFloat, Float], [AtomicDouble, Double]] in { @@ -1037,10 +1061,22 @@ [Void, PointerType, GenericAS>, TypePair[1]]>; def : Builtin<"atomic_store", [Void, PointerType, GenericAS>, TypePair[1]]>; + def : Builtin<"atomic_store_explicit", + [Void, PointerType, GenericAS>, TypePair[1], MemoryOrder]>; + def : Builtin<"atomic_store_explicit", + [Void, PointerType, GenericAS>, TypePair[1], MemoryOrder, MemoryScope]>; def : Builtin<"atomic_load", [TypePair[1], PointerType, GenericAS>]>; + def : Builtin<"atomic_load_explicit", + [TypePair[1], PointerType, GenericAS>, MemoryOrder]>; + def : Builtin<"atomic_load_explicit", + [TypePair[1], PointerType, GenericAS>, MemoryOrder, MemoryScope]>; def : Builtin<"atomic_exchange", [TypePair[1], PointerType, GenericAS>, TypePair[1]]>; + def : Builtin<"atomic_exchange_explicit", + [TypePair[1], PointerType, GenericAS>, TypePair[1], MemoryOrder]>; + def : Builtin<"atomic_exchange_explicit", + [TypePair[1], PointerType, GenericAS>, TypePair[1], MemoryOrder, MemoryScope]>; foreach Variant = ["weak", "strong"] in { def : Builtin<"atomic_compare_exchange_" # Variant, [Bool, PointerType, GenericAS>, @@ -1061,6 +1097,10 @@ foreach ModOp = ["add", "sub"] in { def : Builtin<"atomic_fetch_" # ModOp, [TypePair[1], PointerType, GenericAS>, TypePair[2]]>; + def : Builtin<"atomic_fetch_" # ModOp # "_explicit", + [TypePair[1], PointerType, GenericAS>, TypePair[2], MemoryOrder]>; + def : Builtin<"atomic_fetch_" # ModOp # "_explicit", + [TypePair[1], PointerType, GenericAS>, TypePair[2], MemoryOrder, MemoryScope]>; } } foreach TypePair = [[AtomicInt, Int, Int], [AtomicUInt, UInt, UInt], @@ -1070,8 +1110,26 @@ foreach ModOp = ["or", "xor", "and", "min", "max"] in { def : Builtin<"atomic_fetch_" # ModOp, [TypePair[1], PointerType, GenericAS>, TypePair[2]]>; + def : Builtin<"atomic_fetch_" # ModOp # "_explicit", + [TypePair[1], PointerType, GenericAS>, TypePair[2], MemoryOrder]>; + def : Builtin<"atomic_fetch_" # ModOp # "_explicit", + [TypePair[1], PointerType, GenericAS>, TypePair[2], MemoryOrder, MemoryScope]>; } } + + def : Builtin<"atomic_flag_clear", + [Void, PointerType, GenericAS>]>; + def : Builtin<"atomic_flag_clear_explicit", + [Void, PointerType, GenericAS>, MemoryOrder]>; + def : Builtin<"atomic_flag_clear_explicit", + [Void, PointerType, GenericAS>, MemoryOrder, MemoryScope]>; + + def : Builtin<"atomic_flag_test_and_set", + [Bool, PointerType, GenericAS>]>; + def : Builtin<"atomic_flag_test_and_set_explicit", + [Bool, PointerType, GenericAS>, MemoryOrder]>; + def : Builtin<"atomic_flag_test_and_set_explicit", + [Bool, PointerType, GenericAS>, MemoryOrder, MemoryScope]>; } //-------------------------------------------------------------------- @@ -1304,7 +1362,8 @@ def : Builtin<"create_user_event", [ClkEvent]>; def : Builtin<"is_valid_event", [Bool, ClkEvent]>; def : Builtin<"set_user_event_status", [Void, ClkEvent, Int]>; -// TODO: capture_event_profiling_info +def : Builtin<"capture_event_profiling_info", + [Void, ClkEvent, ClkProfilingInfo, PointerType]>; // --- Table 35 --- def : Builtin<"get_default_queue", [Queue]>; @@ -1502,7 +1561,10 @@ } // --- Table 28.2.2 --- -// TODO: sub_group_barrier +let Extension = FuncExtKhrSubgroups in { + def : Builtin<"sub_group_barrier", [Void, MemFenceFlags], Attr.Convergent>; + def : Builtin<"sub_group_barrier", [Void, MemFenceFlags, MemoryScope], Attr.Convergent>; +} // --- Table 28.2.4 --- let Extension = FuncExtKhrSubgroups in { diff --git a/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl b/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl --- a/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl +++ b/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl @@ -45,6 +45,9 @@ typedef uint uint4 __attribute__((ext_vector_type(4))); typedef long long2 __attribute__((ext_vector_type(2))); +typedef int clk_profiling_info; +#define CLK_PROFILING_COMMAND_EXEC_TIME 0x1 + typedef uint cl_mem_fence_flags; #define CLK_GLOBAL_MEM_FENCE 0x02 @@ -79,6 +82,15 @@ } #endif +#if defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200 +void test_typedef_args(clk_event_t evt, volatile atomic_flag *flg, global unsigned long long *values) { + capture_event_profiling_info(evt, CLK_PROFILING_COMMAND_EXEC_TIME, values); + + atomic_flag_clear(flg); + bool result = atomic_flag_test_and_set(flg); +} +#endif + kernel void basic_conversion() { double d; float f; @@ -167,6 +179,11 @@ // expected-error@-2{{implicit declaration of function 'get_sub_group_size' is invalid in OpenCL}} // expected-error@-3{{implicit conversion changes signedness}} #endif + +// Only test when the base header is included, because we need the enum declarations. +#if !defined(NO_HEADER) && (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) + sub_group_barrier(CLK_GLOBAL_MEM_FENCE, memory_scope_device); +#endif } kernel void extended_subgroup(global uint4 *out, global int *scalar, global char2 *c2) {