Index: clang/lib/Headers/opencl-c-base.h =================================================================== --- clang/lib/Headers/opencl-c-base.h +++ clang/lib/Headers/opencl-c-base.h @@ -25,6 +25,25 @@ #define cl_khr_integer_dot_product 1 #define __opencl_c_integer_dot_product_input_4x8bit 1 #define __opencl_c_integer_dot_product_input_4x8bit_packed 1 +#define cl_ext_float_atomics 1 +#ifdef cl_khr_fp16 +#define __opencl_c_ext_fp16_global_atomic_load_store 1 +#define __opencl_c_ext_fp16_local_atomic_load_store 1 +#define __opencl_c_ext_fp16_global_atomic_add 1 +#define __opencl_c_ext_fp16_local_atomic_add 1 +#define __opencl_c_ext_fp16_global_atomic_min_max 1 +#define __opencl_c_ext_fp16_local_atomic_min_max 1 +#endif +#ifdef __opencl_c_fp64 +#define __opencl_c_ext_fp64_global_atomic_add 1 +#define __opencl_c_ext_fp64_local_atomic_add 1 +#define __opencl_c_ext_fp64_global_atomic_min_max 1 +#define __opencl_c_ext_fp64_local_atomic_min_max 1 +#endif +#define __opencl_c_ext_fp32_global_atomic_add 1 +#define __opencl_c_ext_fp32_local_atomic_add 1 +#define __opencl_c_ext_fp32_global_atomic_min_max 1 +#define __opencl_c_ext_fp32_local_atomic_min_max 1 #endif // defined(__SPIR__) #endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) Index: clang/lib/Headers/opencl-c.h =================================================================== --- clang/lib/Headers/opencl-c.h +++ clang/lib/Headers/opencl-c.h @@ -13682,6 +13682,217 @@ #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) #endif //__OPENCL_C_VERSION__ >= CL_VERSION_3_0 +// The functionality added by cl_ext_float_atomics extension +#if __OPENCL_C_VERSION__ >= CL_VERSION_3_0 +#if defined(cl_ext_float_atomics) + +#if defined(__opencl_c_ext_fp32_global_atomic_min_max) +float __ovld atomic_fetch_min(volatile __global atomic_float *object, + float operand); +float __ovld atomic_fetch_max(volatile __global atomic_float *object, + float operand); +float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object, + float operand, memory_order order, + memory_scope scope); +float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object, + float operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) + +#if defined(__opencl_c_ext_fp32_local_atomic_min_max) +float __ovld atomic_fetch_min(volatile __local atomic_float *object, + float operand); +float __ovld atomic_fetch_max(volatile __local atomic_float *object, + float operand); +float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object, + float operand, memory_order order, + memory_scope scope); +float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object, + float operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp32_local_atomic_min_max) + +#if defined(__opencl_c_ext_fp32_global_atomic_min_max) || \ + defined(__opencl_c_ext_fp32_local_atomic_min_max) +float __ovld atomic_fetch_min(volatile atomic_float *object, float operand); +float __ovld atomic_fetch_max(volatile atomic_float *object, float operand); +float __ovld atomic_fetch_min_explicit(volatile atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_max_explicit(volatile atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_min_explicit(volatile atomic_float *object, + float operand, memory_order order, + memory_scope scope); +float __ovld atomic_fetch_max_explicit(volatile atomic_float *object, + float operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) || \ + defined(__opencl_c_ext_fp32_local_atomic_min_max) + +#if defined(__opencl_c_ext_fp64_global_atomic_min_max) +double __ovld atomic_fetch_min(volatile __global atomic_double *object, + double operand); +double __ovld atomic_fetch_max(volatile __global atomic_double *object, + double operand); +double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object, + double operand, memory_order order, + memory_scope scope); +double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object, + double operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) + +#if defined(__opencl_c_ext_fp64_local_atomic_min_max) +double __ovld atomic_fetch_min(volatile __local atomic_double *object, + double operand); +double __ovld atomic_fetch_max(volatile __local atomic_double *object, + double operand); +double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object, + double operand, memory_order order, + memory_scope scope); +double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object, + double operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp64_local_atomic_min_max) + +#if defined(__opencl_c_ext_fp64_global_atomic_min_max) || \ + defined(__opencl_c_ext_fp64_local_atomic_min_max) +double __ovld atomic_fetch_min(volatile atomic_double *object, double operand); +double __ovld atomic_fetch_max(volatile atomic_double *object, double operand); +double __ovld atomic_fetch_min_explicit(volatile atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_max_explicit(volatile atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_min_explicit(volatile atomic_double *object, + double operand, memory_order order, + memory_scope scope); +double __ovld atomic_fetch_max_explicit(volatile atomic_double *object, + double operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) || \ + defined(__opencl_c_ext_fp64_local_atomic_min_max) + +#if defined(__opencl_c_ext_fp32_global_atomic_add) +float __ovld atomic_fetch_add(volatile __global atomic_float *object, + float operand); +float __ovld atomic_fetch_sub(volatile __global atomic_float *object, + float operand); +float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object, + float operand, memory_order order, + memory_scope scope); +float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object, + float operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp32_global_atomic_add) + +#if defined(__opencl_c_ext_fp32_local_atomic_add) +float __ovld atomic_fetch_add(volatile __local atomic_float *object, + float operand); +float __ovld atomic_fetch_sub(volatile __local atomic_float *object, + float operand); +float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object, + float operand, memory_order order, + memory_scope scope); +float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object, + float operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp32_local_atomic_add) + +#if defined(__opencl_c_ext_fp32_global_atomic_add) || \ + defined(__opencl_c_ext_fp32_local_atomic_add) +float __ovld atomic_fetch_add(volatile atomic_float *object, float operand); +float __ovld atomic_fetch_sub(volatile atomic_float *object, float operand); +float __ovld atomic_fetch_add_explicit(volatile atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object, + float operand, memory_order order); +float __ovld atomic_fetch_add_explicit(volatile atomic_float *object, + float operand, memory_order order, + memory_scope scope); +float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object, + float operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp32_global_atomic_add) || \ + defined(__opencl_c_ext_fp32_local_atomic_add) + +#if defined(__opencl_c_ext_fp64_global_atomic_add) +double __ovld atomic_fetch_add(volatile __global atomic_double *object, + double operand); +double __ovld atomic_fetch_sub(volatile __global atomic_double *object, + double operand); +double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object, + double operand, memory_order order, + memory_scope scope); +double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object, + double operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp64_global_atomic_add) + +#if defined(__opencl_c_ext_fp64_local_atomic_add) +double __ovld atomic_fetch_add(volatile __local atomic_double *object, + double operand); +double __ovld atomic_fetch_sub(volatile __local atomic_double *object, + double operand); +double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object, + double operand, memory_order order, + memory_scope scope); +double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object, + double operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp64_local_atomic_add) + +#if defined(__opencl_c_ext_fp64_global_atomic_add) || \ + defined(__opencl_c_ext_fp64_local_atomic_add) +double __ovld atomic_fetch_add(volatile atomic_double *object, double operand); +double __ovld atomic_fetch_sub(volatile atomic_double *object, double operand); +double __ovld atomic_fetch_add_explicit(volatile atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object, + double operand, memory_order order); +double __ovld atomic_fetch_add_explicit(volatile atomic_double *object, + double operand, memory_order order, + memory_scope scope); +double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object, + double operand, memory_order order, + memory_scope scope); +#endif // defined(__opencl_c_ext_fp64_global_atomic_add) || \ + defined(__opencl_c_ext_fp64_local_atomic_add) + +#endif // cl_ext_float_atomics +#endif //__OPENCL_C_VERSION__ >= CL_VERSION_3_0 + // atomic_store() #if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device) Index: clang/lib/Sema/OpenCLBuiltins.td =================================================================== --- clang/lib/Sema/OpenCLBuiltins.td +++ clang/lib/Sema/OpenCLBuiltins.td @@ -27,6 +27,7 @@ def CL11 : Version<110>; def CL12 : Version<120>; def CL20 : Version<200>; +def CL30 : Version<300>; // Address spaces // Pointer types need to be assigned an address space. @@ -1102,6 +1103,58 @@ [Bool, PointerType, GenericAS>, MemoryOrder, MemoryScope]>; } +// The functionality added by cl_ext_float_atomics extension +let MinVersion = CL30 in { + foreach TypePair = + [[AtomicFloat, Float, Float], [AtomicDouble, Double, Double]] in { + foreach ModOp = ["add", "sub"] in { + foreach AddressSpaceiKind = [GenericAS, GlobalAS, LocalAS] in { + def: + Builtin<"atomic_fetch_" #ModOp, [ + TypePair[1], + PointerType, AddressSpaceiKind>, TypePair[2] + ]>; + def: + Builtin<"atomic_fetch_" #ModOp #"_explicit", [ + TypePair[1], + PointerType, AddressSpaceiKind>, + TypePair[2], MemoryOrder + ]>; + def: + Builtin<"atomic_fetch_" #ModOp #"_explicit", [ + TypePair[1], + PointerType, AddressSpaceiKind>, + TypePair[2], MemoryOrder, MemoryScope + ]>; + } + } + } + foreach TypePair = + [[AtomicFloat, Float, Float], [AtomicDouble, Double, Double]] in { + foreach ModOp = ["max", "min"] in { + foreach AddressSpaceiKind = [GenericAS, GlobalAS, LocalAS] in { + def: + Builtin<"atomic_fetch_" #ModOp, [ + TypePair[1], + PointerType, AddressSpaceiKind>, TypePair[2] + ]>; + def: + Builtin<"atomic_fetch_" #ModOp #"_explicit", [ + TypePair[1], + PointerType, AddressSpaceiKind>, + TypePair[2], MemoryOrder + ]>; + def: + Builtin<"atomic_fetch_" #ModOp #"_explicit", [ + TypePair[1], + PointerType, AddressSpaceiKind>, + TypePair[2], MemoryOrder, MemoryScope + ]>; + } + } + } +} + //-------------------------------------------------------------------- // OpenCL v1.1 s6.11.12, v1.2 s6.12.12, v2.0 s6.13.12 - Miscellaneous Vector Functions // --- Table 19 --- Index: clang/test/Headers/opencl-c-header.cl =================================================================== --- clang/test/Headers/opencl-c-header.cl +++ clang/test/Headers/opencl-c-header.cl @@ -135,6 +135,36 @@ #if __opencl_c_integer_dot_product_input_4x8bit_packed != 1 #error "Incorrectly defined __opencl_c_integer_dot_product_input_4x8bit_packed" #endif +#if __opencl_c_ext_fp16_global_atomic_load_store != 1 +#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_load_store" +#endif +#if __opencl_c_ext_fp16_local_atomic_load_store != 1 +#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_load_store" +#endif +#if __opencl_c_ext_fp16_global_atomic_add != 1 +#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_add" +#endif +#if __opencl_c_ext_fp32_global_atomic_add != 1 +#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_add" +#endif +#if __opencl_c_ext_fp16_local_atomic_add != 1 +#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_add" +#endif +#if __opencl_c_ext_fp32_local_atomic_add != 1 +#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_add" +#endif +#if __opencl_c_ext_fp16_global_atomic_min_max != 1 +#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_min_max" +#endif +#if __opencl_c_ext_fp32_global_atomic_min_max != 1 +#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_min_max" +#endif +#if __opencl_c_ext_fp16_local_atomic_min_max != 1 +#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_min_max" +#endif +#if __opencl_c_ext_fp32_local_atomic_min_max != 1 +#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_min_max" +#endif #else @@ -171,6 +201,48 @@ #ifdef __opencl_c_integer_dot_product_input_4x8bit_packed #error "Incorrect __opencl_c_integer_dot_product_input_4x8bit_packed define" #endif +#ifdef __opencl_c_ext_fp16_global_atomic_load_store +#error "Incorrectly __opencl_c_ext_fp16_global_atomic_load_store defined" +#endif +#ifdef __opencl_c_ext_fp16_local_atomic_load_store +#error "Incorrectly __opencl_c_ext_fp16_local_atomic_load_store defined" +#endif +#ifdef __opencl_c_ext_fp16_global_atomic_add +#error "Incorrectly __opencl_c_ext_fp16_global_atomic_add defined" +#endif +#ifdef __opencl_c_ext_fp32_global_atomic_add +#error "Incorrectly __opencl_c_ext_fp32_global_atomic_add defined" +#endif +#ifdef __opencl_c_ext_fp64_global_atomic_add +#error "Incorrectly __opencl_c_ext_fp64_global_atomic_add defined" +#endif +#ifdef __opencl_c_ext_fp16_local_atomic_add +#error "Incorrectly __opencl_c_ext_fp16_local_atomic_add defined" +#endif +#ifdef __opencl_c_ext_fp32_local_atomic_add +#error "Incorrectly __opencl_c_ext_fp32_local_atomic_add defined" +#endif +#ifdef __opencl_c_ext_fp64_local_atomic_add +#error "Incorrectly __opencl_c_ext_fp64_local_atomic_add defined" +#endif +#ifdef __opencl_c_ext_fp16_global_atomic_min_max +#error "Incorrectly __opencl_c_ext_fp16_global_atomic_min_max defined" +#endif +#ifdef __opencl_c_ext_fp32_global_atomic_min_max +#error "Incorrectly __opencl_c_ext_fp32_global_atomic_min_max defined" +#endif +#ifdef __opencl_c_ext_fp64_global_atomic_min_max +#error "Incorrectly __opencl_c_ext_fp64_global_atomic_min_max defined" +#endif +#ifdef __opencl_c_ext_fp16_local_atomic_min_max +#error "Incorrectly __opencl_c_ext_fp16_local_atomic_min_max defined" +#endif +#ifdef __opencl_c_ext_fp32_local_atomic_min_max +#error "Incorrectly __opencl_c_ext_fp32_local_atomic_min_max defined" +#endif +#ifdef __opencl_c_ext_fp64_local_atomic_min_max +#error "Incorrectly __opencl_c_ext_fp64_local_atomic_min_max defined" +#endif #endif //(defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) Index: clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl =================================================================== --- clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl +++ clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl @@ -7,6 +7,8 @@ // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CLC++ -fdeclare-opencl-builtins -DNO_HEADER // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CLC++ -fdeclare-opencl-builtins -finclude-default-header // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL2.0 -fdeclare-opencl-builtins -finclude-default-header -cl-ext=-cl_khr_fp64 -DNO_FP64 +// RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL3.0 -fdeclare-opencl-builtins -DNO_HEADER +// RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL3.0 -fdeclare-opencl-builtins -finclude-default-header // Test the -fdeclare-opencl-builtins option. This is not a completeness // test, so it should not test for all builtins defined by OpenCL. Instead @@ -122,6 +124,46 @@ } #endif +#if !defined(NO_HEADER) && __OPENCL_C_VERSION__ == 300 +// Check added atomic_fetch_ functions by cl_ext_float_atomics +// extension can be called +void test_atomic_fetch_with_address_space(volatile __generic atomic_float *a_float, + volatile __generic atomic_double *a_double, + volatile __local atomic_float *a_float_local, + volatile __local atomic_double *a_double_local, + volatile __global atomic_float *a_float_global, + volatile __global atomic_double *a_double_global) { + float f1, resf1; + double d1, resd1; + + resf1 = atomic_fetch_min(a_float, f1); + resf1 = atomic_fetch_max(a_float, f1); + resf1 = atomic_fetch_add(a_float, f1); + resf1 = atomic_fetch_sub(a_float, f1); + resf1 = atomic_fetch_min_explicit(a_float_local, f1, memory_order_seq_cst); + resf1 = atomic_fetch_max_explicit(a_float_local, f1, memory_order_seq_cst); + resf1 = atomic_fetch_add_explicit(a_float_local, f1, memory_order_seq_cst); + resf1 = atomic_fetch_sub_explicit(a_float_local, f1, memory_order_seq_cst); + resf1 = atomic_fetch_min_explicit(a_float_global, f1, memory_order_seq_cst, memory_scope_work_group); + resf1 = atomic_fetch_max_explicit(a_float_global, f1, memory_order_seq_cst, memory_scope_work_group); + resf1 = atomic_fetch_add_explicit(a_float_global, f1, memory_order_seq_cst, memory_scope_work_group); + resf1 = atomic_fetch_sub_explicit(a_float_global, f1, memory_order_seq_cst, memory_scope_work_group); + + resd1 = atomic_fetch_min(a_double, d1); + resd1 = atomic_fetch_max(a_double, d1); + resd1 = atomic_fetch_add(a_double, d1); + resd1 = atomic_fetch_sub(a_double, d1); + resd1 = atomic_fetch_min_explicit(a_double_local, d1, memory_order_seq_cst); + resd1 = atomic_fetch_max_explicit(a_double_local, d1, memory_order_seq_cst); + resd1 = atomic_fetch_add_explicit(a_double_local, d1, memory_order_seq_cst); + resd1 = atomic_fetch_sub_explicit(a_double_local, d1, memory_order_seq_cst); + resd1 = atomic_fetch_min_explicit(a_double_global, d1, memory_order_seq_cst, memory_scope_work_group); + resd1 = atomic_fetch_max_explicit(a_double_global, d1, memory_order_seq_cst, memory_scope_work_group); + resd1 = atomic_fetch_add_explicit(a_double_global, d1, memory_order_seq_cst, memory_scope_work_group); + resd1 = atomic_fetch_sub_explicit(a_double_global, d1, memory_order_seq_cst, memory_scope_work_group); +} +#endif // !defined(NO_HEADER) && __OPENCL_C_VERSION__ == 300 + // Test old atomic overloaded with generic address space in C++ for OpenCL. #if __OPENCL_C_VERSION__ >= 200 void test_legacy_atomics_cpp(__generic volatile unsigned int *a) {