Index: lib/Headers/opencl-c.h =================================================================== --- lib/Headers/opencl-c.h +++ lib/Headers/opencl-c.h @@ -17,6 +17,7 @@ #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 #define __ovld __attribute__((overloadable)) +#define __nodup __attribute__((noduplicate)) // Optimizations #define __purefn __attribute__((pure)) @@ -13822,7 +13823,7 @@ * image objects and then want to read the updated data. */ -void __ovld barrier(cl_mem_fence_flags flags); +void __ovld __nodup barrier(cl_mem_fence_flags flags); #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 @@ -13835,8 +13836,8 @@ memory_scope_sub_group } memory_scope; -void __ovld work_group_barrier(cl_mem_fence_flags flags, memory_scope scope); -void __ovld work_group_barrier(cl_mem_fence_flags flags); +void __ovld __nodup work_group_barrier(cl_mem_fence_flags flags, memory_scope scope); +void __ovld __nodup work_group_barrier(cl_mem_fence_flags flags); #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 // OpenCL v1.1 s6.11.9, v1.2 s6.12.9 - Explicit Memory Fence Functions @@ -16559,101 +16560,101 @@ // OpenCL v2.0 s6.13.15 - Work-group Functions #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -int __ovld work_group_all(int predicate); -int __ovld work_group_any(int predicate); +int __ovld __nodup work_group_all(int predicate); +int __ovld __nodup work_group_any(int predicate); #ifdef cl_khr_fp16 -half __ovld work_group_broadcast(half a, size_t local_id); -half __ovld work_group_broadcast(half a, size_t x, size_t y); -half __ovld work_group_broadcast(half a, size_t x, size_t y, size_t z); +half __ovld __nodup work_group_broadcast(half a, size_t local_id); +half __ovld __nodup work_group_broadcast(half a, size_t x, size_t y); +half __ovld __nodup work_group_broadcast(half a, size_t x, size_t y, size_t z); #endif -int __ovld work_group_broadcast(int a, size_t local_id); -int __ovld work_group_broadcast(int a, size_t x, size_t y); -int __ovld work_group_broadcast(int a, size_t x, size_t y, size_t z); -uint __ovld work_group_broadcast(uint a, size_t local_id); -uint __ovld work_group_broadcast(uint a, size_t x, size_t y); -uint __ovld work_group_broadcast(uint a, size_t x, size_t y, size_t z); -long __ovld work_group_broadcast(long a, size_t local_id); -long __ovld work_group_broadcast(long a, size_t x, size_t y); -long __ovld work_group_broadcast(long a, size_t x, size_t y, size_t z); -ulong __ovld work_group_broadcast(ulong a, size_t local_id); -ulong __ovld work_group_broadcast(ulong a, size_t x, size_t y); -ulong __ovld work_group_broadcast(ulong a, size_t x, size_t y, size_t z); -float __ovld work_group_broadcast(float a, size_t local_id); -float __ovld work_group_broadcast(float a, size_t x, size_t y); -float __ovld work_group_broadcast(float a, size_t x, size_t y, size_t z); +int __ovld __nodup work_group_broadcast(int a, size_t local_id); +int __ovld __nodup work_group_broadcast(int a, size_t x, size_t y); +int __ovld __nodup work_group_broadcast(int a, size_t x, size_t y, size_t z); +uint __ovld __nodup work_group_broadcast(uint a, size_t local_id); +uint __ovld __nodup work_group_broadcast(uint a, size_t x, size_t y); +uint __ovld __nodup work_group_broadcast(uint a, size_t x, size_t y, size_t z); +long __ovld __nodup work_group_broadcast(long a, size_t local_id); +long __ovld __nodup work_group_broadcast(long a, size_t x, size_t y); +long __ovld __nodup work_group_broadcast(long a, size_t x, size_t y, size_t z); +ulong __ovld __nodup work_group_broadcast(ulong a, size_t local_id); +ulong __ovld __nodup work_group_broadcast(ulong a, size_t x, size_t y); +ulong __ovld __nodup work_group_broadcast(ulong a, size_t x, size_t y, size_t z); +float __ovld __nodup work_group_broadcast(float a, size_t local_id); +float __ovld __nodup work_group_broadcast(float a, size_t x, size_t y); +float __ovld __nodup work_group_broadcast(float a, size_t x, size_t y, size_t z); #ifdef cl_khr_fp64 -double __ovld work_group_broadcast(double a, size_t local_id); -double __ovld work_group_broadcast(double a, size_t x, size_t y); -double __ovld work_group_broadcast(double a, size_t x, size_t y, size_t z); +double __ovld __nodup work_group_broadcast(double a, size_t local_id); +double __ovld __nodup work_group_broadcast(double a, size_t x, size_t y); +double __ovld __nodup work_group_broadcast(double a, size_t x, size_t y, size_t z); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 -half __ovld work_group_reduce_add(half x); -half __ovld work_group_reduce_min(half x); -half __ovld work_group_reduce_max(half x); -half __ovld work_group_scan_exclusive_add(half x); -half __ovld work_group_scan_exclusive_min(half x); -half __ovld work_group_scan_exclusive_max(half x); -half __ovld work_group_scan_inclusive_add(half x); -half __ovld work_group_scan_inclusive_min(half x); -half __ovld work_group_scan_inclusive_max(half x); +half __ovld __nodup work_group_reduce_add(half x); +half __ovld __nodup work_group_reduce_min(half x); +half __ovld __nodup work_group_reduce_max(half x); +half __ovld __nodup work_group_scan_exclusive_add(half x); +half __ovld __nodup work_group_scan_exclusive_min(half x); +half __ovld __nodup work_group_scan_exclusive_max(half x); +half __ovld __nodup work_group_scan_inclusive_add(half x); +half __ovld __nodup work_group_scan_inclusive_min(half x); +half __ovld __nodup work_group_scan_inclusive_max(half x); #endif -int __ovld work_group_reduce_add(int x); -int __ovld work_group_reduce_min(int x); -int __ovld work_group_reduce_max(int x); -int __ovld work_group_scan_exclusive_add(int x); -int __ovld work_group_scan_exclusive_min(int x); -int __ovld work_group_scan_exclusive_max(int x); -int __ovld work_group_scan_inclusive_add(int x); -int __ovld work_group_scan_inclusive_min(int x); -int __ovld work_group_scan_inclusive_max(int x); -uint __ovld work_group_reduce_add(uint x); -uint __ovld work_group_reduce_min(uint x); -uint __ovld work_group_reduce_max(uint x); -uint __ovld work_group_scan_exclusive_add(uint x); -uint __ovld work_group_scan_exclusive_min(uint x); -uint __ovld work_group_scan_exclusive_max(uint x); -uint __ovld work_group_scan_inclusive_add(uint x); -uint __ovld work_group_scan_inclusive_min(uint x); -uint __ovld work_group_scan_inclusive_max(uint x); -long __ovld work_group_reduce_add(long x); -long __ovld work_group_reduce_min(long x); -long __ovld work_group_reduce_max(long x); -long __ovld work_group_scan_exclusive_add(long x); -long __ovld work_group_scan_exclusive_min(long x); -long __ovld work_group_scan_exclusive_max(long x); -long __ovld work_group_scan_inclusive_add(long x); -long __ovld work_group_scan_inclusive_min(long x); -long __ovld work_group_scan_inclusive_max(long x); -ulong __ovld work_group_reduce_add(ulong x); -ulong __ovld work_group_reduce_min(ulong x); -ulong __ovld work_group_reduce_max(ulong x); -ulong __ovld work_group_scan_exclusive_add(ulong x); -ulong __ovld work_group_scan_exclusive_min(ulong x); -ulong __ovld work_group_scan_exclusive_max(ulong x); -ulong __ovld work_group_scan_inclusive_add(ulong x); -ulong __ovld work_group_scan_inclusive_min(ulong x); -ulong __ovld work_group_scan_inclusive_max(ulong x); -float __ovld work_group_reduce_add(float x); -float __ovld work_group_reduce_min(float x); -float __ovld work_group_reduce_max(float x); -float __ovld work_group_scan_exclusive_add(float x); -float __ovld work_group_scan_exclusive_min(float x); -float __ovld work_group_scan_exclusive_max(float x); -float __ovld work_group_scan_inclusive_add(float x); -float __ovld work_group_scan_inclusive_min(float x); -float __ovld work_group_scan_inclusive_max(float x); +int __ovld __nodup work_group_reduce_add(int x); +int __ovld __nodup work_group_reduce_min(int x); +int __ovld __nodup work_group_reduce_max(int x); +int __ovld __nodup work_group_scan_exclusive_add(int x); +int __ovld __nodup work_group_scan_exclusive_min(int x); +int __ovld __nodup work_group_scan_exclusive_max(int x); +int __ovld __nodup work_group_scan_inclusive_add(int x); +int __ovld __nodup work_group_scan_inclusive_min(int x); +int __ovld __nodup work_group_scan_inclusive_max(int x); +uint __ovld __nodup work_group_reduce_add(uint x); +uint __ovld __nodup work_group_reduce_min(uint x); +uint __ovld __nodup work_group_reduce_max(uint x); +uint __ovld __nodup work_group_scan_exclusive_add(uint x); +uint __ovld __nodup work_group_scan_exclusive_min(uint x); +uint __ovld __nodup work_group_scan_exclusive_max(uint x); +uint __ovld __nodup work_group_scan_inclusive_add(uint x); +uint __ovld __nodup work_group_scan_inclusive_min(uint x); +uint __ovld __nodup work_group_scan_inclusive_max(uint x); +long __ovld __nodup work_group_reduce_add(long x); +long __ovld __nodup work_group_reduce_min(long x); +long __ovld __nodup work_group_reduce_max(long x); +long __ovld __nodup work_group_scan_exclusive_add(long x); +long __ovld __nodup work_group_scan_exclusive_min(long x); +long __ovld __nodup work_group_scan_exclusive_max(long x); +long __ovld __nodup work_group_scan_inclusive_add(long x); +long __ovld __nodup work_group_scan_inclusive_min(long x); +long __ovld __nodup work_group_scan_inclusive_max(long x); +ulong __ovld __nodup work_group_reduce_add(ulong x); +ulong __ovld __nodup work_group_reduce_min(ulong x); +ulong __ovld __nodup work_group_reduce_max(ulong x); +ulong __ovld __nodup work_group_scan_exclusive_add(ulong x); +ulong __ovld __nodup work_group_scan_exclusive_min(ulong x); +ulong __ovld __nodup work_group_scan_exclusive_max(ulong x); +ulong __ovld __nodup work_group_scan_inclusive_add(ulong x); +ulong __ovld __nodup work_group_scan_inclusive_min(ulong x); +ulong __ovld __nodup work_group_scan_inclusive_max(ulong x); +float __ovld __nodup work_group_reduce_add(float x); +float __ovld __nodup work_group_reduce_min(float x); +float __ovld __nodup work_group_reduce_max(float x); +float __ovld __nodup work_group_scan_exclusive_add(float x); +float __ovld __nodup work_group_scan_exclusive_min(float x); +float __ovld __nodup work_group_scan_exclusive_max(float x); +float __ovld __nodup work_group_scan_inclusive_add(float x); +float __ovld __nodup work_group_scan_inclusive_min(float x); +float __ovld __nodup work_group_scan_inclusive_max(float x); #ifdef cl_khr_fp64 -double __ovld work_group_reduce_add(double x); -double __ovld work_group_reduce_min(double x); -double __ovld work_group_reduce_max(double x); -double __ovld work_group_scan_exclusive_add(double x); -double __ovld work_group_scan_exclusive_min(double x); -double __ovld work_group_scan_exclusive_max(double x); -double __ovld work_group_scan_inclusive_add(double x); -double __ovld work_group_scan_inclusive_min(double x); -double __ovld work_group_scan_inclusive_max(double x); +double __ovld __nodup work_group_reduce_add(double x); +double __ovld __nodup work_group_reduce_min(double x); +double __ovld __nodup work_group_reduce_max(double x); +double __ovld __nodup work_group_scan_exclusive_add(double x); +double __ovld __nodup work_group_scan_exclusive_min(double x); +double __ovld __nodup work_group_scan_exclusive_max(double x); +double __ovld __nodup work_group_scan_inclusive_add(double x); +double __ovld __nodup work_group_scan_inclusive_min(double x); +double __ovld __nodup work_group_scan_inclusive_max(double x); #endif //cl_khr_fp64 #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 @@ -16753,92 +16754,92 @@ uint __ovld get_sub_group_id(void); uint __ovld get_sub_group_local_id(void); -void __ovld sub_group_barrier(cl_mem_fence_flags flags); +void __ovld __nodup sub_group_barrier(cl_mem_fence_flags flags); #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -void __ovld sub_group_barrier(cl_mem_fence_flags flags, memory_scope scope); +void __ovld __nodup sub_group_barrier(cl_mem_fence_flags flags, memory_scope scope); #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -int __ovld sub_group_all(int predicate); -int __ovld sub_group_any(int predicate); - -int __ovld sub_group_broadcast(int x, uint sub_group_local_id); -uint __ovld sub_group_broadcast(uint x, uint sub_group_local_id); -long __ovld sub_group_broadcast(long x, uint sub_group_local_id); -ulong __ovld sub_group_broadcast(ulong x, uint sub_group_local_id); -float __ovld sub_group_broadcast(float x, uint sub_group_local_id); - -int __ovld sub_group_reduce_add(int x); -uint __ovld sub_group_reduce_add(uint x); -long __ovld sub_group_reduce_add(long x); -ulong __ovld sub_group_reduce_add(ulong x); -float __ovld sub_group_reduce_add(float x); -int __ovld sub_group_reduce_min(int x); -uint __ovld sub_group_reduce_min(uint x); -long __ovld sub_group_reduce_min(long x); -ulong __ovld sub_group_reduce_min(ulong x); -float __ovld sub_group_reduce_min(float x); -int __ovld sub_group_reduce_max(int x); -uint __ovld sub_group_reduce_max(uint x); -long __ovld sub_group_reduce_max(long x); -ulong __ovld sub_group_reduce_max(ulong x); -float __ovld sub_group_reduce_max(float x); - -int __ovld sub_group_scan_exclusive_add(int x); -uint __ovld sub_group_scan_exclusive_add(uint x); -long __ovld sub_group_scan_exclusive_add(long x); -ulong __ovld sub_group_scan_exclusive_add(ulong x); -float __ovld sub_group_scan_exclusive_add(float x); -int __ovld sub_group_scan_exclusive_min(int x); -uint __ovld sub_group_scan_exclusive_min(uint x); -long __ovld sub_group_scan_exclusive_min(long x); -ulong __ovld sub_group_scan_exclusive_min(ulong x); -float __ovld sub_group_scan_exclusive_min(float x); -int __ovld sub_group_scan_exclusive_max(int x); -uint __ovld sub_group_scan_exclusive_max(uint x); -long __ovld sub_group_scan_exclusive_max(long x); -ulong __ovld sub_group_scan_exclusive_max(ulong x); -float __ovld sub_group_scan_exclusive_max(float x); - -int __ovld sub_group_scan_inclusive_add(int x); -uint __ovld sub_group_scan_inclusive_add(uint x); -long __ovld sub_group_scan_inclusive_add(long x); -ulong __ovld sub_group_scan_inclusive_add(ulong x); -float __ovld sub_group_scan_inclusive_add(float x); -int __ovld sub_group_scan_inclusive_min(int x); -uint __ovld sub_group_scan_inclusive_min(uint x); -long __ovld sub_group_scan_inclusive_min(long x); -ulong __ovld sub_group_scan_inclusive_min(ulong x); -float __ovld sub_group_scan_inclusive_min(float x); -int __ovld sub_group_scan_inclusive_max(int x); -uint __ovld sub_group_scan_inclusive_max(uint x); -long __ovld sub_group_scan_inclusive_max(long x); -ulong __ovld sub_group_scan_inclusive_max(ulong x); -float __ovld sub_group_scan_inclusive_max(float x); +int __ovld __nodup sub_group_all(int predicate); +int __ovld __nodup sub_group_any(int predicate); + +int __ovld __nodup sub_group_broadcast(int x, uint sub_group_local_id); +uint __ovld __nodup sub_group_broadcast(uint x, uint sub_group_local_id); +long __ovld __nodup sub_group_broadcast(long x, uint sub_group_local_id); +ulong __ovld __nodup sub_group_broadcast(ulong x, uint sub_group_local_id); +float __ovld __nodup sub_group_broadcast(float x, uint sub_group_local_id); + +int __ovld __nodup sub_group_reduce_add(int x); +uint __ovld __nodup sub_group_reduce_add(uint x); +long __ovld __nodup sub_group_reduce_add(long x); +ulong __ovld __nodup sub_group_reduce_add(ulong x); +float __ovld __nodup sub_group_reduce_add(float x); +int __ovld __nodup sub_group_reduce_min(int x); +uint __ovld __nodup sub_group_reduce_min(uint x); +long __ovld __nodup sub_group_reduce_min(long x); +ulong __ovld __nodup sub_group_reduce_min(ulong x); +float __ovld __nodup sub_group_reduce_min(float x); +int __ovld __nodup sub_group_reduce_max(int x); +uint __ovld __nodup sub_group_reduce_max(uint x); +long __ovld __nodup sub_group_reduce_max(long x); +ulong __ovld __nodup sub_group_reduce_max(ulong x); +float __ovld __nodup sub_group_reduce_max(float x); + +int __ovld __nodup sub_group_scan_exclusive_add(int x); +uint __ovld __nodup sub_group_scan_exclusive_add(uint x); +long __ovld __nodup sub_group_scan_exclusive_add(long x); +ulong __ovld __nodup sub_group_scan_exclusive_add(ulong x); +float __ovld __nodup sub_group_scan_exclusive_add(float x); +int __ovld __nodup sub_group_scan_exclusive_min(int x); +uint __ovld __nodup sub_group_scan_exclusive_min(uint x); +long __ovld __nodup sub_group_scan_exclusive_min(long x); +ulong __ovld __nodup sub_group_scan_exclusive_min(ulong x); +float __ovld __nodup sub_group_scan_exclusive_min(float x); +int __ovld __nodup sub_group_scan_exclusive_max(int x); +uint __ovld __nodup sub_group_scan_exclusive_max(uint x); +long __ovld __nodup sub_group_scan_exclusive_max(long x); +ulong __ovld __nodup sub_group_scan_exclusive_max(ulong x); +float __ovld __nodup sub_group_scan_exclusive_max(float x); + +int __ovld __nodup sub_group_scan_inclusive_add(int x); +uint __ovld __nodup sub_group_scan_inclusive_add(uint x); +long __ovld __nodup sub_group_scan_inclusive_add(long x); +ulong __ovld __nodup sub_group_scan_inclusive_add(ulong x); +float __ovld __nodup sub_group_scan_inclusive_add(float x); +int __ovld __nodup sub_group_scan_inclusive_min(int x); +uint __ovld __nodup sub_group_scan_inclusive_min(uint x); +long __ovld __nodup sub_group_scan_inclusive_min(long x); +ulong __ovld __nodup sub_group_scan_inclusive_min(ulong x); +float __ovld __nodup sub_group_scan_inclusive_min(float x); +int __ovld __nodup sub_group_scan_inclusive_max(int x); +uint __ovld __nodup sub_group_scan_inclusive_max(uint x); +long __ovld __nodup sub_group_scan_inclusive_max(long x); +ulong __ovld __nodup sub_group_scan_inclusive_max(ulong x); +float __ovld __nodup sub_group_scan_inclusive_max(float x); #ifdef cl_khr_fp16 -half __ovld sub_group_broadcast(half x, uint sub_group_local_id); -half __ovld sub_group_reduce_add(half x); -half __ovld sub_group_reduce_min(half x); -half __ovld sub_group_reduce_max(half x); -half __ovld sub_group_scan_exclusive_add(half x); -half __ovld sub_group_scan_exclusive_min(half x); -half __ovld sub_group_scan_exclusive_max(half x); -half __ovld sub_group_scan_inclusive_add(half x); -half __ovld sub_group_scan_inclusive_min(half x); -half __ovld sub_group_scan_inclusive_max(half x); +half __ovld __nodup sub_group_broadcast(half x, uint sub_group_local_id); +half __ovld __nodup sub_group_reduce_add(half x); +half __ovld __nodup sub_group_reduce_min(half x); +half __ovld __nodup sub_group_reduce_max(half x); +half __ovld __nodup sub_group_scan_exclusive_add(half x); +half __ovld __nodup sub_group_scan_exclusive_min(half x); +half __ovld __nodup sub_group_scan_exclusive_max(half x); +half __ovld __nodup sub_group_scan_inclusive_add(half x); +half __ovld __nodup sub_group_scan_inclusive_min(half x); +half __ovld __nodup sub_group_scan_inclusive_max(half x); #endif //cl_khr_fp16 #ifdef cl_khr_fp64 -double __ovld sub_group_broadcast(double x, uint sub_group_local_id); -double __ovld sub_group_reduce_add(double x); -double __ovld sub_group_reduce_min(double x); -double __ovld sub_group_reduce_max(double x); -double __ovld sub_group_scan_exclusive_add(double x); -double __ovld sub_group_scan_exclusive_min(double x); -double __ovld sub_group_scan_exclusive_max(double x); -double __ovld sub_group_scan_inclusive_add(double x); -double __ovld sub_group_scan_inclusive_min(double x); -double __ovld sub_group_scan_inclusive_max(double x); +double __ovld __nodup sub_group_broadcast(double x, uint sub_group_local_id); +double __ovld __nodup sub_group_reduce_add(double x); +double __ovld __nodup sub_group_reduce_min(double x); +double __ovld __nodup sub_group_reduce_max(double x); +double __ovld __nodup sub_group_scan_exclusive_add(double x); +double __ovld __nodup sub_group_scan_exclusive_min(double x); +double __ovld __nodup sub_group_scan_exclusive_max(double x); +double __ovld __nodup sub_group_scan_inclusive_add(double x); +double __ovld __nodup sub_group_scan_inclusive_min(double x); +double __ovld __nodup sub_group_scan_inclusive_max(double x); #endif //cl_khr_fp64 #endif //cl_khr_subgroups cl_intel_subgroups