Index: lib/Headers/opencl-c.h =================================================================== --- lib/Headers/opencl-c.h +++ lib/Headers/opencl-c.h @@ -11540,7 +11540,7 @@ * * vstoren write sizeof (gentypen) bytes given by data to address (p + (offset * n)). * - * The address computed as (p + (offset * n)) must be + * The address computed as (p + (offset * n)) must be * 8-bit aligned if gentype is char, uchar; * 16-bit aligned if gentype is short, ushort, half; * 32-bit aligned if gentype is int, uint, float; @@ -12093,6 +12093,7 @@ * The read address computed as (p + offset) * must be 16-bit aligned. */ +#ifdef cl_khr_fp16 float __ovld vload_half(size_t offset, const __constant half *p); #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 float __ovld vload_half(size_t offset, const half *p); @@ -12101,6 +12102,7 @@ float __ovld vload_half(size_t offset, const __local half *p); float __ovld vload_half(size_t offset, const __private half *p); #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //cl_khr_fp16 /** * Read sizeof (halfn) bytes of data from address @@ -12110,6 +12112,7 @@ * value is returned. The read address computed * as (p + (offset * n)) must be 16-bit aligned. */ +#ifdef cl_khr_fp16 float2 __ovld vload_half2(size_t offset, const __constant half *p); float3 __ovld vload_half3(size_t offset, const __constant half *p); float4 __ovld vload_half4(size_t offset, const __constant half *p); @@ -12138,6 +12141,7 @@ float8 __ovld vload_half8(size_t offset, const __private half *p); float16 __ovld vload_half16(size_t offset, const __private half *p); #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //cl_khr_fp16 /** * The float value given by data is first @@ -12150,6 +12154,7 @@ * The default current rounding mode is round to * nearest even. */ +#ifdef cl_khr_fp16 #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 void __ovld vstore_half(float data, size_t offset, half *p); void __ovld vstore_half_rte(float data, size_t offset, half *p); @@ -12197,6 +12202,7 @@ void __ovld vstore_half_rtn(double data, size_t offset, __private half *p); #endif //cl_khr_fp64 #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //cl_khr_fp16 /** * The floatn value given by data is converted to @@ -12209,6 +12215,7 @@ * The default current rounding mode is round to * nearest even. */ +#ifdef cl_khr_fp16 #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 void __ovld vstore_half2(float2 data, size_t offset, half *p); void __ovld vstore_half3(float3 data, size_t offset, half *p); @@ -12416,6 +12423,7 @@ void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p); #endif //cl_khr_fp64 #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //cl_khr_fp16 /** * For n = 1, 2, 4, 8 and 16 read sizeof (halfn) @@ -12430,6 +12438,7 @@ * The address computed as (p + (offset * 4)) * must be aligned to sizeof (half) * 4 bytes. */ +#ifdef cl_khr_fp16 float __ovld vloada_half(size_t offset, const __constant half *p); float2 __ovld vloada_half2(size_t offset, const __constant half *p); float3 __ovld vloada_half3(size_t offset, const __constant half *p); @@ -12463,6 +12472,7 @@ float8 __ovld vloada_half8(size_t offset, const __private half *p); float16 __ovld vloada_half16(size_t offset, const __private half *p); #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //cl_khr_fp16 /** * The floatn value given by data is converted to @@ -12480,6 +12490,7 @@ * mode. The default current rounding mode is * round to nearest even. */ +#ifdef cl_khr_fp16 #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 void __ovld vstorea_half(float data, size_t offset, half *p); void __ovld vstorea_half2(float2 data, size_t offset, half *p); @@ -12766,6 +12777,7 @@ void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p); #endif //cl_khr_fp64 #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //cl_khr_fp16 // OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions @@ -12888,7 +12900,7 @@ cl_mem_fence_flags __ovld get_fence(const void *ptr); cl_mem_fence_flags __ovld get_fence(void *ptr); -/** +/** * Builtin functions to_global, to_local, and to_private need to be declared as Clang builtin functions * and checked in Sema since they should be declared as * addr gentype* to_addr (gentype*); @@ -13773,7 +13785,7 @@ // add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument can be ptrdiff_t. // or/xor/and/min/max: atomic type argument can be intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. -#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand); uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order); uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope); @@ -14571,7 +14583,7 @@ * only. The filter_mode specified in sampler * must be set to CLK_FILTER_NEAREST; otherwise * the values returned are undefined. - + * The read_image{f|i|ui} calls that take * integer coordinates must use a sampler with * normalized coordinates set to