Index: lib/Headers/opencl-c.h
===================================================================
--- lib/Headers/opencl-c.h
+++ lib/Headers/opencl-c.h
@@ -11540,7 +11540,7 @@
  *
  * vstoren write sizeof (gentypen) bytes given by data to address (p + (offset * n)).
  *
- * The address computed as (p + (offset * n)) must be 
+ * The address computed as (p + (offset * n)) must be
  * 8-bit aligned if gentype is char, uchar;
  * 16-bit aligned if gentype is short, ushort, half;
  * 32-bit aligned if gentype is int, uint, float;
@@ -12093,6 +12093,7 @@
  * The read address computed as (p + offset)
  * must be 16-bit aligned.
  */
+#ifdef cl_khr_fp16
 float __ovld vload_half(size_t offset, const __constant half *p);
 #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
 float __ovld vload_half(size_t offset, const half *p);
@@ -12101,6 +12102,7 @@
 float __ovld vload_half(size_t offset, const __local half *p);
 float __ovld vload_half(size_t offset, const __private half *p);
 #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+#endif //cl_khr_fp16
 
 /**
  * Read sizeof (halfn) bytes of data from address
@@ -12110,6 +12112,7 @@
  * value is returned. The read address computed
  * as (p + (offset * n)) must be 16-bit aligned.
  */
+#ifdef cl_khr_fp16
 float2 __ovld vload_half2(size_t offset, const __constant half *p);
 float3 __ovld vload_half3(size_t offset, const __constant half *p);
 float4 __ovld vload_half4(size_t offset, const __constant half *p);
@@ -12138,6 +12141,7 @@
 float8 __ovld vload_half8(size_t offset, const __private half *p);
 float16 __ovld vload_half16(size_t offset, const __private half *p);
 #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+#endif //cl_khr_fp16
 
 /**
  * The float value given by data is first
@@ -12150,6 +12154,7 @@
  * The default current rounding mode is round to
  * nearest even.
  */
+#ifdef cl_khr_fp16
 #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
 void __ovld vstore_half(float data, size_t offset, half *p);
 void __ovld vstore_half_rte(float data, size_t offset, half *p);
@@ -12197,6 +12202,7 @@
 void __ovld vstore_half_rtn(double data, size_t offset, __private half *p);
 #endif //cl_khr_fp64
 #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+#endif //cl_khr_fp16
 
 /**
  * The floatn value given by data is converted to
@@ -12209,6 +12215,7 @@
  * The default current rounding mode is round to
  * nearest even.
  */
+#ifdef cl_khr_fp16
 #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
 void __ovld vstore_half2(float2 data, size_t offset, half *p);
 void __ovld vstore_half3(float3 data, size_t offset, half *p);
@@ -12416,6 +12423,7 @@
 void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p);
 #endif //cl_khr_fp64
 #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+#endif //cl_khr_fp16
 
 /**
  * For n = 1, 2, 4, 8 and 16 read sizeof (halfn)
@@ -12430,6 +12438,7 @@
  * The address computed as (p + (offset * 4))
  * must be aligned to sizeof (half) * 4 bytes.
  */
+#ifdef cl_khr_fp16
 float __ovld vloada_half(size_t offset, const __constant half *p);
 float2 __ovld vloada_half2(size_t offset, const __constant half *p);
 float3 __ovld vloada_half3(size_t offset, const __constant half *p);
@@ -12463,6 +12472,7 @@
 float8 __ovld vloada_half8(size_t offset, const __private half *p);
 float16 __ovld vloada_half16(size_t offset, const __private half *p);
 #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+#endif //cl_khr_fp16
 
 /**
  * The floatn value given by data is converted to
@@ -12480,6 +12490,7 @@
  * mode. The default current rounding mode is
  * round to nearest even.
  */
+#ifdef cl_khr_fp16
 #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
 void __ovld vstorea_half(float data, size_t offset, half *p);
 void __ovld vstorea_half2(float2 data, size_t offset, half *p);
@@ -12766,6 +12777,7 @@
 void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p);
 #endif //cl_khr_fp64
 #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
+#endif //cl_khr_fp16
 
 // OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions
 
@@ -12888,7 +12900,7 @@
 cl_mem_fence_flags __ovld get_fence(const void *ptr);
 cl_mem_fence_flags __ovld get_fence(void *ptr);
 
-/** 
+/**
  * Builtin functions to_global, to_local, and to_private need to be declared as Clang builtin functions
  * and checked in Sema since they should be declared as
  *   addr gentype* to_addr (gentype*);
@@ -13773,7 +13785,7 @@
 // add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument can be ptrdiff_t.
 // or/xor/and/min/max: atomic type argument can be intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t.
 
-#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) 
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
 uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand);
 uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);
 uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);
@@ -14571,7 +14583,7 @@
  * only. The filter_mode specified in sampler
  * must be set to CLK_FILTER_NEAREST; otherwise
  * the values returned are undefined.
- 
+
  * The read_image{f|i|ui} calls that take
  * integer coordinates must use a sampler with
  * normalized coordinates set to