Index: clang/lib/Headers/opencl-c-base.h =================================================================== --- clang/lib/Headers/opencl-c-base.h +++ clang/lib/Headers/opencl-c-base.h @@ -21,6 +21,7 @@ #define cl_khr_subgroup_shuffle 1 #define cl_khr_subgroup_shuffle_relative 1 #define cl_khr_subgroup_clustered_reduce 1 +#define cl_khr_extended_bit_ops 1 #endif // defined(__SPIR__) #endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) Index: clang/lib/Headers/opencl-c.h =================================================================== --- clang/lib/Headers/opencl-c.h +++ clang/lib/Headers/opencl-c.h @@ -16051,6 +16051,206 @@ #endif // cl_khr_subgroup_clustered_reduce +#if defined(cl_khr_extended_bit_ops) +char __ovld __cnfn bitfield_insert(char, char, uint, uint); +uchar __ovld __cnfn bitfield_insert(uchar, uchar, uint, uint); +short __ovld __cnfn bitfield_insert(short, short, uint, uint); +ushort __ovld __cnfn bitfield_insert(ushort, ushort, uint, uint); +int __ovld __cnfn bitfield_insert(int, int, uint, uint); +uint __ovld __cnfn bitfield_insert(uint, uint, uint, uint); +long __ovld __cnfn bitfield_insert(long, long, uint, uint); +ulong __ovld __cnfn bitfield_insert(ulong, ulong, uint, uint); +char2 __ovld __cnfn bitfield_insert(char2, char2, uint, uint); +uchar2 __ovld __cnfn bitfield_insert(uchar2, uchar2, uint, uint); +short2 __ovld __cnfn bitfield_insert(short2, short2, uint, uint); +ushort2 __ovld __cnfn bitfield_insert(ushort2, ushort2, uint, uint); +int2 __ovld __cnfn bitfield_insert(int2, int2, uint, uint); +uint2 __ovld __cnfn bitfield_insert(uint2, uint2, uint, uint); +long2 __ovld __cnfn bitfield_insert(long2, long2, uint, uint); +ulong2 __ovld __cnfn bitfield_insert(ulong2, ulong2, uint, uint); +char3 __ovld __cnfn bitfield_insert(char3, char3, uint, uint); +uchar3 __ovld __cnfn bitfield_insert(uchar3, uchar3, uint, uint); +short3 __ovld __cnfn bitfield_insert(short3, short3, uint, uint); +ushort3 __ovld __cnfn bitfield_insert(ushort3, ushort3, uint, uint); +int3 __ovld __cnfn bitfield_insert(int3, int3, uint, uint); +uint3 __ovld __cnfn bitfield_insert(uint3, uint3, uint, uint); +long3 __ovld __cnfn bitfield_insert(long3, long3, uint, uint); +ulong3 __ovld __cnfn bitfield_insert(ulong3, ulong3, uint, uint); +char4 __ovld __cnfn bitfield_insert(char4, char4, uint, uint); +uchar4 __ovld __cnfn bitfield_insert(uchar4, uchar4, uint, uint); +short4 __ovld __cnfn bitfield_insert(short4, short4, uint, uint); +ushort4 __ovld __cnfn bitfield_insert(ushort4, ushort4, uint, uint); +int4 __ovld __cnfn bitfield_insert(int4, int4, uint, uint); +uint4 __ovld __cnfn bitfield_insert(uint4, uint4, uint, uint); +long4 __ovld __cnfn bitfield_insert(long4, long4, uint, uint); +ulong4 __ovld __cnfn bitfield_insert(ulong4, ulong4, uint, uint); +char8 __ovld __cnfn bitfield_insert(char8, char8, uint, uint); +uchar8 __ovld __cnfn bitfield_insert(uchar8, uchar8, uint, uint); +short8 __ovld __cnfn bitfield_insert(short8, short8, uint, uint); +ushort8 __ovld __cnfn bitfield_insert(ushort8, ushort8, uint, uint); +int8 __ovld __cnfn bitfield_insert(int8, int8, uint, uint); +uint8 __ovld __cnfn bitfield_insert(uint8, uint8, uint, uint); +long8 __ovld __cnfn bitfield_insert(long8, long8, uint, uint); +ulong8 __ovld __cnfn bitfield_insert(ulong8, ulong8, uint, uint); +char16 __ovld __cnfn bitfield_insert(char16, char16, uint, uint); +uchar16 __ovld __cnfn bitfield_insert(uchar16, uchar16, uint, uint); +short16 __ovld __cnfn bitfield_insert(short16, short16, uint, uint); +ushort16 __ovld __cnfn bitfield_insert(ushort16, ushort16, uint, uint); +int16 __ovld __cnfn bitfield_insert(int16, int16, uint, uint); +uint16 __ovld __cnfn bitfield_insert(uint16, uint16, uint, uint); +long16 __ovld __cnfn bitfield_insert(long16, long16, uint, uint); +ulong16 __ovld __cnfn bitfield_insert(ulong16, ulong16, uint, uint); + +char __ovld __cnfn bitfield_extract_signed(char, uint, uint); +short __ovld __cnfn bitfield_extract_signed(short, uint, uint); +int __ovld __cnfn bitfield_extract_signed(int, uint, uint); +long __ovld __cnfn bitfield_extract_signed(long, uint, uint); +char2 __ovld __cnfn bitfield_extract_signed(char2, uint, uint); +short2 __ovld __cnfn bitfield_extract_signed(short2, uint, uint); +int2 __ovld __cnfn bitfield_extract_signed(int2, uint, uint); +long2 __ovld __cnfn bitfield_extract_signed(long2, uint, uint); +char3 __ovld __cnfn bitfield_extract_signed(char3, uint, uint); +short3 __ovld __cnfn bitfield_extract_signed(short3, uint, uint); +int3 __ovld __cnfn bitfield_extract_signed(int3, uint, uint); +long3 __ovld __cnfn bitfield_extract_signed(long3, uint, uint); +char4 __ovld __cnfn bitfield_extract_signed(char4, uint, uint); +short4 __ovld __cnfn bitfield_extract_signed(short4, uint, uint); +int4 __ovld __cnfn bitfield_extract_signed(int4, uint, uint); +long4 __ovld __cnfn bitfield_extract_signed(long4, uint, uint); +char8 __ovld __cnfn bitfield_extract_signed(char8, uint, uint); +short8 __ovld __cnfn bitfield_extract_signed(short8, uint, uint); +int8 __ovld __cnfn bitfield_extract_signed(int8, uint, uint); +long8 __ovld __cnfn bitfield_extract_signed(long8, uint, uint); +char16 __ovld __cnfn bitfield_extract_signed(char16, uint, uint); +short16 __ovld __cnfn bitfield_extract_signed(short16, uint, uint); +int16 __ovld __cnfn bitfield_extract_signed(int16, uint, uint); +long16 __ovld __cnfn bitfield_extract_signed(long16, uint, uint); + +char __ovld __cnfn bitfield_extract_signed(uchar, uint, uint); +short __ovld __cnfn bitfield_extract_signed(ushort, uint, uint); +int __ovld __cnfn bitfield_extract_signed(uint, uint, uint); +long __ovld __cnfn bitfield_extract_signed(ulong, uint, uint); +char2 __ovld __cnfn bitfield_extract_signed(uchar2, uint, uint); +short2 __ovld __cnfn bitfield_extract_signed(ushort2, uint, uint); +int2 __ovld __cnfn bitfield_extract_signed(uint2, uint, uint); +long2 __ovld __cnfn bitfield_extract_signed(ulong2, uint, uint); +char3 __ovld __cnfn bitfield_extract_signed(uchar3, uint, uint); +short3 __ovld __cnfn bitfield_extract_signed(ushort3, uint, uint); +int3 __ovld __cnfn bitfield_extract_signed(uint3, uint, uint); +long3 __ovld __cnfn bitfield_extract_signed(ulong3, uint, uint); +char4 __ovld __cnfn bitfield_extract_signed(uchar4, uint, uint); +short4 __ovld __cnfn bitfield_extract_signed(ushort4, uint, uint); +int4 __ovld __cnfn bitfield_extract_signed(uint4, uint, uint); +long4 __ovld __cnfn bitfield_extract_signed(ulong4, uint, uint); +char8 __ovld __cnfn bitfield_extract_signed(uchar8, uint, uint); +short8 __ovld __cnfn bitfield_extract_signed(ushort8, uint, uint); +int8 __ovld __cnfn bitfield_extract_signed(uint8, uint, uint); +long8 __ovld __cnfn bitfield_extract_signed(ulong8, uint, uint); +char16 __ovld __cnfn bitfield_extract_signed(uchar16, uint, uint); +short16 __ovld __cnfn bitfield_extract_signed(ushort16, uint, uint); +int16 __ovld __cnfn bitfield_extract_signed(uint16, uint, uint); +long16 __ovld __cnfn bitfield_extract_signed(ulong16, uint, uint); + +uchar __ovld __cnfn bitfield_extract_unsigned(char, uint, uint); +ushort __ovld __cnfn bitfield_extract_unsigned(short, uint, uint); +uint __ovld __cnfn bitfield_extract_unsigned(int, uint, uint); +ulong __ovld __cnfn bitfield_extract_unsigned(long, uint, uint); +uchar2 __ovld __cnfn bitfield_extract_unsigned(char2, uint, uint); +ushort2 __ovld __cnfn bitfield_extract_unsigned(short2, uint, uint); +uint2 __ovld __cnfn bitfield_extract_unsigned(int2, uint, uint); +ulong2 __ovld __cnfn bitfield_extract_unsigned(long2, uint, uint); +uchar3 __ovld __cnfn bitfield_extract_unsigned(char3, uint, uint); +ushort3 __ovld __cnfn bitfield_extract_unsigned(short3, uint, uint); +uint3 __ovld __cnfn bitfield_extract_unsigned(int3, uint, uint); +ulong3 __ovld __cnfn bitfield_extract_unsigned(long3, uint, uint); +uchar4 __ovld __cnfn bitfield_extract_unsigned(char4, uint, uint); +ushort4 __ovld __cnfn bitfield_extract_unsigned(short4, uint, uint); +uint4 __ovld __cnfn bitfield_extract_unsigned(int4, uint, uint); +ulong4 __ovld __cnfn bitfield_extract_unsigned(long4, uint, uint); +uchar8 __ovld __cnfn bitfield_extract_unsigned(char8, uint, uint); +ushort8 __ovld __cnfn bitfield_extract_unsigned(short8, uint, uint); +uint8 __ovld __cnfn bitfield_extract_unsigned(int8, uint, uint); +ulong8 __ovld __cnfn bitfield_extract_unsigned(long8, uint, uint); +uchar16 __ovld __cnfn bitfield_extract_unsigned(char16, uint, uint); +ushort16 __ovld __cnfn bitfield_extract_unsigned(short16, uint, uint); +uint16 __ovld __cnfn bitfield_extract_unsigned(int16, uint, uint); +ulong16 __ovld __cnfn bitfield_extract_unsigned(long16, uint, uint); + +uchar __ovld __cnfn bitfield_extract_unsigned(uchar, uint, uint); +ushort __ovld __cnfn bitfield_extract_unsigned(ushort, uint, uint); +uint __ovld __cnfn bitfield_extract_unsigned(uint, uint, uint); +ulong __ovld __cnfn bitfield_extract_unsigned(ulong, uint, uint); +uchar2 __ovld __cnfn bitfield_extract_unsigned(uchar2, uint, uint); +ushort2 __ovld __cnfn bitfield_extract_unsigned(ushort2, uint, uint); +uint2 __ovld __cnfn bitfield_extract_unsigned(uint2, uint, uint); +ulong2 __ovld __cnfn bitfield_extract_unsigned(ulong2, uint, uint); +uchar3 __ovld __cnfn bitfield_extract_unsigned(uchar3, uint, uint); +ushort3 __ovld __cnfn bitfield_extract_unsigned(ushort3, uint, uint); +uint3 __ovld __cnfn bitfield_extract_unsigned(uint3, uint, uint); +ulong3 __ovld __cnfn bitfield_extract_unsigned(ulong3, uint, uint); +uchar4 __ovld __cnfn bitfield_extract_unsigned(uchar4, uint, uint); +ushort4 __ovld __cnfn bitfield_extract_unsigned(ushort4, uint, uint); +uint4 __ovld __cnfn bitfield_extract_unsigned(uint4, uint, uint); +ulong4 __ovld __cnfn bitfield_extract_unsigned(ulong4, uint, uint); +uchar8 __ovld __cnfn bitfield_extract_unsigned(uchar8, uint, uint); +ushort8 __ovld __cnfn bitfield_extract_unsigned(ushort8, uint, uint); +uint8 __ovld __cnfn bitfield_extract_unsigned(uint8, uint, uint); +ulong8 __ovld __cnfn bitfield_extract_unsigned(ulong8, uint, uint); +uchar16 __ovld __cnfn bitfield_extract_unsigned(uchar16, uint, uint); +ushort16 __ovld __cnfn bitfield_extract_unsigned(ushort16, uint, uint); +uint16 __ovld __cnfn bitfield_extract_unsigned(uint16, uint, uint); +ulong16 __ovld __cnfn bitfield_extract_unsigned(ulong16, uint, uint); + +char __ovld __cnfn bit_reverse(char); +uchar __ovld __cnfn bit_reverse(uchar); +short __ovld __cnfn bit_reverse(short); +ushort __ovld __cnfn bit_reverse(ushort); +int __ovld __cnfn bit_reverse(int); +uint __ovld __cnfn bit_reverse(uint); +long __ovld __cnfn bit_reverse(long); +ulong __ovld __cnfn bit_reverse(ulong); +char2 __ovld __cnfn bit_reverse(char2); +uchar2 __ovld __cnfn bit_reverse(uchar2); +short2 __ovld __cnfn bit_reverse(short2); +ushort2 __ovld __cnfn bit_reverse(ushort2); +int2 __ovld __cnfn bit_reverse(int2); +uint2 __ovld __cnfn bit_reverse(uint2); +long2 __ovld __cnfn bit_reverse(long2); +ulong2 __ovld __cnfn bit_reverse(ulong2); +char3 __ovld __cnfn bit_reverse(char3); +uchar3 __ovld __cnfn bit_reverse(uchar3); +short3 __ovld __cnfn bit_reverse(short3); +ushort3 __ovld __cnfn bit_reverse(ushort3); +int3 __ovld __cnfn bit_reverse(int3); +uint3 __ovld __cnfn bit_reverse(uint3); +long3 __ovld __cnfn bit_reverse(long3); +ulong3 __ovld __cnfn bit_reverse(ulong3); +char4 __ovld __cnfn bit_reverse(char4); +uchar4 __ovld __cnfn bit_reverse(uchar4); +short4 __ovld __cnfn bit_reverse(short4); +ushort4 __ovld __cnfn bit_reverse(ushort4); +int4 __ovld __cnfn bit_reverse(int4); +uint4 __ovld __cnfn bit_reverse(uint4); +long4 __ovld __cnfn bit_reverse(long4); +ulong4 __ovld __cnfn bit_reverse(ulong4); +char8 __ovld __cnfn bit_reverse(char8); +uchar8 __ovld __cnfn bit_reverse(uchar8); +short8 __ovld __cnfn bit_reverse(short8); +ushort8 __ovld __cnfn bit_reverse(ushort8); +int8 __ovld __cnfn bit_reverse(int8); +uint8 __ovld __cnfn bit_reverse(uint8); +long8 __ovld __cnfn bit_reverse(long8); +ulong8 __ovld __cnfn bit_reverse(ulong8); +char16 __ovld __cnfn bit_reverse(char16); +uchar16 __ovld __cnfn bit_reverse(uchar16); +short16 __ovld __cnfn bit_reverse(short16); +ushort16 __ovld __cnfn bit_reverse(ushort16); +int16 __ovld __cnfn bit_reverse(int16); +uint16 __ovld __cnfn bit_reverse(uint16); +long16 __ovld __cnfn bit_reverse(long16); +ulong16 __ovld __cnfn bit_reverse(ulong16); +#endif // cl_khr_extended_bit_ops + #if defined(cl_intel_subgroups) // Intel-Specific Sub Group Functions float __ovld __conv intel_sub_group_shuffle( float x, uint c ); Index: clang/lib/Sema/OpenCLBuiltins.td =================================================================== --- clang/lib/Sema/OpenCLBuiltins.td +++ clang/lib/Sema/OpenCLBuiltins.td @@ -72,6 +72,7 @@ def FuncExtKhrSubgroupShuffle : FunctionExtension<"cl_khr_subgroup_shuffle">; def FuncExtKhrSubgroupShuffleRelative : FunctionExtension<"cl_khr_subgroup_shuffle_relative">; def FuncExtKhrSubgroupClusteredReduce : FunctionExtension<"cl_khr_subgroup_clustered_reduce">; +def FuncExtKhrExtendedBitOps : FunctionExtension<"cl_khr_extended_bit_ops">; def FuncExtKhrGlobalInt32BaseAtomics : FunctionExtension<"cl_khr_global_int32_base_atomics">; def FuncExtKhrGlobalInt32ExtendedAtomics : FunctionExtension<"cl_khr_global_int32_extended_atomics">; def FuncExtKhrLocalInt32BaseAtomics : FunctionExtension<"cl_khr_local_int32_base_atomics">; @@ -1738,6 +1739,16 @@ } } +// Section 40.3.1 - cl_khr_extended_bit_ops +let Extension = FuncExtKhrExtendedBitOps in { + def : Builtin<"bitfield_insert", [AIGenTypeN, AIGenTypeN, AIGenTypeN, UInt, UInt], Attr.Const>; + def : Builtin<"bitfield_extract_signed", [SGenTypeN, SGenTypeN, UInt, UInt], Attr.Const>; + def : Builtin<"bitfield_extract_signed", [SGenTypeN, UGenTypeN, UInt, UInt], Attr.Const>; + def : Builtin<"bitfield_extract_unsigned", [UGenTypeN, SGenTypeN, UInt, UInt], Attr.Const>; + def : Builtin<"bitfield_extract_unsigned", [UGenTypeN, UGenTypeN, UInt, UInt], Attr.Const>; + def : Builtin<"bit_reverse", [AIGenTypeN, AIGenTypeN], Attr.Const>; +} + //-------------------------------------------------------------------- // Arm extensions. let Extension = ArmIntegerDotProductInt8 in { Index: clang/test/Headers/opencl-c-header.cl =================================================================== --- clang/test/Headers/opencl-c-header.cl +++ clang/test/Headers/opencl-c-header.cl @@ -123,6 +123,9 @@ #if cl_khr_subgroup_clustered_reduce != 1 #error "Incorrectly defined cl_khr_subgroup_clustered_reduce" #endif +#if cl_khr_extended_bit_ops != 1 +#error "Incorrectly defined cl_khr_extended_bit_ops" +#endif #else @@ -147,6 +150,9 @@ #ifdef cl_khr_subgroup_clustered_reduce #error "Incorrect cl_khr_subgroup_clustered_reduce define" #endif +#ifdef cl_khr_extended_bit_ops +#error "Incorrect cl_khr_extended_bit_ops define" +#endif #endif //(defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)