Index: cfe/trunk/include/clang/Basic/Attr.td =================================================================== --- cfe/trunk/include/clang/Basic/Attr.td +++ cfe/trunk/include/clang/Basic/Attr.td @@ -1026,6 +1026,12 @@ let Documentation = [NoDuplicateDocs]; } +def Convergent : InheritableAttr { + let Spellings = [GNU<"convergent">, CXX11<"clang", "convergent">]; + let Subjects = SubjectList<[Function]>; + let Documentation = [ConvergentDocs]; +} + def NoInline : InheritableAttr { let Spellings = [GCC<"noinline">, Declspec<"noinline">]; let Subjects = SubjectList<[Function]>; Index: cfe/trunk/include/clang/Basic/AttrDocs.td =================================================================== --- cfe/trunk/include/clang/Basic/AttrDocs.td +++ cfe/trunk/include/clang/Basic/AttrDocs.td @@ -606,6 +606,33 @@ }]; } +def ConvergentDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +The ``convergent`` attribute can be placed on a function declaration. It is +translated into the LLVM ``convergent`` attribute, which indicates that the call +instructions of a function with this attribute cannot be made control-dependent +on any additional values. + +In languages designed for SPMD/SIMT programming model, e.g. OpenCL or CUDA, +the call instructions of a function with this attribute must be executed by +all work items or threads in a work group or sub group. + +This attribute is different from ``noduplicate`` because it allows duplicating +function calls if it can be proved that the duplicated function calls are +not made control-dependent on any additional values, e.g., unrolling a loop +executed by all work items. + +Sample usage: +.. code-block:: c + + void convfunc(void) __attribute__((convergent)); + // Setting it as a C++11 attribute is also valid in a C++ program. + // void convfunc(void) [[clang::convergent]]; + + }]; +} + def NoSplitStackDocs : Documentation { let Category = DocCatFunction; let Content = [{ Index: cfe/trunk/lib/CodeGen/CGCall.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGCall.cpp +++ cfe/trunk/lib/CodeGen/CGCall.cpp @@ -1648,6 +1648,8 @@ FuncAttrs.addAttribute(llvm::Attribute::NoReturn); if (TargetDecl->hasAttr()) FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate); + if (TargetDecl->hasAttr()) + FuncAttrs.addAttribute(llvm::Attribute::Convergent); if (const FunctionDecl *Fn = dyn_cast(TargetDecl)) { AddAttributesFromFunctionProtoType( Index: cfe/trunk/lib/Headers/opencl-c.h =================================================================== --- cfe/trunk/lib/Headers/opencl-c.h +++ cfe/trunk/lib/Headers/opencl-c.h @@ -17,6 +17,7 @@ #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 #define __ovld __attribute__((overloadable)) +#define __conv __attribute__((convergent)) // Optimizations #define __purefn __attribute__((pure)) @@ -13822,7 +13823,7 @@ * image objects and then want to read the updated data. */ -void __ovld barrier(cl_mem_fence_flags flags); +void __ovld __conv barrier(cl_mem_fence_flags flags); #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 @@ -13835,8 +13836,8 @@ memory_scope_sub_group } memory_scope; -void __ovld work_group_barrier(cl_mem_fence_flags flags, memory_scope scope); -void __ovld work_group_barrier(cl_mem_fence_flags flags); +void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope); +void __ovld __conv work_group_barrier(cl_mem_fence_flags flags); #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 // OpenCL v1.1 s6.11.9, v1.2 s6.12.9 - Explicit Memory Fence Functions @@ -16568,101 +16569,101 @@ // OpenCL v2.0 s6.13.15 - Work-group Functions #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -int __ovld work_group_all(int predicate); -int __ovld work_group_any(int predicate); +int __ovld __conv work_group_all(int predicate); +int __ovld __conv work_group_any(int predicate); #ifdef cl_khr_fp16 -half __ovld work_group_broadcast(half a, size_t local_id); -half __ovld work_group_broadcast(half a, size_t x, size_t y); -half __ovld work_group_broadcast(half a, size_t x, size_t y, size_t z); +half __ovld __conv work_group_broadcast(half a, size_t local_id); +half __ovld __conv work_group_broadcast(half a, size_t x, size_t y); +half __ovld __conv work_group_broadcast(half a, size_t x, size_t y, size_t z); #endif -int __ovld work_group_broadcast(int a, size_t local_id); -int __ovld work_group_broadcast(int a, size_t x, size_t y); -int __ovld work_group_broadcast(int a, size_t x, size_t y, size_t z); -uint __ovld work_group_broadcast(uint a, size_t local_id); -uint __ovld work_group_broadcast(uint a, size_t x, size_t y); -uint __ovld work_group_broadcast(uint a, size_t x, size_t y, size_t z); -long __ovld work_group_broadcast(long a, size_t local_id); -long __ovld work_group_broadcast(long a, size_t x, size_t y); -long __ovld work_group_broadcast(long a, size_t x, size_t y, size_t z); -ulong __ovld work_group_broadcast(ulong a, size_t local_id); -ulong __ovld work_group_broadcast(ulong a, size_t x, size_t y); -ulong __ovld work_group_broadcast(ulong a, size_t x, size_t y, size_t z); -float __ovld work_group_broadcast(float a, size_t local_id); -float __ovld work_group_broadcast(float a, size_t x, size_t y); -float __ovld work_group_broadcast(float a, size_t x, size_t y, size_t z); -#ifdef cl_khr_fp64 -double __ovld work_group_broadcast(double a, size_t local_id); -double __ovld work_group_broadcast(double a, size_t x, size_t y); -double __ovld work_group_broadcast(double a, size_t x, size_t y, size_t z); +int __ovld __conv work_group_broadcast(int a, size_t local_id); +int __ovld __conv work_group_broadcast(int a, size_t x, size_t y); +int __ovld __conv work_group_broadcast(int a, size_t x, size_t y, size_t z); +uint __ovld __conv work_group_broadcast(uint a, size_t local_id); +uint __ovld __conv work_group_broadcast(uint a, size_t x, size_t y); +uint __ovld __conv work_group_broadcast(uint a, size_t x, size_t y, size_t z); +long __ovld __conv work_group_broadcast(long a, size_t local_id); +long __ovld __conv work_group_broadcast(long a, size_t x, size_t y); +long __ovld __conv work_group_broadcast(long a, size_t x, size_t y, size_t z); +ulong __ovld __conv work_group_broadcast(ulong a, size_t local_id); +ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y); +ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y, size_t z); +float __ovld __conv work_group_broadcast(float a, size_t local_id); +float __ovld __conv work_group_broadcast(float a, size_t x, size_t y); +float __ovld __conv work_group_broadcast(float a, size_t x, size_t y, size_t z); +#ifdef cl_khr_fp64 +double __ovld __conv work_group_broadcast(double a, size_t local_id); +double __ovld __conv work_group_broadcast(double a, size_t x, size_t y); +double __ovld __conv work_group_broadcast(double a, size_t x, size_t y, size_t z); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 -half __ovld work_group_reduce_add(half x); -half __ovld work_group_reduce_min(half x); -half __ovld work_group_reduce_max(half x); -half __ovld work_group_scan_exclusive_add(half x); -half __ovld work_group_scan_exclusive_min(half x); -half __ovld work_group_scan_exclusive_max(half x); -half __ovld work_group_scan_inclusive_add(half x); -half __ovld work_group_scan_inclusive_min(half x); -half __ovld work_group_scan_inclusive_max(half x); +half __ovld __conv work_group_reduce_add(half x); +half __ovld __conv work_group_reduce_min(half x); +half __ovld __conv work_group_reduce_max(half x); +half __ovld __conv work_group_scan_exclusive_add(half x); +half __ovld __conv work_group_scan_exclusive_min(half x); +half __ovld __conv work_group_scan_exclusive_max(half x); +half __ovld __conv work_group_scan_inclusive_add(half x); +half __ovld __conv work_group_scan_inclusive_min(half x); +half __ovld __conv work_group_scan_inclusive_max(half x); #endif -int __ovld work_group_reduce_add(int x); -int __ovld work_group_reduce_min(int x); -int __ovld work_group_reduce_max(int x); -int __ovld work_group_scan_exclusive_add(int x); -int __ovld work_group_scan_exclusive_min(int x); -int __ovld work_group_scan_exclusive_max(int x); -int __ovld work_group_scan_inclusive_add(int x); -int __ovld work_group_scan_inclusive_min(int x); -int __ovld work_group_scan_inclusive_max(int x); -uint __ovld work_group_reduce_add(uint x); -uint __ovld work_group_reduce_min(uint x); -uint __ovld work_group_reduce_max(uint x); -uint __ovld work_group_scan_exclusive_add(uint x); -uint __ovld work_group_scan_exclusive_min(uint x); -uint __ovld work_group_scan_exclusive_max(uint x); -uint __ovld work_group_scan_inclusive_add(uint x); -uint __ovld work_group_scan_inclusive_min(uint x); -uint __ovld work_group_scan_inclusive_max(uint x); -long __ovld work_group_reduce_add(long x); -long __ovld work_group_reduce_min(long x); -long __ovld work_group_reduce_max(long x); -long __ovld work_group_scan_exclusive_add(long x); -long __ovld work_group_scan_exclusive_min(long x); -long __ovld work_group_scan_exclusive_max(long x); -long __ovld work_group_scan_inclusive_add(long x); -long __ovld work_group_scan_inclusive_min(long x); -long __ovld work_group_scan_inclusive_max(long x); -ulong __ovld work_group_reduce_add(ulong x); -ulong __ovld work_group_reduce_min(ulong x); -ulong __ovld work_group_reduce_max(ulong x); -ulong __ovld work_group_scan_exclusive_add(ulong x); -ulong __ovld work_group_scan_exclusive_min(ulong x); -ulong __ovld work_group_scan_exclusive_max(ulong x); -ulong __ovld work_group_scan_inclusive_add(ulong x); -ulong __ovld work_group_scan_inclusive_min(ulong x); -ulong __ovld work_group_scan_inclusive_max(ulong x); -float __ovld work_group_reduce_add(float x); -float __ovld work_group_reduce_min(float x); -float __ovld work_group_reduce_max(float x); -float __ovld work_group_scan_exclusive_add(float x); -float __ovld work_group_scan_exclusive_min(float x); -float __ovld work_group_scan_exclusive_max(float x); -float __ovld work_group_scan_inclusive_add(float x); -float __ovld work_group_scan_inclusive_min(float x); -float __ovld work_group_scan_inclusive_max(float x); -#ifdef cl_khr_fp64 -double __ovld work_group_reduce_add(double x); -double __ovld work_group_reduce_min(double x); -double __ovld work_group_reduce_max(double x); -double __ovld work_group_scan_exclusive_add(double x); -double __ovld work_group_scan_exclusive_min(double x); -double __ovld work_group_scan_exclusive_max(double x); -double __ovld work_group_scan_inclusive_add(double x); -double __ovld work_group_scan_inclusive_min(double x); -double __ovld work_group_scan_inclusive_max(double x); +int __ovld __conv work_group_reduce_add(int x); +int __ovld __conv work_group_reduce_min(int x); +int __ovld __conv work_group_reduce_max(int x); +int __ovld __conv work_group_scan_exclusive_add(int x); +int __ovld __conv work_group_scan_exclusive_min(int x); +int __ovld __conv work_group_scan_exclusive_max(int x); +int __ovld __conv work_group_scan_inclusive_add(int x); +int __ovld __conv work_group_scan_inclusive_min(int x); +int __ovld __conv work_group_scan_inclusive_max(int x); +uint __ovld __conv work_group_reduce_add(uint x); +uint __ovld __conv work_group_reduce_min(uint x); +uint __ovld __conv work_group_reduce_max(uint x); +uint __ovld __conv work_group_scan_exclusive_add(uint x); +uint __ovld __conv work_group_scan_exclusive_min(uint x); +uint __ovld __conv work_group_scan_exclusive_max(uint x); +uint __ovld __conv work_group_scan_inclusive_add(uint x); +uint __ovld __conv work_group_scan_inclusive_min(uint x); +uint __ovld __conv work_group_scan_inclusive_max(uint x); +long __ovld __conv work_group_reduce_add(long x); +long __ovld __conv work_group_reduce_min(long x); +long __ovld __conv work_group_reduce_max(long x); +long __ovld __conv work_group_scan_exclusive_add(long x); +long __ovld __conv work_group_scan_exclusive_min(long x); +long __ovld __conv work_group_scan_exclusive_max(long x); +long __ovld __conv work_group_scan_inclusive_add(long x); +long __ovld __conv work_group_scan_inclusive_min(long x); +long __ovld __conv work_group_scan_inclusive_max(long x); +ulong __ovld __conv work_group_reduce_add(ulong x); +ulong __ovld __conv work_group_reduce_min(ulong x); +ulong __ovld __conv work_group_reduce_max(ulong x); +ulong __ovld __conv work_group_scan_exclusive_add(ulong x); +ulong __ovld __conv work_group_scan_exclusive_min(ulong x); +ulong __ovld __conv work_group_scan_exclusive_max(ulong x); +ulong __ovld __conv work_group_scan_inclusive_add(ulong x); +ulong __ovld __conv work_group_scan_inclusive_min(ulong x); +ulong __ovld __conv work_group_scan_inclusive_max(ulong x); +float __ovld __conv work_group_reduce_add(float x); +float __ovld __conv work_group_reduce_min(float x); +float __ovld __conv work_group_reduce_max(float x); +float __ovld __conv work_group_scan_exclusive_add(float x); +float __ovld __conv work_group_scan_exclusive_min(float x); +float __ovld __conv work_group_scan_exclusive_max(float x); +float __ovld __conv work_group_scan_inclusive_add(float x); +float __ovld __conv work_group_scan_inclusive_min(float x); +float __ovld __conv work_group_scan_inclusive_max(float x); +#ifdef cl_khr_fp64 +double __ovld __conv work_group_reduce_add(double x); +double __ovld __conv work_group_reduce_min(double x); +double __ovld __conv work_group_reduce_max(double x); +double __ovld __conv work_group_scan_exclusive_add(double x); +double __ovld __conv work_group_scan_exclusive_min(double x); +double __ovld __conv work_group_scan_exclusive_max(double x); +double __ovld __conv work_group_scan_inclusive_add(double x); +double __ovld __conv work_group_scan_inclusive_min(double x); +double __ovld __conv work_group_scan_inclusive_max(double x); #endif //cl_khr_fp64 #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 @@ -16762,92 +16763,92 @@ uint __ovld get_sub_group_id(void); uint __ovld get_sub_group_local_id(void); -void __ovld sub_group_barrier(cl_mem_fence_flags flags); +void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags); #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -void __ovld sub_group_barrier(cl_mem_fence_flags flags, memory_scope scope); +void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags, memory_scope scope); #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 -int __ovld sub_group_all(int predicate); -int __ovld sub_group_any(int predicate); +int __ovld __conv sub_group_all(int predicate); +int __ovld __conv sub_group_any(int predicate); -int __ovld sub_group_broadcast(int x, uint sub_group_local_id); -uint __ovld sub_group_broadcast(uint x, uint sub_group_local_id); -long __ovld sub_group_broadcast(long x, uint sub_group_local_id); -ulong __ovld sub_group_broadcast(ulong x, uint sub_group_local_id); -float __ovld sub_group_broadcast(float x, uint sub_group_local_id); - -int __ovld sub_group_reduce_add(int x); -uint __ovld sub_group_reduce_add(uint x); -long __ovld sub_group_reduce_add(long x); -ulong __ovld sub_group_reduce_add(ulong x); -float __ovld sub_group_reduce_add(float x); -int __ovld sub_group_reduce_min(int x); -uint __ovld sub_group_reduce_min(uint x); -long __ovld sub_group_reduce_min(long x); -ulong __ovld sub_group_reduce_min(ulong x); -float __ovld sub_group_reduce_min(float x); -int __ovld sub_group_reduce_max(int x); -uint __ovld sub_group_reduce_max(uint x); -long __ovld sub_group_reduce_max(long x); -ulong __ovld sub_group_reduce_max(ulong x); -float __ovld sub_group_reduce_max(float x); - -int __ovld sub_group_scan_exclusive_add(int x); -uint __ovld sub_group_scan_exclusive_add(uint x); -long __ovld sub_group_scan_exclusive_add(long x); -ulong __ovld sub_group_scan_exclusive_add(ulong x); -float __ovld sub_group_scan_exclusive_add(float x); -int __ovld sub_group_scan_exclusive_min(int x); -uint __ovld sub_group_scan_exclusive_min(uint x); -long __ovld sub_group_scan_exclusive_min(long x); -ulong __ovld sub_group_scan_exclusive_min(ulong x); -float __ovld sub_group_scan_exclusive_min(float x); -int __ovld sub_group_scan_exclusive_max(int x); -uint __ovld sub_group_scan_exclusive_max(uint x); -long __ovld sub_group_scan_exclusive_max(long x); -ulong __ovld sub_group_scan_exclusive_max(ulong x); -float __ovld sub_group_scan_exclusive_max(float x); - -int __ovld sub_group_scan_inclusive_add(int x); -uint __ovld sub_group_scan_inclusive_add(uint x); -long __ovld sub_group_scan_inclusive_add(long x); -ulong __ovld sub_group_scan_inclusive_add(ulong x); -float __ovld sub_group_scan_inclusive_add(float x); -int __ovld sub_group_scan_inclusive_min(int x); -uint __ovld sub_group_scan_inclusive_min(uint x); -long __ovld sub_group_scan_inclusive_min(long x); -ulong __ovld sub_group_scan_inclusive_min(ulong x); -float __ovld sub_group_scan_inclusive_min(float x); -int __ovld sub_group_scan_inclusive_max(int x); -uint __ovld sub_group_scan_inclusive_max(uint x); -long __ovld sub_group_scan_inclusive_max(long x); -ulong __ovld sub_group_scan_inclusive_max(ulong x); -float __ovld sub_group_scan_inclusive_max(float x); - -#ifdef cl_khr_fp16 -half __ovld sub_group_broadcast(half x, uint sub_group_local_id); -half __ovld sub_group_reduce_add(half x); -half __ovld sub_group_reduce_min(half x); -half __ovld sub_group_reduce_max(half x); -half __ovld sub_group_scan_exclusive_add(half x); -half __ovld sub_group_scan_exclusive_min(half x); -half __ovld sub_group_scan_exclusive_max(half x); -half __ovld sub_group_scan_inclusive_add(half x); -half __ovld sub_group_scan_inclusive_min(half x); -half __ovld sub_group_scan_inclusive_max(half x); -#endif //cl_khr_fp16 - -#ifdef cl_khr_fp64 -double __ovld sub_group_broadcast(double x, uint sub_group_local_id); -double __ovld sub_group_reduce_add(double x); -double __ovld sub_group_reduce_min(double x); -double __ovld sub_group_reduce_max(double x); -double __ovld sub_group_scan_exclusive_add(double x); -double __ovld sub_group_scan_exclusive_min(double x); -double __ovld sub_group_scan_exclusive_max(double x); -double __ovld sub_group_scan_inclusive_add(double x); -double __ovld sub_group_scan_inclusive_min(double x); -double __ovld sub_group_scan_inclusive_max(double x); +int __ovld __conv sub_group_broadcast(int x, uint sub_group_local_id); +uint __ovld __conv sub_group_broadcast(uint x, uint sub_group_local_id); +long __ovld __conv sub_group_broadcast(long x, uint sub_group_local_id); +ulong __ovld __conv sub_group_broadcast(ulong x, uint sub_group_local_id); +float __ovld __conv sub_group_broadcast(float x, uint sub_group_local_id); + +int __ovld __conv sub_group_reduce_add(int x); +uint __ovld __conv sub_group_reduce_add(uint x); +long __ovld __conv sub_group_reduce_add(long x); +ulong __ovld __conv sub_group_reduce_add(ulong x); +float __ovld __conv sub_group_reduce_add(float x); +int __ovld __conv sub_group_reduce_min(int x); +uint __ovld __conv sub_group_reduce_min(uint x); +long __ovld __conv sub_group_reduce_min(long x); +ulong __ovld __conv sub_group_reduce_min(ulong x); +float __ovld __conv sub_group_reduce_min(float x); +int __ovld __conv sub_group_reduce_max(int x); +uint __ovld __conv sub_group_reduce_max(uint x); +long __ovld __conv sub_group_reduce_max(long x); +ulong __ovld __conv sub_group_reduce_max(ulong x); +float __ovld __conv sub_group_reduce_max(float x); + +int __ovld __conv sub_group_scan_exclusive_add(int x); +uint __ovld __conv sub_group_scan_exclusive_add(uint x); +long __ovld __conv sub_group_scan_exclusive_add(long x); +ulong __ovld __conv sub_group_scan_exclusive_add(ulong x); +float __ovld __conv sub_group_scan_exclusive_add(float x); +int __ovld __conv sub_group_scan_exclusive_min(int x); +uint __ovld __conv sub_group_scan_exclusive_min(uint x); +long __ovld __conv sub_group_scan_exclusive_min(long x); +ulong __ovld __conv sub_group_scan_exclusive_min(ulong x); +float __ovld __conv sub_group_scan_exclusive_min(float x); +int __ovld __conv sub_group_scan_exclusive_max(int x); +uint __ovld __conv sub_group_scan_exclusive_max(uint x); +long __ovld __conv sub_group_scan_exclusive_max(long x); +ulong __ovld __conv sub_group_scan_exclusive_max(ulong x); +float __ovld __conv sub_group_scan_exclusive_max(float x); + +int __ovld __conv sub_group_scan_inclusive_add(int x); +uint __ovld __conv sub_group_scan_inclusive_add(uint x); +long __ovld __conv sub_group_scan_inclusive_add(long x); +ulong __ovld __conv sub_group_scan_inclusive_add(ulong x); +float __ovld __conv sub_group_scan_inclusive_add(float x); +int __ovld __conv sub_group_scan_inclusive_min(int x); +uint __ovld __conv sub_group_scan_inclusive_min(uint x); +long __ovld __conv sub_group_scan_inclusive_min(long x); +ulong __ovld __conv sub_group_scan_inclusive_min(ulong x); +float __ovld __conv sub_group_scan_inclusive_min(float x); +int __ovld __conv sub_group_scan_inclusive_max(int x); +uint __ovld __conv sub_group_scan_inclusive_max(uint x); +long __ovld __conv sub_group_scan_inclusive_max(long x); +ulong __ovld __conv sub_group_scan_inclusive_max(ulong x); +float __ovld __conv sub_group_scan_inclusive_max(float x); + +#ifdef cl_khr_fp16 +half __ovld __conv sub_group_broadcast(half x, uint sub_group_local_id); +half __ovld __conv sub_group_reduce_add(half x); +half __ovld __conv sub_group_reduce_min(half x); +half __ovld __conv sub_group_reduce_max(half x); +half __ovld __conv sub_group_scan_exclusive_add(half x); +half __ovld __conv sub_group_scan_exclusive_min(half x); +half __ovld __conv sub_group_scan_exclusive_max(half x); +half __ovld __conv sub_group_scan_inclusive_add(half x); +half __ovld __conv sub_group_scan_inclusive_min(half x); +half __ovld __conv sub_group_scan_inclusive_max(half x); +#endif //cl_khr_fp16 + +#ifdef cl_khr_fp64 +double __ovld __conv sub_group_broadcast(double x, uint sub_group_local_id); +double __ovld __conv sub_group_reduce_add(double x); +double __ovld __conv sub_group_reduce_min(double x); +double __ovld __conv sub_group_reduce_max(double x); +double __ovld __conv sub_group_scan_exclusive_add(double x); +double __ovld __conv sub_group_scan_exclusive_min(double x); +double __ovld __conv sub_group_scan_exclusive_max(double x); +double __ovld __conv sub_group_scan_inclusive_add(double x); +double __ovld __conv sub_group_scan_inclusive_min(double x); +double __ovld __conv sub_group_scan_inclusive_max(double x); #endif //cl_khr_fp64 #endif //cl_khr_subgroups cl_intel_subgroups Index: cfe/trunk/lib/Sema/SemaDeclAttr.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaDeclAttr.cpp +++ cfe/trunk/lib/Sema/SemaDeclAttr.cpp @@ -5857,6 +5857,9 @@ case AttributeList::AT_NoDuplicate: handleSimpleAttribute(S, D, Attr); break; + case AttributeList::AT_Convergent: + handleSimpleAttribute(S, D, Attr); + break; case AttributeList::AT_NoInline: handleSimpleAttribute(S, D, Attr); break; Index: cfe/trunk/test/CodeGenOpenCL/convergent.cl =================================================================== --- cfe/trunk/test/CodeGenOpenCL/convergent.cl +++ cfe/trunk/test/CodeGenOpenCL/convergent.cl @@ -0,0 +1,118 @@ +// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | FileCheck %s + +void convfun(void) __attribute__((convergent)); +void non_convfun(void); +void nodupfun(void) __attribute__((noduplicate)); + +void f(void); +void g(void); + +// Test two if's are merged and non_convfun duplicated. +// The LLVM IR is equivalent to: +// if (a) { +// f(); +// non_convfun(); +// g(); +// } else { +// non_convfun(); +// } +// +// CHECK: define spir_func void @test_merge_if(i32 %[[a:.+]]) +// CHECK: %[[tobool:.+]] = icmp eq i32 %[[a]], 0 +// CHECK: br i1 %[[tobool]], label %[[if_end3_critedge:.+]], label %[[if_then:.+]] +// CHECK: [[if_then]]: +// CHECK: tail call spir_func void @f() +// CHECK: tail call spir_func void @non_convfun() +// CHECK: tail call spir_func void @g() +// CHECK: br label %[[if_end3:.+]] +// CHECK: [[if_end3_critedge]]: +// CHECK: tail call spir_func void @non_convfun() +// CHECK: br label %[[if_end3]] +// CHECK: [[if_end3]]: +// CHECK-LABEL: ret void + +void test_merge_if(int a) { + if (a) { + f(); + } + non_convfun(); + if (a) { + g(); + } +} + +// CHECK-DAG: declare spir_func void @f() +// CHECK-DAG: declare spir_func void @non_convfun() +// CHECK-DAG: declare spir_func void @g() + +// Test two if's are not merged. +// CHECK: define spir_func void @test_no_merge_if(i32 %[[a:.+]]) +// CHECK: %[[tobool:.+]] = icmp eq i32 %[[a]], 0 +// CHECK: br i1 %[[tobool]], label %[[if_end:.+]], label %[[if_then:.+]] +// CHECK: [[if_then]]: +// CHECK: tail call spir_func void @f() +// CHECK-NOT: call spir_func void @convfun() +// CHECK-NOT: call spir_func void @g() +// CHECK: br label %[[if_end]] +// CHECK: [[if_end]]: +// CHECK: %[[tobool_pr:.+]] = phi i1 [ true, %[[if_then]] ], [ false, %{{.+}} ] +// CHECK: tail call spir_func void @convfun() #[[attr5:.+]] +// CHECK: br i1 %[[tobool_pr]], label %[[if_then2:.+]], label %[[if_end3:.+]] +// CHECK: [[if_then2]]: +// CHECK: tail call spir_func void @g() +// CHECK: br label %[[if_end3:.+]] +// CHECK: [[if_end3]]: +// CHECK-LABEL: ret void + +void test_no_merge_if(int a) { + if (a) { + f(); + } + convfun(); + if(a) { + g(); + } +} + +// CHECK: declare spir_func void @convfun(){{[^#]*}} #[[attr2:[0-9]+]] + +// Test loop is unrolled for convergent function. +// CHECK-LABEL: define spir_func void @test_unroll() +// CHECK: tail call spir_func void @convfun() #[[attr5:[0-9]+]] +// CHECK: tail call spir_func void @convfun() #[[attr5]] +// CHECK: tail call spir_func void @convfun() #[[attr5]] +// CHECK: tail call spir_func void @convfun() #[[attr5]] +// CHECK: tail call spir_func void @convfun() #[[attr5]] +// CHECK: tail call spir_func void @convfun() #[[attr5]] +// CHECK: tail call spir_func void @convfun() #[[attr5]] +// CHECK: tail call spir_func void @convfun() #[[attr5]] +// CHECK: tail call spir_func void @convfun() #[[attr5]] +// CHECK: tail call spir_func void @convfun() #[[attr5]] +// CHECK-LABEL: ret void + +void test_unroll() { + for (int i = 0; i < 10; i++) + convfun(); +} + +// Test loop is not unrolled for noduplicate function. +// CHECK-LABEL: define spir_func void @test_not_unroll() +// CHECK: br label %[[for_body:.+]] +// CHECK: [[for_cond_cleanup:.+]]: +// CHECK: ret void +// CHECK: [[for_body]]: +// CHECK: tail call spir_func void @nodupfun() #[[attr6:[0-9]+]] +// CHECK-NOT: call spir_func void @nodupfun() +// CHECK: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]] + +void test_not_unroll() { + for (int i = 0; i < 10; i++) + nodupfun(); +} + +// CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]] + +// CHECK-DAG: attributes #[[attr2]] = { {{[^}]*}}convergent{{[^}]*}} } +// CHECK-DAG: attributes #[[attr3]] = { {{[^}]*}}noduplicate{{[^}]*}} } +// CHECK-DAG: attributes #[[attr5]] = { {{[^}]*}}convergent{{[^}]*}} } +// CHECK-DAG: attributes #[[attr6]] = { {{[^}]*}}noduplicate{{[^}]*}} } Index: cfe/trunk/test/SemaOpenCL/convergent.cl =================================================================== --- cfe/trunk/test/SemaOpenCL/convergent.cl +++ cfe/trunk/test/SemaOpenCL/convergent.cl @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -triple spir-unknown-unknown -fsyntax-only -verify %s + +void f1(void) __attribute__((convergent)); + +void f2(void) __attribute__((convergent(1))); // expected-error {{'convergent' attribute takes no arguments}} + +void f3(int a __attribute__((convergent))); // expected-warning {{'convergent' attribute only applies to functions}} + +void f4(void) { + int var1 __attribute__((convergent)); // expected-warning {{'convergent' attribute only applies to functions}} +} +