Index: cfe/trunk/include/clang/Driver/Options.td =================================================================== --- cfe/trunk/include/clang/Driver/Options.td +++ cfe/trunk/include/clang/Driver/Options.td @@ -518,6 +518,8 @@ HelpText<"OpenCL only. Allow denormals to be flushed to zero.">; def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group, Flags<[CC1Option]>, HelpText<"OpenCL only. Specify that single precision floating-point divide and sqrt used in the program source are correctly rounded.">; +def cl_uniform_work_group_size : Flag<["-"], "cl-uniform-work-group-size">, Group, Flags<[CC1Option]>, + HelpText<"OpenCL only. Defines that the global work-size be a multiple of the work-group size specified to clEnqueueNDRangeKernel">; def client__name : JoinedOrSeparate<["-"], "client_name">; def combine : Flag<["-", "--"], "combine">, Flags<[DriverOption, Unsupported]>; def compatibility__version : JoinedOrSeparate<["-"], "compatibility_version">; Index: cfe/trunk/include/clang/Frontend/CodeGenOptions.def =================================================================== --- cfe/trunk/include/clang/Frontend/CodeGenOptions.def +++ cfe/trunk/include/clang/Frontend/CodeGenOptions.def @@ -128,6 +128,7 @@ CODEGENOPT(NoNaNsFPMath , 1, 0) ///< Assume FP arguments, results not NaN. CODEGENOPT(FlushDenorm , 1, 0) ///< Allow FP denorm numbers to be flushed to zero CODEGENOPT(CorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt +CODEGENOPT(UniformWGSize , 1, 0) ///< -cl-uniform-work-group-size CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss. /// \brief Method of Objective-C dispatch to use. ENUM_CODEGENOPT(ObjCDispatchMethod, ObjCDispatchMethodKind, 2, Legacy) Index: cfe/trunk/lib/CodeGen/CGCall.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGCall.cpp +++ cfe/trunk/lib/CodeGen/CGCall.cpp @@ -1870,6 +1870,21 @@ } } + if (TargetDecl && TargetDecl->hasAttr()) { + if (getLangOpts().OpenCLVersion <= 120) { + // OpenCL v1.2 Work groups are always uniform + FuncAttrs.addAttribute("uniform-work-group-size", "true"); + } else { + // OpenCL v2.0 Work groups may be whether uniform or not. + // '-cl-uniform-work-group-size' compile option gets a hint + // to the compiler that the global work-size be a multiple of + // the work-group size specified to clEnqueueNDRangeKernel + // (i.e. work groups are uniform). + FuncAttrs.addAttribute("uniform-work-group-size", + llvm::toStringRef(CodeGenOpts.UniformWGSize)); + } + } + if (!AttrOnCallSite) { bool DisableTailCalls = CodeGenOpts.DisableTailCalls || Index: cfe/trunk/lib/Driver/ToolChains/Clang.cpp =================================================================== --- cfe/trunk/lib/Driver/ToolChains/Clang.cpp +++ cfe/trunk/lib/Driver/ToolChains/Clang.cpp @@ -2379,6 +2379,7 @@ options::OPT_cl_no_signed_zeros, options::OPT_cl_denorms_are_zero, options::OPT_cl_fp32_correctly_rounded_divide_sqrt, + options::OPT_cl_uniform_work_group_size }; if (Arg *A = Args.getLastArg(options::OPT_cl_std_EQ)) { Index: cfe/trunk/lib/Frontend/CompilerInvocation.cpp =================================================================== --- cfe/trunk/lib/Frontend/CompilerInvocation.cpp +++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp @@ -659,6 +659,8 @@ Opts.FlushDenorm = Args.hasArg(OPT_cl_denorms_are_zero); Opts.CorrectlyRoundedDivSqrt = Args.hasArg(OPT_cl_fp32_correctly_rounded_divide_sqrt); + Opts.UniformWGSize = + Args.hasArg(OPT_cl_uniform_work_group_size); Opts.Reciprocals = Args.getAllArgValues(OPT_mrecip_EQ); Opts.ReciprocalMath = Args.hasArg(OPT_freciprocal_math); Opts.NoTrappingMath = Args.hasArg(OPT_fno_trapping_math); Index: cfe/trunk/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl =================================================================== --- cfe/trunk/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl +++ cfe/trunk/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl @@ -425,7 +425,7 @@ return s; } -// CHECK: define i32 @func_transparent_union_ret() local_unnamed_addr #0 { +// CHECK: define i32 @func_transparent_union_ret() local_unnamed_addr #1 { // CHECK: ret i32 0 transparent_u func_transparent_union_ret() { Index: cfe/trunk/test/CodeGenOpenCL/cl-uniform-wg-size.cl =================================================================== --- cfe/trunk/test/CodeGenOpenCL/cl-uniform-wg-size.cl +++ cfe/trunk/test/CodeGenOpenCL/cl-uniform-wg-size.cl @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM +// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM +// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM + +kernel void ker() {}; +// CHECK: define{{.*}}@ker() #0 + +void foo() {}; +// CHECK: define{{.*}}@foo() #1 + +// CHECK-LABEL: attributes #0 +// CHECK-UNIFORM: "uniform-work-group-size"="true" +// CHECK-NONUNIFORM: "uniform-work-group-size"="false" + +// CHECK-LABEL: attributes #1 +// CHECK-NOT: uniform-work-group-size Index: cfe/trunk/test/CodeGenOpenCL/convergent.cl =================================================================== --- cfe/trunk/test/CodeGenOpenCL/convergent.cl +++ cfe/trunk/test/CodeGenOpenCL/convergent.cl @@ -127,7 +127,7 @@ // CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]] // CHECK-LABEL: @assume_convergent_asm -// CHECK: tail call void asm sideeffect "s_barrier", ""() #4 +// CHECK: tail call void asm sideeffect "s_barrier", ""() #5 kernel void assume_convergent_asm() { __asm__ volatile("s_barrier"); @@ -138,4 +138,5 @@ // CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} } // CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} } // CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} } -// CHECK: attributes #5 = { {{[^}]*}}convergent noduplicate{{[^}]*}} } +// CHECK: attributes #5 = { {{[^}]*}}convergent{{[^}]*}} } +// CHECK: attributes #6 = { {{[^}]*}}convergent noduplicate{{[^}]*}} } Index: cfe/trunk/test/Driver/opencl.cl =================================================================== --- cfe/trunk/test/Driver/opencl.cl +++ cfe/trunk/test/Driver/opencl.cl @@ -13,6 +13,7 @@ // RUN: %clang -S -### -cl-no-signed-zeros %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SIGNED-ZEROS %s // RUN: %clang -S -### -cl-denorms-are-zero %s 2>&1 | FileCheck --check-prefix=CHECK-DENORMS-ARE-ZERO %s // RUN: %clang -S -### -cl-fp32-correctly-rounded-divide-sqrt %s 2>&1 | FileCheck --check-prefix=CHECK-ROUND-DIV %s +// RUN: %clang -S -### -cl-uniform-work-group-size %s 2>&1 | FileCheck --check-prefix=CHECK-UNIFORM-WG %s // RUN: not %clang -cl-std=c99 -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-C99 %s // RUN: not %clang -cl-std=invalid -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID %s @@ -31,6 +32,7 @@ // CHECK-NO-SIGNED-ZEROS: "-cc1" {{.*}} "-cl-no-signed-zeros" // CHECK-DENORMS-ARE-ZERO: "-cc1" {{.*}} "-cl-denorms-are-zero" // CHECK-ROUND-DIV: "-cc1" {{.*}} "-cl-fp32-correctly-rounded-divide-sqrt" +// CHECK-UNIFORM-WG: "-cc1" {{.*}} "-cl-uniform-work-group-size" // CHECK-C99: error: invalid value 'c99' in '-cl-std=c99' // CHECK-INVALID: error: invalid value 'invalid' in '-cl-std=invalid'