Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -741,6 +741,13 @@ let Documentation = [OpenCLUnrollHintDocs]; } +def OpenCLIntelReqdSubGroupSize: InheritableAttr { + let Spellings = [GNU<"intel_reqd_sub_group_size">]; + let Args = [UnsignedArgument<"SubGroupSize">]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let Documentation = [OpenCLIntelReqdSubGroupSizeDocs]; +} + // This attribute is both a type attribute, and a declaration attribute (for // parameter variables). def OpenCLAccess : Attr { Index: include/clang/Basic/AttrDocs.td =================================================================== --- include/clang/Basic/AttrDocs.td +++ include/clang/Basic/AttrDocs.td @@ -2137,6 +2137,21 @@ }]; } +def OpenCLIntelReqdSubGroupSizeDocs : Documentation { + let Category = DocCatStmt; + let Heading = "__attribute__((intel_reqd_sub_group_size))"; + let Content = [{ +The optional attribute intel_reqd_sub_group_size can be used to indicate that +the kernel must be compiled and executed with the specified subgroup size. When +this attribute is present, get_max_sub_group_size() is guaranteed to return the +specified integer value. This is important for the correctness of many subgroup +algorithms, and in some cases may be used by the compiler to generate more optimal +code. See `cl_intel_required_subgroup_size +` +for details. + }]; +} + def OpenCLAccessDocs : Documentation { let Category = DocCatStmt; let Heading = "__read_only, __write_only, __read_write (read_only, write_only, read_write)"; Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -652,6 +652,7 @@ llvm::MDNode::get(Context, argNames)); } +/// Emit OpenCL Kernel arg metadata and the kernel attribute meatadata. void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn) { @@ -692,6 +693,14 @@ llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))}; Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, attrMDArgs)); } + + if (const OpenCLIntelReqdSubGroupSizeAttr *A = + FD->getAttr()) { + llvm::Metadata *attrMDArgs[] = { + llvm::ConstantAsMetadata::get(Builder.getInt32(A->getSubGroupSize()))}; + Fn->setMetadata("intel_reqd_sub_group_size", + llvm::MDNode::get(Context, attrMDArgs)); + } } /// Determine whether the function F ends with a return stmt. Index: lib/Sema/SemaDeclAttr.cpp =================================================================== --- lib/Sema/SemaDeclAttr.cpp +++ lib/Sema/SemaDeclAttr.cpp @@ -2779,6 +2779,28 @@ Attr.getAttributeSpellingListIndex())); } +// Handles intel_reqd_sub_group_size. +static void handleSubGroupSize(Sema &S, Decl *D, const AttributeList &Attr) { + uint32_t SGSize; + const Expr *E = Attr.getArgAsExpr(0); + if (!checkUInt32Argument(S, Attr, E, SGSize)) + return; + if (SGSize == 0) { + S.Diag(Attr.getLoc(), diag::err_attribute_argument_is_zero) + << Attr.getName() << E->getSourceRange(); + return; + } + + OpenCLIntelReqdSubGroupSizeAttr *Existing = + D->getAttr(); + if (Existing && Existing->getSubGroupSize() != SGSize) + S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute) << Attr.getName(); + + D->addAttr(::new (S.Context) OpenCLIntelReqdSubGroupSizeAttr( + Attr.getRange(), S.Context, SGSize, + Attr.getAttributeSpellingListIndex())); +} + static void handleVecTypeHint(Sema &S, Decl *D, const AttributeList &Attr) { if (!Attr.hasParsedType()) { S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) @@ -5998,6 +6020,9 @@ case AttributeList::AT_ReqdWorkGroupSize: handleWorkGroupSize(S, D, Attr); break; + case AttributeList::AT_OpenCLIntelReqdSubGroupSize: + handleSubGroupSize(S, D, Attr); + break; case AttributeList::AT_VecTypeHint: handleVecTypeHint(S, D, Attr); break; @@ -6356,6 +6381,9 @@ Diag(D->getLocation(), diag::err_attribute_wrong_decl_type) << A << ExpectedKernelFunction; D->setInvalidDecl(); + } else if (Attr *A = D->getAttr()) { + Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A; + D->setInvalidDecl(); } } } Index: test/CodeGenOpenCL/kernel-attributes.cl =================================================================== --- test/CodeGenOpenCL/kernel-attributes.cl +++ test/CodeGenOpenCL/kernel-attributes.cl @@ -8,7 +8,11 @@ kernel __attribute__((vec_type_hint(uint4))) __attribute__((work_group_size_hint(8,16,32))) void kernel2(int a) {} // CHECK: define void @kernel2(i32 {{[^%]*}}%a) {{[^{]+}} !vec_type_hint ![[MD3:[0-9]+]] !work_group_size_hint ![[MD4:[0-9]+]] +kernel __attribute__((intel_reqd_sub_group_size(8))) void kernel3(int a) {} +// CHECK: define void @kernel3(i32 {{[^%]*}}%a) {{[^{]+}} !intel_reqd_sub_group_size ![[MD5:[0-9]+]] + // CHECK: [[MD1]] = !{i32 undef, i32 1} // CHECK: [[MD2]] = !{i32 1, i32 2, i32 4} // CHECK: [[MD3]] = !{<4 x i32> undef, i32 0} // CHECK: [[MD4]] = !{i32 8, i32 16, i32 32} +// CHECK: [[MD5]] = !{i32 8} Index: test/SemaOpenCL/invalid-kernel-attrs.cl =================================================================== --- test/SemaOpenCL/invalid-kernel-attrs.cl +++ test/SemaOpenCL/invalid-kernel-attrs.cl @@ -33,3 +33,5 @@ kernel __attribute__((reqd_work_group_size(1,2,0))) void kernel11(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}} kernel __attribute__((reqd_work_group_size(1,0,2))) void kernel12(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}} kernel __attribute__((reqd_work_group_size(0,1,2))) void kernel13(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}} +__attribute__((intel_reqd_sub_group_size(8))) void kernel14(){} // expected-error {{attribute 'intel_reqd_sub_group_size' can only be applied to a kernel}} +kernel __attribute__((intel_reqd_sub_group_size(0))) void kernel15(){} // expected-error {{'intel_reqd_sub_group_size' attribute must be greater than 0}}