Index: cfe/trunk/include/clang/Basic/Attr.td =================================================================== --- cfe/trunk/include/clang/Basic/Attr.td +++ cfe/trunk/include/clang/Basic/Attr.td @@ -864,6 +864,13 @@ let Documentation = [OpenCLUnrollHintDocs]; } +def OpenCLIntelReqdSubGroupSize: InheritableAttr { + let Spellings = [GNU<"intel_reqd_sub_group_size">]; + let Args = [UnsignedArgument<"SubGroupSize">]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let Documentation = [OpenCLIntelReqdSubGroupSizeDocs]; +} + // This attribute is both a type attribute, and a declaration attribute (for // parameter variables). def OpenCLAccess : Attr { Index: cfe/trunk/include/clang/Basic/AttrDocs.td =================================================================== --- cfe/trunk/include/clang/Basic/AttrDocs.td +++ cfe/trunk/include/clang/Basic/AttrDocs.td @@ -2216,6 +2216,21 @@ }]; } +def OpenCLIntelReqdSubGroupSizeDocs : Documentation { + let Category = DocCatStmt; + let Heading = "__attribute__((intel_reqd_sub_group_size))"; + let Content = [{ +The optional attribute intel_reqd_sub_group_size can be used to indicate that +the kernel must be compiled and executed with the specified subgroup size. When +this attribute is present, get_max_sub_group_size() is guaranteed to return the +specified integer value. This is important for the correctness of many subgroup +algorithms, and in some cases may be used by the compiler to generate more optimal +code. See `cl_intel_required_subgroup_size +` +for details. + }]; +} + def OpenCLAccessDocs : Documentation { let Category = DocCatStmt; let Heading = "__read_only, __write_only, __read_write (read_only, write_only, read_write)"; Index: cfe/trunk/lib/CodeGen/CodeGenFunction.h =================================================================== --- cfe/trunk/lib/CodeGen/CodeGenFunction.h +++ cfe/trunk/lib/CodeGen/CodeGenFunction.h @@ -1413,16 +1413,8 @@ /// True if we need emit the life-time markers. const bool ShouldEmitLifetimeMarkers; - /// Add a kernel metadata node to the named metadata node 'opencl.kernels'. - /// In the kernel metadata node, reference the kernel function and metadata - /// nodes for its optional attribute qualifiers (OpenCL 1.1 6.7.2): - /// - A node for the vec_type_hint() qualifier contains string - /// "vec_type_hint", an undefined value of the data type, - /// and a Boolean that is true if the is integer and signed. - /// - A node for the work_group_size_hint(X,Y,Z) qualifier contains string - /// "work_group_size_hint", and three 32-bit integers X, Y and Z. - /// - A node for the reqd_work_group_size(X,Y,Z) qualifier contains string - /// "reqd_work_group_size", and three 32-bit integers X, Y and Z. + /// Add OpenCL kernel arg metadata and the kernel attribute meatadata to + /// the function metadata. void EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn); Index: cfe/trunk/lib/CodeGen/CodeGenFunction.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CodeGenFunction.cpp +++ cfe/trunk/lib/CodeGen/CodeGenFunction.cpp @@ -658,34 +658,42 @@ GenOpenCLArgMetadata(FD, Fn, CGM, Context, Builder, getContext()); if (const VecTypeHintAttr *A = FD->getAttr()) { - QualType hintQTy = A->getTypeHint(); - const ExtVectorType *hintEltQTy = hintQTy->getAs(); - bool isSignedInteger = - hintQTy->isSignedIntegerType() || - (hintEltQTy && hintEltQTy->getElementType()->isSignedIntegerType()); - llvm::Metadata *attrMDArgs[] = { + QualType HintQTy = A->getTypeHint(); + const ExtVectorType *HintEltQTy = HintQTy->getAs(); + bool IsSignedInteger = + HintQTy->isSignedIntegerType() || + (HintEltQTy && HintEltQTy->getElementType()->isSignedIntegerType()); + llvm::Metadata *AttrMDArgs[] = { llvm::ConstantAsMetadata::get(llvm::UndefValue::get( CGM.getTypes().ConvertType(A->getTypeHint()))), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( llvm::IntegerType::get(Context, 32), - llvm::APInt(32, (uint64_t)(isSignedInteger ? 1 : 0))))}; - Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, attrMDArgs)); + llvm::APInt(32, (uint64_t)(IsSignedInteger ? 1 : 0))))}; + Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, AttrMDArgs)); } if (const WorkGroupSizeHintAttr *A = FD->getAttr()) { - llvm::Metadata *attrMDArgs[] = { + llvm::Metadata *AttrMDArgs[] = { llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))}; - Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, attrMDArgs)); + Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, AttrMDArgs)); } if (const ReqdWorkGroupSizeAttr *A = FD->getAttr()) { - llvm::Metadata *attrMDArgs[] = { + llvm::Metadata *AttrMDArgs[] = { llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))}; - Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, attrMDArgs)); + Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, AttrMDArgs)); + } + + if (const OpenCLIntelReqdSubGroupSizeAttr *A = + FD->getAttr()) { + llvm::Metadata *AttrMDArgs[] = { + llvm::ConstantAsMetadata::get(Builder.getInt32(A->getSubGroupSize()))}; + Fn->setMetadata("intel_reqd_sub_group_size", + llvm::MDNode::get(Context, AttrMDArgs)); } } Index: cfe/trunk/lib/Sema/SemaDeclAttr.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaDeclAttr.cpp +++ cfe/trunk/lib/Sema/SemaDeclAttr.cpp @@ -2891,6 +2891,28 @@ Attr.getAttributeSpellingListIndex())); } +// Handles intel_reqd_sub_group_size. +static void handleSubGroupSize(Sema &S, Decl *D, const AttributeList &Attr) { + uint32_t SGSize; + const Expr *E = Attr.getArgAsExpr(0); + if (!checkUInt32Argument(S, Attr, E, SGSize)) + return; + if (SGSize == 0) { + S.Diag(Attr.getLoc(), diag::err_attribute_argument_is_zero) + << Attr.getName() << E->getSourceRange(); + return; + } + + OpenCLIntelReqdSubGroupSizeAttr *Existing = + D->getAttr(); + if (Existing && Existing->getSubGroupSize() != SGSize) + S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute) << Attr.getName(); + + D->addAttr(::new (S.Context) OpenCLIntelReqdSubGroupSizeAttr( + Attr.getRange(), S.Context, SGSize, + Attr.getAttributeSpellingListIndex())); +} + static void handleVecTypeHint(Sema &S, Decl *D, const AttributeList &Attr) { if (!Attr.hasParsedType()) { S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) @@ -6157,6 +6179,9 @@ case AttributeList::AT_ReqdWorkGroupSize: handleWorkGroupSize(S, D, Attr); break; + case AttributeList::AT_OpenCLIntelReqdSubGroupSize: + handleSubGroupSize(S, D, Attr); + break; case AttributeList::AT_VecTypeHint: handleVecTypeHint(S, D, Attr); break; @@ -6521,6 +6546,9 @@ Diag(D->getLocation(), diag::err_attribute_wrong_decl_type) << A << ExpectedKernelFunction; D->setInvalidDecl(); + } else if (Attr *A = D->getAttr()) { + Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A; + D->setInvalidDecl(); } } } Index: cfe/trunk/test/CodeGenOpenCL/kernel-attributes.cl =================================================================== --- cfe/trunk/test/CodeGenOpenCL/kernel-attributes.cl +++ cfe/trunk/test/CodeGenOpenCL/kernel-attributes.cl @@ -8,7 +8,11 @@ kernel __attribute__((vec_type_hint(uint4))) __attribute__((work_group_size_hint(8,16,32))) void kernel2(int a) {} // CHECK: define void @kernel2(i32 {{[^%]*}}%a) {{[^{]+}} !vec_type_hint ![[MD3:[0-9]+]] !work_group_size_hint ![[MD4:[0-9]+]] +kernel __attribute__((intel_reqd_sub_group_size(8))) void kernel3(int a) {} +// CHECK: define void @kernel3(i32 {{[^%]*}}%a) {{[^{]+}} !intel_reqd_sub_group_size ![[MD5:[0-9]+]] + // CHECK: [[MD1]] = !{i32 undef, i32 1} // CHECK: [[MD2]] = !{i32 1, i32 2, i32 4} // CHECK: [[MD3]] = !{<4 x i32> undef, i32 0} // CHECK: [[MD4]] = !{i32 8, i32 16, i32 32} +// CHECK: [[MD5]] = !{i32 8} Index: cfe/trunk/test/Misc/pragma-attribute-supported-attributes-list.test =================================================================== --- cfe/trunk/test/Misc/pragma-attribute-supported-attributes-list.test +++ cfe/trunk/test/Misc/pragma-attribute-supported-attributes-list.test @@ -2,7 +2,7 @@ // The number of supported attributes should never go down! -// CHECK: #pragma clang attribute supports 59 attributes: +// CHECK: #pragma clang attribute supports 60 attributes: // CHECK-NEXT: AMDGPUFlatWorkGroupSize (SubjectMatchRule_function) // CHECK-NEXT: AMDGPUNumSGPR (SubjectMatchRule_function) // CHECK-NEXT: AMDGPUNumVGPR (SubjectMatchRule_function) @@ -42,6 +42,7 @@ // CHECK-NEXT: ObjCRuntimeName (SubjectMatchRule_objc_interface, SubjectMatchRule_objc_protocol) // CHECK-NEXT: ObjCRuntimeVisible (SubjectMatchRule_objc_interface) // CHECK-NEXT: ObjCSubclassingRestricted (SubjectMatchRule_objc_interface) +// CHECK-NEXT: OpenCLIntelReqdSubGroupSize (SubjectMatchRule_function) // CHECK-NEXT: OpenCLNoSVM (SubjectMatchRule_variable) // CHECK-NEXT: OptimizeNone (SubjectMatchRule_function, SubjectMatchRule_objc_method) // CHECK-NEXT: Overloadable (SubjectMatchRule_function) Index: cfe/trunk/test/SemaOpenCL/invalid-kernel-attrs.cl =================================================================== --- cfe/trunk/test/SemaOpenCL/invalid-kernel-attrs.cl +++ cfe/trunk/test/SemaOpenCL/invalid-kernel-attrs.cl @@ -33,3 +33,7 @@ kernel __attribute__((reqd_work_group_size(1,2,0))) void kernel11(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}} kernel __attribute__((reqd_work_group_size(1,0,2))) void kernel12(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}} kernel __attribute__((reqd_work_group_size(0,1,2))) void kernel13(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}} + +__attribute__((intel_reqd_sub_group_size(8))) void kernel14(){} // expected-error {{attribute 'intel_reqd_sub_group_size' can only be applied to a kernel}} +kernel __attribute__((intel_reqd_sub_group_size(0))) void kernel15(){} // expected-error {{'intel_reqd_sub_group_size' attribute must be greater than 0}} +kernel __attribute__((intel_reqd_sub_group_size(8))) __attribute__((intel_reqd_sub_group_size(16))) void kernel16() {} //expected-warning{{attribute 'intel_reqd_sub_group_size' is already applied with different parameters}}