Index: clang/include/clang/Basic/Attr.td =================================================================== --- clang/include/clang/Basic/Attr.td +++ clang/include/clang/Basic/Attr.td @@ -1191,6 +1191,20 @@ let Documentation = [SYCLKernelDocs]; } +def SYCLSpecialClass: InheritableAttr { + let Spellings = [Clang<"sycl_special_class">]; + let Subjects = SubjectList<[CXXRecord]>; + let LangOpts = [SYCL]; + let Documentation = [SYCLSpecialClassDocs]; +} + +def SYCLDevice : InheritableAttr { + let Spellings = [GNU<"sycl_device">]; + let Subjects = SubjectList<[Function]>; + let LangOpts = [SYCL]; + let Documentation = [SYCLDeviceDocs]; +} + def C11NoReturn : InheritableAttr { let Spellings = [Keyword<"_Noreturn">]; let Subjects = SubjectList<[Function], ErrorDiag>; Index: clang/include/clang/Basic/AttrDocs.td =================================================================== --- clang/include/clang/Basic/AttrDocs.td +++ clang/include/clang/Basic/AttrDocs.td @@ -405,6 +405,26 @@ }]; } +def SYCLSpecialClassDocs : Documentation { + let Category = DocCatStmt; + let Content = [{ +The ``__attribute__((sycl_special_class))`` attribute is used in SYCL +headers to indicate that a class or a struct needs additional handling when +it is passed from host to device. Please note that this is an attribute that is +used for internal implementation and not intended to be used by external users. +It is used for ``accessor``, ``sampler`` , or ``stream`` classes. +The types that own this attribute are excluded from device-copyable and other +type-legalization steps. + +.. code-block:: c++ + + class __attribute__((sycl_special_class)) accessor { + private: + void __init() {} + }; + }]; +} + def C11NoReturnDocs : Documentation { let Category = DocCatFunction; let Content = [{ @@ -742,6 +762,17 @@ }]; } +def SYCLDeviceDocs : Documentation { + let Category = DocCatFunction; + let Heading = "sycl_device"; + let Content = [{ +This attribute can only be applied to functions and indicates that the +function must be treated as a device function and must be emitted even if it has +no direct uses from other device functions. All ``sycl_device`` function callees +implicitly inherit this attribute. + }]; +} + def DiagnoseIfDocs : Documentation { let Category = DocCatFunction; let Content = [{ Index: clang/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticSemaKinds.td +++ clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11410,6 +11410,28 @@ "probability argument to __builtin_expect_with_probability is outside the " "range [0.0, 1.0]">; +// SYCL-specific diagnostics +def err_sycl_attribute_internal_function + : Error<"%0 attribute cannot be applied to a " + "static function or function in an anonymous namespace">; +def err_sycl_restrict : Error< + "SYCL kernel cannot " +"%select{use a non-const global variable" + "|use rtti" + "|use a non-const static data variable" + "|call a virtual function" + "|use exceptions" + "|call a recursive function" + "|call through a function pointer" + "|allocate storage" + "|use inline assembly" + "|call a dllimport function" + "|call a variadic function" + "|call an undefined function without SYCL_EXTERNAL attribute" + "|use a const static or global variable that is neither zero-initialized " + "nor constant-initialized" + "}0">; + // TCB warnings def err_tcb_conflicting_attributes : Error< "attributes '%0(\"%2\")' and '%1(\"%2\")' are mutually exclusive">; Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -13080,6 +13080,9 @@ ConstructorDestructor, BuiltinFunction }; + + void checkSYCLDeviceVarDecl(VarDecl *Var); + /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current /// context is "used as device code". /// @@ -13115,6 +13118,20 @@ /// Adds Callee to DeviceCallGraph if we don't know if its caller will be /// codegen'ed yet. bool checkSYCLDeviceFunction(SourceLocation Loc, FunctionDecl *Callee); + +private: + /// Contains generated OpenCL kernel functions for SYCL. + SmallVector SYCLKernels; + +public: + void addSYCLKernel(Decl *D) { SYCLKernels.push_back(D); } + /// Access to SYCL kernels. + //const SmallVectorImpl &getSYCLKernels() { return SYCLKernels; } + const ArrayRef getSYCLKernels() { return SYCLKernels; } + + /// Constructs an OpenCL kernel using the KernelCaller function and adds it to + /// the SYCL device code. + void constructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC); }; /// RAII object that enters a new expression evaluation context. Index: clang/lib/AST/ASTContext.cpp =================================================================== --- clang/lib/AST/ASTContext.cpp +++ clang/lib/AST/ASTContext.cpp @@ -11065,6 +11065,10 @@ if (D->hasAttr()) return false; + // If SYCL, only kernels are required. + if (LangOpts.SYCLIsDevice && !D->hasAttr()) + return false; + // Aliases and used decls are required. if (D->hasAttr() || D->hasAttr()) return true; Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -2960,6 +2960,12 @@ } } + if (LangOpts.SYCLIsDevice && Global->hasAttr() && + MustBeEmitted(Global)) { + addDeferredDeclToEmit(GD); + return; + } + // Ignore declarations, they will be emitted on their first use. if (const auto *FD = dyn_cast(Global)) { // Forward declarations are emitted lazily on first use. @@ -3011,6 +3017,13 @@ } } + // clang::ParseAST ensures that we emit the SYCL device functions at the end, + // so anything that is a device (or indirectly called) will be handled later. + if (LangOpts.SYCLIsDevice && MustBeEmitted(Global)) { + addDeferredDeclToEmit(GD); + return; + } + // Defer code generation to first use when possible, e.g. if this is an inline // function. If the global must always be emitted, do it eagerly if possible // to benefit from cache locality. Index: clang/lib/Parse/ParseAST.cpp =================================================================== --- clang/lib/Parse/ParseAST.cpp +++ clang/lib/Parse/ParseAST.cpp @@ -168,6 +168,10 @@ for (Decl *D : S.WeakTopLevelDecls()) Consumer->HandleTopLevelDecl(DeclGroupRef(D)); + if (S.getLangOpts().SYCLIsDevice) + for (Decl *D : S.getSYCLKernels()) + Consumer->HandleTopLevelDecl(DeclGroupRef(D)); + Consumer->HandleTranslationUnit(S.getASTContext()); // Finalize the template instantiation observer chain. Index: clang/lib/Sema/SemaDecl.cpp =================================================================== --- clang/lib/Sema/SemaDecl.cpp +++ clang/lib/Sema/SemaDecl.cpp @@ -13048,6 +13048,9 @@ } } + if (getLangOpts().SYCLIsDevice) + checkSYCLDeviceVarDecl(var); + // In Objective-C, don't allow jumps past the implicit initialization of a // local retaining variable. if (getLangOpts().ObjC && Index: clang/lib/Sema/SemaDeclAttr.cpp =================================================================== --- clang/lib/Sema/SemaDeclAttr.cpp +++ clang/lib/Sema/SemaDeclAttr.cpp @@ -4565,6 +4565,16 @@ D->addAttr(Optnone); } +static void handleSYCLDeviceAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + auto *FD = cast(D); + if (!FD->isExternallyVisible()) { + S.Diag(AL.getLoc(), diag::err_sycl_attribute_internal_function) << AL; + return; + } + + handleSimpleAttribute(S, D, AL); +} + static void handleConstantAttr(Sema &S, Decl *D, const ParsedAttr &AL) { const auto *VD = cast(D); if (VD->hasLocalStorage()) { @@ -8054,6 +8064,12 @@ case ParsedAttr::AT_SYCLKernel: handleSYCLKernelAttr(S, D, AL); break; + case ParsedAttr::AT_SYCLSpecialClass: + handleSimpleAttribute(S, D, AL); + break; + case ParsedAttr::AT_SYCLDevice: + handleSYCLDeviceAttr(S, D, AL); + break; case ParsedAttr::AT_Format: handleFormatAttr(S, D, AL); break; Index: clang/lib/Sema/SemaSYCL.cpp =================================================================== --- clang/lib/Sema/SemaSYCL.cpp +++ clang/lib/Sema/SemaSYCL.cpp @@ -8,11 +8,1510 @@ // This implements Semantic Analysis for SYCL constructs. //===----------------------------------------------------------------------===// +#include "TreeTransform.h" +#include "clang/AST/AST.h" #include "clang/AST/Mangle.h" +#include "clang/AST/QualTypeNames.h" +#include "clang/AST/RecordLayout.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/TemplateArgumentVisitor.h" +#include "clang/AST/TypeVisitor.h" +#include "clang/Sema/Initialization.h" #include "clang/Sema/Sema.h" #include "clang/Sema/SemaDiagnostic.h" +#include "llvm/Support/raw_ostream.h" + +#include + using namespace clang; +using namespace std::placeholders; + +using ParamDesc = std::tuple; + +const static std::string InitMethodName = "__init"; +const static std::string FinalizeMethodName = "__finalize"; + +// anonymous namespace so these don't get linkage. +namespace { + +/// Various utilities. +class Util { +public: + using DeclContextDesc = std::pair; + + /// Checks whether given clang type is a full specialization of a SYCL + /// special class. + static bool isSyclSpecialType(QualType Ty); + + /// Checks whether given clang type is a full specialization of the SYCL + /// half class. + static bool isSyclHalfType(const QualType Ty); + + /// Checks whether given clang type is a standard SYCL API class with given + /// name. + /// \param Ty the clang type being checked + /// \param Name the class name checked against + /// \param Tmpl whether the class is template instantiation or simple record + static bool isSyclType(const QualType &Ty, StringRef Name, bool Tmpl = false); + + // Checks declaration context hierarchy. + /// \param DC the context of the item to be checked. + /// \param Scopes the declaration scopes leading from the item context to the + /// translation unit (excluding the latter) + static bool matchContext(const DeclContext *DC, + ArrayRef Scopes); + + /// Checks whether given clang type is declared in the given hierarchy of + /// declaration contexts. + /// \param Ty the clang type being checked + /// \param Scopes the declaration scopes leading from the type to the + /// translation unit (excluding the latter) + static bool matchQualifiedTypeName(const QualType &Ty, + ArrayRef Scopes); +}; + +class KernelBodyTransform final : public TreeTransform { +public: + KernelBodyTransform(std::pair &MPair, + Sema &S) + : TreeTransform(S), MappingPair(MPair), SemaRef(S) {} + bool AlwaysRebuild() { return true; } + + ExprResult TransformDeclRefExpr(DeclRefExpr *DRE) { + auto *Ref = dyn_cast(DRE->getDecl()); + if (Ref && Ref == MappingPair.first) { + DeclaratorDecl *NewDecl = MappingPair.second; + return DeclRefExpr::Create( + SemaRef.getASTContext(), DRE->getQualifierLoc(), + DRE->getTemplateKeywordLoc(), NewDecl, false, DRE->getNameInfo(), + NewDecl->getType(), DRE->getValueKind()); + } + return DRE; + } + +private: + std::pair MappingPair; + Sema &SemaRef; +}; + +/// Return method by name +static CXXMethodDecl *getMethodByName(const CXXRecordDecl *CRD, + StringRef MethodName) { + CXXMethodDecl *Method; + auto It = llvm::find_if(CRD->methods(), + [MethodName](const CXXMethodDecl *Method) { + return Method->getNameAsString() == MethodName; + }); + Method = (It != CRD->methods().end()) ? *It : nullptr; + return Method; +} + +const CXXRecordDecl *getKernelObjectType(FunctionDecl *Caller) { + assert(Caller->getNumParams() > 0 && "Insufficient kernel parameters"); + + QualType KernelParamTy = Caller->getParamDecl(0)->getType(); + // In SYCL 2020 kernels are now passed by reference. + if (KernelParamTy->isReferenceType()) + return KernelParamTy->getPointeeCXXRecordDecl(); + + // SYCL 1.2.1 + return KernelParamTy->getAsCXXRecordDecl(); +} + +/// Creates a kernel parameter descriptor +/// \param Src field declaration to construct name from +/// \param Ty the desired parameter type +/// \return the constructed descriptor +ParamDesc makeParamDesc(const FieldDecl *Src, QualType Ty) { + ASTContext &Ctx = Src->getASTContext(); + std::string Name = (Twine("_arg_") + Src->getName()).str(); + return std::make_tuple(Ty, &Ctx.Idents.get(Name), + Ctx.getTrivialTypeSourceInfo(Ty)); +} + +ParamDesc makeParamDesc(ASTContext &Ctx, const CXXBaseSpecifier &Src, + QualType Ty) { + // TODO: There is no name for the base available, but duplicate names are + // seemingly already possible, so we'll give them all the same name for now. + // This only happens with the accessor types. + std::string Name = "_arg__base"; + return std::make_tuple(Ty, &Ctx.Idents.get(Name), + Ctx.getTrivialTypeSourceInfo(Ty)); +} + +// The first template argument to the kernel caller function is used to identify +// the kernel itself. +QualType calculateKernelNameType(ASTContext &Ctx, + FunctionDecl *KernelCallerFunc) { + const TemplateArgumentList *TAL = + KernelCallerFunc->getTemplateSpecializationArgs(); + assert(TAL && "No template argument info"); + return TAL->get(0).getAsType().getCanonicalType(); +} + +// Gets a name for the OpenCL kernel function, calculated from the first +// template argument of the kernel caller function. +std::string constructKernelName(Sema &S, FunctionDecl *KernelCallerFunc, + MangleContext &MC) { + QualType KernelNameType = + calculateKernelNameType(S.getASTContext(), KernelCallerFunc); + + SmallString<256> Result; + llvm::raw_svector_ostream Out(Result); + + MC.mangleTypeName(KernelNameType, Out); + + return std::string(Out.str()); +} + +template struct bind_param { using type = T; }; + +template <> struct bind_param { + using Type = const CXXBaseSpecifier &; +}; + +template <> struct bind_param { using type = FieldDecl *; }; + +template <> struct bind_param { using type = FieldDecl *; }; + +template using bind_param_t = typename bind_param::type; + +class KernelObjVisitor { + Sema &SemaRef; + + template + void VisitUnionImpl(const CXXRecordDecl *Owner, ParentTy &Parent, + const CXXRecordDecl *Wrapper, HandlerTys &... Handlers) { + (void)std::initializer_list{ + (Handlers.enterUnion(Owner, Parent), 0)...}; + VisitRecordHelper(Wrapper, Wrapper->fields(), Handlers...); + (void)std::initializer_list{ + (Handlers.leaveUnion(Owner, Parent), 0)...}; + } + + // These enable handler execution only when previous Handlers succeed. + template + bool handleField(FieldDecl *FD, QualType FDTy, Tn &&... tn) { + bool result = true; + (void)std::initializer_list{(result = result && tn(FD, FDTy), 0)...}; + return result; + } + template + bool handleField(const CXXBaseSpecifier &BD, QualType BDTy, Tn &&... tn) { + bool result = true; + std::initializer_list{(result = result && tn(BD, BDTy), 0)...}; + return result; + } + +// This definition using std::bind is necessary because of a gcc 7.x bug. +#define KF_FOR_EACH(FUNC, Item, Qt) \ + handleField( \ + Item, Qt, \ + std::bind(static_cast::*)( \ + bind_param_t, QualType)>( \ + &std::decay_t::FUNC), \ + std::ref(Handlers), _1, _2)...) + + // The following simpler definition works with gcc 8.x and later. + //#define KF_FOR_EACH(FUNC) \ +// handleField(Field, FieldTy, ([&](FieldDecl *FD, QualType FDTy) { \ +// return Handlers.f(FD, FDTy); \ +// })...) + + // Parent contains the FieldDecl or CXXBaseSpecifier that was used to enter + // the Wrapper structure that we're currently visiting. Owner is the parent + // type (which doesn't exist in cases where it is a FieldDecl in the + // 'root'), and Wrapper is the current struct being unwrapped. + template + void visitComplexRecord(const CXXRecordDecl *Owner, ParentTy &Parent, + const CXXRecordDecl *Wrapper, QualType RecordTy, + HandlerTys &... Handlers) { + (void)std::initializer_list{ + (Handlers.enterStruct(Owner, Parent, RecordTy), 0)...}; + VisitRecordHelper(Wrapper, Wrapper->bases(), Handlers...); + VisitRecordHelper(Wrapper, Wrapper->fields(), Handlers...); + (void)std::initializer_list{ + (Handlers.leaveStruct(Owner, Parent, RecordTy), 0)...}; + } + + template + void visitSimpleRecord(const CXXRecordDecl *Owner, ParentTy &Parent, + const CXXRecordDecl *Wrapper, QualType RecordTy, + HandlerTys &... Handlers) { + (void)std::initializer_list{ + (Handlers.handleNonDecompStruct(Owner, Parent, RecordTy), 0)...}; + } + + template + void visitRecord(const CXXRecordDecl *Owner, ParentTy &Parent, + const CXXRecordDecl *Wrapper, QualType RecordTy, + HandlerTys &... Handlers); + + template + void VisitUnion(const CXXRecordDecl *Owner, ParentTy &Parent, + const CXXRecordDecl *Wrapper, HandlerTys &... Handlers); + + template + void VisitRecordHelper(const CXXRecordDecl *Owner, + clang::CXXRecordDecl::base_class_const_range Range, + HandlerTys &... Handlers) { + for (const auto &Base : Range) { + QualType BaseTy = Base.getType(); + // Handle accessor class as base + if (Util::isSyclSpecialType(BaseTy)) { + (void)std::initializer_list{ + (Handlers.handleSyclSpecialType(Owner, Base, BaseTy), 0)...}; + } else + // For all other bases, visit the record + visitRecord(Owner, Base, BaseTy->getAsCXXRecordDecl(), BaseTy, + Handlers...); + } + } + + template + void VisitRecordHelper(const CXXRecordDecl *Owner, + RecordDecl::field_range Range, + HandlerTys &... Handlers) { + VisitRecordFields(Owner, Handlers...); + } + + // FIXME: Can this be refactored/handled some other way? + template + void visitStreamRecord(const CXXRecordDecl *Owner, ParentTy &Parent, + CXXRecordDecl *Wrapper, QualType RecordTy, + HandlerTys &... Handlers) { + (void)std::initializer_list{ + (Handlers.enterStream(Owner, Parent, RecordTy), 0)...}; + for (const auto &Field : Wrapper->fields()) { + QualType FieldTy = Field->getType(); + // Required to initialize accessors inside streams. + if (Util::isSyclAccessorType(FieldTy)) + KF_FOR_EACH(handleSyclAccessorType, Field, FieldTy); + } + (void)std::initializer_list{ + (Handlers.leaveStream(Owner, Parent, RecordTy), 0)...}; + } + + template + void visitArrayElementImpl(const CXXRecordDecl *Owner, FieldDecl *ArrayField, + QualType ElementTy, uint64_t Index, + HandlerTys &... Handlers) { + (void)std::initializer_list{ + (Handlers.nextElement(ElementTy, Index), 0)...}; + visitField(Owner, ArrayField, ElementTy, Handlers...); + } + + template + void visitFirstArrayElement(const CXXRecordDecl *Owner, FieldDecl *ArrayField, + QualType ElementTy, HandlerTys &... Handlers) { + visitArrayElementImpl(Owner, ArrayField, ElementTy, 0, Handlers...); + } + + template + void visitNthArrayElement(const CXXRecordDecl *Owner, FieldDecl *ArrayField, + QualType ElementTy, uint64_t Index, + HandlerTys &... Handlers); + + template + void visitSimpleArray(const CXXRecordDecl *Owner, FieldDecl *Field, + QualType ArrayTy, HandlerTys &... Handlers) { + (void)std::initializer_list{ + (Handlers.handleSimpleArrayType(Field, ArrayTy), 0)...}; + } + + template + void visitComplexArray(const CXXRecordDecl *Owner, FieldDecl *Field, + QualType ArrayTy, HandlerTys &... Handlers) { + // Array workflow is: + // handleArrayType + // enterArray + // nextElement + // VisitField (same as before, note that The FieldDecl is the of array + // itself, not the element) + // ... repeat per element, opt-out for duplicates. + // leaveArray + + if (!KF_FOR_EACH(handleArrayType, Field, ArrayTy)) + return; + + const ConstantArrayType *CAT = + SemaRef.getASTContext().getAsConstantArrayType(ArrayTy); + assert(CAT && "Should only be called on constant-size array."); + QualType ET = CAT->getElementType(); + uint64_t ElemCount = CAT->getSize().getZExtValue(); + assert(ElemCount > 0 && "SYCL prohibits 0 sized arrays"); + + (void)std::initializer_list{ + (Handlers.enterArray(Field, ArrayTy, ET), 0)...}; + + visitFirstArrayElement(Owner, Field, ET, Handlers...); + for (uint64_t Index = 1; Index < ElemCount; ++Index) + visitNthArrayElement(Owner, Field, ET, Index, Handlers...); + + (void)std::initializer_list{ + (Handlers.leaveArray(Field, ArrayTy, ET), 0)...}; + } + + template + void visitArray(const CXXRecordDecl *Owner, FieldDecl *Field, + QualType ArrayTy, HandlerTys &... Handlers); + + template + void visitField(const CXXRecordDecl *Owner, FieldDecl *Field, + QualType FieldTy, HandlerTys &... Handlers) { + if (Util::isSyclSpecialType(FieldTy)) + KF_FOR_EACH(handleSyclSpecialType, Field, FieldTy); + else if (Util::isSyclHalfType(FieldTy)) + KF_FOR_EACH(handleSyclHalfType, Field, FieldTy); + else if (FieldTy->isStructureOrClassType()) { + if (KF_FOR_EACH(handleStructType, Field, FieldTy)) { + CXXRecordDecl *RD = FieldTy->getAsCXXRecordDecl(); + visitRecord(Owner, Field, RD, FieldTy, Handlers...); + } + } else if (FieldTy->isUnionType()) { + if (KF_FOR_EACH(handleUnionType, Field, FieldTy)) { + CXXRecordDecl *RD = FieldTy->getAsCXXRecordDecl(); + VisitUnion(Owner, Field, RD, Handlers...); + } + } else if (FieldTy->isReferenceType()) + KF_FOR_EACH(handleReferenceType, Field, FieldTy); + else if (FieldTy->isPointerType()) + KF_FOR_EACH(handlePointerType, Field, FieldTy); + else if (FieldTy->isArrayType()) + visitArray(Owner, Field, FieldTy, Handlers...); + else if (FieldTy->isScalarType() || FieldTy->isVectorType()) + KF_FOR_EACH(handleScalarType, Field, FieldTy); + else + KF_FOR_EACH(handleOtherType, Field, FieldTy); + } + +public: + KernelObjVisitor(Sema &S) : SemaRef(S) {} + + template + void VisitRecordBases(const CXXRecordDecl *KernelFunctor, + HandlerTys &... Handlers) { + VisitRecordHelper(KernelFunctor, KernelFunctor->bases(), Handlers...); + } + + // A visitor function that dispatches to functions as defined in + // SyclKernelFieldHandler for the purposes of kernel generation. + template + void VisitRecordFields(const CXXRecordDecl *Owner, HandlerTys &... Handlers) { + for (const auto Field : Owner->fields()) + visitField(Owner, Field, Field->getType(), Handlers...); + } +#undef KF_FOR_EACH +}; +// A base type that the SYCL OpenCL Kernel construction task uses to implement +// individual tasks. +class SyclKernelFieldHandlerBase { +public: + static constexpr const bool VisitUnionBody = false; + static constexpr const bool VisitNthArrayElement = true; + // Opt-in based on whether we should visit inside simple containers (structs, + // arrays). All of the 'check' types should likely be true, the int-header, + // and kernel decl creation types should not. + static constexpr const bool VisitInsideSimpleContainers = true; + // Mark these virtual so that we can use override in the implementer classes, + // despite virtual dispatch never being used. + + // SYCL special class can be a base class or a field decl, so both must be + // handled. + virtual bool handleSyclSpecialType(const CXXRecordDecl *, + const CXXBaseSpecifier &, QualType) { + return true; + } + virtual bool handleSyclSpecialType(FieldDecl *, QualType) { return true; } + + + virtual bool handleSyclSpecConstantType(FieldDecl *, QualType) { + return true; + } + + virtual bool handleSyclHalfType(const CXXRecordDecl *, + const CXXBaseSpecifier &, QualType) { + return true; + } + virtual bool handleSyclHalfType(FieldDecl *, QualType) { return true; } + virtual bool handleStructType(FieldDecl *, QualType) { return true; } + virtual bool handleUnionType(FieldDecl *, QualType) { return true; } + virtual bool handleReferenceType(FieldDecl *, QualType) { return true; } + virtual bool handlePointerType(FieldDecl *, QualType) { return true; } + virtual bool handleArrayType(FieldDecl *, QualType) { return true; } + virtual bool handleScalarType(FieldDecl *, QualType) { return true; } + // Most handlers shouldn't be handling this, just the field checker. + virtual bool handleOtherType(FieldDecl *, QualType) { return true; } + + // Handle a simple struct that doesn't need to be decomposed, only called on + // handlers with VisitInsideSimpleContainers as false. Replaces + // handleStructType, enterStruct, leaveStruct, and visiting of sub-elements. + virtual bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *, + QualType) { + return true; + } + virtual bool handleNonDecompStruct(const CXXRecordDecl *, + const CXXBaseSpecifier &, QualType) { + return true; + } + + // Instead of handleArrayType, enterArray, leaveArray, and nextElement (plus + // descending down the elements), this function gets called in the event of an + // array containing simple elements (even in the case of an MD array). + virtual bool handleSimpleArrayType(FieldDecl *, QualType) { return true; } + + // The following are only used for keeping track of where we are in the base + // class/field graph. Int Headers use this to calculate offset, most others + // don't have a need for these. + + virtual bool enterStruct(const CXXRecordDecl *, FieldDecl *, QualType) { + return true; + } + virtual bool leaveStruct(const CXXRecordDecl *, FieldDecl *, QualType) { + return true; + } + virtual bool enterStream(const CXXRecordDecl *, FieldDecl *, QualType) { + return true; + } + virtual bool leaveStream(const CXXRecordDecl *, FieldDecl *, QualType) { + return true; + } + virtual bool enterStruct(const CXXRecordDecl *, const CXXBaseSpecifier &, + QualType) { + return true; + } + virtual bool leaveStruct(const CXXRecordDecl *, const CXXBaseSpecifier &, + QualType) { + return true; + } + virtual bool enterUnion(const CXXRecordDecl *, FieldDecl *) { return true; } + virtual bool leaveUnion(const CXXRecordDecl *, FieldDecl *) { return true; } + + // The following are used for stepping through array elements. + virtual bool enterArray(FieldDecl *, QualType ArrayTy, QualType ElementTy) { + return true; + } + virtual bool leaveArray(FieldDecl *, QualType ArrayTy, QualType ElementTy) { + return true; + } + + virtual bool nextElement(QualType, uint64_t) { return true; } + + virtual ~SyclKernelFieldHandlerBase() = default; +}; + +// A class to act as the direct base for all the SYCL OpenCL Kernel construction +// tasks that contains a reference to Sema (and potentially any other +// universally required data). +class SyclKernelFieldHandler : public SyclKernelFieldHandlerBase { +protected: + Sema &SemaRef; + SyclKernelFieldHandler(Sema &S) : SemaRef(S) {} +}; + +// A class to represent the 'do nothing' case for filtering purposes. +class SyclEmptyHandler final : public SyclKernelFieldHandlerBase {}; +SyclEmptyHandler GlobalEmptyHandler; + +template struct HandlerFilter; +template struct HandlerFilter { + H &Handler; + HandlerFilter(H &Handler) : Handler(Handler) {} +}; +template struct HandlerFilter { + SyclEmptyHandler &Handler = GlobalEmptyHandler; + HandlerFilter(H &Handler) {} +}; + +template struct AnyTrue; + +template struct AnyTrue { static constexpr bool Value = B; }; + +template struct AnyTrue { + static constexpr bool Value = B || AnyTrue::Value; +}; + +template struct AllTrue; + +template struct AllTrue { static constexpr bool Value = B; }; + +template struct AllTrue { + static constexpr bool Value = B && AllTrue::Value; +}; + +template +void KernelObjVisitor::VisitUnion(const CXXRecordDecl *Owner, ParentTy &Parent, + const CXXRecordDecl *Wrapper, + Handlers &... handlers) { + // Don't continue descending if none of the handlers 'care'. This could be 'if + // constexpr' starting in C++17. Until then, we have to count on the + // optimizer to realize "if (false)" is a dead branch. + if (AnyTrue::Value) + VisitUnionImpl( + Owner, Parent, Wrapper, + HandlerFilter(handlers).Handler...); +} + +template +void KernelObjVisitor::visitNthArrayElement(const CXXRecordDecl *Owner, + FieldDecl *ArrayField, + QualType ElementTy, uint64_t Index, + Handlers &... handlers) { + // Don't continue descending if none of the handlers 'care'. This could be 'if + // constexpr' starting in C++17. Until then, we have to count on the + // optimizer to realize "if (false)" is a dead branch. + if (AnyTrue::Value) + visitArrayElementImpl( + Owner, ArrayField, ElementTy, Index, + HandlerFilter(handlers) + .Handler...); +} + +template +void KernelObjVisitor::visitRecord(const CXXRecordDecl *Owner, ParentTy &Parent, + const CXXRecordDecl *Wrapper, + QualType RecordTy, + HandlerTys &... Handlers) { + RecordDecl *RD = RecordTy->getAsRecordDecl(); + assert(RD && "should not be null."); + // "Simple" Containers are those that do NOT need to be decomposed, + // "Complex" containers are those that DO. In the case where the container + // does NOT need to be decomposed, we can call VisitSimpleRecord on the + // handlers that have opted-out of VisitInsideSimpleContainers. The 'if' + // makes sure we only do that if at least 1 has opted out. + if (!AllTrue::Value) + visitSimpleRecord( + Owner, Parent, Wrapper, RecordTy, + HandlerFilter( + Handlers) + .Handler...); + + // Even though this is a 'simple' container, some handlers (via + // VisitInsideSimpleContainers = true) need to treat it as if it needs + // decomposing, so we call VisitComplexRecord iif at least one has. + if (AnyTrue::Value) + visitComplexRecord( + Owner, Parent, Wrapper, RecordTy, + HandlerFilter( + Handlers) + .Handler...); +} + +template +void KernelObjVisitor::visitArray(const CXXRecordDecl *Owner, FieldDecl *Field, + QualType ArrayTy, HandlerTys &... Handlers) { + if (!AllTrue::Value) + visitSimpleArray( + Owner, Field, ArrayTy, + HandlerFilter( + Handlers) + .Handler...); + + if (AnyTrue::Value) + visitComplexArray( + Owner, Field, ArrayTy, + HandlerFilter( + Handlers) + .Handler...); +} + +// A type to Create and own the FunctionDecl for the kernel. +class SyclKernelDeclCreator : public SyclKernelFieldHandler { + FunctionDecl *KernelDecl; + llvm::SmallVector Params; + Sema::ContextRAII FuncContext; + // Holds the last handled field's first parameter. This doesn't store an + // iterator as push_back invalidates iterators. + size_t LastParamIndex = 0; + // Keeps track of whether we are currently handling fields inside a struct. + int StructDepth = 0; + + void addParam(const FieldDecl *FD, QualType FieldTy) { + ParamDesc newParamDesc = makeParamDesc(FD, FieldTy); + addParam(newParamDesc, FieldTy); + } + + void addParam(const CXXBaseSpecifier &BS, QualType FieldTy) { + ParamDesc newParamDesc = + makeParamDesc(SemaRef.getASTContext(), BS, FieldTy); + addParam(newParamDesc, FieldTy); + } + + void addParam(ParamDesc newParamDesc, QualType FieldTy) { + // Create a new ParmVarDecl based on the new info. + ASTContext &Ctx = SemaRef.getASTContext(); + auto *NewParam = ParmVarDecl::Create( + Ctx, KernelDecl, SourceLocation(), SourceLocation(), + std::get<1>(newParamDesc), std::get<0>(newParamDesc), + std::get<2>(newParamDesc), SC_None, /*DefArg*/ nullptr); + NewParam->setScopeInfo(0, Params.size()); + NewParam->setIsUsed(); + + LastParamIndex = Params.size(); + Params.push_back(NewParam); + } + + // All special SYCL objects must have __init method. We extract types for + // kernel parameters from __init method parameters. We will use __init method + // and kernel parameters which we build here to initialize special objects in + // the kernel body. + bool handleSpecialType(FieldDecl *FD, QualType FieldTy, + bool isAccessorType = false) { + const auto *RecordDecl = FieldTy->getAsCXXRecordDecl(); + assert(RecordDecl && "The type must be a RecordDecl"); + CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, InitMethodName); + assert(InitMethod && "The type must have the __init method"); + + // Don't do -1 here because we count on this to be the first parameter added + // (if any). + size_t ParamIndex = Params.size(); + for (const ParmVarDecl *Param : InitMethod->parameters()) { + QualType ParamTy = Param->getType(); + addParam(FD, ParamTy.getCanonicalType()); + } + LastParamIndex = ParamIndex; + return true; + } + + static void setKernelImplicitAttrs(ASTContext &Context, FunctionDecl *FD, + StringRef Name) { + // Set implicit attributes. + FD->addAttr(OpenCLKernelAttr::CreateImplicit(Context)); + FD->addAttr(AsmLabelAttr::CreateImplicit(Context, Name)); + FD->addAttr(ArtificialAttr::CreateImplicit(Context)); + } + + static FunctionDecl *createKernelDecl(ASTContext &Ctx, StringRef Name, + SourceLocation Loc, bool IsInline) { + // Create this with no prototype, and we can fix this up after we've seen + // all the params. + FunctionProtoType::ExtProtoInfo Info(CC_OpenCLKernel); + QualType FuncType = Ctx.getFunctionType(Ctx.VoidTy, {}, Info); + + FunctionDecl *FD = FunctionDecl::Create( + Ctx, Ctx.getTranslationUnitDecl(), Loc, Loc, &Ctx.Idents.get(Name), + FuncType, Ctx.getTrivialTypeSourceInfo(Ctx.VoidTy), SC_None); + FD->setImplicitlyInline(IsInline); + setKernelImplicitAttrs(Ctx, FD, Name); + + // Add kernel to translation unit to see it in AST-dump. + Ctx.getTranslationUnitDecl()->addDecl(FD); + return FD; + } + +public: + static constexpr const bool VisitInsideSimpleContainers = false; + SyclKernelDeclCreator(Sema &S, StringRef Name, SourceLocation Loc, + bool IsInline) + : SyclKernelFieldHandler(S), + KernelDecl(createKernelDecl(S.getASTContext(), Name, Loc, IsInline)), + FuncContext(SemaRef, KernelDecl) {} + + ~SyclKernelDeclCreator() { + ASTContext &Ctx = SemaRef.getASTContext(); + FunctionProtoType::ExtProtoInfo Info(CC_OpenCLKernel); + + SmallVector ArgTys; + std::transform(std::begin(Params), std::end(Params), + std::back_inserter(ArgTys), + [](const ParmVarDecl *PVD) { return PVD->getType(); }); + + QualType FuncType = Ctx.getFunctionType(Ctx.VoidTy, ArgTys, Info); + KernelDecl->setType(FuncType); + KernelDecl->setParams(Params); + + SemaRef.addSYCLKernel(KernelDecl); + } + + bool enterStream(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final { + return enterStruct(RD, FD, Ty); + } + + bool leaveStream(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final { + return leaveStruct(RD, FD, Ty); + } + + bool enterStruct(const CXXRecordDecl *, FieldDecl *, QualType) final { + ++StructDepth; + return true; + } + + bool leaveStruct(const CXXRecordDecl *, FieldDecl *, QualType) final { + --StructDepth; + return true; + } + + bool enterStruct(const CXXRecordDecl *, const CXXBaseSpecifier &BS, + QualType FieldTy) final { + ++StructDepth; + return true; + } + + bool leaveStruct(const CXXRecordDecl *, const CXXBaseSpecifier &BS, + QualType FieldTy) final { + --StructDepth; + return true; + } + + bool handleSyclSpecialType(const CXXRecordDecl *, const CXXBaseSpecifier &BS, + QualType FieldTy) final { + const auto *RecordDecl = FieldTy->getAsCXXRecordDecl(); + assert(RecordDecl && "The type must be a RecordDecl"); + CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, InitMethodName); + assert(InitMethod && "The type must have the __init method"); + + // Don't do -1 here because we count on this to be the first parameter added + // (if any). + size_t ParamIndex = Params.size(); + for (const ParmVarDecl *Param : InitMethod->parameters()) { + QualType ParamTy = Param->getType(); + addParam(BS, ParamTy.getCanonicalType()); + } + LastParamIndex = ParamIndex; + return true; + } + + bool handleSyclSpecialType(FieldDecl *FD, QualType FieldTy) final { + return handleSpecialType(FD, FieldTy); + } + + RecordDecl *wrapField(FieldDecl *Field, QualType FieldTy) { + RecordDecl *WrapperClass = + SemaRef.getASTContext().buildImplicitRecord("__wrapper_class"); + WrapperClass->startDefinition(); + Field = FieldDecl::Create( + SemaRef.getASTContext(), WrapperClass, SourceLocation(), + SourceLocation(), /*Id=*/nullptr, FieldTy, + SemaRef.getASTContext().getTrivialTypeSourceInfo(FieldTy, + SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + WrapperClass->addDecl(Field); + WrapperClass->completeDefinition(); + return WrapperClass; + }; + + bool handlePointerType(FieldDecl *FD, QualType FieldTy) final { + // USM allows to use raw pointers instead of buffers/accessors, but these + // pointers point to the specially allocated memory. For pointer fields we + // add a kernel argument with the same type as field but global address + // space, because OpenCL requires it. + QualType PointeeTy = FieldTy->getPointeeType(); + Qualifiers Quals = PointeeTy.getQualifiers(); + auto AS = Quals.getAddressSpace(); + // Leave global_device and global_host address spaces as is to help FPGA + // device in memory allocations + if (AS != LangAS::opencl_global_device && AS != LangAS::opencl_global_host) + Quals.setAddressSpace(LangAS::opencl_global); + PointeeTy = SemaRef.getASTContext().getQualifiedType( + PointeeTy.getUnqualifiedType(), Quals); + QualType ModTy = SemaRef.getASTContext().getPointerType(PointeeTy); + // When the kernel is generated, struct type kernel arguments are + // decomposed; i.e. the parameters of the kernel are the fields of the + // struct, and not the struct itself. This causes an error in the backend + // when the struct field is a pointer, since non-USM pointers cannot be + // passed directly. To work around this issue, all pointers inside the + // struct are wrapped in a generated '__wrapper_class'. + if (StructDepth) { + RecordDecl *WrappedPointer = wrapField(FD, ModTy); + ModTy = SemaRef.getASTContext().getRecordType(WrappedPointer); + } + + addParam(FD, ModTy); + return true; + } + + bool handleSimpleArrayType(FieldDecl *FD, QualType FieldTy) final { + // Arrays are always wrapped in a struct since they cannot be passed + // directly. + RecordDecl *WrappedArray = wrapField(FD, FieldTy); + QualType ModTy = SemaRef.getASTContext().getRecordType(WrappedArray); + addParam(FD, ModTy); + return true; + } + + bool handleScalarType(FieldDecl *FD, QualType FieldTy) final { + addParam(FD, FieldTy); + return true; + } + + bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD, + QualType Ty) final { + addParam(FD, Ty); + return true; + } + + bool handleNonDecompStruct(const CXXRecordDecl *Base, + const CXXBaseSpecifier &BS, QualType Ty) final { + addParam(BS, Ty); + return true; + } + + bool handleUnionType(FieldDecl *FD, QualType FieldTy) final { + return handleScalarType(FD, FieldTy); + } + + bool handleSyclHalfType(FieldDecl *FD, QualType FieldTy) final { + addParam(FD, FieldTy); + return true; + } + + void setBody(CompoundStmt *KB) { KernelDecl->setBody(KB); } + + FunctionDecl *getKernelDecl() { return KernelDecl; } + + llvm::ArrayRef getParamVarDeclsForCurrentField() { + return ArrayRef(std::begin(Params) + LastParamIndex, + std::end(Params)); + } + using SyclKernelFieldHandler::handleSyclHalfType; +}; + +class SyclKernelBodyCreator : public SyclKernelFieldHandler { + SyclKernelDeclCreator &DeclCreator; + llvm::SmallVector BodyStmts; + llvm::SmallVector CollectionInitExprs; + llvm::SmallVector FinalizeStmts; + // This collection contains the information required to add/remove information + // about arrays as we enter them. The InitializedEntity component is + // necessary for initializing child members. uin64_t is the index of the + // current element being worked on, which is updated every time we visit + // nextElement. + llvm::SmallVector, 8> ArrayInfos; + VarDecl *KernelObjClone; + InitializedEntity VarEntity; + const CXXRecordDecl *KernelObj; + llvm::SmallVector MemberExprBases; + FunctionDecl *KernelCallerFunc; + SourceLocation KernelCallerSrcLoc; // KernelCallerFunc source location. + // Contains a count of how many containers we're in. This is used by the + // pointer-struct-wrapping code to ensure that we don't try to wrap + // non-top-level pointers. + uint64_t StructDepth = 0; + + // Using the statements/init expressions that we've created, this generates + // the kernel body compound stmt. CompoundStmt needs to know its number of + // statements in advance to allocate it, so we cannot do this as we go along. + CompoundStmt *createKernelBody() { + assert(CollectionInitExprs.size() == 1 && + "Should have been popped down to just the first one"); + KernelObjClone->setInit(CollectionInitExprs.back()); + Stmt *FunctionBody = KernelCallerFunc->getBody(); + + ParmVarDecl *KernelObjParam = *(KernelCallerFunc->param_begin()); + + // DeclRefExpr with valid source location but with decl which is not marked + // as used is invalid. + KernelObjClone->setIsUsed(); + std::pair MappingPair = + std::make_pair(KernelObjParam, KernelObjClone); + + // Push the Kernel function scope to ensure the scope isn't empty + SemaRef.PushFunctionScope(); + KernelBodyTransform KBT(MappingPair, SemaRef); + Stmt *NewBody = KBT.TransformStmt(FunctionBody).get(); + BodyStmts.push_back(NewBody); + + BodyStmts.insert(BodyStmts.end(), FinalizeStmts.begin(), + FinalizeStmts.end()); + return CompoundStmt::Create(SemaRef.getASTContext(), BodyStmts, {}, {}); + } + + // Creates a DeclRefExpr to the ParmVar that represents the current field. + Expr *createParamReferenceExpr() { + ParmVarDecl *KernelParameter = + DeclCreator.getParamVarDeclsForCurrentField()[0]; + + QualType ParamType = KernelParameter->getOriginalType(); + Expr *DRE = SemaRef.BuildDeclRefExpr(KernelParameter, ParamType, VK_LValue, + KernelCallerSrcLoc); + return DRE; + } + + // Creates a DeclRefExpr to the ParmVar that represents the current pointer + // field. + Expr *createPointerParamReferenceExpr(QualType PointerTy, bool Wrapped) { + ParmVarDecl *KernelParameter = + DeclCreator.getParamVarDeclsForCurrentField()[0]; + + QualType ParamType = KernelParameter->getOriginalType(); + Expr *DRE = SemaRef.BuildDeclRefExpr(KernelParameter, ParamType, VK_LValue, + KernelCallerSrcLoc); + + // Struct Type kernel arguments are decomposed. The pointer fields are + // then wrapped inside a compiler generated struct. Therefore when + // generating the initializers, we have to 'unwrap' the pointer. + if (Wrapped) { + CXXRecordDecl *WrapperStruct = ParamType->getAsCXXRecordDecl(); + // Pointer field wrapped inside __wrapper_class + FieldDecl *Pointer = *(WrapperStruct->field_begin()); + DRE = buildMemberExpr(DRE, Pointer); + ParamType = Pointer->getType(); + } + + DRE = ImplicitCastExpr::Create(SemaRef.Context, ParamType, + CK_LValueToRValue, DRE, /*BasePath=*/nullptr, + VK_PRValue, FPOptionsOverride()); + + if (PointerTy->getPointeeType().getAddressSpace() != + ParamType->getPointeeType().getAddressSpace()) + DRE = ImplicitCastExpr::Create(SemaRef.Context, PointerTy, + CK_AddressSpaceConversion, DRE, nullptr, + VK_PRValue, FPOptionsOverride()); + + return DRE; + } + + Expr *createSimpleArrayParamReferenceExpr(QualType ArrayTy) { + ParmVarDecl *KernelParameter = + DeclCreator.getParamVarDeclsForCurrentField()[0]; + QualType ParamType = KernelParameter->getOriginalType(); + Expr *DRE = SemaRef.BuildDeclRefExpr(KernelParameter, ParamType, VK_LValue, + KernelCallerSrcLoc); + + // Unwrap the array. + CXXRecordDecl *WrapperStruct = ParamType->getAsCXXRecordDecl(); + FieldDecl *ArrayField = *(WrapperStruct->field_begin()); + return buildMemberExpr(DRE, ArrayField); + } + + // Returns 'true' if the thing we're visiting (Based on the FD/QualType pair) + // is an element of an array. This will determine whether we do + // MemberExprBases in some cases or not, AND determines how we initialize + // values. + bool isArrayElement(const FieldDecl *FD, QualType Ty) const { + return !SemaRef.getASTContext().hasSameType(FD->getType(), Ty); + } + + // Creates an initialized entity for a field/item. In the case where this is a + // field, returns a normal member initializer, if we're in a sub-array of a MD + // array, returns an element initializer. + InitializedEntity getFieldEntity(FieldDecl *FD, QualType Ty) { + if (isArrayElement(FD, Ty)) + return InitializedEntity::InitializeElement(SemaRef.getASTContext(), + ArrayInfos.back().second, + ArrayInfos.back().first); + return InitializedEntity::InitializeMember(FD, &VarEntity); + } + + void addFieldInit(FieldDecl *FD, QualType Ty, MultiExprArg ParamRef) { + InitializationKind InitKind = + InitializationKind::CreateCopy(KernelCallerSrcLoc, KernelCallerSrcLoc); + addFieldInit(FD, Ty, ParamRef, InitKind); + } + + void addFieldInit(FieldDecl *FD, QualType Ty, MultiExprArg ParamRef, + InitializationKind InitKind) { + addFieldInit(FD, Ty, ParamRef, InitKind, getFieldEntity(FD, Ty)); + } + + void addFieldInit(FieldDecl *FD, QualType Ty, MultiExprArg ParamRef, + InitializationKind InitKind, InitializedEntity Entity) { + InitializationSequence InitSeq(SemaRef, Entity, InitKind, ParamRef); + ExprResult Init = InitSeq.Perform(SemaRef, Entity, InitKind, ParamRef); + + InitListExpr *ParentILE = CollectionInitExprs.back(); + ParentILE->updateInit(SemaRef.getASTContext(), ParentILE->getNumInits(), + Init.get()); + } + + void addBaseInit(const CXXBaseSpecifier &BS, QualType Ty, + InitializationKind InitKind) { + InitializedEntity Entity = InitializedEntity::InitializeBase( + SemaRef.Context, &BS, /*IsInheritedVirtualBase*/ false, &VarEntity); + InitializationSequence InitSeq(SemaRef, Entity, InitKind, None); + ExprResult Init = InitSeq.Perform(SemaRef, Entity, InitKind, None); + + InitListExpr *ParentILE = CollectionInitExprs.back(); + ParentILE->updateInit(SemaRef.getASTContext(), ParentILE->getNumInits(), + Init.get()); + } + + void addSimpleBaseInit(const CXXBaseSpecifier &BS, QualType Ty) { + InitializationKind InitKind = + InitializationKind::CreateCopy(KernelCallerSrcLoc, KernelCallerSrcLoc); + + InitializedEntity Entity = InitializedEntity::InitializeBase( + SemaRef.Context, &BS, /*IsInheritedVirtualBase*/ false, &VarEntity); + + Expr *ParamRef = createParamReferenceExpr(); + InitializationSequence InitSeq(SemaRef, Entity, InitKind, ParamRef); + ExprResult Init = InitSeq.Perform(SemaRef, Entity, InitKind, ParamRef); + + InitListExpr *ParentILE = CollectionInitExprs.back(); + ParentILE->updateInit(SemaRef.getASTContext(), ParentILE->getNumInits(), + Init.get()); + } + + // Adds an initializer that handles a simple initialization of a field. + void addSimpleFieldInit(FieldDecl *FD, QualType Ty) { + Expr *ParamRef = createParamReferenceExpr(); + addFieldInit(FD, Ty, ParamRef); + } + + MemberExpr *buildMemberExpr(Expr *Base, ValueDecl *Member) { + DeclAccessPair MemberDAP = DeclAccessPair::make(Member, AS_none); + MemberExpr *Result = SemaRef.BuildMemberExpr( + Base, /*IsArrow */ false, KernelCallerSrcLoc, NestedNameSpecifierLoc(), + KernelCallerSrcLoc, Member, MemberDAP, + /*HadMultipleCandidates*/ false, + DeclarationNameInfo(Member->getDeclName(), KernelCallerSrcLoc), + Member->getType(), VK_LValue, OK_Ordinary); + return Result; + } + + void addFieldMemberExpr(FieldDecl *FD, QualType Ty) { + if (!isArrayElement(FD, Ty)) + MemberExprBases.push_back(buildMemberExpr(MemberExprBases.back(), FD)); + } + + void removeFieldMemberExpr(const FieldDecl *FD, QualType Ty) { + if (!isArrayElement(FD, Ty)) + MemberExprBases.pop_back(); + } + + void createSpecialMethodCall(const CXXRecordDecl *RD, StringRef MethodName, + SmallVectorImpl &AddTo) { + CXXMethodDecl *Method = getMethodByName(RD, MethodName); + if (!Method) + return; + + unsigned NumParams = Method->getNumParams(); + llvm::SmallVector ParamDREs(NumParams); + llvm::ArrayRef KernelParameters = + DeclCreator.getParamVarDeclsForCurrentField(); + for (size_t I = 0; I < NumParams; ++I) { + QualType ParamType = KernelParameters[I]->getOriginalType(); + ParamDREs[I] = SemaRef.BuildDeclRefExpr(KernelParameters[I], ParamType, + VK_LValue, KernelCallerSrcLoc); + } + + MemberExpr *MethodME = buildMemberExpr(MemberExprBases.back(), Method); + + QualType ResultTy = Method->getReturnType(); + ExprValueKind VK = Expr::getValueKindForType(ResultTy); + ResultTy = ResultTy.getNonLValueExprType(SemaRef.Context); + llvm::SmallVector ParamStmts; + const auto *Proto = cast(Method->getType()); + SemaRef.GatherArgumentsForCall(KernelCallerSrcLoc, Method, Proto, 0, + ParamDREs, ParamStmts); + // [kernel_obj or wrapper object].accessor.__init(_ValueType*, + // range, range, id) + AddTo.push_back(CXXMemberCallExpr::Create( + SemaRef.Context, MethodME, ParamStmts, ResultTy, VK, KernelCallerSrcLoc, + FPOptionsOverride())); + } + + // Creates an empty InitListExpr of the correct number of child-inits + // of this to append into. + void addCollectionInitListExpr(const CXXRecordDecl *RD) { + const ASTRecordLayout &Info = + SemaRef.getASTContext().getASTRecordLayout(RD); + uint64_t NumInitExprs = Info.getFieldCount() + RD->getNumBases(); + addCollectionInitListExpr(QualType(RD->getTypeForDecl(), 0), NumInitExprs); + } + + InitListExpr *createInitListExpr(const CXXRecordDecl *RD) { + const ASTRecordLayout &Info = + SemaRef.getASTContext().getASTRecordLayout(RD); + uint64_t NumInitExprs = Info.getFieldCount() + RD->getNumBases(); + return createInitListExpr(QualType(RD->getTypeForDecl(), 0), NumInitExprs); + } + + InitListExpr *createInitListExpr(QualType InitTy, uint64_t NumChildInits) { + InitListExpr *ILE = new (SemaRef.getASTContext()) InitListExpr( + SemaRef.getASTContext(), KernelCallerSrcLoc, {}, KernelCallerSrcLoc); + ILE->reserveInits(SemaRef.getASTContext(), NumChildInits); + ILE->setType(InitTy); + + return ILE; + } + + // Create an empty InitListExpr of the type/size for the rest of the visitor + // to append into. + void addCollectionInitListExpr(QualType InitTy, uint64_t NumChildInits) { + + InitListExpr *ILE = createInitListExpr(InitTy, NumChildInits); + InitListExpr *ParentILE = CollectionInitExprs.back(); + ParentILE->updateInit(SemaRef.getASTContext(), ParentILE->getNumInits(), + ILE); + + CollectionInitExprs.push_back(ILE); + } + + // FIXME Avoid creation of kernel obj clone. + // See https://github.com/intel/llvm/issues/1544 for details. + static VarDecl *createKernelObjClone(ASTContext &Ctx, DeclContext *DC, + const CXXRecordDecl *KernelObj) { + TypeSourceInfo *TSInfo = + KernelObj->isLambda() ? KernelObj->getLambdaTypeInfo() : nullptr; + VarDecl *VD = VarDecl::Create( + Ctx, DC, KernelObj->getLocation(), KernelObj->getLocation(), + KernelObj->getIdentifier(), QualType(KernelObj->getTypeForDecl(), 0), + TSInfo, SC_None); + + return VD; + } + + const std::string &getInitMethodName() const { return InitMethodName; } + + // Default inits the type, then calls the init-method in the body. + bool handleSpecialType(FieldDecl *FD, QualType Ty) { + addFieldInit(FD, Ty, None, + InitializationKind::CreateDefault(KernelCallerSrcLoc)); + + addFieldMemberExpr(FD, Ty); + + const auto *RecordDecl = Ty->getAsCXXRecordDecl(); + createSpecialMethodCall(RecordDecl, getInitMethodName(), BodyStmts); + // A finalize-method is expected for special type such as stream. + CXXMethodDecl *FinalizeMethod = + getMethodByName(RecordDecl, FinalizeMethodName); + if (FinalizeMethod) + createSpecialMethodCall(RecordDecl, FinalizeMethodName, FinalizeStmts); + + removeFieldMemberExpr(FD, Ty); + + return true; + } + + bool handleSpecialType(const CXXBaseSpecifier &BS, QualType Ty) { + const auto *RecordDecl = Ty->getAsCXXRecordDecl(); + addBaseInit(BS, Ty, InitializationKind::CreateDefault(KernelCallerSrcLoc)); + createSpecialMethodCall(RecordDecl, getInitMethodName(), BodyStmts); + return true; + } + +public: + static constexpr const bool VisitInsideSimpleContainers = false; + SyclKernelBodyCreator(Sema &S, SyclKernelDeclCreator &DC, + const CXXRecordDecl *KernelObj, + FunctionDecl *KernelCallerFunc) + : SyclKernelFieldHandler(S), DeclCreator(DC), + KernelObjClone(createKernelObjClone(S.getASTContext(), + DC.getKernelDecl(), KernelObj)), + VarEntity(InitializedEntity::InitializeVariable(KernelObjClone)), + KernelObj(KernelObj), KernelCallerFunc(KernelCallerFunc), + KernelCallerSrcLoc(KernelCallerFunc->getLocation()) { + CollectionInitExprs.push_back(createInitListExpr(KernelObj)); + + Stmt *DS = new (S.Context) DeclStmt(DeclGroupRef(KernelObjClone), + KernelCallerSrcLoc, KernelCallerSrcLoc); + BodyStmts.push_back(DS); + DeclRefExpr *KernelObjCloneRef = DeclRefExpr::Create( + S.Context, NestedNameSpecifierLoc(), KernelCallerSrcLoc, KernelObjClone, + false, DeclarationNameInfo(), QualType(KernelObj->getTypeForDecl(), 0), + VK_LValue); + MemberExprBases.push_back(KernelObjCloneRef); + } + + ~SyclKernelBodyCreator() { + CompoundStmt *KernelBody = createKernelBody(); + DeclCreator.setBody(KernelBody); + } + + bool handleSyclSpecialType(FieldDecl *FD, QualType Ty) final { + return handleSpecialType(FD, Ty); + } + + bool handleSyclSpecialType(const CXXRecordDecl *, const CXXBaseSpecifier &BS, + QualType Ty) final { + return handleSpecialType(BS, Ty); + } + + bool handleSyclSpecConstantType(FieldDecl *FD, QualType Ty) final { + return handleSpecialType(FD, Ty); + } + + bool handleSyclHalfType(FieldDecl *FD, QualType Ty) final { + addSimpleFieldInit(FD, Ty); + return true; + } + + bool handlePointerType(FieldDecl *FD, QualType FieldTy) final { + Expr *PointerRef = + createPointerParamReferenceExpr(FieldTy, StructDepth != 0); + addFieldInit(FD, FieldTy, PointerRef); + return true; + } + + bool handleSimpleArrayType(FieldDecl *FD, QualType FieldTy) final { + Expr *ArrayRef = createSimpleArrayParamReferenceExpr(FieldTy); + InitializationKind InitKind = InitializationKind::CreateDirect({}, {}, {}); + + InitializedEntity Entity = + InitializedEntity::InitializeMember(FD, &VarEntity, /*Implicit*/ true); + + addFieldInit(FD, FieldTy, ArrayRef, InitKind, Entity); + return true; + } + + bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD, + QualType Ty) final { + addSimpleFieldInit(FD, Ty); + return true; + } + + bool handleNonDecompStruct(const CXXRecordDecl *Base, + const CXXBaseSpecifier &BS, QualType Ty) final { + addSimpleBaseInit(BS, Ty); + return true; + } + + bool handleScalarType(FieldDecl *FD, QualType FieldTy) final { + addSimpleFieldInit(FD, FieldTy); + return true; + } + + bool handleUnionType(FieldDecl *FD, QualType FieldTy) final { + addSimpleFieldInit(FD, FieldTy); + return true; + } + + bool enterStream(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final { + ++StructDepth; + // Add a dummy init expression to catch the accessor initializers. + const auto *StreamDecl = Ty->getAsCXXRecordDecl(); + CollectionInitExprs.push_back(createInitListExpr(StreamDecl)); + + addFieldMemberExpr(FD, Ty); + return true; + } + + bool leaveStream(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final { + --StructDepth; + // Stream requires that its 'init' calls happen after its accessors init + // calls, so add them here instead. + const auto *StreamDecl = Ty->getAsCXXRecordDecl(); + + createSpecialMethodCall(StreamDecl, getInitMethodName(), BodyStmts); + createSpecialMethodCall(StreamDecl, FinalizeMethodName, FinalizeStmts); + + removeFieldMemberExpr(FD, Ty); + + CollectionInitExprs.pop_back(); + return true; + } + + bool enterStruct(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final { + ++StructDepth; + addCollectionInitListExpr(Ty->getAsCXXRecordDecl()); + + addFieldMemberExpr(FD, Ty); + return true; + } + + bool leaveStruct(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final { + --StructDepth; + CollectionInitExprs.pop_back(); + + removeFieldMemberExpr(FD, Ty); + return true; + } + + bool enterStruct(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS, + QualType) final { + ++StructDepth; + + CXXCastPath BasePath; + QualType DerivedTy(RD->getTypeForDecl(), 0); + QualType BaseTy = BS.getType(); + SemaRef.CheckDerivedToBaseConversion(DerivedTy, BaseTy, KernelCallerSrcLoc, + SourceRange(), &BasePath, + /*IgnoreBaseAccess*/ true); + auto *Cast = ImplicitCastExpr::Create( + SemaRef.Context, BaseTy, CK_DerivedToBase, MemberExprBases.back(), + /* CXXCastPath=*/&BasePath, VK_LValue, FPOptionsOverride()); + MemberExprBases.push_back(Cast); + + addCollectionInitListExpr(BaseTy->getAsCXXRecordDecl()); + return true; + } + + bool leaveStruct(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS, + QualType) final { + --StructDepth; + MemberExprBases.pop_back(); + CollectionInitExprs.pop_back(); + return true; + } + + bool enterArray(FieldDecl *FD, QualType ArrayType, + QualType ElementType) final { + uint64_t ArraySize = SemaRef.getASTContext() + .getAsConstantArrayType(ArrayType) + ->getSize() + .getZExtValue(); + addCollectionInitListExpr(ArrayType, ArraySize); + ArrayInfos.emplace_back(getFieldEntity(FD, ArrayType), 0); + + // If this is the top-level array, we need to make a MemberExpr in addition + // to an array subscript. + addFieldMemberExpr(FD, ArrayType); + return true; + } + + bool nextElement(QualType, uint64_t Index) final { + ArrayInfos.back().second = Index; + + // Pop off the last member expr base. + if (Index != 0) + MemberExprBases.pop_back(); + + QualType SizeT = SemaRef.getASTContext().getSizeType(); + + llvm::APInt IndexVal{ + static_cast(SemaRef.getASTContext().getTypeSize(SizeT)), + Index, SizeT->isSignedIntegerType()}; + + auto *IndexLiteral = IntegerLiteral::Create( + SemaRef.getASTContext(), IndexVal, SizeT, KernelCallerSrcLoc); + + ExprResult IndexExpr = SemaRef.CreateBuiltinArraySubscriptExpr( + MemberExprBases.back(), KernelCallerSrcLoc, IndexLiteral, + KernelCallerSrcLoc); + + assert(!IndexExpr.isInvalid()); + MemberExprBases.push_back(IndexExpr.get()); + return true; + } + + bool leaveArray(FieldDecl *FD, QualType ArrayType, + QualType ElementType) final { + CollectionInitExprs.pop_back(); + ArrayInfos.pop_back(); + + assert( + !SemaRef.getASTContext().getAsConstantArrayType(ArrayType)->getSize() == + 0 && + "Constant arrays must have at least 1 element"); + // Remove the IndexExpr. + MemberExprBases.pop_back(); + + // Remove the field access expr as well. + removeFieldMemberExpr(FD, ArrayType); + return true; + } + + using SyclKernelFieldHandler::handleSyclHalfType; +}; + +} // anonymous namespace + +static bool isZeroSizedArray(QualType Ty) { + if (const auto *CATy = dyn_cast(Ty)) + return CATy->getSize() == 0; + return false; +} + +static void checkSYCLType(Sema &S, QualType Ty, SourceRange Loc, + llvm::DenseSet Visited, + SourceRange UsedAtLoc = SourceRange()) { + // Not all variable types are supported inside SYCL kernels, + // for example the quad type __float128 will cause errors in the + // SPIR-V translation phase. + // Here we check any potentially unsupported declaration and issue + // a deferred diagnostic, which will be emitted iff the declaration + // is discovered to reside in kernel code. + // The optional UsedAtLoc param is used when the SYCL usage is at a + // different location than the variable declaration and we need to + // inform the user of both, e.g. struct member usage vs declaration. + + bool Emitting = false; + + //--- check types --- + + // zero length arrays + if (isZeroSizedArray(Ty)) { + S.SYCLDiagIfDeviceCode(Loc.getBegin(), diag::err_typecheck_zero_array_size); + Emitting = true; + } + + // variable length arrays + if (Ty->isVariableArrayType()) { + S.SYCLDiagIfDeviceCode(Loc.getBegin(), diag::err_vla_unsupported); + Emitting = true; + } + + // Sub-reference array or pointer, then proceed with that type. + while (Ty->isAnyPointerType() || Ty->isArrayType()) + Ty = QualType{Ty->getPointeeOrArrayElementType(), 0}; + + if (Emitting && UsedAtLoc.isValid()) + S.SYCLDiagIfDeviceCode(UsedAtLoc.getBegin(), diag::note_used_here); + + //--- now recurse --- + // Pointers complicate recursion. Add this type to Visited. + // If already there, bail out. + if (!Visited.insert(Ty).second) + return; + + if (const auto *ATy = dyn_cast(Ty)) + return checkSYCLType(S, ATy->getModifiedType(), Loc, Visited); + + if (const auto *RD = Ty->getAsRecordDecl()) { + for (const auto &Field : RD->fields()) + checkSYCLType(S, Field->getType(), Field->getSourceRange(), Visited, Loc); + } else if (const auto *FPTy = dyn_cast(Ty)) { + for (const auto &ParamTy : FPTy->param_types()) + checkSYCLType(S, ParamTy, Loc, Visited); + checkSYCLType(S, FPTy->getReturnType(), Loc, Visited); + } +} + +void Sema::checkSYCLDeviceVarDecl(VarDecl *Var) { + assert(getLangOpts().SYCLIsDevice && + "Should only be called during SYCL compilation"); + QualType Ty = Var->getType(); + SourceRange Loc = Var->getLocation(); + llvm::DenseSet Visited; + + checkSYCLType(*this, Ty, Loc, Visited); +} + +// Generates the OpenCL kernel using KernelCallerFunc (kernel caller +// function) defined is SYCL headers. +// Generated OpenCL kernel contains the body of the kernel caller function, +// receives OpenCL like parameters and additionally does some manipulation to +// initialize captured lambda/functor fields with these parameters. +// SYCL runtime marks kernel caller function with sycl_kernel attribute. +// To be able to generate OpenCL kernel from KernelCallerFunc we put +// the following requirements to the function which SYCL runtime can mark with +// sycl_kernel attribute: +// - Must be template function with at least two template parameters. +// First parameter must represent "unique kernel name" +// Second parameter must be the function object type +// - Must have only one function parameter - function object. +// +// Example of kernel caller function: +// template +// __attribute__((sycl_kernel)) void kernel_caller_function(KernelType +// KernelFuncObj) { +// KernelFuncObj(); +// } +// +// +void Sema::constructOpenCLKernel(FunctionDecl *KernelCallerFunc, + MangleContext &MC) { + // The first argument to the KernelCallerFunc is the lambda object. + const CXXRecordDecl *KernelObj = getKernelObjectType(KernelCallerFunc); + assert(KernelObj && "invalid kernel caller"); + + // Do not visit invalid kernel object. + if (KernelObj->isInvalidDecl()) + return; + + std::string KernelName = constructKernelName(*this, KernelCallerFunc, MC); + SyclKernelDeclCreator kernel_decl(*this, KernelName, KernelObj->getLocation(), + KernelCallerFunc->isInlined()); + SyclKernelBodyCreator kernel_body(*this, kernel_decl, KernelObj, + KernelCallerFunc); + + KernelObjVisitor Visitor{*this}; + Visitor.VisitRecordBases(KernelObj, kernel_decl, kernel_body); + Visitor.VisitRecordFields(KernelObj, kernel_decl, kernel_body); +} // ----------------------------------------------------------------------------- // SYCL device specific diagnostics implementation @@ -38,13 +1537,91 @@ "Should only be called during SYCL compilation"); assert(Callee && "Callee may not be null."); - // Errors in an unevaluated context don't need to be generated, + // Errors in unevaluated context don't need to be generated, // so we can safely skip them. if (isUnevaluatedContext() || isConstantEvaluated()) return true; + FunctionDecl *Caller = dyn_cast(getCurLexicalContext()); + + if (!Caller) + return true; + SemaDiagnosticBuilder::Kind DiagKind = SemaDiagnosticBuilder::K_Nop; return DiagKind != SemaDiagnosticBuilder::K_Immediate && DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; } + +// ----------------------------------------------------------------------------- +// Utility class methods +// ----------------------------------------------------------------------------- +bool Util::isSyclSpecialType(const QualType Ty) { + const CXXRecordDecl *RecTy = Ty->getAsCXXRecordDecl(); + if (!RecTy) + return false; + return RecTy->hasAttr(); +} + +bool Util::isSyclHalfType(const QualType Ty) { + const StringRef &Name = "half"; + std::array Scopes = { + Util::DeclContextDesc{clang::Decl::Kind::Namespace, "cl"}, + Util::DeclContextDesc{clang::Decl::Kind::Namespace, "sycl"}, + Util::DeclContextDesc{clang::Decl::Kind::Namespace, "detail"}, + Util::DeclContextDesc{clang::Decl::Kind::Namespace, "half_impl"}, + Util::DeclContextDesc{Decl::Kind::CXXRecord, Name}}; + return matchQualifiedTypeName(Ty, Scopes); +} + +bool Util::isSyclType(const QualType &Ty, StringRef Name, bool Tmpl) { + Decl::Kind ClassDeclKind = + Tmpl ? Decl::Kind::ClassTemplateSpecialization : Decl::Kind::CXXRecord; + std::array Scopes = { + Util::DeclContextDesc{clang::Decl::Kind::Namespace, "cl"}, + Util::DeclContextDesc{clang::Decl::Kind::Namespace, "sycl"}, + Util::DeclContextDesc{ClassDeclKind, Name}}; + return matchQualifiedTypeName(Ty, Scopes); +} + +bool Util::matchContext(const DeclContext *Ctx, + ArrayRef Scopes) { + // The idea: check the declaration context chain starting from the item + // itself. At each step check the context is of expected kind + // (namespace) and name. + StringRef Name = ""; + + for (const auto &Scope : llvm::reverse(Scopes)) { + clang::Decl::Kind DK = Ctx->getDeclKind(); + if (DK != Scope.first) + return false; + + switch (DK) { + case clang::Decl::Kind::ClassTemplateSpecialization: + // ClassTemplateSpecializationDecl inherits from CXXRecordDecl + case clang::Decl::Kind::CXXRecord: + Name = cast(Ctx)->getName(); + break; + case clang::Decl::Kind::Namespace: + Name = cast(Ctx)->getName(); + break; + default: + llvm_unreachable("matchContext: decl kind not supported"); + } + if (Name != Scope.second) + return false; + Ctx = Ctx->getParent(); + } + return Ctx->isTranslationUnit(); +} + +bool Util::matchQualifiedTypeName(const QualType &Ty, + ArrayRef Scopes) { + const CXXRecordDecl *RecTy = Ty->getAsCXXRecordDecl(); + + if (!RecTy) + return false; // only classes/structs supported + const auto *Ctx = cast(RecTy); + return Util::matchContext(Ctx, Scopes); +} + Index: clang/lib/Sema/SemaTemplateInstantiateDecl.cpp =================================================================== --- clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -18,6 +18,7 @@ #include "clang/AST/DependentDiagnostic.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/Mangle.h" #include "clang/AST/PrettyDeclStackTrace.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/SourceManager.h" @@ -6266,9 +6267,25 @@ return D; } +static void processFunctionInstantiation(Sema &S, + SourceLocation PointOfInstantiation, + FunctionDecl *FD, + bool DefinitionRequired, + MangleContext &MC) { + S.InstantiateFunctionDefinition(/*FIXME:*/ PointOfInstantiation, FD, true, + DefinitionRequired, true); + if (!FD->isDefined()) + return; + if (FD->hasAttr()) + S.constructOpenCLKernel(FD, MC); + FD->setInstantiationIsPending(false); +} + /// Performs template instantiation for all implicit template /// instantiations we have seen until this point. void Sema::PerformPendingInstantiations(bool LocalOnly) { + std::unique_ptr MangleCtx( + getASTContext().createMangleContext()); std::deque delayedPCHInstantiations; while (!PendingLocalImplicitInstantiations.empty() || (!LocalOnly && !PendingInstantiations.empty())) { @@ -6286,20 +6303,16 @@ if (FunctionDecl *Function = dyn_cast(Inst.first)) { bool DefinitionRequired = Function->getTemplateSpecializationKind() == TSK_ExplicitInstantiationDefinition; - if (Function->isMultiVersion()) { + if (Function->isMultiVersion()) getASTContext().forEachMultiversionedFunctionVersion( - Function, [this, Inst, DefinitionRequired](FunctionDecl *CurFD) { - InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, CurFD, true, - DefinitionRequired, true); - if (CurFD->isDefined()) - CurFD->setInstantiationIsPending(false); + Function, [this, Inst, DefinitionRequired, + MangleCtx = move(MangleCtx)](FunctionDecl *CurFD) { + processFunctionInstantiation(*this, Inst.second, CurFD, + DefinitionRequired, *MangleCtx); }); - } else { - InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, Function, true, - DefinitionRequired, true); - if (Function->isDefined()) - Function->setInstantiationIsPending(false); - } + else + processFunctionInstantiation(*this, Inst.second, Function, + DefinitionRequired, *MangleCtx); // Definition of a PCH-ed template declaration may be available only in the TU. if (!LocalOnly && LangOpts.PCHInstantiateTemplates && TUKind == TU_Prefix && Function->instantiationIsPending()) Index: clang/lib/Sema/SemaType.cpp =================================================================== --- clang/lib/Sema/SemaType.cpp +++ clang/lib/Sema/SemaType.cpp @@ -2535,7 +2535,8 @@ } } - if (T->isVariableArrayType() && !Context.getTargetInfo().isVLASupported()) { + if (T->isVariableArrayType() && !Context.getTargetInfo().isVLASupported() && + !getLangOpts().SYCLIsDevice) { // CUDA device code and some other targets don't support VLAs. targetDiag(Loc, (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) ? diag::err_cuda_vla Index: clang/test/CodeGenSYCL/Inputs/sycl.hpp =================================================================== --- /dev/null +++ clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -0,0 +1,86 @@ +#pragma once + +namespace cl { +namespace sycl { +namespace access { + +enum class target { + global_buffer = 2014, + constant_buffer, + local, + image, + host_buffer, + host_image, + image_array +}; + +enum class mode { + read = 1024, + write, + read_write, + discard_write, + discard_read_write, + atomic +}; + +enum class placeholder { + false_t, + true_t +}; + +enum class address_space : int { + private_space = 0, + global_space, + constant_space, + local_space +}; +} // namespace access + +template +struct id { + template + id(T... args) {} // fake constructor +private: + // Some fake field added to see using of id arguments in the + // kernel wrapper + int Data; +}; + +template +struct range { + template + range(T... args) {} // fake constructor +private: + // Some fake field added to see using of range arguments in the + // kernel wrapper + int Data; +}; + +template +struct _ImplT { + range AccessRange; + range MemRange; + id Offset; +}; + +template +class __attribute__((sycl_special_class)) accessor { + +public: + void use(void) const {} + template + void use(T... args) {} + template + void use(T... args) const {} + _ImplT impl; + +private: + void __init(__attribute__((opencl_global)) dataT *Ptr, + range AccessRange, + range MemRange, id Offset) {} +}; + +} // namespace sycl +} // namespace cl Index: clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp @@ -0,0 +1,58 @@ +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm %s -o - | FileCheck %s + +// This test checks that compiler generates correct kernel wrapper for basic +// case. + +#include "Inputs/sycl.hpp" + +template +__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { + kernelFunc(); +} + +int main() { + cl::sycl::accessor accessorA; + kernel( + [=]() { + accessorA.use(); + }); + return 0; +} + +// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_function +// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG:%[a-zA-Z0-9_]+]], +// CHECK-SAME: %"struct.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE:%[a-zA-Z0-9_]+_1]], +// CHECK-SAME: %"struct.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE:%[a-zA-Z0-9_]+_2]], +// CHECK-SAME: %"struct.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET:%[a-zA-Z0-9_]+]]) +// Check alloca for pointer argument +// CHECK: [[MEM_ARG]].addr = alloca i32 addrspace(1)* +// Check lambda object alloca +// CHECK: [[ANONALLOCA:%[0-9]+]] = alloca %class.anon +// CHECK: [[ARANGEA:%agg.tmp]] = alloca %"struct.cl::sycl::range" +// CHECK: [[MRANGEA:%agg.tmp.*]] = alloca %"struct.cl::sycl::range" +// CHECK: [[OIDA:%agg.tmp.*]] = alloca %"struct.cl::sycl::id" +// CHECK: [[ANON:%[0-9]+]] = addrspacecast %class.anon* [[ANONALLOCA]] to %class.anon addrspace(4)* +// CHECK: [[ARANGET:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range"* [[ARANGEA]] to %"struct.cl::sycl::range" addrspace(4)* +// CHECK: [[MRANGET:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range"* [[MRANGEA]] to %"struct.cl::sycl::range" addrspace(4)* +// CHECK: [[OIDT:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::id"* [[OIDA]] to %"struct.cl::sycl::id" addrspace(4)* +// +// Check store of kernel pointer argument to alloca +// CHECK: store i32 addrspace(1)* [[MEM_ARG]], i32 addrspace(1)* addrspace(4)* [[MEM_ARG]].addr.ascast, align 8 + +// Check for default constructor of accessor +// CHECK: call spir_func {{.*}}accessor + +// Check accessor GEP +// CHECK: [[ACCESSOR:%[a-zA-Z0-9_]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* [[ANON]], i32 0, i32 0 + +// Check load from kernel pointer argument alloca +// CHECK: [[MEM_LOAD:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[MEM_ARG]].addr.ascast + +// Check accessor __init method call +// CHECK: [[ARANGE:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range" addrspace(4)* [[ARANGET]] to %"struct.cl::sycl::range"* +// CHECK: [[MRANGE:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range" addrspace(4)* [[MRANGET]] to %"struct.cl::sycl::range"* +// CHECK: [[OID:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::id" addrspace(4)* [[OIDT]] to %"struct.cl::sycl::id"* +// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.cl::sycl::accessor" addrspace(4)* {{[^,]*}} [[ACCESSOR]], i32 addrspace(1)* [[MEM_LOAD]], %"struct.cl::sycl::range"* byval({{.*}}) align 4 [[ARANGE]], %"struct.cl::sycl::range"* byval({{.*}}) align 4 [[MRANGE]], %"struct.cl::sycl::id"* byval({{.*}}) align 4 [[OID]]) + +// Check lambda "()" operator call +// CHECK: call spir_func void @{{.*}}(%class.anon addrspace(4)* {{[^,]*}}) Index: clang/test/CodeGenSYCL/device-functions.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenSYCL/device-functions.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s + +// This test checks that the compiler emits functions called from a SYCL kernel +// and doesn't emit functions not called from any SYCL kernel + +template +T bar(T arg); + +void foo() { + int a = 1 + 1 + bar(1); +} + +template +T bar(T arg) { + return arg; +} + +template +__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { + kernelFunc(); +} + +// Make sure that definitions for the types not used in SYCL kernels are not +// emitted +// CHECK-NOT: %struct.A +// CHECK-NOT: @a = {{.*}} %struct.A +struct A { + int x = 10; +} a; + +int main() { + a.x = 8; + kernel_single_task([]() { foo(); }); + return 0; +} + +// baz is not called from the SYCL kernel, so it must not be emitted +// CHECK-NOT: define {{.*}} @{{.*}}baz +void baz() {} + +// CHECK-LABEL: define dso_local spir_kernel void @{{.*}}test_kernel +// CHECK-LABEL: define internal spir_func void @_ZZ4mainENKUlvE_clEv(%class.anon addrspace(4)* {{[^,]*}} %this) +// CHECK-LABEL: define {{.*}}spir_func void @_Z3foov() +// CHECK-LABEL: define linkonce_odr spir_func i32 @_Z3barIiET_S0_(i32 %arg) Index: clang/test/CodeGenSYCL/unique_stable_name.cpp =================================================================== --- clang/test/CodeGenSYCL/unique_stable_name.cpp +++ clang/test/CodeGenSYCL/unique_stable_name.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple spir64-unknown-unknown-sycldevice -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-pc -fsycl-is-host -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s // CHECK: @[[LAMBDA_KERNEL3:[^\w]+]] = private unnamed_addr constant [[LAMBDA_K3_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZ4mainEUlPZ4mainEUlvE_E_\00" // CHECK: @[[INT1:[^\w]+]] = private unnamed_addr constant [[INT_SIZE:\[[0-9]+ x i8\]]] c"_ZTSi\00" // CHECK: @[[STRING:[^\w]+]] = private unnamed_addr constant [[STRING_SIZE:\[[0-9]+ x i8\]]] c"_ZTSAppL_ZZ4mainE1jE_i\00", @@ -65,95 +65,105 @@ kernelFunc(); } +template +void unnamed_kernel_single_task(KernelType kernelFunc) { + kernel_single_task(kernelFunc); +} + +template +void not_kernel_single_task(KernelType kernelFunc) { + kernelFunc(); +} + int main() { - kernel_single_task(func); - // CHECK: call spir_func void @_Z18kernel_single_taskIZ4mainE7kernel2PFPKcvEEvT0_(i8 addrspace(4)* ()* @_Z4funcI4DerpEDTu33__builtin_sycl_unique_stable_nameDtsrT_3strEEEv) + not_kernel_single_task(func); + // CHECK: call void @_Z22not_kernel_single_taskIZ4mainE7kernel2PFPKcvEEvT0_(i8* ()* @_Z4funcI4DerpEDTu33__builtin_sycl_unique_stable_nameDtsrT_3strEEEv) auto l1 = []() { return 1; }; auto l2 = [](decltype(l1) *l = nullptr) { return 2; }; - kernel_single_task(l2); + kernel_single_task(l2); puts(__builtin_sycl_unique_stable_name(decltype(l2))); - // CHECK: call spir_func void @_Z18kernel_single_taskIZ4mainE7kernel3Z4mainEUlPZ4mainEUlvE_E_EvT0_ - // CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[LAMBDA_K3_SIZE]], [[LAMBDA_K3_SIZE]]* @[[LAMBDA_KERNEL3]], i32 0, i32 0) to i8 addrspace(4)*)) + // CHECK: call void @_Z18kernel_single_taskIZ4mainEUlPZ4mainEUlvE_E_S2_EvT0_ + // CHECK: call void @puts(i8* getelementptr inbounds ([[LAMBDA_K3_SIZE]], [[LAMBDA_K3_SIZE]]* @[[LAMBDA_KERNEL3]], i32 0, i32 0)) constexpr const char str[] = "lalala"; static_assert(__builtin_strcmp(__builtin_sycl_unique_stable_name(decltype(str)), "_ZTSA7_Kc\0") == 0, "unexpected mangling"); int i = 0; puts(__builtin_sycl_unique_stable_name(decltype(i++))); - // CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[INT_SIZE]], [[INT_SIZE]]* @[[INT1]], i32 0, i32 0) to i8 addrspace(4)*)) + // CHECK: call void @puts(i8* getelementptr inbounds ([[INT_SIZE]], [[INT_SIZE]]* @[[INT1]], i32 0, i32 0)) // FIXME: Ensure that j is incremented because VLAs are terrible. int j = 55; puts(__builtin_sycl_unique_stable_name(int[++j])); - // CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[STRING_SIZE]], [[STRING_SIZE]]* @[[STRING]], i32 0, i32 0) to i8 addrspace(4)*)) + // CHECK: call void @puts(i8* getelementptr inbounds ([[STRING_SIZE]], [[STRING_SIZE]]* @[[STRING]], i32 0, i32 0)) - // CHECK: define internal spir_func void @_Z18kernel_single_taskIZ4mainE7kernel2PFPKcvEEvT0_ - // CHECK: declare spir_func i8 addrspace(4)* @_Z4funcI4DerpEDTu33__builtin_sycl_unique_stable_nameDtsrT_3strEEEv - // CHECK: define internal spir_func void @_Z18kernel_single_taskIZ4mainE7kernel3Z4mainEUlPZ4mainEUlvE_E_EvT0_ - // CHECK: define internal spir_func void @_Z18kernel_single_taskIZ4mainE6kernelZ4mainEUlvE0_EvT0_ + // CHECK: define internal void @_Z22not_kernel_single_taskIZ4mainE7kernel2PFPKcvEEvT0_ + // CHECK: declare i8* @_Z4funcI4DerpEDTu33__builtin_sycl_unique_stable_nameDtsrT_3strEEEv + // CHECK: define internal void @_Z18kernel_single_taskIZ4mainEUlPZ4mainEUlvE_E_S2_EvT0_ + // CHECK: define internal void @_Z18kernel_single_taskIZ4mainEUlvE0_S0_EvT0_ - kernel_single_task( + unnamed_kernel_single_task( []() { puts(__builtin_sycl_unique_stable_name(int)); - // CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[INT_SIZE]], [[INT_SIZE]]* @[[INT2]], i32 0, i32 0) to i8 addrspace(4)*)) + // CHECK: call void @puts(i8* getelementptr inbounds ([[INT_SIZE]], [[INT_SIZE]]* @[[INT2]], i32 0, i32 0)) auto x = []() {}; puts(__builtin_sycl_unique_stable_name(decltype(x))); - // CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[LAMBDA_X_SIZE]], [[LAMBDA_X_SIZE]]* @[[LAMBDA_X]], i32 0, i32 0) to i8 addrspace(4)*)) + // CHECK: call void @puts(i8* getelementptr inbounds ([[LAMBDA_X_SIZE]], [[LAMBDA_X_SIZE]]* @[[LAMBDA_X]], i32 0, i32 0)) DEF_IN_MACRO(); - // CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[MACRO_SIZE]], [[MACRO_SIZE]]* @[[MACRO_X]], i32 0, i32 0) to i8 addrspace(4)*)) - // CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[MACRO_SIZE]], [[MACRO_SIZE]]* @[[MACRO_Y]], i32 0, i32 0) to i8 addrspace(4)*)) + // CHECK: call void @puts(i8* getelementptr inbounds ([[MACRO_SIZE]], [[MACRO_SIZE]]* @[[MACRO_X]], i32 0, i32 0)) + // CHECK: call void @puts(i8* getelementptr inbounds ([[MACRO_SIZE]], [[MACRO_SIZE]]* @[[MACRO_Y]], i32 0, i32 0)) MACRO_CALLS_MACRO(); - // CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[MACRO_MACRO_SIZE]], [[MACRO_MACRO_SIZE]]* @[[MACRO_MACRO_X]], i32 0, i32 0) to i8 addrspace(4)*)) - // CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[MACRO_MACRO_SIZE]], [[MACRO_MACRO_SIZE]]* @[[MACRO_MACRO_Y]], i32 0, i32 0) to i8 addrspace(4)*)) + // CHECK: call void @puts(i8* getelementptr inbounds ([[MACRO_MACRO_SIZE]], [[MACRO_MACRO_SIZE]]* @[[MACRO_MACRO_X]], i32 0, i32 0)) + // CHECK: call void @puts(i8* getelementptr inbounds ([[MACRO_MACRO_SIZE]], [[MACRO_MACRO_SIZE]]* @[[MACRO_MACRO_Y]], i32 0, i32 0)) template_param(); - // CHECK: call spir_func void @_Z14template_paramIiEvv + // CHECK: call void @_Z14template_paramIiEvv template_param(); - // CHECK: call spir_func void @_Z14template_paramIZZ4mainENKUlvE0_clEvEUlvE_Evv + // CHECK: call void @_Z14template_paramIZZ4mainENKUlvE0_clEvEUlvE_Evv lambda_in_dependent_function(); - // CHECK: call spir_func void @_Z28lambda_in_dependent_functionIiEvv + // CHECK: call void @_Z28lambda_in_dependent_functionIiEvv lambda_in_dependent_function(); - // CHECK: call spir_func void @_Z28lambda_in_dependent_functionIZZ4mainENKUlvE0_clEvEUlvE_Evv + // CHECK: call void @_Z28lambda_in_dependent_functionIZZ4mainENKUlvE0_clEvEUlvE_Evv lambda_no_dep(3, 5.5); - // CHECK: call spir_func void @_Z13lambda_no_depIidEvT_T0_(i32 3, double 5.500000e+00) + // CHECK: call void @_Z13lambda_no_depIidEvT_T0_(i32 3, double 5.500000e+00) int a = 5; double b = 10.7; auto y = [](int a) { return a; }; auto z = [](double b) { return b; }; lambda_two_dep(); - // CHECK: call spir_func void @_Z14lambda_two_depIZZ4mainENKUlvE0_clEvEUliE_ZZ4mainENKS0_clEvEUldE_Evv + // CHECK: call void @_Z14lambda_two_depIZZ4mainENKUlvE0_clEvEUliE_ZZ4mainENKS0_clEvEUldE_Evv lambda_two_dep(); - // CHECK: call spir_func void @_Z14lambda_two_depIZZ4mainENKUlvE0_clEvEUldE_ZZ4mainENKS0_clEvEUliE_Evv + // CHECK: call void @_Z14lambda_two_depIZZ4mainENKUlvE0_clEvEUldE_ZZ4mainENKS0_clEvEUliE_Evv }); } -// CHECK: define linkonce_odr spir_func void @_Z14template_paramIiEvv -// CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[INT_SIZE]], [[INT_SIZE]]* @[[INT3]], i32 0, i32 0) to i8 addrspace(4)*)) +// CHECK: define linkonce_odr void @_Z14template_paramIiEvv +// CHECK: call void @puts(i8* getelementptr inbounds ([[INT_SIZE]], [[INT_SIZE]]* @[[INT3]], i32 0, i32 0)) -// CHECK: define internal spir_func void @_Z14template_paramIZZ4mainENKUlvE0_clEvEUlvE_Evv -// CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[LAMBDA_SIZE]], [[LAMBDA_SIZE]]* @[[LAMBDA]], i32 0, i32 0) to i8 addrspace(4)*)) +// CHECK: define internal void @_Z14template_paramIZZ4mainENKUlvE0_clEvEUlvE_Evv +// CHECK: call void @puts(i8* getelementptr inbounds ([[LAMBDA_SIZE]], [[LAMBDA_SIZE]]* @[[LAMBDA]], i32 0, i32 0)) -// CHECK: define linkonce_odr spir_func void @_Z28lambda_in_dependent_functionIiEvv -// CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[DEP_INT_SIZE]], [[DEP_INT_SIZE]]* @[[LAMBDA_IN_DEP_INT]], i32 0, i32 0) to i8 addrspace(4)*)) +// CHECK: define linkonce_odr void @_Z28lambda_in_dependent_functionIiEvv +// CHECK: call void @puts(i8* getelementptr inbounds ([[DEP_INT_SIZE]], [[DEP_INT_SIZE]]* @[[LAMBDA_IN_DEP_INT]], i32 0, i32 0)) -// CHECK: define internal spir_func void @_Z28lambda_in_dependent_functionIZZ4mainENKUlvE0_clEvEUlvE_Evv -// CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[DEP_LAMBDA_SIZE]], [[DEP_LAMBDA_SIZE]]* @[[LAMBDA_IN_DEP_X]], i32 0, i32 0) to i8 addrspace(4)*)) +// CHECK: define internal void @_Z28lambda_in_dependent_functionIZZ4mainENKUlvE0_clEvEUlvE_Evv +// CHECK: call void @puts(i8* getelementptr inbounds ([[DEP_LAMBDA_SIZE]], [[DEP_LAMBDA_SIZE]]* @[[LAMBDA_IN_DEP_X]], i32 0, i32 0)) -// CHECK: define linkonce_odr spir_func void @_Z13lambda_no_depIidEvT_T0_(i32 %a, double %b) -// CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[NO_DEP_LAMBDA_SIZE]], [[NO_DEP_LAMBDA_SIZE]]* @[[LAMBDA_NO_DEP]], i32 0, i32 0) to i8 addrspace(4)*)) +// CHECK: define linkonce_odr void @_Z13lambda_no_depIidEvT_T0_(i32 %a, double %b) +// CHECK: call void @puts(i8* getelementptr inbounds ([[NO_DEP_LAMBDA_SIZE]], [[NO_DEP_LAMBDA_SIZE]]* @[[LAMBDA_NO_DEP]], i32 0, i32 0)) -// CHECK: define internal spir_func void @_Z14lambda_two_depIZZ4mainENKUlvE0_clEvEUliE_ZZ4mainENKS0_clEvEUldE_Evv -// CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[DEP_LAMBDA1_SIZE]], [[DEP_LAMBDA1_SIZE]]* @[[LAMBDA_TWO_DEP]], i32 0, i32 0) to i8 addrspace(4)*)) +// CHECK: define internal void @_Z14lambda_two_depIZZ4mainENKUlvE0_clEvEUliE_ZZ4mainENKS0_clEvEUldE_Evv +// CHECK: call void @puts(i8* getelementptr inbounds ([[DEP_LAMBDA1_SIZE]], [[DEP_LAMBDA1_SIZE]]* @[[LAMBDA_TWO_DEP]], i32 0, i32 0)) -// CHECK: define internal spir_func void @_Z14lambda_two_depIZZ4mainENKUlvE0_clEvEUldE_ZZ4mainENKS0_clEvEUliE_Evv -// CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[DEP_LAMBDA2_SIZE]], [[DEP_LAMBDA2_SIZE]]* @[[LAMBDA_TWO_DEP2]], i32 0, i32 0) to i8 addrspace(4)*)) +// CHECK: define internal void @_Z14lambda_two_depIZZ4mainENKUlvE0_clEvEUldE_ZZ4mainENKS0_clEvEUliE_Evv +// CHECK: call void @puts(i8* getelementptr inbounds ([[DEP_LAMBDA2_SIZE]], [[DEP_LAMBDA2_SIZE]]* @[[LAMBDA_TWO_DEP2]], i32 0, i32 0)) Index: clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp =================================================================== --- clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp +++ clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp @@ -1,6 +1,5 @@ -// RUN: %clang_cc1 -triple spir64-unknown-unknown-sycldevice -aux-triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s - +// RUN: %clang_cc1 -triple spir64-unknown-unknown -aux-triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s --check-prefixes=WIN,CHECK template __attribute__((sycl_kernel)) void kernel(Func F){ Index: clang/test/Misc/pragma-attribute-supported-attributes-list.test =================================================================== --- clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -154,6 +154,7 @@ // CHECK-NEXT: ReturnTypestate (SubjectMatchRule_function, SubjectMatchRule_variable_is_parameter) // CHECK-NEXT: ReturnsNonNull (SubjectMatchRule_objc_method, SubjectMatchRule_function) // CHECK-NEXT: ReturnsTwice (SubjectMatchRule_function) +// CHECK-NEXT: SYCLSpecialClass (SubjectMatchRule_record) // CHECK-NEXT: ScopedLockable (SubjectMatchRule_record) // CHECK-NEXT: Section (SubjectMatchRule_function, SubjectMatchRule_variable_is_global, SubjectMatchRule_objc_method, SubjectMatchRule_objc_property) // CHECK-NEXT: SetTypestate (SubjectMatchRule_function_is_member) Index: clang/test/SemaSYCL/Inputs/sycl.hpp =================================================================== --- /dev/null +++ clang/test/SemaSYCL/Inputs/sycl.hpp @@ -0,0 +1,86 @@ +namespace cl { +namespace sycl { +namespace access { + +enum class target { + global_buffer = 2014, + constant_buffer, + local, + image, + host_buffer, + host_image, + image_array +}; + +enum class mode { + read = 1024, + write, + read_write, + discard_write, + discard_read_write, + atomic +}; + +enum class placeholder { false_t, + true_t }; + +enum class address_space : int { + private_space = 0, + global_space, + constant_space, + local_space +}; +} // namespace access + + +template +struct range { +}; + +template +struct id { +}; + +template +struct _ImplT { + range AccessRange; + range MemRange; + id Offset; +}; + +template +struct DeviceValueType; + +template +struct DeviceValueType { + using type = __attribute__((opencl_global)) dataT; +}; + +template +struct DeviceValueType { + using type = __attribute__((opencl_global)) const dataT; +}; + +template +struct DeviceValueType { + using type = __attribute__((opencl_local)) dataT; +}; + +template +class __attribute__((sycl_special_class)) accessor { +public: + void use(void) const {} + void use(void *) const {} + _ImplT impl; + +private: + using PtrType = typename DeviceValueType::type *; + void __init(PtrType Ptr, range AccessRange, + range MemRange, id Offset) {} + friend class stream; +}; + +} // namespace sycl +} // namespace cl Index: clang/test/SemaSYCL/accessors-targets.cpp =================================================================== --- /dev/null +++ clang/test/SemaSYCL/accessors-targets.cpp @@ -0,0 +1,33 @@ +// RUN: %clang_cc1 -fsycl-is-device -ast-dump %s | FileCheck %s + +// This test checks that compiler generates correct kernel wrapper arguments for +// different accessors targets. + +#include "Inputs/sycl.hpp" + +using namespace cl::sycl; + +template +__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { + kernelFunc(); +} + +int main() { + + accessor + local_acc; + accessor + global_acc; + kernel( + [=]() { + local_acc.use(); + }); + kernel( + [=]() { + global_acc.use(); + }); +} +// CHECK: {{.*}}use_local{{.*}} 'void (__local int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)' +// CHECK: {{.*}}use_global{{.*}} 'void (__global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)' Index: clang/test/SemaSYCL/basic-kernel-wrapper.cpp =================================================================== --- /dev/null +++ clang/test/SemaSYCL/basic-kernel-wrapper.cpp @@ -0,0 +1,77 @@ +// RUN: %clang_cc1 -fsycl-is-device -ast-dump %s | FileCheck %s + +// This test checks that compiler generates correct kernel wrapper for basic +// case. + +#include "Inputs/sycl.hpp" + +template +struct AccWrapper { Acc accessor; }; + +template +__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { + kernelFunc(); +} + +int main() { + cl::sycl::accessor acc; + kernel( + [=]() { + acc.use(); + }); +} + +// Check declaration of the kernel + +// CHECK: FunctionDecl {{.*}}kernel_wrapper{{.*}} 'void (__global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)' + +// Check parameters of the kernel + +// CHECK: ParmVarDecl {{.*}} used [[_arg_Mem:[0-9a-zA-Z_]+]] '__global int *' +// CHECK: ParmVarDecl {{.*}} used [[_arg_AccessRange:[0-9a-zA-Z_]+]] 'cl::sycl::range<1>' +// CHECK: ParmVarDecl {{.*}} used [[_arg_MemRange:[0-9a-zA-Z_]+]] 'cl::sycl::range<1>' +// CHECK: ParmVarDecl {{.*}} used [[_arg_Offset:[0-9a-zA-Z_]+]] 'cl::sycl::id<1>' + +// Check body of the kernel + +// Check lambda declaration inside the wrapper + +// CHECK: DeclStmt +// CHECK-NEXT: VarDecl {{.*}} used '(lambda at {{.*}}basic-kernel-wrapper.cpp{{.*}})' + +// Check accessor initialization + +// CHECK: CXXMemberCallExpr {{.*}} 'void' +// CHECK-NEXT: MemberExpr {{.*}} 'void ({{.*}}PtrType, range<1>, range<1>, id<1>)' lvalue .__init +// CHECK-NEXT: MemberExpr {{.*}} 'cl::sycl::accessor':'cl::sycl::accessor' lvalue . +// CHECK-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}basic-kernel-wrapper.cpp{{.*}})' lvalue Var + +// CHECK-NEXT: ImplicitCastExpr {{.*}} +// CHECK-NEXT: DeclRefExpr {{.*}} '__global int *' lvalue ParmVar {{.*}} '[[_arg_Mem]]' '__global int *' + +// CHECK-NEXT: CXXConstructExpr {{.*}} 'range<1>':'cl::sycl::range<1>' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'const cl::sycl::range<1>' lvalue +// CHECK-NEXT: DeclRefExpr {{.*}} 'cl::sycl::range<1>' lvalue ParmVar {{.*}} '[[_arg_AccessRange]]' 'cl::sycl::range<1>' + +// CHECK-NEXT: CXXConstructExpr {{.*}} 'range<1>':'cl::sycl::range<1>' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'const cl::sycl::range<1>' lvalue +// CHECK-NEXT: DeclRefExpr {{.*}} 'cl::sycl::range<1>' lvalue ParmVar {{.*}} '[[_arg_MemRange]]' 'cl::sycl::range<1>' + +// CHECK-NEXT: CXXConstructExpr {{.*}} 'id<1>':'cl::sycl::id<1>' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'const cl::sycl::id<1>' lvalue +// CHECK-NEXT: DeclRefExpr {{.*}} 'cl::sycl::id<1>' lvalue ParmVar {{.*}} '[[_arg_Offset]]' 'cl::sycl::id<1>' + +// Check that body of the kernel caller function is included into kernel + +// CHECK: CompoundStmt {{.*}} +// CHECK-NEXT: CXXOperatorCallExpr {{.*}} 'void' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)() const' +// CHECK-NEXT: DeclRefExpr {{.*}} 'void () const' lvalue CXXMethod {{.*}} 'operator()' 'void () const' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'const (lambda at {{.*}}basic-kernel-wrapper.cpp{{.*}})' lvalue +// CHECK-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}basic-kernel-wrapper.cpp{{.*}})' lvalue Var + +// Check kernel wrapper attributes + +// CHECK: OpenCLKernelAttr {{.*}} Implicit +// CHECK: AsmLabelAttr {{.*}} Implicit "{{.*}}kernel_wrapper{{.*}}" +// CHECK: ArtificialAttr {{.*}} Implicit Index: clang/test/SemaSYCL/built-in-type-kernel-arg.cpp =================================================================== --- /dev/null +++ clang/test/SemaSYCL/built-in-type-kernel-arg.cpp @@ -0,0 +1,70 @@ +// RUN: %clang_cc1 -fsycl-is-device -ast-dump %s | FileCheck %s + +// This test checks that compiler generates correct initialization for arguments +// that have struct or built-in type inside the OpenCL kernel + +#include "Inputs/sycl.hpp" + +template +__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { + kernelFunc(); +} + +struct test_struct { + int data; +}; + +void test(const int some_const) { + kernel( + [=]() { + int a = some_const; + }); +} + +int main() { + int data = 5; + test_struct s; + s.data = data; + kernel( + [=]() { + int kernel_data = data; + }); + kernel( + [=]() { + test_struct k_s; + k_s = s; + }); + const int some_const = 10; + test(some_const); + return 0; +} +// Check kernel parameters +// CHECK: FunctionDecl {{.*}}kernel_const{{.*}} 'void (const int)' +// CHECK: ParmVarDecl {{.*}} used _arg_ 'const int' + +// Check that lambda field of const built-in type is initialized +// CHECK: VarDecl {{.*}}'(lambda at {{.*}}built-in-type-kernel-arg.cpp{{.*}})' +// CHECK-NEXT: InitListExpr +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'const int' lvalue ParmVar {{.*}} '_arg_' 'const int' + +// Check kernel parameters +// CHECK: {{.*}}kernel_int{{.*}} 'void (int)' +// CHECK: ParmVarDecl {{.*}} used _arg_ 'int' + +// Check that lambda field of built-in type is initialized +// CHECK: VarDecl {{.*}}'(lambda at {{.*}}built-in-type-kernel-arg.cpp{{.*}})' +// CHECK-NEXT: InitListExpr +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_' 'int' + +// Check kernel parameters +// CHECK: {{.*}}kernel_struct{{.*}} 'void (test_struct)' +// CHECK: ParmVarDecl {{.*}} used _arg_ 'test_struct' + +// Check that lambda field of struct type is initialized +// CHECK: VarDecl {{.*}}'(lambda at {{.*}}built-in-type-kernel-arg.cpp{{.*}})' +// CHECK-NEXT: InitListExpr +// CHECK-NEXT: CXXConstructExpr {{.*}}'test_struct'{{.*}}void (const test_struct &) +// CHECK-NEXT: ImplicitCastExpr {{.*}}'const test_struct' lvalue +// CHECK-NEXT: DeclRefExpr {{.*}} 'test_struct' lvalue ParmVar {{.*}} '_arg_' 'test_struct' Index: clang/test/SemaSYCL/fake-accessors.cpp =================================================================== --- /dev/null +++ clang/test/SemaSYCL/fake-accessors.cpp @@ -0,0 +1,67 @@ +// RUN: %clang_cc1 -fsycl-is-device -Wno-unused-value -Wno-int-to-void-pointer-cast -ast-dump %s | FileCheck %s + +// This test checks that the compiler handles SYCL kenrel parameters of accessor +// type correctly (i.e. sends a pointer as a separate parameter) and doesn't +// apply this approach for user types even if they have the same name (e.g. +// `accessor` or `foo::cl::sycl::accessor`). We also verify that type aliasing +// via typedef or using doesn't confuse the compiler. + +#include "Inputs/sycl.hpp" + +namespace foo { +namespace cl { +namespace sycl { +class accessor { +public: + int field; +}; +} // namespace sycl +} // namespace cl +} // namespace foo + +class accessor { +public: + int field; +}; + +typedef cl::sycl::accessor + MyAccessorTD; + +using MyAccessorA = cl::sycl::accessor; + +template +__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { + kernelFunc(); +} + +int main() { + foo::cl::sycl::accessor acc = {1}; + accessor acc1 = {1}; + + cl::sycl::accessor accessorA; + cl::sycl::accessor accessorB; + cl::sycl::accessor accessorC; + kernel( + [=]() { + accessorA.use((void*)(acc.field + acc1.field)); + }); + kernel( + [=]() { + accessorB.use((void*)(acc.field + acc1.field)); + }); + kernel( + [=]() { + accessorC.use((void*)(acc.field + acc1.field)); + }); + kernel( + [=]() { + acc.field + acc1.field; + }); + return 0; +} +// CHECK: fake_accessors{{.*}} 'void (__global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, foo::cl::sycl::accessor, accessor) +// CHECK: accessor_typedef{{.*}} 'void (__global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, foo::cl::sycl::accessor, accessor) +// CHECK: accessor_alias{{.*}} 'void (__global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, foo::cl::sycl::accessor, accessor) +// CHECK: user_types{{.*}} 'void (foo::cl::sycl::accessor, accessor) Index: clang/test/SemaSYCL/mangle-kernel.cpp =================================================================== --- /dev/null +++ clang/test/SemaSYCL/mangle-kernel.cpp @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -I %S/../Headers/Inputs/include/ -ast-dump %s | FileCheck %s --check-prefix=CHECK-64 +// RUN: %clang_cc1 -fsycl-is-device -triple spir-unknown-unknown-sycldevice -I %S/../Headers/Inputs/include/ -ast-dump %s | FileCheck %s --check-prefix=CHECK-32 +#include "Inputs/sycl.hpp" +#include + +// This test checks that SYCL kernel name mangling matches Itanium ABI + +template +__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { + kernelFunc(); +} + +template +class SimpleVadd; + +int main() { + kernel>( + [=](){}); + + kernel>( + [=](){}); + + kernel>( + [=](){}); + return 0; +} + +// CHECK: _ZTS10SimpleVaddIiE +// CHECK: _ZTS10SimpleVaddIdE +// CHECK-64: _ZTS10SimpleVaddImE +// CHECK-32: _ZTS10SimpleVaddIjE