Index: clang/include/clang/Basic/Attr.td
===================================================================
--- clang/include/clang/Basic/Attr.td
+++ clang/include/clang/Basic/Attr.td
@@ -1191,6 +1191,20 @@
   let Documentation = [SYCLKernelDocs];
 }
 
+def SYCLSpecialClass: InheritableAttr {
+  let Spellings = [Clang<"sycl_special_class">];
+  let Subjects = SubjectList<[CXXRecord]>;
+  let LangOpts = [SYCL];
+  let Documentation = [SYCLSpecialClassDocs];
+}
+
+def SYCLDevice : InheritableAttr {
+  let Spellings = [GNU<"sycl_device">];
+  let Subjects = SubjectList<[Function]>;
+  let LangOpts = [SYCL];
+  let Documentation = [SYCLDeviceDocs];
+}
+
 def C11NoReturn : InheritableAttr {
   let Spellings = [Keyword<"_Noreturn">];
   let Subjects = SubjectList<[Function], ErrorDiag>;
Index: clang/include/clang/Basic/AttrDocs.td
===================================================================
--- clang/include/clang/Basic/AttrDocs.td
+++ clang/include/clang/Basic/AttrDocs.td
@@ -405,6 +405,26 @@
   }];
 }
 
+def SYCLSpecialClassDocs : Documentation {
+  let Category = DocCatStmt;
+  let Content = [{
+The ``__attribute__((sycl_special_class))`` attribute is used in SYCL
+headers to indicate that a class or a struct needs additional handling when
+it is passed from host to device. Please note that this is an attribute that is
+used for internal implementation and not intended to be used by external users.
+It is used for ``accessor``, ``sampler`` , or ``stream`` classes.
+The types that own this attribute are excluded from device-copyable and other
+type-legalization steps.
+
+.. code-block:: c++
+
+  class __attribute__((sycl_special_class)) accessor {
+    private:
+      void __init() {}
+  };
+  }];
+}
+
 def C11NoReturnDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
@@ -742,6 +762,17 @@
   }];
 }
 
+def SYCLDeviceDocs : Documentation {
+  let Category = DocCatFunction;
+  let Heading = "sycl_device";
+  let Content = [{
+This attribute can only be applied to functions and indicates that the
+function must be treated as a device function and must be emitted even if it has
+no direct uses from other device functions. All ``sycl_device`` function callees
+implicitly inherit this attribute.
+  }];
+}
+
 def DiagnoseIfDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11410,6 +11410,28 @@
    "probability argument to __builtin_expect_with_probability is outside the "
    "range [0.0, 1.0]">;
 
+// SYCL-specific diagnostics
+def err_sycl_attribute_internal_function
+    : Error<"%0 attribute cannot be applied to a "
+            "static function or function in an anonymous namespace">;
+def err_sycl_restrict : Error<
+  "SYCL kernel cannot "
+"%select{use a non-const global variable"
+  "|use rtti"
+  "|use a non-const static data variable"
+  "|call a virtual function"
+  "|use exceptions"
+  "|call a recursive function"
+  "|call through a function pointer"
+  "|allocate storage"
+  "|use inline assembly"
+  "|call a dllimport function"
+  "|call a variadic function"
+  "|call an undefined function without SYCL_EXTERNAL attribute"
+  "|use a const static or global variable that is neither zero-initialized "
+  "nor constant-initialized"
+  "}0">;
+
 // TCB warnings
 def err_tcb_conflicting_attributes : Error<
   "attributes '%0(\"%2\")' and '%1(\"%2\")' are mutually exclusive">;
Index: clang/include/clang/Sema/Sema.h
===================================================================
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -13080,6 +13080,9 @@
     ConstructorDestructor,
     BuiltinFunction
   };
+
+  void checkSYCLDeviceVarDecl(VarDecl *Var);
+
   /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current
   /// context is "used as device code".
   ///
@@ -13115,6 +13118,20 @@
   /// Adds Callee to DeviceCallGraph if we don't know if its caller will be
   /// codegen'ed yet.
   bool checkSYCLDeviceFunction(SourceLocation Loc, FunctionDecl *Callee);
+
+private:
+  /// Contains generated OpenCL kernel functions for SYCL.
+  SmallVector<Decl *, 1> SYCLKernels;
+
+public:
+  void addSYCLKernel(Decl *D) { SYCLKernels.push_back(D); }
+  /// Access to SYCL kernels.
+  //const SmallVectorImpl<Decl *> &getSYCLKernels() { return SYCLKernels; }
+  const ArrayRef<Decl *> getSYCLKernels() { return SYCLKernels; }
+
+  /// Constructs an OpenCL kernel using the KernelCaller function and adds it to
+  /// the SYCL device code.
+  void constructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC);
 };
 
 /// RAII object that enters a new expression evaluation context.
Index: clang/lib/AST/ASTContext.cpp
===================================================================
--- clang/lib/AST/ASTContext.cpp
+++ clang/lib/AST/ASTContext.cpp
@@ -11065,6 +11065,10 @@
   if (D->hasAttr<WeakRefAttr>())
     return false;
 
+  // If SYCL, only kernels are required.
+  if (LangOpts.SYCLIsDevice && !D->hasAttr<OpenCLKernelAttr>())
+    return false;
+
   // Aliases and used decls are required.
   if (D->hasAttr<AliasAttr>() || D->hasAttr<UsedAttr>())
     return true;
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -2960,6 +2960,12 @@
     }
   }
 
+  if (LangOpts.SYCLIsDevice && Global->hasAttr<OpenCLKernelAttr>() &&
+      MustBeEmitted(Global)) {
+    addDeferredDeclToEmit(GD);
+    return;
+  }
+
   // Ignore declarations, they will be emitted on their first use.
   if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
     // Forward declarations are emitted lazily on first use.
@@ -3011,6 +3017,13 @@
     }
   }
 
+  // clang::ParseAST ensures that we emit the SYCL device functions at the end,
+  // so anything that is a device (or indirectly called) will be handled later.
+  if (LangOpts.SYCLIsDevice && MustBeEmitted(Global)) {
+    addDeferredDeclToEmit(GD);
+    return;
+  }
+
   // Defer code generation to first use when possible, e.g. if this is an inline
   // function. If the global must always be emitted, do it eagerly if possible
   // to benefit from cache locality.
Index: clang/lib/Parse/ParseAST.cpp
===================================================================
--- clang/lib/Parse/ParseAST.cpp
+++ clang/lib/Parse/ParseAST.cpp
@@ -168,6 +168,10 @@
   for (Decl *D : S.WeakTopLevelDecls())
     Consumer->HandleTopLevelDecl(DeclGroupRef(D));
 
+  if (S.getLangOpts().SYCLIsDevice)
+    for (Decl *D : S.getSYCLKernels())
+      Consumer->HandleTopLevelDecl(DeclGroupRef(D));
+
   Consumer->HandleTranslationUnit(S.getASTContext());
 
   // Finalize the template instantiation observer chain.
Index: clang/lib/Sema/SemaDecl.cpp
===================================================================
--- clang/lib/Sema/SemaDecl.cpp
+++ clang/lib/Sema/SemaDecl.cpp
@@ -13048,6 +13048,9 @@
     }
   }
 
+  if (getLangOpts().SYCLIsDevice)
+    checkSYCLDeviceVarDecl(var);
+
   // In Objective-C, don't allow jumps past the implicit initialization of a
   // local retaining variable.
   if (getLangOpts().ObjC &&
Index: clang/lib/Sema/SemaDeclAttr.cpp
===================================================================
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -4565,6 +4565,16 @@
     D->addAttr(Optnone);
 }
 
+static void handleSYCLDeviceAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+  auto *FD = cast<FunctionDecl>(D);
+  if (!FD->isExternallyVisible()) {
+    S.Diag(AL.getLoc(), diag::err_sycl_attribute_internal_function) << AL;
+    return;
+  }
+
+  handleSimpleAttribute<SYCLDeviceAttr>(S, D, AL);
+}
+
 static void handleConstantAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   const auto *VD = cast<VarDecl>(D);
   if (VD->hasLocalStorage()) {
@@ -8054,6 +8064,12 @@
   case ParsedAttr::AT_SYCLKernel:
     handleSYCLKernelAttr(S, D, AL);
     break;
+  case ParsedAttr::AT_SYCLSpecialClass:
+    handleSimpleAttribute<SYCLSpecialClassAttr>(S, D, AL);
+    break;
+  case ParsedAttr::AT_SYCLDevice:
+    handleSYCLDeviceAttr(S, D, AL);
+    break;
   case ParsedAttr::AT_Format:
     handleFormatAttr(S, D, AL);
     break;
Index: clang/lib/Sema/SemaSYCL.cpp
===================================================================
--- clang/lib/Sema/SemaSYCL.cpp
+++ clang/lib/Sema/SemaSYCL.cpp
@@ -8,11 +8,1510 @@
 // This implements Semantic Analysis for SYCL constructs.
 //===----------------------------------------------------------------------===//
 
+#include "TreeTransform.h"
+#include "clang/AST/AST.h"
 #include "clang/AST/Mangle.h"
+#include "clang/AST/QualTypeNames.h"
+#include "clang/AST/RecordLayout.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/TemplateArgumentVisitor.h"
+#include "clang/AST/TypeVisitor.h"
+#include "clang/Sema/Initialization.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaDiagnostic.h"
 
+#include "llvm/Support/raw_ostream.h"
+
+#include <string>
+
 using namespace clang;
+using namespace std::placeholders;
+
+using ParamDesc = std::tuple<QualType, IdentifierInfo *, TypeSourceInfo *>;
+
+const static std::string InitMethodName = "__init";
+const static std::string FinalizeMethodName = "__finalize";
+
+// anonymous namespace so these don't get linkage.
+namespace {
+
+/// Various utilities.
+class Util {
+public:
+  using DeclContextDesc = std::pair<clang::Decl::Kind, StringRef>;
+
+  /// Checks whether given clang type is a full specialization of a SYCL
+  /// special class.
+  static bool isSyclSpecialType(QualType Ty);
+
+  /// Checks whether given clang type is a full specialization of the SYCL
+  /// half class.
+  static bool isSyclHalfType(const QualType Ty);
+
+  /// Checks whether given clang type is a standard SYCL API class with given
+  /// name.
+  /// \param Ty    the clang type being checked
+  /// \param Name  the class name checked against
+  /// \param Tmpl  whether the class is template instantiation or simple record
+  static bool isSyclType(const QualType &Ty, StringRef Name, bool Tmpl = false);
+
+  // Checks declaration context hierarchy.
+  /// \param DC     the context of the item to be checked.
+  /// \param Scopes the declaration scopes leading from the item context to the
+  ///               translation unit (excluding the latter)
+  static bool matchContext(const DeclContext *DC,
+                           ArrayRef<Util::DeclContextDesc> Scopes);
+
+  /// Checks whether given clang type is declared in the given hierarchy of
+  /// declaration contexts.
+  /// \param Ty         the clang type being checked
+  /// \param Scopes     the declaration scopes leading from the type to the
+  ///     translation unit (excluding the latter)
+  static bool matchQualifiedTypeName(const QualType &Ty,
+                                     ArrayRef<Util::DeclContextDesc> Scopes);
+};
+
+class KernelBodyTransform final : public TreeTransform<KernelBodyTransform> {
+public:
+  KernelBodyTransform(std::pair<DeclaratorDecl *, DeclaratorDecl *> &MPair,
+                      Sema &S)
+      : TreeTransform<KernelBodyTransform>(S), MappingPair(MPair), SemaRef(S) {}
+  bool AlwaysRebuild() { return true; }
+
+  ExprResult TransformDeclRefExpr(DeclRefExpr *DRE) {
+    auto *Ref = dyn_cast<DeclaratorDecl>(DRE->getDecl());
+    if (Ref && Ref == MappingPair.first) {
+      DeclaratorDecl *NewDecl = MappingPair.second;
+      return DeclRefExpr::Create(
+          SemaRef.getASTContext(), DRE->getQualifierLoc(),
+          DRE->getTemplateKeywordLoc(), NewDecl, false, DRE->getNameInfo(),
+          NewDecl->getType(), DRE->getValueKind());
+    }
+    return DRE;
+  }
+
+private:
+  std::pair<DeclaratorDecl *, DeclaratorDecl *> MappingPair;
+  Sema &SemaRef;
+};
+
+/// Return method by name
+static CXXMethodDecl *getMethodByName(const CXXRecordDecl *CRD,
+                                      StringRef MethodName) {
+  CXXMethodDecl *Method;
+  auto It = llvm::find_if(CRD->methods(),
+                         [MethodName](const CXXMethodDecl *Method) {
+                           return Method->getNameAsString() == MethodName;
+                         });
+  Method = (It != CRD->methods().end()) ? *It : nullptr;
+  return Method;
+}
+
+const CXXRecordDecl *getKernelObjectType(FunctionDecl *Caller) {
+  assert(Caller->getNumParams() > 0 && "Insufficient kernel parameters");
+
+  QualType KernelParamTy = Caller->getParamDecl(0)->getType();
+  // In SYCL 2020 kernels are now passed by reference.
+  if (KernelParamTy->isReferenceType())
+    return KernelParamTy->getPointeeCXXRecordDecl();
+
+  // SYCL 1.2.1
+  return KernelParamTy->getAsCXXRecordDecl();
+}
+
+/// Creates a kernel parameter descriptor
+/// \param Src  field declaration to construct name from
+/// \param Ty   the desired parameter type
+/// \return     the constructed descriptor
+ParamDesc makeParamDesc(const FieldDecl *Src, QualType Ty) {
+  ASTContext &Ctx = Src->getASTContext();
+  std::string Name = (Twine("_arg_") + Src->getName()).str();
+  return std::make_tuple(Ty, &Ctx.Idents.get(Name),
+                         Ctx.getTrivialTypeSourceInfo(Ty));
+}
+
+ParamDesc makeParamDesc(ASTContext &Ctx, const CXXBaseSpecifier &Src,
+                        QualType Ty) {
+  // TODO: There is no name for the base available, but duplicate names are
+  // seemingly already possible, so we'll give them all the same name for now.
+  // This only happens with the accessor types.
+  std::string Name = "_arg__base";
+  return std::make_tuple(Ty, &Ctx.Idents.get(Name),
+                         Ctx.getTrivialTypeSourceInfo(Ty));
+}
+
+// The first template argument to the kernel caller function is used to identify
+// the kernel itself.
+QualType calculateKernelNameType(ASTContext &Ctx,
+                                 FunctionDecl *KernelCallerFunc) {
+  const TemplateArgumentList *TAL =
+      KernelCallerFunc->getTemplateSpecializationArgs();
+  assert(TAL && "No template argument info");
+  return TAL->get(0).getAsType().getCanonicalType();
+}
+
+// Gets a name for the OpenCL kernel function, calculated from the first
+// template argument of the kernel caller function.
+std::string constructKernelName(Sema &S, FunctionDecl *KernelCallerFunc,
+                                MangleContext &MC) {
+  QualType KernelNameType =
+      calculateKernelNameType(S.getASTContext(), KernelCallerFunc);
+
+  SmallString<256> Result;
+  llvm::raw_svector_ostream Out(Result);
+
+  MC.mangleTypeName(KernelNameType, Out);
+
+  return std::string(Out.str());
+}
+
+template <typename T> struct bind_param { using type = T; };
+
+template <> struct bind_param<CXXBaseSpecifier &> {
+  using Type = const CXXBaseSpecifier &;
+};
+
+template <> struct bind_param<FieldDecl *&> { using type = FieldDecl *; };
+
+template <> struct bind_param<FieldDecl *const &> { using type = FieldDecl *; };
+
+template <typename T> using bind_param_t = typename bind_param<T>::type;
+
+class KernelObjVisitor {
+  Sema &SemaRef;
+
+  template <typename ParentTy, typename... HandlerTys>
+  void VisitUnionImpl(const CXXRecordDecl *Owner, ParentTy &Parent,
+                      const CXXRecordDecl *Wrapper, HandlerTys &... Handlers) {
+    (void)std::initializer_list<int>{
+        (Handlers.enterUnion(Owner, Parent), 0)...};
+    VisitRecordHelper(Wrapper, Wrapper->fields(), Handlers...);
+    (void)std::initializer_list<int>{
+        (Handlers.leaveUnion(Owner, Parent), 0)...};
+  }
+
+  // These enable handler execution only when previous Handlers succeed.
+  template <typename... Tn>
+  bool handleField(FieldDecl *FD, QualType FDTy, Tn &&... tn) {
+    bool result = true;
+    (void)std::initializer_list<int>{(result = result && tn(FD, FDTy), 0)...};
+    return result;
+  }
+  template <typename... Tn>
+  bool handleField(const CXXBaseSpecifier &BD, QualType BDTy, Tn &&... tn) {
+    bool result = true;
+    std::initializer_list<int>{(result = result && tn(BD, BDTy), 0)...};
+    return result;
+  }
+
+// This definition using std::bind is necessary because of a gcc 7.x bug.
+#define KF_FOR_EACH(FUNC, Item, Qt)                                            \
+  handleField(                                                                 \
+      Item, Qt,                                                                \
+      std::bind(static_cast<bool (std::decay_t<decltype(Handlers)>::*)(        \
+                    bind_param_t<decltype(Item)>, QualType)>(                  \
+                    &std::decay_t<decltype(Handlers)>::FUNC),                  \
+                std::ref(Handlers), _1, _2)...)
+
+  // The following simpler definition works with gcc 8.x and later.
+  //#define KF_FOR_EACH(FUNC) \
+//  handleField(Field, FieldTy, ([&](FieldDecl *FD, QualType FDTy) { \
+//                return Handlers.f(FD, FDTy); \
+//              })...)
+
+  // Parent contains the FieldDecl or CXXBaseSpecifier that was used to enter
+  // the Wrapper structure that we're currently visiting. Owner is the parent
+  // type (which doesn't exist in cases where it is a FieldDecl in the
+  // 'root'), and Wrapper is the current struct being unwrapped.
+  template <typename ParentTy, typename... HandlerTys>
+  void visitComplexRecord(const CXXRecordDecl *Owner, ParentTy &Parent,
+                          const CXXRecordDecl *Wrapper, QualType RecordTy,
+                          HandlerTys &... Handlers) {
+    (void)std::initializer_list<int>{
+        (Handlers.enterStruct(Owner, Parent, RecordTy), 0)...};
+    VisitRecordHelper(Wrapper, Wrapper->bases(), Handlers...);
+    VisitRecordHelper(Wrapper, Wrapper->fields(), Handlers...);
+    (void)std::initializer_list<int>{
+        (Handlers.leaveStruct(Owner, Parent, RecordTy), 0)...};
+  }
+
+  template <typename ParentTy, typename... HandlerTys>
+  void visitSimpleRecord(const CXXRecordDecl *Owner, ParentTy &Parent,
+                         const CXXRecordDecl *Wrapper, QualType RecordTy,
+                         HandlerTys &... Handlers) {
+    (void)std::initializer_list<int>{
+        (Handlers.handleNonDecompStruct(Owner, Parent, RecordTy), 0)...};
+  }
+
+  template <typename ParentTy, typename... HandlerTys>
+  void visitRecord(const CXXRecordDecl *Owner, ParentTy &Parent,
+                   const CXXRecordDecl *Wrapper, QualType RecordTy,
+                   HandlerTys &... Handlers);
+
+  template <typename ParentTy, typename... HandlerTys>
+  void VisitUnion(const CXXRecordDecl *Owner, ParentTy &Parent,
+                  const CXXRecordDecl *Wrapper, HandlerTys &... Handlers);
+
+  template <typename... HandlerTys>
+  void VisitRecordHelper(const CXXRecordDecl *Owner,
+                         clang::CXXRecordDecl::base_class_const_range Range,
+                         HandlerTys &... Handlers) {
+    for (const auto &Base : Range) {
+      QualType BaseTy = Base.getType();
+      // Handle accessor class as base
+      if (Util::isSyclSpecialType(BaseTy)) {
+        (void)std::initializer_list<int>{
+            (Handlers.handleSyclSpecialType(Owner, Base, BaseTy), 0)...};
+      } else
+        // For all other bases, visit the record
+        visitRecord(Owner, Base, BaseTy->getAsCXXRecordDecl(), BaseTy,
+                    Handlers...);
+    }
+  }
+
+  template <typename... HandlerTys>
+  void VisitRecordHelper(const CXXRecordDecl *Owner,
+                         RecordDecl::field_range Range,
+                         HandlerTys &... Handlers) {
+    VisitRecordFields(Owner, Handlers...);
+  }
+
+  // FIXME: Can this be refactored/handled some other way?
+  template <typename ParentTy, typename... HandlerTys>
+  void visitStreamRecord(const CXXRecordDecl *Owner, ParentTy &Parent,
+                         CXXRecordDecl *Wrapper, QualType RecordTy,
+                         HandlerTys &... Handlers) {
+    (void)std::initializer_list<int>{
+        (Handlers.enterStream(Owner, Parent, RecordTy), 0)...};
+    for (const auto &Field : Wrapper->fields()) {
+      QualType FieldTy = Field->getType();
+      // Required to initialize accessors inside streams.
+      if (Util::isSyclAccessorType(FieldTy))
+        KF_FOR_EACH(handleSyclAccessorType, Field, FieldTy);
+    }
+    (void)std::initializer_list<int>{
+        (Handlers.leaveStream(Owner, Parent, RecordTy), 0)...};
+  }
+
+  template <typename... HandlerTys>
+  void visitArrayElementImpl(const CXXRecordDecl *Owner, FieldDecl *ArrayField,
+                             QualType ElementTy, uint64_t Index,
+                             HandlerTys &... Handlers) {
+    (void)std::initializer_list<int>{
+        (Handlers.nextElement(ElementTy, Index), 0)...};
+    visitField(Owner, ArrayField, ElementTy, Handlers...);
+  }
+
+  template <typename... HandlerTys>
+  void visitFirstArrayElement(const CXXRecordDecl *Owner, FieldDecl *ArrayField,
+                              QualType ElementTy, HandlerTys &... Handlers) {
+    visitArrayElementImpl(Owner, ArrayField, ElementTy, 0, Handlers...);
+  }
+
+  template <typename... HandlerTys>
+  void visitNthArrayElement(const CXXRecordDecl *Owner, FieldDecl *ArrayField,
+                            QualType ElementTy, uint64_t Index,
+                            HandlerTys &... Handlers);
+
+  template <typename... HandlerTys>
+  void visitSimpleArray(const CXXRecordDecl *Owner, FieldDecl *Field,
+                        QualType ArrayTy, HandlerTys &... Handlers) {
+    (void)std::initializer_list<int>{
+        (Handlers.handleSimpleArrayType(Field, ArrayTy), 0)...};
+  }
+
+  template <typename... HandlerTys>
+  void visitComplexArray(const CXXRecordDecl *Owner, FieldDecl *Field,
+                         QualType ArrayTy, HandlerTys &... Handlers) {
+    // Array workflow is:
+    // handleArrayType
+    // enterArray
+    // nextElement
+    // VisitField (same as before, note that The FieldDecl is the of array
+    // itself, not the element)
+    // ... repeat per element, opt-out for duplicates.
+    // leaveArray
+
+    if (!KF_FOR_EACH(handleArrayType, Field, ArrayTy))
+      return;
+
+    const ConstantArrayType *CAT =
+        SemaRef.getASTContext().getAsConstantArrayType(ArrayTy);
+    assert(CAT && "Should only be called on constant-size array.");
+    QualType ET = CAT->getElementType();
+    uint64_t ElemCount = CAT->getSize().getZExtValue();
+    assert(ElemCount > 0 && "SYCL prohibits 0 sized arrays");
+
+    (void)std::initializer_list<int>{
+        (Handlers.enterArray(Field, ArrayTy, ET), 0)...};
+
+    visitFirstArrayElement(Owner, Field, ET, Handlers...);
+    for (uint64_t Index = 1; Index < ElemCount; ++Index)
+      visitNthArrayElement(Owner, Field, ET, Index, Handlers...);
+
+    (void)std::initializer_list<int>{
+        (Handlers.leaveArray(Field, ArrayTy, ET), 0)...};
+  }
+
+  template <typename... HandlerTys>
+  void visitArray(const CXXRecordDecl *Owner, FieldDecl *Field,
+                  QualType ArrayTy, HandlerTys &... Handlers);
+
+  template <typename... HandlerTys>
+  void visitField(const CXXRecordDecl *Owner, FieldDecl *Field,
+                  QualType FieldTy, HandlerTys &... Handlers) {
+    if (Util::isSyclSpecialType(FieldTy))
+      KF_FOR_EACH(handleSyclSpecialType, Field, FieldTy);
+    else if (Util::isSyclHalfType(FieldTy))
+      KF_FOR_EACH(handleSyclHalfType, Field, FieldTy);
+    else if (FieldTy->isStructureOrClassType()) {
+      if (KF_FOR_EACH(handleStructType, Field, FieldTy)) {
+        CXXRecordDecl *RD = FieldTy->getAsCXXRecordDecl();
+        visitRecord(Owner, Field, RD, FieldTy, Handlers...);
+      }
+    } else if (FieldTy->isUnionType()) {
+      if (KF_FOR_EACH(handleUnionType, Field, FieldTy)) {
+        CXXRecordDecl *RD = FieldTy->getAsCXXRecordDecl();
+        VisitUnion(Owner, Field, RD, Handlers...);
+      }
+    } else if (FieldTy->isReferenceType())
+      KF_FOR_EACH(handleReferenceType, Field, FieldTy);
+    else if (FieldTy->isPointerType())
+      KF_FOR_EACH(handlePointerType, Field, FieldTy);
+    else if (FieldTy->isArrayType())
+      visitArray(Owner, Field, FieldTy, Handlers...);
+    else if (FieldTy->isScalarType() || FieldTy->isVectorType())
+      KF_FOR_EACH(handleScalarType, Field, FieldTy);
+    else
+      KF_FOR_EACH(handleOtherType, Field, FieldTy);
+  }
+
+public:
+  KernelObjVisitor(Sema &S) : SemaRef(S) {}
+
+  template <typename... HandlerTys>
+  void VisitRecordBases(const CXXRecordDecl *KernelFunctor,
+                        HandlerTys &... Handlers) {
+    VisitRecordHelper(KernelFunctor, KernelFunctor->bases(), Handlers...);
+  }
+
+  // A visitor function that dispatches to functions as defined in
+  // SyclKernelFieldHandler for the purposes of kernel generation.
+  template <typename... HandlerTys>
+  void VisitRecordFields(const CXXRecordDecl *Owner, HandlerTys &... Handlers) {
+    for (const auto Field : Owner->fields())
+      visitField(Owner, Field, Field->getType(), Handlers...);
+  }
+#undef KF_FOR_EACH
+};
+// A base type that the SYCL OpenCL Kernel construction task uses to implement
+// individual tasks.
+class SyclKernelFieldHandlerBase {
+public:
+  static constexpr const bool VisitUnionBody = false;
+  static constexpr const bool VisitNthArrayElement = true;
+  // Opt-in based on whether we should visit inside simple containers (structs,
+  // arrays). All of the 'check' types should likely be true, the int-header,
+  // and kernel decl creation types should not.
+  static constexpr const bool VisitInsideSimpleContainers = true;
+  // Mark these virtual so that we can use override in the implementer classes,
+  // despite virtual dispatch never being used.
+
+  // SYCL special class can be a base class or a field decl, so both must be
+  // handled.
+  virtual bool handleSyclSpecialType(const CXXRecordDecl *,
+                                     const CXXBaseSpecifier &, QualType) {
+    return true;
+  }
+  virtual bool handleSyclSpecialType(FieldDecl *, QualType) { return true; }
+
+
+  virtual bool handleSyclSpecConstantType(FieldDecl *, QualType) {
+    return true;
+  }
+
+  virtual bool handleSyclHalfType(const CXXRecordDecl *,
+                                  const CXXBaseSpecifier &, QualType) {
+    return true;
+  }
+  virtual bool handleSyclHalfType(FieldDecl *, QualType) { return true; }
+  virtual bool handleStructType(FieldDecl *, QualType) { return true; }
+  virtual bool handleUnionType(FieldDecl *, QualType) { return true; }
+  virtual bool handleReferenceType(FieldDecl *, QualType) { return true; }
+  virtual bool handlePointerType(FieldDecl *, QualType) { return true; }
+  virtual bool handleArrayType(FieldDecl *, QualType) { return true; }
+  virtual bool handleScalarType(FieldDecl *, QualType) { return true; }
+  // Most handlers shouldn't be handling this, just the field checker.
+  virtual bool handleOtherType(FieldDecl *, QualType) { return true; }
+
+  // Handle a simple struct that doesn't need to be decomposed, only called on
+  // handlers with VisitInsideSimpleContainers as false.  Replaces
+  // handleStructType, enterStruct, leaveStruct, and visiting of sub-elements.
+  virtual bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *,
+                                     QualType) {
+    return true;
+  }
+  virtual bool handleNonDecompStruct(const CXXRecordDecl *,
+                                     const CXXBaseSpecifier &, QualType) {
+    return true;
+  }
+
+  // Instead of handleArrayType, enterArray, leaveArray, and nextElement (plus
+  // descending down the elements), this function gets called in the event of an
+  // array containing simple elements (even in the case of an MD array).
+  virtual bool handleSimpleArrayType(FieldDecl *, QualType) { return true; }
+
+  // The following are only used for keeping track of where we are in the base
+  // class/field graph. Int Headers use this to calculate offset, most others
+  // don't have a need for these.
+
+  virtual bool enterStruct(const CXXRecordDecl *, FieldDecl *, QualType) {
+    return true;
+  }
+  virtual bool leaveStruct(const CXXRecordDecl *, FieldDecl *, QualType) {
+    return true;
+  }
+  virtual bool enterStream(const CXXRecordDecl *, FieldDecl *, QualType) {
+    return true;
+  }
+  virtual bool leaveStream(const CXXRecordDecl *, FieldDecl *, QualType) {
+    return true;
+  }
+  virtual bool enterStruct(const CXXRecordDecl *, const CXXBaseSpecifier &,
+                           QualType) {
+    return true;
+  }
+  virtual bool leaveStruct(const CXXRecordDecl *, const CXXBaseSpecifier &,
+                           QualType) {
+    return true;
+  }
+  virtual bool enterUnion(const CXXRecordDecl *, FieldDecl *) { return true; }
+  virtual bool leaveUnion(const CXXRecordDecl *, FieldDecl *) { return true; }
+
+  // The following are used for stepping through array elements.
+  virtual bool enterArray(FieldDecl *, QualType ArrayTy, QualType ElementTy) {
+    return true;
+  }
+  virtual bool leaveArray(FieldDecl *, QualType ArrayTy, QualType ElementTy) {
+    return true;
+  }
+
+  virtual bool nextElement(QualType, uint64_t) { return true; }
+
+  virtual ~SyclKernelFieldHandlerBase() = default;
+};
+
+// A class to act as the direct base for all the SYCL OpenCL Kernel construction
+// tasks that contains a reference to Sema (and potentially any other
+// universally required data).
+class SyclKernelFieldHandler : public SyclKernelFieldHandlerBase {
+protected:
+  Sema &SemaRef;
+  SyclKernelFieldHandler(Sema &S) : SemaRef(S) {}
+};
+
+// A class to represent the 'do nothing' case for filtering purposes.
+class SyclEmptyHandler final : public SyclKernelFieldHandlerBase {};
+SyclEmptyHandler GlobalEmptyHandler;
+
+template <bool Keep, typename H> struct HandlerFilter;
+template <typename H> struct HandlerFilter<true, H> {
+  H &Handler;
+  HandlerFilter(H &Handler) : Handler(Handler) {}
+};
+template <typename H> struct HandlerFilter<false, H> {
+  SyclEmptyHandler &Handler = GlobalEmptyHandler;
+  HandlerFilter(H &Handler) {}
+};
+
+template <bool B, bool... Rest> struct AnyTrue;
+
+template <bool B> struct AnyTrue<B> { static constexpr bool Value = B; };
+
+template <bool B, bool... Rest> struct AnyTrue {
+  static constexpr bool Value = B || AnyTrue<Rest...>::Value;
+};
+
+template <bool B, bool... Rest> struct AllTrue;
+
+template <bool B> struct AllTrue<B> { static constexpr bool Value = B; };
+
+template <bool B, bool... Rest> struct AllTrue {
+  static constexpr bool Value = B && AllTrue<Rest...>::Value;
+};
+
+template <typename ParentTy, typename... Handlers>
+void KernelObjVisitor::VisitUnion(const CXXRecordDecl *Owner, ParentTy &Parent,
+                                  const CXXRecordDecl *Wrapper,
+                                  Handlers &... handlers) {
+  // Don't continue descending if none of the handlers 'care'. This could be 'if
+  // constexpr' starting in C++17.  Until then, we have to count on the
+  // optimizer to realize "if (false)" is a dead branch.
+  if (AnyTrue<Handlers::VisitUnionBody...>::Value)
+    VisitUnionImpl(
+        Owner, Parent, Wrapper,
+        HandlerFilter<Handlers::VisitUnionBody, Handlers>(handlers).Handler...);
+}
+
+template <typename... Handlers>
+void KernelObjVisitor::visitNthArrayElement(const CXXRecordDecl *Owner,
+                                            FieldDecl *ArrayField,
+                                            QualType ElementTy, uint64_t Index,
+                                            Handlers &... handlers) {
+  // Don't continue descending if none of the handlers 'care'. This could be 'if
+  // constexpr' starting in C++17.  Until then, we have to count on the
+  // optimizer to realize "if (false)" is a dead branch.
+  if (AnyTrue<Handlers::VisitNthArrayElement...>::Value)
+    visitArrayElementImpl(
+        Owner, ArrayField, ElementTy, Index,
+        HandlerFilter<Handlers::VisitNthArrayElement, Handlers>(handlers)
+            .Handler...);
+}
+
+template <typename ParentTy, typename... HandlerTys>
+void KernelObjVisitor::visitRecord(const CXXRecordDecl *Owner, ParentTy &Parent,
+                                   const CXXRecordDecl *Wrapper,
+                                   QualType RecordTy,
+                                   HandlerTys &... Handlers) {
+  RecordDecl *RD = RecordTy->getAsRecordDecl();
+  assert(RD && "should not be null.");
+  // "Simple" Containers are those that do NOT need to be decomposed,
+  // "Complex" containers are those that DO. In the case where the container
+  // does NOT need to be decomposed, we can call VisitSimpleRecord on the
+  // handlers that have opted-out of VisitInsideSimpleContainers. The 'if'
+  // makes sure we only do that if at least 1 has opted out.
+  if (!AllTrue<HandlerTys::VisitInsideSimpleContainers...>::Value)
+    visitSimpleRecord(
+        Owner, Parent, Wrapper, RecordTy,
+        HandlerFilter<!HandlerTys::VisitInsideSimpleContainers, HandlerTys>(
+            Handlers)
+            .Handler...);
+
+  // Even though this is a 'simple' container, some handlers (via
+  // VisitInsideSimpleContainers = true) need to treat it as if it needs
+  // decomposing, so we call VisitComplexRecord iif at least one has.
+  if (AnyTrue<HandlerTys::VisitInsideSimpleContainers...>::Value)
+    visitComplexRecord(
+        Owner, Parent, Wrapper, RecordTy,
+        HandlerFilter<HandlerTys::VisitInsideSimpleContainers, HandlerTys>(
+            Handlers)
+            .Handler...);
+}
+
+template <typename... HandlerTys>
+void KernelObjVisitor::visitArray(const CXXRecordDecl *Owner, FieldDecl *Field,
+                                  QualType ArrayTy, HandlerTys &... Handlers) {
+  if (!AllTrue<HandlerTys::VisitInsideSimpleContainers...>::Value)
+    visitSimpleArray(
+        Owner, Field, ArrayTy,
+        HandlerFilter<!HandlerTys::VisitInsideSimpleContainers, HandlerTys>(
+            Handlers)
+            .Handler...);
+
+  if (AnyTrue<HandlerTys::VisitInsideSimpleContainers...>::Value)
+    visitComplexArray(
+        Owner, Field, ArrayTy,
+        HandlerFilter<HandlerTys::VisitInsideSimpleContainers, HandlerTys>(
+            Handlers)
+            .Handler...);
+}
+
+// A type to Create and own the FunctionDecl for the kernel.
+class SyclKernelDeclCreator : public SyclKernelFieldHandler {
+  FunctionDecl *KernelDecl;
+  llvm::SmallVector<ParmVarDecl *, 8> Params;
+  Sema::ContextRAII FuncContext;
+  // Holds the last handled field's first parameter. This doesn't store an
+  // iterator as push_back invalidates iterators.
+  size_t LastParamIndex = 0;
+  // Keeps track of whether we are currently handling fields inside a struct.
+  int StructDepth = 0;
+
+  void addParam(const FieldDecl *FD, QualType FieldTy) {
+    ParamDesc newParamDesc = makeParamDesc(FD, FieldTy);
+    addParam(newParamDesc, FieldTy);
+  }
+
+  void addParam(const CXXBaseSpecifier &BS, QualType FieldTy) {
+    ParamDesc newParamDesc =
+        makeParamDesc(SemaRef.getASTContext(), BS, FieldTy);
+    addParam(newParamDesc, FieldTy);
+  }
+
+  void addParam(ParamDesc newParamDesc, QualType FieldTy) {
+    // Create a new ParmVarDecl based on the new info.
+    ASTContext &Ctx = SemaRef.getASTContext();
+    auto *NewParam = ParmVarDecl::Create(
+        Ctx, KernelDecl, SourceLocation(), SourceLocation(),
+        std::get<1>(newParamDesc), std::get<0>(newParamDesc),
+        std::get<2>(newParamDesc), SC_None, /*DefArg*/ nullptr);
+    NewParam->setScopeInfo(0, Params.size());
+    NewParam->setIsUsed();
+
+    LastParamIndex = Params.size();
+    Params.push_back(NewParam);
+  }
+
+  // All special SYCL objects must have __init method. We extract types for
+  // kernel parameters from __init method parameters. We will use __init method
+  // and kernel parameters which we build here to initialize special objects in
+  // the kernel body.
+  bool handleSpecialType(FieldDecl *FD, QualType FieldTy,
+                         bool isAccessorType = false) {
+    const auto *RecordDecl = FieldTy->getAsCXXRecordDecl();
+    assert(RecordDecl && "The type must be a RecordDecl");
+    CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, InitMethodName);
+    assert(InitMethod && "The type must have the __init method");
+
+    // Don't do -1 here because we count on this to be the first parameter added
+    // (if any).
+    size_t ParamIndex = Params.size();
+    for (const ParmVarDecl *Param : InitMethod->parameters()) {
+      QualType ParamTy = Param->getType();
+      addParam(FD, ParamTy.getCanonicalType());
+    }
+    LastParamIndex = ParamIndex;
+    return true;
+  }
+
+  static void setKernelImplicitAttrs(ASTContext &Context, FunctionDecl *FD,
+                                     StringRef Name) {
+    // Set implicit attributes.
+    FD->addAttr(OpenCLKernelAttr::CreateImplicit(Context));
+    FD->addAttr(AsmLabelAttr::CreateImplicit(Context, Name));
+    FD->addAttr(ArtificialAttr::CreateImplicit(Context));
+  }
+
+  static FunctionDecl *createKernelDecl(ASTContext &Ctx, StringRef Name,
+                                        SourceLocation Loc, bool IsInline) {
+    // Create this with no prototype, and we can fix this up after we've seen
+    // all the params.
+    FunctionProtoType::ExtProtoInfo Info(CC_OpenCLKernel);
+    QualType FuncType = Ctx.getFunctionType(Ctx.VoidTy, {}, Info);
+
+    FunctionDecl *FD = FunctionDecl::Create(
+        Ctx, Ctx.getTranslationUnitDecl(), Loc, Loc, &Ctx.Idents.get(Name),
+        FuncType, Ctx.getTrivialTypeSourceInfo(Ctx.VoidTy), SC_None);
+    FD->setImplicitlyInline(IsInline);
+    setKernelImplicitAttrs(Ctx, FD, Name);
+
+    // Add kernel to translation unit to see it in AST-dump.
+    Ctx.getTranslationUnitDecl()->addDecl(FD);
+    return FD;
+  }
+
+public:
+  static constexpr const bool VisitInsideSimpleContainers = false;
+  SyclKernelDeclCreator(Sema &S, StringRef Name, SourceLocation Loc,
+                        bool IsInline)
+      : SyclKernelFieldHandler(S),
+        KernelDecl(createKernelDecl(S.getASTContext(), Name, Loc, IsInline)),
+        FuncContext(SemaRef, KernelDecl) {}
+
+  ~SyclKernelDeclCreator() {
+    ASTContext &Ctx = SemaRef.getASTContext();
+    FunctionProtoType::ExtProtoInfo Info(CC_OpenCLKernel);
+
+    SmallVector<QualType, 8> ArgTys;
+    std::transform(std::begin(Params), std::end(Params),
+                   std::back_inserter(ArgTys),
+                   [](const ParmVarDecl *PVD) { return PVD->getType(); });
+
+    QualType FuncType = Ctx.getFunctionType(Ctx.VoidTy, ArgTys, Info);
+    KernelDecl->setType(FuncType);
+    KernelDecl->setParams(Params);
+
+    SemaRef.addSYCLKernel(KernelDecl);
+  }
+
+  bool enterStream(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final {
+    return enterStruct(RD, FD, Ty);
+  }
+
+  bool leaveStream(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final {
+    return leaveStruct(RD, FD, Ty);
+  }
+
+  bool enterStruct(const CXXRecordDecl *, FieldDecl *, QualType) final {
+    ++StructDepth;
+    return true;
+  }
+
+  bool leaveStruct(const CXXRecordDecl *, FieldDecl *, QualType) final {
+    --StructDepth;
+    return true;
+  }
+
+  bool enterStruct(const CXXRecordDecl *, const CXXBaseSpecifier &BS,
+                   QualType FieldTy) final {
+    ++StructDepth;
+    return true;
+  }
+
+  bool leaveStruct(const CXXRecordDecl *, const CXXBaseSpecifier &BS,
+                   QualType FieldTy) final {
+    --StructDepth;
+    return true;
+  }
+
+  bool handleSyclSpecialType(const CXXRecordDecl *, const CXXBaseSpecifier &BS,
+                             QualType FieldTy) final {
+    const auto *RecordDecl = FieldTy->getAsCXXRecordDecl();
+    assert(RecordDecl && "The type must be a RecordDecl");
+    CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, InitMethodName);
+    assert(InitMethod && "The type must have the __init method");
+
+    // Don't do -1 here because we count on this to be the first parameter added
+    // (if any).
+    size_t ParamIndex = Params.size();
+    for (const ParmVarDecl *Param : InitMethod->parameters()) {
+      QualType ParamTy = Param->getType();
+      addParam(BS, ParamTy.getCanonicalType());
+    }
+    LastParamIndex = ParamIndex;
+    return true;
+  }
+
+  bool handleSyclSpecialType(FieldDecl *FD, QualType FieldTy) final {
+    return handleSpecialType(FD, FieldTy);
+  }
+
+  RecordDecl *wrapField(FieldDecl *Field, QualType FieldTy) {
+    RecordDecl *WrapperClass =
+        SemaRef.getASTContext().buildImplicitRecord("__wrapper_class");
+    WrapperClass->startDefinition();
+    Field = FieldDecl::Create(
+        SemaRef.getASTContext(), WrapperClass, SourceLocation(),
+        SourceLocation(), /*Id=*/nullptr, FieldTy,
+        SemaRef.getASTContext().getTrivialTypeSourceInfo(FieldTy,
+                                                         SourceLocation()),
+        /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
+    Field->setAccess(AS_public);
+    WrapperClass->addDecl(Field);
+    WrapperClass->completeDefinition();
+    return WrapperClass;
+  };
+
+  bool handlePointerType(FieldDecl *FD, QualType FieldTy) final {
+    // USM allows to use raw pointers instead of buffers/accessors, but these
+    // pointers point to the specially allocated memory. For pointer fields we
+    // add a kernel argument with the same type as field but global address
+    // space, because OpenCL requires it.
+    QualType PointeeTy = FieldTy->getPointeeType();
+    Qualifiers Quals = PointeeTy.getQualifiers();
+    auto AS = Quals.getAddressSpace();
+    // Leave global_device and global_host address spaces as is to help FPGA
+    // device in memory allocations
+    if (AS != LangAS::opencl_global_device && AS != LangAS::opencl_global_host)
+      Quals.setAddressSpace(LangAS::opencl_global);
+    PointeeTy = SemaRef.getASTContext().getQualifiedType(
+        PointeeTy.getUnqualifiedType(), Quals);
+    QualType ModTy = SemaRef.getASTContext().getPointerType(PointeeTy);
+    // When the kernel is generated, struct type kernel arguments are
+    // decomposed; i.e. the parameters of the kernel are the fields of the
+    // struct, and not the struct itself. This causes an error in the backend
+    // when the struct field is a pointer, since non-USM pointers cannot be
+    // passed directly. To work around this issue, all pointers inside the
+    // struct are wrapped in a generated '__wrapper_class'.
+    if (StructDepth) {
+      RecordDecl *WrappedPointer = wrapField(FD, ModTy);
+      ModTy = SemaRef.getASTContext().getRecordType(WrappedPointer);
+    }
+
+    addParam(FD, ModTy);
+    return true;
+  }
+
+  bool handleSimpleArrayType(FieldDecl *FD, QualType FieldTy) final {
+    // Arrays are always wrapped in a struct since they cannot be passed
+    // directly.
+    RecordDecl *WrappedArray = wrapField(FD, FieldTy);
+    QualType ModTy = SemaRef.getASTContext().getRecordType(WrappedArray);
+    addParam(FD, ModTy);
+    return true;
+  }
+
+  bool handleScalarType(FieldDecl *FD, QualType FieldTy) final {
+    addParam(FD, FieldTy);
+    return true;
+  }
+
+  bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD,
+                             QualType Ty) final {
+    addParam(FD, Ty);
+    return true;
+  }
+
+  bool handleNonDecompStruct(const CXXRecordDecl *Base,
+                             const CXXBaseSpecifier &BS, QualType Ty) final {
+    addParam(BS, Ty);
+    return true;
+  }
+
+  bool handleUnionType(FieldDecl *FD, QualType FieldTy) final {
+    return handleScalarType(FD, FieldTy);
+  }
+
+  bool handleSyclHalfType(FieldDecl *FD, QualType FieldTy) final {
+    addParam(FD, FieldTy);
+    return true;
+  }
+
+  void setBody(CompoundStmt *KB) { KernelDecl->setBody(KB); }
+
+  FunctionDecl *getKernelDecl() { return KernelDecl; }
+
+  llvm::ArrayRef<ParmVarDecl *> getParamVarDeclsForCurrentField() {
+    return ArrayRef<ParmVarDecl *>(std::begin(Params) + LastParamIndex,
+                                   std::end(Params));
+  }
+  using SyclKernelFieldHandler::handleSyclHalfType;
+};
+
+class SyclKernelBodyCreator : public SyclKernelFieldHandler {
+  SyclKernelDeclCreator &DeclCreator;
+  llvm::SmallVector<Stmt *, 16> BodyStmts;
+  llvm::SmallVector<InitListExpr *, 16> CollectionInitExprs;
+  llvm::SmallVector<Stmt *, 16> FinalizeStmts;
+  // This collection contains the information required to add/remove information
+  // about arrays as we enter them.  The InitializedEntity component is
+  // necessary for initializing child members.  uin64_t is the index of the
+  // current element being worked on, which is updated every time we visit
+  // nextElement.
+  llvm::SmallVector<std::pair<InitializedEntity, uint64_t>, 8> ArrayInfos;
+  VarDecl *KernelObjClone;
+  InitializedEntity VarEntity;
+  const CXXRecordDecl *KernelObj;
+  llvm::SmallVector<Expr *, 16> MemberExprBases;
+  FunctionDecl *KernelCallerFunc;
+  SourceLocation KernelCallerSrcLoc; // KernelCallerFunc source location.
+  // Contains a count of how many containers we're in.  This is used by the
+  // pointer-struct-wrapping code to ensure that we don't try to wrap
+  // non-top-level pointers.
+  uint64_t StructDepth = 0;
+
+  // Using the statements/init expressions that we've created, this generates
+  // the kernel body compound stmt. CompoundStmt needs to know its number of
+  // statements in advance to allocate it, so we cannot do this as we go along.
+  CompoundStmt *createKernelBody() {
+    assert(CollectionInitExprs.size() == 1 &&
+           "Should have been popped down to just the first one");
+    KernelObjClone->setInit(CollectionInitExprs.back());
+    Stmt *FunctionBody = KernelCallerFunc->getBody();
+
+    ParmVarDecl *KernelObjParam = *(KernelCallerFunc->param_begin());
+
+    // DeclRefExpr with valid source location but with decl which is not marked
+    // as used is invalid.
+    KernelObjClone->setIsUsed();
+    std::pair<DeclaratorDecl *, DeclaratorDecl *> MappingPair =
+        std::make_pair(KernelObjParam, KernelObjClone);
+
+    // Push the Kernel function scope to ensure the scope isn't empty
+    SemaRef.PushFunctionScope();
+    KernelBodyTransform KBT(MappingPair, SemaRef);
+    Stmt *NewBody = KBT.TransformStmt(FunctionBody).get();
+    BodyStmts.push_back(NewBody);
+
+    BodyStmts.insert(BodyStmts.end(), FinalizeStmts.begin(),
+                     FinalizeStmts.end());
+    return CompoundStmt::Create(SemaRef.getASTContext(), BodyStmts, {}, {});
+  }
+
+  // Creates a DeclRefExpr to the ParmVar that represents the current field.
+  Expr *createParamReferenceExpr() {
+    ParmVarDecl *KernelParameter =
+        DeclCreator.getParamVarDeclsForCurrentField()[0];
+
+    QualType ParamType = KernelParameter->getOriginalType();
+    Expr *DRE = SemaRef.BuildDeclRefExpr(KernelParameter, ParamType, VK_LValue,
+                                         KernelCallerSrcLoc);
+    return DRE;
+  }
+
+  // Creates a DeclRefExpr to the ParmVar that represents the current pointer
+  // field.
+  Expr *createPointerParamReferenceExpr(QualType PointerTy, bool Wrapped) {
+    ParmVarDecl *KernelParameter =
+        DeclCreator.getParamVarDeclsForCurrentField()[0];
+
+    QualType ParamType = KernelParameter->getOriginalType();
+    Expr *DRE = SemaRef.BuildDeclRefExpr(KernelParameter, ParamType, VK_LValue,
+                                         KernelCallerSrcLoc);
+
+    // Struct Type kernel arguments are decomposed. The pointer fields are
+    // then wrapped inside a compiler generated struct. Therefore when
+    // generating the initializers, we have to 'unwrap' the pointer.
+    if (Wrapped) {
+      CXXRecordDecl *WrapperStruct = ParamType->getAsCXXRecordDecl();
+      // Pointer field wrapped inside __wrapper_class
+      FieldDecl *Pointer = *(WrapperStruct->field_begin());
+      DRE = buildMemberExpr(DRE, Pointer);
+      ParamType = Pointer->getType();
+    }
+
+    DRE = ImplicitCastExpr::Create(SemaRef.Context, ParamType,
+                                   CK_LValueToRValue, DRE, /*BasePath=*/nullptr,
+                                   VK_PRValue, FPOptionsOverride());
+
+    if (PointerTy->getPointeeType().getAddressSpace() !=
+        ParamType->getPointeeType().getAddressSpace())
+      DRE = ImplicitCastExpr::Create(SemaRef.Context, PointerTy,
+                                     CK_AddressSpaceConversion, DRE, nullptr,
+                                     VK_PRValue, FPOptionsOverride());
+
+    return DRE;
+  }
+
+  Expr *createSimpleArrayParamReferenceExpr(QualType ArrayTy) {
+    ParmVarDecl *KernelParameter =
+        DeclCreator.getParamVarDeclsForCurrentField()[0];
+    QualType ParamType = KernelParameter->getOriginalType();
+    Expr *DRE = SemaRef.BuildDeclRefExpr(KernelParameter, ParamType, VK_LValue,
+                                         KernelCallerSrcLoc);
+
+    // Unwrap the array.
+    CXXRecordDecl *WrapperStruct = ParamType->getAsCXXRecordDecl();
+    FieldDecl *ArrayField = *(WrapperStruct->field_begin());
+    return buildMemberExpr(DRE, ArrayField);
+  }
+
+  // Returns 'true' if the thing we're visiting (Based on the FD/QualType pair)
+  // is an element of an array.  This will determine whether we do
+  // MemberExprBases in some cases or not, AND determines how we initialize
+  // values.
+  bool isArrayElement(const FieldDecl *FD, QualType Ty) const {
+    return !SemaRef.getASTContext().hasSameType(FD->getType(), Ty);
+  }
+
+  // Creates an initialized entity for a field/item. In the case where this is a
+  // field, returns a normal member initializer, if we're in a sub-array of a MD
+  // array, returns an element initializer.
+  InitializedEntity getFieldEntity(FieldDecl *FD, QualType Ty) {
+    if (isArrayElement(FD, Ty))
+      return InitializedEntity::InitializeElement(SemaRef.getASTContext(),
+                                                  ArrayInfos.back().second,
+                                                  ArrayInfos.back().first);
+    return InitializedEntity::InitializeMember(FD, &VarEntity);
+  }
+
+  void addFieldInit(FieldDecl *FD, QualType Ty, MultiExprArg ParamRef) {
+    InitializationKind InitKind =
+        InitializationKind::CreateCopy(KernelCallerSrcLoc, KernelCallerSrcLoc);
+    addFieldInit(FD, Ty, ParamRef, InitKind);
+  }
+
+  void addFieldInit(FieldDecl *FD, QualType Ty, MultiExprArg ParamRef,
+                    InitializationKind InitKind) {
+    addFieldInit(FD, Ty, ParamRef, InitKind, getFieldEntity(FD, Ty));
+  }
+
+  void addFieldInit(FieldDecl *FD, QualType Ty, MultiExprArg ParamRef,
+                    InitializationKind InitKind, InitializedEntity Entity) {
+    InitializationSequence InitSeq(SemaRef, Entity, InitKind, ParamRef);
+    ExprResult Init = InitSeq.Perform(SemaRef, Entity, InitKind, ParamRef);
+
+    InitListExpr *ParentILE = CollectionInitExprs.back();
+    ParentILE->updateInit(SemaRef.getASTContext(), ParentILE->getNumInits(),
+                          Init.get());
+  }
+
+  void addBaseInit(const CXXBaseSpecifier &BS, QualType Ty,
+                   InitializationKind InitKind) {
+    InitializedEntity Entity = InitializedEntity::InitializeBase(
+        SemaRef.Context, &BS, /*IsInheritedVirtualBase*/ false, &VarEntity);
+    InitializationSequence InitSeq(SemaRef, Entity, InitKind, None);
+    ExprResult Init = InitSeq.Perform(SemaRef, Entity, InitKind, None);
+
+    InitListExpr *ParentILE = CollectionInitExprs.back();
+    ParentILE->updateInit(SemaRef.getASTContext(), ParentILE->getNumInits(),
+                          Init.get());
+  }
+
+  void addSimpleBaseInit(const CXXBaseSpecifier &BS, QualType Ty) {
+    InitializationKind InitKind =
+        InitializationKind::CreateCopy(KernelCallerSrcLoc, KernelCallerSrcLoc);
+
+    InitializedEntity Entity = InitializedEntity::InitializeBase(
+        SemaRef.Context, &BS, /*IsInheritedVirtualBase*/ false, &VarEntity);
+
+    Expr *ParamRef = createParamReferenceExpr();
+    InitializationSequence InitSeq(SemaRef, Entity, InitKind, ParamRef);
+    ExprResult Init = InitSeq.Perform(SemaRef, Entity, InitKind, ParamRef);
+
+    InitListExpr *ParentILE = CollectionInitExprs.back();
+    ParentILE->updateInit(SemaRef.getASTContext(), ParentILE->getNumInits(),
+                          Init.get());
+  }
+
+  // Adds an initializer that handles a simple initialization of a field.
+  void addSimpleFieldInit(FieldDecl *FD, QualType Ty) {
+    Expr *ParamRef = createParamReferenceExpr();
+    addFieldInit(FD, Ty, ParamRef);
+  }
+
+  MemberExpr *buildMemberExpr(Expr *Base, ValueDecl *Member) {
+    DeclAccessPair MemberDAP = DeclAccessPair::make(Member, AS_none);
+    MemberExpr *Result = SemaRef.BuildMemberExpr(
+        Base, /*IsArrow */ false, KernelCallerSrcLoc, NestedNameSpecifierLoc(),
+        KernelCallerSrcLoc, Member, MemberDAP,
+        /*HadMultipleCandidates*/ false,
+        DeclarationNameInfo(Member->getDeclName(), KernelCallerSrcLoc),
+        Member->getType(), VK_LValue, OK_Ordinary);
+    return Result;
+  }
+
+  void addFieldMemberExpr(FieldDecl *FD, QualType Ty) {
+    if (!isArrayElement(FD, Ty))
+      MemberExprBases.push_back(buildMemberExpr(MemberExprBases.back(), FD));
+  }
+
+  void removeFieldMemberExpr(const FieldDecl *FD, QualType Ty) {
+    if (!isArrayElement(FD, Ty))
+      MemberExprBases.pop_back();
+  }
+
+  void createSpecialMethodCall(const CXXRecordDecl *RD, StringRef MethodName,
+                               SmallVectorImpl<Stmt *> &AddTo) {
+    CXXMethodDecl *Method = getMethodByName(RD, MethodName);
+    if (!Method)
+      return;
+
+    unsigned NumParams = Method->getNumParams();
+    llvm::SmallVector<Expr *, 4> ParamDREs(NumParams);
+    llvm::ArrayRef<ParmVarDecl *> KernelParameters =
+        DeclCreator.getParamVarDeclsForCurrentField();
+    for (size_t I = 0; I < NumParams; ++I) {
+      QualType ParamType = KernelParameters[I]->getOriginalType();
+      ParamDREs[I] = SemaRef.BuildDeclRefExpr(KernelParameters[I], ParamType,
+                                              VK_LValue, KernelCallerSrcLoc);
+    }
+
+    MemberExpr *MethodME = buildMemberExpr(MemberExprBases.back(), Method);
+
+    QualType ResultTy = Method->getReturnType();
+    ExprValueKind VK = Expr::getValueKindForType(ResultTy);
+    ResultTy = ResultTy.getNonLValueExprType(SemaRef.Context);
+    llvm::SmallVector<Expr *, 4> ParamStmts;
+    const auto *Proto = cast<FunctionProtoType>(Method->getType());
+    SemaRef.GatherArgumentsForCall(KernelCallerSrcLoc, Method, Proto, 0,
+                                   ParamDREs, ParamStmts);
+    // [kernel_obj or wrapper object].accessor.__init(_ValueType*,
+    // range<int>, range<int>, id<int>)
+    AddTo.push_back(CXXMemberCallExpr::Create(
+        SemaRef.Context, MethodME, ParamStmts, ResultTy, VK, KernelCallerSrcLoc,
+        FPOptionsOverride()));
+  }
+
+  // Creates an empty InitListExpr of the correct number of child-inits
+  // of this to append into.
+  void addCollectionInitListExpr(const CXXRecordDecl *RD) {
+    const ASTRecordLayout &Info =
+        SemaRef.getASTContext().getASTRecordLayout(RD);
+    uint64_t NumInitExprs = Info.getFieldCount() + RD->getNumBases();
+    addCollectionInitListExpr(QualType(RD->getTypeForDecl(), 0), NumInitExprs);
+  }
+
+  InitListExpr *createInitListExpr(const CXXRecordDecl *RD) {
+    const ASTRecordLayout &Info =
+        SemaRef.getASTContext().getASTRecordLayout(RD);
+    uint64_t NumInitExprs = Info.getFieldCount() + RD->getNumBases();
+    return createInitListExpr(QualType(RD->getTypeForDecl(), 0), NumInitExprs);
+  }
+
+  InitListExpr *createInitListExpr(QualType InitTy, uint64_t NumChildInits) {
+    InitListExpr *ILE = new (SemaRef.getASTContext()) InitListExpr(
+        SemaRef.getASTContext(), KernelCallerSrcLoc, {}, KernelCallerSrcLoc);
+    ILE->reserveInits(SemaRef.getASTContext(), NumChildInits);
+    ILE->setType(InitTy);
+
+    return ILE;
+  }
+
+  // Create an empty InitListExpr of the type/size for the rest of the visitor
+  // to append into.
+  void addCollectionInitListExpr(QualType InitTy, uint64_t NumChildInits) {
+
+    InitListExpr *ILE = createInitListExpr(InitTy, NumChildInits);
+    InitListExpr *ParentILE = CollectionInitExprs.back();
+    ParentILE->updateInit(SemaRef.getASTContext(), ParentILE->getNumInits(),
+                          ILE);
+
+    CollectionInitExprs.push_back(ILE);
+  }
+
+  // FIXME Avoid creation of kernel obj clone.
+  // See https://github.com/intel/llvm/issues/1544 for details.
+  static VarDecl *createKernelObjClone(ASTContext &Ctx, DeclContext *DC,
+                                       const CXXRecordDecl *KernelObj) {
+    TypeSourceInfo *TSInfo =
+        KernelObj->isLambda() ? KernelObj->getLambdaTypeInfo() : nullptr;
+    VarDecl *VD = VarDecl::Create(
+        Ctx, DC, KernelObj->getLocation(), KernelObj->getLocation(),
+        KernelObj->getIdentifier(), QualType(KernelObj->getTypeForDecl(), 0),
+        TSInfo, SC_None);
+
+    return VD;
+  }
+
+  const std::string &getInitMethodName() const { return InitMethodName; }
+
+  // Default inits the type, then calls the init-method in the body.
+  bool handleSpecialType(FieldDecl *FD, QualType Ty) {
+    addFieldInit(FD, Ty, None,
+                 InitializationKind::CreateDefault(KernelCallerSrcLoc));
+
+    addFieldMemberExpr(FD, Ty);
+
+    const auto *RecordDecl = Ty->getAsCXXRecordDecl();
+    createSpecialMethodCall(RecordDecl, getInitMethodName(), BodyStmts);
+    // A finalize-method is expected for special type such as stream.
+    CXXMethodDecl *FinalizeMethod =
+        getMethodByName(RecordDecl, FinalizeMethodName);
+    if (FinalizeMethod)
+      createSpecialMethodCall(RecordDecl, FinalizeMethodName, FinalizeStmts);
+
+    removeFieldMemberExpr(FD, Ty);
+
+    return true;
+  }
+
+  bool handleSpecialType(const CXXBaseSpecifier &BS, QualType Ty) {
+    const auto *RecordDecl = Ty->getAsCXXRecordDecl();
+    addBaseInit(BS, Ty, InitializationKind::CreateDefault(KernelCallerSrcLoc));
+    createSpecialMethodCall(RecordDecl, getInitMethodName(), BodyStmts);
+    return true;
+  }
+
+public:
+  static constexpr const bool VisitInsideSimpleContainers = false;
+  SyclKernelBodyCreator(Sema &S, SyclKernelDeclCreator &DC,
+                        const CXXRecordDecl *KernelObj,
+                        FunctionDecl *KernelCallerFunc)
+      : SyclKernelFieldHandler(S), DeclCreator(DC),
+        KernelObjClone(createKernelObjClone(S.getASTContext(),
+                                            DC.getKernelDecl(), KernelObj)),
+        VarEntity(InitializedEntity::InitializeVariable(KernelObjClone)),
+        KernelObj(KernelObj), KernelCallerFunc(KernelCallerFunc),
+        KernelCallerSrcLoc(KernelCallerFunc->getLocation()) {
+    CollectionInitExprs.push_back(createInitListExpr(KernelObj));
+
+    Stmt *DS = new (S.Context) DeclStmt(DeclGroupRef(KernelObjClone),
+                                        KernelCallerSrcLoc, KernelCallerSrcLoc);
+    BodyStmts.push_back(DS);
+    DeclRefExpr *KernelObjCloneRef = DeclRefExpr::Create(
+        S.Context, NestedNameSpecifierLoc(), KernelCallerSrcLoc, KernelObjClone,
+        false, DeclarationNameInfo(), QualType(KernelObj->getTypeForDecl(), 0),
+        VK_LValue);
+    MemberExprBases.push_back(KernelObjCloneRef);
+  }
+
+  ~SyclKernelBodyCreator() {
+    CompoundStmt *KernelBody = createKernelBody();
+    DeclCreator.setBody(KernelBody);
+  }
+
+  bool handleSyclSpecialType(FieldDecl *FD, QualType Ty) final {
+    return handleSpecialType(FD, Ty);
+  }
+
+  bool handleSyclSpecialType(const CXXRecordDecl *, const CXXBaseSpecifier &BS,
+                             QualType Ty) final {
+    return handleSpecialType(BS, Ty);
+  }
+
+  bool handleSyclSpecConstantType(FieldDecl *FD, QualType Ty) final {
+    return handleSpecialType(FD, Ty);
+  }
+
+  bool handleSyclHalfType(FieldDecl *FD, QualType Ty) final {
+    addSimpleFieldInit(FD, Ty);
+    return true;
+  }
+
+  bool handlePointerType(FieldDecl *FD, QualType FieldTy) final {
+    Expr *PointerRef =
+        createPointerParamReferenceExpr(FieldTy, StructDepth != 0);
+    addFieldInit(FD, FieldTy, PointerRef);
+    return true;
+  }
+
+  bool handleSimpleArrayType(FieldDecl *FD, QualType FieldTy) final {
+    Expr *ArrayRef = createSimpleArrayParamReferenceExpr(FieldTy);
+    InitializationKind InitKind = InitializationKind::CreateDirect({}, {}, {});
+
+    InitializedEntity Entity =
+        InitializedEntity::InitializeMember(FD, &VarEntity, /*Implicit*/ true);
+
+    addFieldInit(FD, FieldTy, ArrayRef, InitKind, Entity);
+    return true;
+  }
+
+  bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD,
+                             QualType Ty) final {
+    addSimpleFieldInit(FD, Ty);
+    return true;
+  }
+
+  bool handleNonDecompStruct(const CXXRecordDecl *Base,
+                             const CXXBaseSpecifier &BS, QualType Ty) final {
+    addSimpleBaseInit(BS, Ty);
+    return true;
+  }
+
+  bool handleScalarType(FieldDecl *FD, QualType FieldTy) final {
+    addSimpleFieldInit(FD, FieldTy);
+    return true;
+  }
+
+  bool handleUnionType(FieldDecl *FD, QualType FieldTy) final {
+    addSimpleFieldInit(FD, FieldTy);
+    return true;
+  }
+
+  bool enterStream(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final {
+    ++StructDepth;
+    // Add a dummy init expression to catch the accessor initializers.
+    const auto *StreamDecl = Ty->getAsCXXRecordDecl();
+    CollectionInitExprs.push_back(createInitListExpr(StreamDecl));
+
+    addFieldMemberExpr(FD, Ty);
+    return true;
+  }
+
+  bool leaveStream(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final {
+    --StructDepth;
+    // Stream requires that its 'init' calls happen after its accessors init
+    // calls, so add them here instead.
+    const auto *StreamDecl = Ty->getAsCXXRecordDecl();
+
+    createSpecialMethodCall(StreamDecl, getInitMethodName(), BodyStmts);
+    createSpecialMethodCall(StreamDecl, FinalizeMethodName, FinalizeStmts);
+
+    removeFieldMemberExpr(FD, Ty);
+
+    CollectionInitExprs.pop_back();
+    return true;
+  }
+
+  bool enterStruct(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final {
+    ++StructDepth;
+    addCollectionInitListExpr(Ty->getAsCXXRecordDecl());
+
+    addFieldMemberExpr(FD, Ty);
+    return true;
+  }
+
+  bool leaveStruct(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final {
+    --StructDepth;
+    CollectionInitExprs.pop_back();
+
+    removeFieldMemberExpr(FD, Ty);
+    return true;
+  }
+
+  bool enterStruct(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS,
+                   QualType) final {
+    ++StructDepth;
+
+    CXXCastPath BasePath;
+    QualType DerivedTy(RD->getTypeForDecl(), 0);
+    QualType BaseTy = BS.getType();
+    SemaRef.CheckDerivedToBaseConversion(DerivedTy, BaseTy, KernelCallerSrcLoc,
+                                         SourceRange(), &BasePath,
+                                         /*IgnoreBaseAccess*/ true);
+    auto *Cast = ImplicitCastExpr::Create(
+        SemaRef.Context, BaseTy, CK_DerivedToBase, MemberExprBases.back(),
+        /* CXXCastPath=*/&BasePath, VK_LValue, FPOptionsOverride());
+    MemberExprBases.push_back(Cast);
+
+    addCollectionInitListExpr(BaseTy->getAsCXXRecordDecl());
+    return true;
+  }
+
+  bool leaveStruct(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS,
+                   QualType) final {
+    --StructDepth;
+    MemberExprBases.pop_back();
+    CollectionInitExprs.pop_back();
+    return true;
+  }
+
+  bool enterArray(FieldDecl *FD, QualType ArrayType,
+                  QualType ElementType) final {
+    uint64_t ArraySize = SemaRef.getASTContext()
+                             .getAsConstantArrayType(ArrayType)
+                             ->getSize()
+                             .getZExtValue();
+    addCollectionInitListExpr(ArrayType, ArraySize);
+    ArrayInfos.emplace_back(getFieldEntity(FD, ArrayType), 0);
+
+    // If this is the top-level array, we need to make a MemberExpr in addition
+    // to an array subscript.
+    addFieldMemberExpr(FD, ArrayType);
+    return true;
+  }
+
+  bool nextElement(QualType, uint64_t Index) final {
+    ArrayInfos.back().second = Index;
+
+    // Pop off the last member expr base.
+    if (Index != 0)
+      MemberExprBases.pop_back();
+
+    QualType SizeT = SemaRef.getASTContext().getSizeType();
+
+    llvm::APInt IndexVal{
+        static_cast<unsigned>(SemaRef.getASTContext().getTypeSize(SizeT)),
+        Index, SizeT->isSignedIntegerType()};
+
+    auto *IndexLiteral = IntegerLiteral::Create(
+        SemaRef.getASTContext(), IndexVal, SizeT, KernelCallerSrcLoc);
+
+    ExprResult IndexExpr = SemaRef.CreateBuiltinArraySubscriptExpr(
+        MemberExprBases.back(), KernelCallerSrcLoc, IndexLiteral,
+        KernelCallerSrcLoc);
+
+    assert(!IndexExpr.isInvalid());
+    MemberExprBases.push_back(IndexExpr.get());
+    return true;
+  }
+
+  bool leaveArray(FieldDecl *FD, QualType ArrayType,
+                  QualType ElementType) final {
+    CollectionInitExprs.pop_back();
+    ArrayInfos.pop_back();
+
+    assert(
+        !SemaRef.getASTContext().getAsConstantArrayType(ArrayType)->getSize() ==
+            0 &&
+        "Constant arrays must have at least 1 element");
+    // Remove the IndexExpr.
+    MemberExprBases.pop_back();
+
+    // Remove the field access expr as well.
+    removeFieldMemberExpr(FD, ArrayType);
+    return true;
+  }
+
+  using SyclKernelFieldHandler::handleSyclHalfType;
+};
+
+} // anonymous namespace
+
+static bool isZeroSizedArray(QualType Ty) {
+  if (const auto *CATy = dyn_cast<ConstantArrayType>(Ty))
+    return CATy->getSize() == 0;
+  return false;
+}
+
+static void checkSYCLType(Sema &S, QualType Ty, SourceRange Loc,
+                          llvm::DenseSet<QualType> Visited,
+                          SourceRange UsedAtLoc = SourceRange()) {
+  // Not all variable types are supported inside SYCL kernels,
+  // for example the quad type __float128 will cause errors in the
+  // SPIR-V translation phase.
+  // Here we check any potentially unsupported declaration and issue
+  // a deferred diagnostic, which will be emitted iff the declaration
+  // is discovered to reside in kernel code.
+  // The optional UsedAtLoc param is used when the SYCL usage is at a
+  // different location than the variable declaration and we need to
+  // inform the user of both, e.g. struct member usage vs declaration.
+
+  bool Emitting = false;
+
+  //--- check types ---
+
+  // zero length arrays
+  if (isZeroSizedArray(Ty)) {
+    S.SYCLDiagIfDeviceCode(Loc.getBegin(), diag::err_typecheck_zero_array_size);
+    Emitting = true;
+  }
+
+  // variable length arrays
+  if (Ty->isVariableArrayType()) {
+    S.SYCLDiagIfDeviceCode(Loc.getBegin(), diag::err_vla_unsupported);
+    Emitting = true;
+  }
+
+  // Sub-reference array or pointer, then proceed with that type.
+  while (Ty->isAnyPointerType() || Ty->isArrayType())
+    Ty = QualType{Ty->getPointeeOrArrayElementType(), 0};
+
+  if (Emitting && UsedAtLoc.isValid())
+    S.SYCLDiagIfDeviceCode(UsedAtLoc.getBegin(), diag::note_used_here);
+
+  //--- now recurse ---
+  // Pointers complicate recursion. Add this type to Visited.
+  // If already there, bail out.
+  if (!Visited.insert(Ty).second)
+    return;
+
+  if (const auto *ATy = dyn_cast<AttributedType>(Ty))
+    return checkSYCLType(S, ATy->getModifiedType(), Loc, Visited);
+
+  if (const auto *RD = Ty->getAsRecordDecl()) {
+    for (const auto &Field : RD->fields())
+      checkSYCLType(S, Field->getType(), Field->getSourceRange(), Visited, Loc);
+  } else if (const auto *FPTy = dyn_cast<FunctionProtoType>(Ty)) {
+    for (const auto &ParamTy : FPTy->param_types())
+      checkSYCLType(S, ParamTy, Loc, Visited);
+    checkSYCLType(S, FPTy->getReturnType(), Loc, Visited);
+  }
+}
+
+void Sema::checkSYCLDeviceVarDecl(VarDecl *Var) {
+  assert(getLangOpts().SYCLIsDevice &&
+         "Should only be called during SYCL compilation");
+  QualType Ty = Var->getType();
+  SourceRange Loc = Var->getLocation();
+  llvm::DenseSet<QualType> Visited;
+
+  checkSYCLType(*this, Ty, Loc, Visited);
+}
+
+// Generates the OpenCL kernel using KernelCallerFunc (kernel caller
+// function) defined is SYCL headers.
+// Generated OpenCL kernel contains the body of the kernel caller function,
+// receives OpenCL like parameters and additionally does some manipulation to
+// initialize captured lambda/functor fields with these parameters.
+// SYCL runtime marks kernel caller function with sycl_kernel attribute.
+// To be able to generate OpenCL kernel from KernelCallerFunc we put
+// the following requirements to the function which SYCL runtime can mark with
+// sycl_kernel attribute:
+//   - Must be template function with at least two template parameters.
+//     First parameter must represent "unique kernel name"
+//     Second parameter must be the function object type
+//   - Must have only one function parameter - function object.
+//
+// Example of kernel caller function:
+//   template <typename KernelName, typename KernelType/*, ...*/>
+//   __attribute__((sycl_kernel)) void kernel_caller_function(KernelType
+//                                                            KernelFuncObj) {
+//     KernelFuncObj();
+//   }
+//
+//
+void Sema::constructOpenCLKernel(FunctionDecl *KernelCallerFunc,
+                                 MangleContext &MC) {
+  // The first argument to the KernelCallerFunc is the lambda object.
+  const CXXRecordDecl *KernelObj = getKernelObjectType(KernelCallerFunc);
+  assert(KernelObj && "invalid kernel caller");
+
+  // Do not visit invalid kernel object.
+  if (KernelObj->isInvalidDecl())
+    return;
+
+  std::string KernelName = constructKernelName(*this, KernelCallerFunc, MC);
+  SyclKernelDeclCreator kernel_decl(*this, KernelName, KernelObj->getLocation(),
+                                    KernelCallerFunc->isInlined());
+  SyclKernelBodyCreator kernel_body(*this, kernel_decl, KernelObj,
+                                    KernelCallerFunc);
+
+  KernelObjVisitor Visitor{*this};
+  Visitor.VisitRecordBases(KernelObj, kernel_decl, kernel_body);
+  Visitor.VisitRecordFields(KernelObj, kernel_decl, kernel_body);
+}
 
 // -----------------------------------------------------------------------------
 // SYCL device specific diagnostics implementation
@@ -38,13 +1537,91 @@
          "Should only be called during SYCL compilation");
   assert(Callee && "Callee may not be null.");
 
-  // Errors in an unevaluated context don't need to be generated,
+  // Errors in unevaluated context don't need to be generated,
   // so we can safely skip them.
   if (isUnevaluatedContext() || isConstantEvaluated())
     return true;
 
+  FunctionDecl *Caller = dyn_cast<FunctionDecl>(getCurLexicalContext());
+
+  if (!Caller)
+    return true;
+
   SemaDiagnosticBuilder::Kind DiagKind = SemaDiagnosticBuilder::K_Nop;
 
   return DiagKind != SemaDiagnosticBuilder::K_Immediate &&
          DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack;
 }
+
+// -----------------------------------------------------------------------------
+// Utility class methods
+// -----------------------------------------------------------------------------
+bool Util::isSyclSpecialType(const QualType Ty) {
+  const CXXRecordDecl *RecTy = Ty->getAsCXXRecordDecl();
+  if (!RecTy)
+    return false;
+  return RecTy->hasAttr<SYCLSpecialClassAttr>();
+}
+
+bool Util::isSyclHalfType(const QualType Ty) {
+  const StringRef &Name = "half";
+  std::array<DeclContextDesc, 5> Scopes = {
+      Util::DeclContextDesc{clang::Decl::Kind::Namespace, "cl"},
+      Util::DeclContextDesc{clang::Decl::Kind::Namespace, "sycl"},
+      Util::DeclContextDesc{clang::Decl::Kind::Namespace, "detail"},
+      Util::DeclContextDesc{clang::Decl::Kind::Namespace, "half_impl"},
+      Util::DeclContextDesc{Decl::Kind::CXXRecord, Name}};
+  return matchQualifiedTypeName(Ty, Scopes);
+}
+
+bool Util::isSyclType(const QualType &Ty, StringRef Name, bool Tmpl) {
+  Decl::Kind ClassDeclKind =
+      Tmpl ? Decl::Kind::ClassTemplateSpecialization : Decl::Kind::CXXRecord;
+  std::array<DeclContextDesc, 3> Scopes = {
+      Util::DeclContextDesc{clang::Decl::Kind::Namespace, "cl"},
+      Util::DeclContextDesc{clang::Decl::Kind::Namespace, "sycl"},
+      Util::DeclContextDesc{ClassDeclKind, Name}};
+  return matchQualifiedTypeName(Ty, Scopes);
+}
+
+bool Util::matchContext(const DeclContext *Ctx,
+                        ArrayRef<Util::DeclContextDesc> Scopes) {
+  // The idea: check the declaration context chain starting from the item
+  // itself. At each step check the context is of expected kind
+  // (namespace) and name.
+  StringRef Name = "";
+
+  for (const auto &Scope : llvm::reverse(Scopes)) {
+    clang::Decl::Kind DK = Ctx->getDeclKind();
+    if (DK != Scope.first)
+      return false;
+
+    switch (DK) {
+    case clang::Decl::Kind::ClassTemplateSpecialization:
+      // ClassTemplateSpecializationDecl inherits from CXXRecordDecl
+    case clang::Decl::Kind::CXXRecord:
+      Name = cast<CXXRecordDecl>(Ctx)->getName();
+      break;
+    case clang::Decl::Kind::Namespace:
+      Name = cast<NamespaceDecl>(Ctx)->getName();
+      break;
+    default:
+      llvm_unreachable("matchContext: decl kind not supported");
+    }
+    if (Name != Scope.second)
+      return false;
+    Ctx = Ctx->getParent();
+  }
+  return Ctx->isTranslationUnit();
+}
+
+bool Util::matchQualifiedTypeName(const QualType &Ty,
+                                  ArrayRef<Util::DeclContextDesc> Scopes) {
+  const CXXRecordDecl *RecTy = Ty->getAsCXXRecordDecl();
+
+  if (!RecTy)
+    return false; // only classes/structs supported
+  const auto *Ctx = cast<DeclContext>(RecTy);
+  return Util::matchContext(Ctx, Scopes);
+}
+
Index: clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
===================================================================
--- clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -18,6 +18,7 @@
 #include "clang/AST/DependentDiagnostic.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
+#include "clang/AST/Mangle.h"
 #include "clang/AST/PrettyDeclStackTrace.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/Basic/SourceManager.h"
@@ -6266,9 +6267,25 @@
   return D;
 }
 
+static void processFunctionInstantiation(Sema &S,
+                                         SourceLocation PointOfInstantiation,
+                                         FunctionDecl *FD,
+                                         bool DefinitionRequired,
+                                         MangleContext &MC) {
+  S.InstantiateFunctionDefinition(/*FIXME:*/ PointOfInstantiation, FD, true,
+                                  DefinitionRequired, true);
+  if (!FD->isDefined())
+    return;
+  if (FD->hasAttr<SYCLKernelAttr>())
+    S.constructOpenCLKernel(FD, MC);
+  FD->setInstantiationIsPending(false);
+}
+
 /// Performs template instantiation for all implicit template
 /// instantiations we have seen until this point.
 void Sema::PerformPendingInstantiations(bool LocalOnly) {
+  std::unique_ptr<MangleContext> MangleCtx(
+      getASTContext().createMangleContext());
   std::deque<PendingImplicitInstantiation> delayedPCHInstantiations;
   while (!PendingLocalImplicitInstantiations.empty() ||
          (!LocalOnly && !PendingInstantiations.empty())) {
@@ -6286,20 +6303,16 @@
     if (FunctionDecl *Function = dyn_cast<FunctionDecl>(Inst.first)) {
       bool DefinitionRequired = Function->getTemplateSpecializationKind() ==
                                 TSK_ExplicitInstantiationDefinition;
-      if (Function->isMultiVersion()) {
+      if (Function->isMultiVersion())
         getASTContext().forEachMultiversionedFunctionVersion(
-            Function, [this, Inst, DefinitionRequired](FunctionDecl *CurFD) {
-              InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, CurFD, true,
-                                            DefinitionRequired, true);
-              if (CurFD->isDefined())
-                CurFD->setInstantiationIsPending(false);
+            Function, [this, Inst, DefinitionRequired,
+                       MangleCtx = move(MangleCtx)](FunctionDecl *CurFD) {
+              processFunctionInstantiation(*this, Inst.second, CurFD,
+                                           DefinitionRequired, *MangleCtx);
             });
-      } else {
-        InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, Function, true,
-                                      DefinitionRequired, true);
-        if (Function->isDefined())
-          Function->setInstantiationIsPending(false);
-      }
+      else
+        processFunctionInstantiation(*this, Inst.second, Function,
+                                     DefinitionRequired, *MangleCtx);
       // Definition of a PCH-ed template declaration may be available only in the TU.
       if (!LocalOnly && LangOpts.PCHInstantiateTemplates &&
           TUKind == TU_Prefix && Function->instantiationIsPending())
Index: clang/lib/Sema/SemaType.cpp
===================================================================
--- clang/lib/Sema/SemaType.cpp
+++ clang/lib/Sema/SemaType.cpp
@@ -2535,7 +2535,8 @@
     }
   }
 
-  if (T->isVariableArrayType() && !Context.getTargetInfo().isVLASupported()) {
+  if (T->isVariableArrayType() && !Context.getTargetInfo().isVLASupported() &&
+      !getLangOpts().SYCLIsDevice) {
     // CUDA device code and some other targets don't support VLAs.
     targetDiag(Loc, (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
                         ? diag::err_cuda_vla
Index: clang/test/CodeGenSYCL/Inputs/sycl.hpp
===================================================================
--- /dev/null
+++ clang/test/CodeGenSYCL/Inputs/sycl.hpp
@@ -0,0 +1,86 @@
+#pragma once
+
+namespace cl {
+namespace sycl {
+namespace access {
+
+enum class target {
+  global_buffer = 2014,
+  constant_buffer,
+  local,
+  image,
+  host_buffer,
+  host_image,
+  image_array
+};
+
+enum class mode {
+  read = 1024,
+  write,
+  read_write,
+  discard_write,
+  discard_read_write,
+  atomic
+};
+
+enum class placeholder {
+  false_t,
+  true_t
+};
+
+enum class address_space : int {
+  private_space = 0,
+  global_space,
+  constant_space,
+  local_space
+};
+} // namespace access
+
+template <int dim>
+struct id {
+  template <typename... T>
+  id(T... args) {} // fake constructor
+private:
+  // Some fake field added to see using of id arguments in the
+  // kernel wrapper
+  int Data;
+};
+
+template <int dim>
+struct range {
+  template <typename... T>
+  range(T... args) {} // fake constructor
+private:
+  // Some fake field added to see using of range arguments in the
+  // kernel wrapper
+  int Data;
+};
+
+template <int dim>
+struct _ImplT {
+  range<dim> AccessRange;
+  range<dim> MemRange;
+  id<dim> Offset;
+};
+
+template <typename dataT, int dimensions, access::mode accessmode,
+          access::target accessTarget = access::target::global_buffer,
+          access::placeholder isPlaceholder = access::placeholder::false_t>
+class __attribute__((sycl_special_class)) accessor {
+
+public:
+  void use(void) const {}
+  template <typename... T>
+  void use(T... args) {}
+  template <typename... T>
+  void use(T... args) const {}
+  _ImplT<dimensions> impl;
+
+private:
+  void __init(__attribute__((opencl_global)) dataT *Ptr,
+              range<dimensions> AccessRange,
+              range<dimensions> MemRange, id<dimensions> Offset) {}
+};
+
+} // namespace sycl
+} // namespace cl
Index: clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp
===================================================================
--- /dev/null
+++ clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp
@@ -0,0 +1,58 @@
+// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm %s -o - | FileCheck %s
+
+// This test checks that compiler generates correct kernel wrapper for basic
+// case.
+
+#include "Inputs/sycl.hpp"
+
+template <typename name, typename Func>
+__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) {
+  kernelFunc();
+}
+
+int main() {
+  cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write> accessorA;
+    kernel<class kernel_function>(
+      [=]() {
+        accessorA.use();
+      });
+  return 0;
+}
+
+// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_function
+// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG:%[a-zA-Z0-9_]+]],
+// CHECK-SAME: %"struct.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE:%[a-zA-Z0-9_]+_1]],
+// CHECK-SAME: %"struct.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE:%[a-zA-Z0-9_]+_2]],
+// CHECK-SAME: %"struct.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET:%[a-zA-Z0-9_]+]])
+// Check alloca for pointer argument
+// CHECK: [[MEM_ARG]].addr = alloca i32 addrspace(1)*
+// Check lambda object alloca
+// CHECK: [[ANONALLOCA:%[0-9]+]] = alloca %class.anon
+// CHECK: [[ARANGEA:%agg.tmp]] = alloca %"struct.cl::sycl::range"
+// CHECK: [[MRANGEA:%agg.tmp.*]] = alloca %"struct.cl::sycl::range"
+// CHECK: [[OIDA:%agg.tmp.*]] = alloca %"struct.cl::sycl::id"
+// CHECK: [[ANON:%[0-9]+]] = addrspacecast %class.anon* [[ANONALLOCA]] to %class.anon addrspace(4)*
+// CHECK: [[ARANGET:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range"* [[ARANGEA]] to %"struct.cl::sycl::range" addrspace(4)*
+// CHECK: [[MRANGET:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range"* [[MRANGEA]] to %"struct.cl::sycl::range" addrspace(4)*
+// CHECK: [[OIDT:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::id"* [[OIDA]] to %"struct.cl::sycl::id" addrspace(4)*
+//
+// Check store of kernel pointer argument to alloca
+// CHECK: store i32 addrspace(1)* [[MEM_ARG]], i32 addrspace(1)* addrspace(4)* [[MEM_ARG]].addr.ascast, align 8
+
+// Check for default constructor of accessor
+// CHECK: call spir_func {{.*}}accessor
+
+// Check accessor GEP
+// CHECK: [[ACCESSOR:%[a-zA-Z0-9_]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* [[ANON]], i32 0, i32 0
+
+// Check load from kernel pointer argument alloca
+// CHECK: [[MEM_LOAD:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[MEM_ARG]].addr.ascast
+
+// Check accessor __init method call
+// CHECK: [[ARANGE:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range" addrspace(4)* [[ARANGET]] to %"struct.cl::sycl::range"*
+// CHECK: [[MRANGE:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range" addrspace(4)* [[MRANGET]] to %"struct.cl::sycl::range"*
+// CHECK: [[OID:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::id" addrspace(4)* [[OIDT]] to %"struct.cl::sycl::id"*
+// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.cl::sycl::accessor" addrspace(4)* {{[^,]*}} [[ACCESSOR]], i32 addrspace(1)* [[MEM_LOAD]], %"struct.cl::sycl::range"* byval({{.*}}) align 4 [[ARANGE]], %"struct.cl::sycl::range"* byval({{.*}}) align 4 [[MRANGE]], %"struct.cl::sycl::id"* byval({{.*}}) align 4 [[OID]])
+
+// Check lambda "()" operator call
+// CHECK: call spir_func void @{{.*}}(%class.anon addrspace(4)* {{[^,]*}})
Index: clang/test/CodeGenSYCL/device-functions.cpp
===================================================================
--- /dev/null
+++ clang/test/CodeGenSYCL/device-functions.cpp
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
+
+// This test checks that the compiler emits functions called from a SYCL kernel
+// and doesn't emit functions not called from any SYCL kernel
+
+template <typename T>
+T bar(T arg);
+
+void foo() {
+  int a = 1 + 1 + bar(1);
+}
+
+template <typename T>
+T bar(T arg) {
+  return arg;
+}
+
+template <typename name, typename Func>
+__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) {
+  kernelFunc();
+}
+
+// Make sure that definitions for the types not used in SYCL kernels are not
+// emitted
+// CHECK-NOT: %struct.A
+// CHECK-NOT: @a = {{.*}} %struct.A
+struct A {
+  int x = 10;
+} a;
+
+int main() {
+  a.x = 8;
+  kernel_single_task<class test_kernel>([]() { foo(); });
+  return 0;
+}
+
+// baz is not called from the SYCL kernel, so it must not be emitted
+// CHECK-NOT: define {{.*}} @{{.*}}baz
+void baz() {}
+
+// CHECK-LABEL: define dso_local spir_kernel void @{{.*}}test_kernel
+// CHECK-LABEL: define internal spir_func void @_ZZ4mainENKUlvE_clEv(%class.anon addrspace(4)* {{[^,]*}} %this)
+// CHECK-LABEL: define {{.*}}spir_func void @_Z3foov()
+// CHECK-LABEL: define linkonce_odr spir_func i32 @_Z3barIiET_S0_(i32 %arg)
Index: clang/test/CodeGenSYCL/unique_stable_name.cpp
===================================================================
--- clang/test/CodeGenSYCL/unique_stable_name.cpp
+++ clang/test/CodeGenSYCL/unique_stable_name.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple spir64-unknown-unknown-sycldevice -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-pc  -fsycl-is-host -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
 // CHECK: @[[LAMBDA_KERNEL3:[^\w]+]] = private unnamed_addr constant [[LAMBDA_K3_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZ4mainEUlPZ4mainEUlvE_E_\00"
 // CHECK: @[[INT1:[^\w]+]] = private unnamed_addr constant [[INT_SIZE:\[[0-9]+ x i8\]]] c"_ZTSi\00"
 // CHECK: @[[STRING:[^\w]+]] = private unnamed_addr constant [[STRING_SIZE:\[[0-9]+ x i8\]]] c"_ZTSAppL_ZZ4mainE1jE_i\00",
@@ -65,95 +65,105 @@
   kernelFunc();
 }
 
+template<typename KernelType>
+void unnamed_kernel_single_task(KernelType kernelFunc) {
+  kernel_single_task<KernelType>(kernelFunc);
+}
+
+template <typename KernelName, typename KernelType>
+void not_kernel_single_task(KernelType kernelFunc) {
+  kernelFunc();
+}
+
 int main() {
-  kernel_single_task<class kernel2>(func<Derp>);
-  // CHECK: call spir_func void @_Z18kernel_single_taskIZ4mainE7kernel2PFPKcvEEvT0_(i8 addrspace(4)* ()* @_Z4funcI4DerpEDTu33__builtin_sycl_unique_stable_nameDtsrT_3strEEEv)
+  not_kernel_single_task<class kernel2>(func<Derp>);
+  // CHECK: call void @_Z22not_kernel_single_taskIZ4mainE7kernel2PFPKcvEEvT0_(i8* ()* @_Z4funcI4DerpEDTu33__builtin_sycl_unique_stable_nameDtsrT_3strEEEv)
 
   auto l1 = []() { return 1; };
   auto l2 = [](decltype(l1) *l = nullptr) { return 2; };
-  kernel_single_task<class kernel3>(l2);
+  kernel_single_task<decltype(l2)>(l2);
   puts(__builtin_sycl_unique_stable_name(decltype(l2)));
-  // CHECK: call spir_func void @_Z18kernel_single_taskIZ4mainE7kernel3Z4mainEUlPZ4mainEUlvE_E_EvT0_
-  // CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[LAMBDA_K3_SIZE]], [[LAMBDA_K3_SIZE]]* @[[LAMBDA_KERNEL3]], i32 0, i32 0) to i8 addrspace(4)*))
+  // CHECK: call void @_Z18kernel_single_taskIZ4mainEUlPZ4mainEUlvE_E_S2_EvT0_
+  // CHECK: call void @puts(i8* getelementptr inbounds ([[LAMBDA_K3_SIZE]], [[LAMBDA_K3_SIZE]]* @[[LAMBDA_KERNEL3]], i32 0, i32 0))
 
   constexpr const char str[] = "lalala";
   static_assert(__builtin_strcmp(__builtin_sycl_unique_stable_name(decltype(str)), "_ZTSA7_Kc\0") == 0, "unexpected mangling");
 
   int i = 0;
   puts(__builtin_sycl_unique_stable_name(decltype(i++)));
-  // CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[INT_SIZE]], [[INT_SIZE]]* @[[INT1]], i32 0, i32 0) to i8 addrspace(4)*))
+  // CHECK: call void @puts(i8* getelementptr inbounds ([[INT_SIZE]], [[INT_SIZE]]* @[[INT1]], i32 0, i32 0))
 
   // FIXME: Ensure that j is incremented because VLAs are terrible.
   int j = 55;
   puts(__builtin_sycl_unique_stable_name(int[++j]));
-  // CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[STRING_SIZE]], [[STRING_SIZE]]* @[[STRING]], i32 0, i32 0) to i8 addrspace(4)*))
+  // CHECK: call void @puts(i8* getelementptr inbounds ([[STRING_SIZE]], [[STRING_SIZE]]* @[[STRING]], i32 0, i32 0))
 
-  // CHECK: define internal spir_func void @_Z18kernel_single_taskIZ4mainE7kernel2PFPKcvEEvT0_
-  // CHECK: declare spir_func i8 addrspace(4)* @_Z4funcI4DerpEDTu33__builtin_sycl_unique_stable_nameDtsrT_3strEEEv
-  // CHECK: define internal spir_func void @_Z18kernel_single_taskIZ4mainE7kernel3Z4mainEUlPZ4mainEUlvE_E_EvT0_
-  // CHECK: define internal spir_func void @_Z18kernel_single_taskIZ4mainE6kernelZ4mainEUlvE0_EvT0_
+  // CHECK: define internal void @_Z22not_kernel_single_taskIZ4mainE7kernel2PFPKcvEEvT0_
+  // CHECK: declare i8* @_Z4funcI4DerpEDTu33__builtin_sycl_unique_stable_nameDtsrT_3strEEEv
+  // CHECK: define internal void @_Z18kernel_single_taskIZ4mainEUlPZ4mainEUlvE_E_S2_EvT0_
+  // CHECK: define internal void @_Z18kernel_single_taskIZ4mainEUlvE0_S0_EvT0_
 
-  kernel_single_task<class kernel>(
+  unnamed_kernel_single_task(
       []() {
         puts(__builtin_sycl_unique_stable_name(int));
-        // CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[INT_SIZE]], [[INT_SIZE]]* @[[INT2]], i32 0, i32 0) to i8 addrspace(4)*))
+        // CHECK: call void @puts(i8* getelementptr inbounds ([[INT_SIZE]], [[INT_SIZE]]* @[[INT2]], i32 0, i32 0))
 
         auto x = []() {};
         puts(__builtin_sycl_unique_stable_name(decltype(x)));
-        // CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[LAMBDA_X_SIZE]], [[LAMBDA_X_SIZE]]* @[[LAMBDA_X]], i32 0, i32 0) to i8 addrspace(4)*))
+        // CHECK: call void @puts(i8* getelementptr inbounds ([[LAMBDA_X_SIZE]], [[LAMBDA_X_SIZE]]* @[[LAMBDA_X]], i32 0, i32 0))
 
         DEF_IN_MACRO();
-        // CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[MACRO_SIZE]], [[MACRO_SIZE]]* @[[MACRO_X]], i32 0, i32 0) to i8 addrspace(4)*))
-        // CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[MACRO_SIZE]], [[MACRO_SIZE]]* @[[MACRO_Y]], i32 0, i32 0) to i8 addrspace(4)*))
+        // CHECK: call void @puts(i8* getelementptr inbounds ([[MACRO_SIZE]], [[MACRO_SIZE]]* @[[MACRO_X]], i32 0, i32 0))
+        // CHECK: call void @puts(i8* getelementptr inbounds ([[MACRO_SIZE]], [[MACRO_SIZE]]* @[[MACRO_Y]], i32 0, i32 0))
 
         MACRO_CALLS_MACRO();
-        // CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[MACRO_MACRO_SIZE]], [[MACRO_MACRO_SIZE]]* @[[MACRO_MACRO_X]], i32 0, i32 0) to i8 addrspace(4)*))
-        // CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[MACRO_MACRO_SIZE]], [[MACRO_MACRO_SIZE]]* @[[MACRO_MACRO_Y]], i32 0, i32 0) to i8 addrspace(4)*))
+        // CHECK: call void @puts(i8* getelementptr inbounds ([[MACRO_MACRO_SIZE]], [[MACRO_MACRO_SIZE]]* @[[MACRO_MACRO_X]], i32 0, i32 0))
+        // CHECK: call void @puts(i8* getelementptr inbounds ([[MACRO_MACRO_SIZE]], [[MACRO_MACRO_SIZE]]* @[[MACRO_MACRO_Y]], i32 0, i32 0))
 
         template_param<int>();
-        // CHECK: call spir_func void @_Z14template_paramIiEvv
+        // CHECK: call void @_Z14template_paramIiEvv
 
         template_param<decltype(x)>();
-        // CHECK: call spir_func void @_Z14template_paramIZZ4mainENKUlvE0_clEvEUlvE_Evv
+        // CHECK: call void @_Z14template_paramIZZ4mainENKUlvE0_clEvEUlvE_Evv
 
         lambda_in_dependent_function<int>();
-        // CHECK: call spir_func void @_Z28lambda_in_dependent_functionIiEvv
+        // CHECK: call void @_Z28lambda_in_dependent_functionIiEvv
 
         lambda_in_dependent_function<decltype(x)>();
-        // CHECK: call spir_func void @_Z28lambda_in_dependent_functionIZZ4mainENKUlvE0_clEvEUlvE_Evv
+        // CHECK: call void @_Z28lambda_in_dependent_functionIZZ4mainENKUlvE0_clEvEUlvE_Evv
 
         lambda_no_dep<int, double>(3, 5.5);
-        // CHECK: call spir_func void @_Z13lambda_no_depIidEvT_T0_(i32 3, double 5.500000e+00)
+        // CHECK: call void @_Z13lambda_no_depIidEvT_T0_(i32 3, double 5.500000e+00)
 
         int a = 5;
         double b = 10.7;
         auto y = [](int a) { return a; };
         auto z = [](double b) { return b; };
         lambda_two_dep<decltype(y), decltype(z)>();
-        // CHECK: call spir_func void @_Z14lambda_two_depIZZ4mainENKUlvE0_clEvEUliE_ZZ4mainENKS0_clEvEUldE_Evv
+        // CHECK: call void @_Z14lambda_two_depIZZ4mainENKUlvE0_clEvEUliE_ZZ4mainENKS0_clEvEUldE_Evv
 
         lambda_two_dep<decltype(z), decltype(y)>();
-        // CHECK: call spir_func void @_Z14lambda_two_depIZZ4mainENKUlvE0_clEvEUldE_ZZ4mainENKS0_clEvEUliE_Evv
+        // CHECK: call void @_Z14lambda_two_depIZZ4mainENKUlvE0_clEvEUldE_ZZ4mainENKS0_clEvEUliE_Evv
       });
 }
 
-// CHECK: define linkonce_odr spir_func void @_Z14template_paramIiEvv
-// CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[INT_SIZE]], [[INT_SIZE]]* @[[INT3]], i32 0, i32 0) to i8 addrspace(4)*))
+// CHECK: define linkonce_odr void @_Z14template_paramIiEvv
+// CHECK: call void @puts(i8* getelementptr inbounds ([[INT_SIZE]], [[INT_SIZE]]* @[[INT3]], i32 0, i32 0))
 
-// CHECK: define internal spir_func void @_Z14template_paramIZZ4mainENKUlvE0_clEvEUlvE_Evv
-// CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[LAMBDA_SIZE]], [[LAMBDA_SIZE]]* @[[LAMBDA]], i32 0, i32 0) to i8 addrspace(4)*))
+// CHECK: define internal void @_Z14template_paramIZZ4mainENKUlvE0_clEvEUlvE_Evv
+// CHECK: call void @puts(i8* getelementptr inbounds ([[LAMBDA_SIZE]], [[LAMBDA_SIZE]]* @[[LAMBDA]], i32 0, i32 0))
 
-// CHECK: define linkonce_odr spir_func void @_Z28lambda_in_dependent_functionIiEvv
-// CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[DEP_INT_SIZE]], [[DEP_INT_SIZE]]* @[[LAMBDA_IN_DEP_INT]], i32 0, i32 0) to i8 addrspace(4)*))
+// CHECK: define linkonce_odr void @_Z28lambda_in_dependent_functionIiEvv
+// CHECK: call void @puts(i8* getelementptr inbounds ([[DEP_INT_SIZE]], [[DEP_INT_SIZE]]* @[[LAMBDA_IN_DEP_INT]], i32 0, i32 0))
 
-// CHECK: define internal spir_func void @_Z28lambda_in_dependent_functionIZZ4mainENKUlvE0_clEvEUlvE_Evv
-// CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[DEP_LAMBDA_SIZE]], [[DEP_LAMBDA_SIZE]]* @[[LAMBDA_IN_DEP_X]], i32 0, i32 0) to i8 addrspace(4)*))
+// CHECK: define internal void @_Z28lambda_in_dependent_functionIZZ4mainENKUlvE0_clEvEUlvE_Evv
+// CHECK: call void @puts(i8* getelementptr inbounds ([[DEP_LAMBDA_SIZE]], [[DEP_LAMBDA_SIZE]]* @[[LAMBDA_IN_DEP_X]], i32 0, i32 0))
 
-// CHECK: define linkonce_odr spir_func void @_Z13lambda_no_depIidEvT_T0_(i32 %a, double %b)
-// CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[NO_DEP_LAMBDA_SIZE]], [[NO_DEP_LAMBDA_SIZE]]* @[[LAMBDA_NO_DEP]], i32 0, i32 0) to i8 addrspace(4)*))
+// CHECK: define linkonce_odr void @_Z13lambda_no_depIidEvT_T0_(i32 %a, double %b)
+// CHECK: call void @puts(i8* getelementptr inbounds ([[NO_DEP_LAMBDA_SIZE]], [[NO_DEP_LAMBDA_SIZE]]* @[[LAMBDA_NO_DEP]], i32 0, i32 0))
 
-// CHECK: define internal spir_func void @_Z14lambda_two_depIZZ4mainENKUlvE0_clEvEUliE_ZZ4mainENKS0_clEvEUldE_Evv
-// CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[DEP_LAMBDA1_SIZE]], [[DEP_LAMBDA1_SIZE]]* @[[LAMBDA_TWO_DEP]], i32 0, i32 0) to i8 addrspace(4)*))
+// CHECK: define internal void @_Z14lambda_two_depIZZ4mainENKUlvE0_clEvEUliE_ZZ4mainENKS0_clEvEUldE_Evv
+// CHECK: call void @puts(i8* getelementptr inbounds ([[DEP_LAMBDA1_SIZE]], [[DEP_LAMBDA1_SIZE]]* @[[LAMBDA_TWO_DEP]], i32 0, i32 0))
 
-// CHECK: define internal spir_func void @_Z14lambda_two_depIZZ4mainENKUlvE0_clEvEUldE_ZZ4mainENKS0_clEvEUliE_Evv
-// CHECK: call spir_func void @puts(i8 addrspace(4)* addrspacecast (i8* getelementptr inbounds ([[DEP_LAMBDA2_SIZE]], [[DEP_LAMBDA2_SIZE]]* @[[LAMBDA_TWO_DEP2]], i32 0, i32 0) to i8 addrspace(4)*))
+// CHECK: define internal void @_Z14lambda_two_depIZZ4mainENKUlvE0_clEvEUldE_ZZ4mainENKS0_clEvEUliE_Evv
+// CHECK: call void @puts(i8* getelementptr inbounds ([[DEP_LAMBDA2_SIZE]], [[DEP_LAMBDA2_SIZE]]* @[[LAMBDA_TWO_DEP2]], i32 0, i32 0))
Index: clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp
===================================================================
--- clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp
+++ clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp
@@ -1,6 +1,5 @@
-// RUN: %clang_cc1 -triple spir64-unknown-unknown-sycldevice -aux-triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s
-
+// RUN: %clang_cc1 -triple spir64-unknown-unknown -aux-triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s --check-prefixes=WIN,CHECK
 
 template<typename KN, typename Func>
 __attribute__((sycl_kernel)) void kernel(Func F){
Index: clang/test/Misc/pragma-attribute-supported-attributes-list.test
===================================================================
--- clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -154,6 +154,7 @@
 // CHECK-NEXT: ReturnTypestate (SubjectMatchRule_function, SubjectMatchRule_variable_is_parameter)
 // CHECK-NEXT: ReturnsNonNull (SubjectMatchRule_objc_method, SubjectMatchRule_function)
 // CHECK-NEXT: ReturnsTwice (SubjectMatchRule_function)
+// CHECK-NEXT: SYCLSpecialClass (SubjectMatchRule_record)
 // CHECK-NEXT: ScopedLockable (SubjectMatchRule_record)
 // CHECK-NEXT: Section (SubjectMatchRule_function, SubjectMatchRule_variable_is_global, SubjectMatchRule_objc_method, SubjectMatchRule_objc_property)
 // CHECK-NEXT: SetTypestate (SubjectMatchRule_function_is_member)
Index: clang/test/SemaSYCL/Inputs/sycl.hpp
===================================================================
--- /dev/null
+++ clang/test/SemaSYCL/Inputs/sycl.hpp
@@ -0,0 +1,86 @@
+namespace cl {
+namespace sycl {
+namespace access {
+
+enum class target {
+  global_buffer = 2014,
+  constant_buffer,
+  local,
+  image,
+  host_buffer,
+  host_image,
+  image_array
+};
+
+enum class mode {
+  read = 1024,
+  write,
+  read_write,
+  discard_write,
+  discard_read_write,
+  atomic
+};
+
+enum class placeholder { false_t,
+                         true_t };
+
+enum class address_space : int {
+  private_space = 0,
+  global_space,
+  constant_space,
+  local_space
+};
+} // namespace access
+
+
+template <int dim>
+struct range {
+};
+
+template <int dim>
+struct id {
+};
+
+template <int dim>
+struct _ImplT {
+  range<dim> AccessRange;
+  range<dim> MemRange;
+  id<dim> Offset;
+};
+
+template <typename dataT, access::target accessTarget>
+struct DeviceValueType;
+
+template <typename dataT>
+struct DeviceValueType<dataT, access::target::global_buffer> {
+  using type = __attribute__((opencl_global)) dataT;
+};
+
+template <typename dataT>
+struct DeviceValueType<dataT, access::target::constant_buffer> {
+  using type = __attribute__((opencl_global)) const dataT;
+};
+
+template <typename dataT>
+struct DeviceValueType<dataT, access::target::local> {
+  using type = __attribute__((opencl_local)) dataT;
+};
+
+template <typename dataT, int dimensions, access::mode accessmode,
+          access::target accessTarget = access::target::global_buffer,
+          access::placeholder isPlaceholder = access::placeholder::false_t>
+class __attribute__((sycl_special_class)) accessor {
+public:
+  void use(void) const {}
+  void use(void *) const {}
+  _ImplT<dimensions> impl;
+
+private:
+  using PtrType = typename DeviceValueType<dataT, accessTarget>::type *;
+  void __init(PtrType Ptr, range<dimensions> AccessRange,
+              range<dimensions> MemRange, id<dimensions> Offset) {}
+  friend class stream;
+};
+
+} // namespace sycl
+} // namespace cl
Index: clang/test/SemaSYCL/accessors-targets.cpp
===================================================================
--- /dev/null
+++ clang/test/SemaSYCL/accessors-targets.cpp
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -fsycl-is-device -ast-dump %s | FileCheck %s
+
+// This test checks that compiler generates correct kernel wrapper arguments for
+// different accessors targets.
+
+#include "Inputs/sycl.hpp"
+
+using namespace cl::sycl;
+
+template <typename name, typename Func>
+__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) {
+  kernelFunc();
+}
+
+int main() {
+
+  accessor<int, 1, access::mode::read_write,
+           access::target::local>
+      local_acc;
+  accessor<int, 1, access::mode::read_write,
+           access::target::global_buffer>
+      global_acc;
+  kernel<class use_local>(
+      [=]() {
+        local_acc.use();
+      });
+  kernel<class use_global>(
+      [=]() {
+        global_acc.use();
+      });
+}
+// CHECK: {{.*}}use_local{{.*}} 'void (__local int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)'
+// CHECK: {{.*}}use_global{{.*}} 'void (__global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)'
Index: clang/test/SemaSYCL/basic-kernel-wrapper.cpp
===================================================================
--- /dev/null
+++ clang/test/SemaSYCL/basic-kernel-wrapper.cpp
@@ -0,0 +1,77 @@
+// RUN: %clang_cc1 -fsycl-is-device -ast-dump %s | FileCheck %s
+
+// This test checks that compiler generates correct kernel wrapper for basic
+// case.
+
+#include "Inputs/sycl.hpp"
+
+template <typename Acc>
+struct AccWrapper { Acc accessor; };
+
+template <typename name, typename Func>
+__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) {
+  kernelFunc();
+}
+
+int main() {
+  cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write> acc;
+  kernel<class kernel_wrapper>(
+      [=]() {
+        acc.use();
+      });
+}
+
+// Check declaration of the kernel
+
+// CHECK: FunctionDecl {{.*}}kernel_wrapper{{.*}} 'void (__global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)'
+
+// Check parameters of the kernel
+
+// CHECK: ParmVarDecl {{.*}} used [[_arg_Mem:[0-9a-zA-Z_]+]] '__global int *'
+// CHECK: ParmVarDecl {{.*}} used [[_arg_AccessRange:[0-9a-zA-Z_]+]] 'cl::sycl::range<1>'
+// CHECK: ParmVarDecl {{.*}} used [[_arg_MemRange:[0-9a-zA-Z_]+]] 'cl::sycl::range<1>'
+// CHECK: ParmVarDecl {{.*}} used [[_arg_Offset:[0-9a-zA-Z_]+]] 'cl::sycl::id<1>'
+
+// Check body of the kernel
+
+// Check lambda declaration inside the wrapper
+
+// CHECK: DeclStmt
+// CHECK-NEXT: VarDecl {{.*}} used '(lambda at {{.*}}basic-kernel-wrapper.cpp{{.*}})'
+
+// Check accessor initialization
+
+// CHECK: CXXMemberCallExpr {{.*}} 'void'
+// CHECK-NEXT: MemberExpr {{.*}} 'void ({{.*}}PtrType, range<1>, range<1>, id<1>)' lvalue .__init
+// CHECK-NEXT: MemberExpr {{.*}} 'cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write>':'cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::global_buffer, cl::sycl::access::placeholder::false_t>' lvalue .
+// CHECK-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}basic-kernel-wrapper.cpp{{.*}})' lvalue Var
+
+// CHECK-NEXT: ImplicitCastExpr {{.*}} <LValueToRValue>
+// CHECK-NEXT: DeclRefExpr {{.*}} '__global int *' lvalue ParmVar {{.*}} '[[_arg_Mem]]' '__global int *'
+
+// CHECK-NEXT: CXXConstructExpr {{.*}} 'range<1>':'cl::sycl::range<1>'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'const cl::sycl::range<1>' lvalue <NoOp>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'cl::sycl::range<1>' lvalue ParmVar {{.*}} '[[_arg_AccessRange]]' 'cl::sycl::range<1>'
+
+// CHECK-NEXT: CXXConstructExpr {{.*}} 'range<1>':'cl::sycl::range<1>'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'const cl::sycl::range<1>' lvalue <NoOp>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'cl::sycl::range<1>' lvalue ParmVar {{.*}} '[[_arg_MemRange]]' 'cl::sycl::range<1>'
+
+// CHECK-NEXT: CXXConstructExpr {{.*}} 'id<1>':'cl::sycl::id<1>'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'const cl::sycl::id<1>' lvalue <NoOp>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'cl::sycl::id<1>' lvalue ParmVar {{.*}} '[[_arg_Offset]]' 'cl::sycl::id<1>'
+
+// Check that body of the kernel caller function is included into kernel
+
+// CHECK: CompoundStmt {{.*}}
+// CHECK-NEXT: CXXOperatorCallExpr {{.*}} 'void'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)() const' <FunctionToPointerDecay>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'void () const' lvalue CXXMethod {{.*}} 'operator()' 'void () const'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'const (lambda at {{.*}}basic-kernel-wrapper.cpp{{.*}})' lvalue <NoOp>
+// CHECK-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}basic-kernel-wrapper.cpp{{.*}})' lvalue Var
+
+// Check kernel wrapper attributes
+
+// CHECK: OpenCLKernelAttr {{.*}} Implicit
+// CHECK: AsmLabelAttr {{.*}} Implicit "{{.*}}kernel_wrapper{{.*}}"
+// CHECK: ArtificialAttr {{.*}} Implicit
Index: clang/test/SemaSYCL/built-in-type-kernel-arg.cpp
===================================================================
--- /dev/null
+++ clang/test/SemaSYCL/built-in-type-kernel-arg.cpp
@@ -0,0 +1,70 @@
+// RUN: %clang_cc1 -fsycl-is-device -ast-dump %s | FileCheck %s
+
+// This test checks that compiler generates correct initialization for arguments
+// that have struct or built-in type inside the OpenCL kernel
+
+#include "Inputs/sycl.hpp"
+
+template <typename name, typename Func>
+__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) {
+  kernelFunc();
+}
+
+struct test_struct {
+  int data;
+};
+
+void test(const int some_const) {
+  kernel<class kernel_const>(
+      [=]() {
+        int a = some_const;
+      });
+}
+
+int main() {
+  int data = 5;
+  test_struct s;
+  s.data = data;
+  kernel<class kernel_int>(
+      [=]() {
+        int kernel_data = data;
+      });
+  kernel<class kernel_struct>(
+      [=]() {
+        test_struct k_s;
+        k_s = s;
+      });
+  const int some_const = 10;
+  test(some_const);
+  return 0;
+}
+// Check kernel parameters
+// CHECK: FunctionDecl {{.*}}kernel_const{{.*}} 'void (const int)'
+// CHECK: ParmVarDecl {{.*}} used _arg_ 'const int'
+
+// Check that lambda field of const built-in type is initialized
+// CHECK: VarDecl {{.*}}'(lambda at {{.*}}built-in-type-kernel-arg.cpp{{.*}})'
+// CHECK-NEXT: InitListExpr
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'const int' lvalue ParmVar {{.*}} '_arg_' 'const int'
+
+// Check kernel parameters
+// CHECK: {{.*}}kernel_int{{.*}} 'void (int)'
+// CHECK: ParmVarDecl {{.*}} used _arg_ 'int'
+
+// Check that lambda field of built-in type is initialized
+// CHECK: VarDecl {{.*}}'(lambda at {{.*}}built-in-type-kernel-arg.cpp{{.*}})'
+// CHECK-NEXT: InitListExpr
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_' 'int'
+
+// Check kernel parameters
+// CHECK: {{.*}}kernel_struct{{.*}} 'void (test_struct)'
+// CHECK: ParmVarDecl {{.*}} used _arg_ 'test_struct'
+
+// Check that lambda field of struct type is initialized
+// CHECK: VarDecl {{.*}}'(lambda at {{.*}}built-in-type-kernel-arg.cpp{{.*}})'
+// CHECK-NEXT: InitListExpr
+// CHECK-NEXT: CXXConstructExpr {{.*}}'test_struct'{{.*}}void (const test_struct &)
+// CHECK-NEXT: ImplicitCastExpr {{.*}}'const test_struct' lvalue <NoOp>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'test_struct' lvalue ParmVar {{.*}} '_arg_' 'test_struct'
Index: clang/test/SemaSYCL/fake-accessors.cpp
===================================================================
--- /dev/null
+++ clang/test/SemaSYCL/fake-accessors.cpp
@@ -0,0 +1,67 @@
+// RUN: %clang_cc1 -fsycl-is-device -Wno-unused-value -Wno-int-to-void-pointer-cast -ast-dump %s | FileCheck %s
+
+// This test checks that the compiler handles SYCL kenrel parameters of accessor
+// type correctly (i.e. sends a pointer as a separate parameter) and doesn't
+// apply this approach for user types even if they have the same name (e.g. 
+// `accessor` or `foo::cl::sycl::accessor`). We also verify that type aliasing
+// via typedef or using doesn't confuse the compiler.
+
+#include "Inputs/sycl.hpp"
+
+namespace foo {
+namespace cl {
+namespace sycl {
+class accessor {
+public:
+  int field;
+};
+} // namespace sycl
+} // namespace cl
+} // namespace foo
+
+class accessor {
+public:
+  int field;
+};
+
+typedef cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write,
+                           cl::sycl::access::target::global_buffer>
+    MyAccessorTD;
+
+using MyAccessorA = cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write,
+                                       cl::sycl::access::target::global_buffer>;
+
+template <typename name, typename Func>
+__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) {
+  kernelFunc();
+}
+
+int main() {
+  foo::cl::sycl::accessor acc = {1};
+  accessor acc1 = {1};
+
+  cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write> accessorA;
+  cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write> accessorB;
+  cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write> accessorC;
+    kernel<class fake_accessors>(
+        [=]() {
+          accessorA.use((void*)(acc.field + acc1.field));
+        });
+    kernel<class accessor_typedef>(
+        [=]() {
+          accessorB.use((void*)(acc.field + acc1.field));
+        });
+    kernel<class accessor_alias>(
+        [=]() {
+          accessorC.use((void*)(acc.field + acc1.field));
+        });
+    kernel<class user_types>(
+        [=]() {
+          acc.field + acc1.field;
+        });
+  return 0;
+}
+// CHECK: fake_accessors{{.*}} 'void (__global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, foo::cl::sycl::accessor, accessor)
+// CHECK: accessor_typedef{{.*}} 'void (__global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, foo::cl::sycl::accessor, accessor)
+// CHECK: accessor_alias{{.*}} 'void (__global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, foo::cl::sycl::accessor, accessor)
+// CHECK: user_types{{.*}} 'void (foo::cl::sycl::accessor, accessor)
Index: clang/test/SemaSYCL/mangle-kernel.cpp
===================================================================
--- /dev/null
+++ clang/test/SemaSYCL/mangle-kernel.cpp
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -I %S/../Headers/Inputs/include/ -ast-dump %s | FileCheck %s --check-prefix=CHECK-64
+// RUN: %clang_cc1 -fsycl-is-device -triple spir-unknown-unknown-sycldevice -I %S/../Headers/Inputs/include/ -ast-dump %s | FileCheck %s --check-prefix=CHECK-32
+#include "Inputs/sycl.hpp"
+#include <stdlib.h>
+
+// This test checks that SYCL kernel name mangling matches Itanium ABI
+
+template <typename name, typename Func>
+__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) {
+  kernelFunc();
+}
+
+template <typename T>
+class SimpleVadd;
+
+int main() {
+  kernel<class SimpleVadd<int>>(
+      [=](){});
+
+  kernel<class SimpleVadd<double>>(
+      [=](){});
+
+  kernel<class SimpleVadd<size_t>>(
+      [=](){});
+  return 0;
+}
+
+// CHECK: _ZTS10SimpleVaddIiE
+// CHECK: _ZTS10SimpleVaddIdE
+// CHECK-64: _ZTS10SimpleVaddImE
+// CHECK-32: _ZTS10SimpleVaddIjE