Index: include/clang-c/Index.h
===================================================================
--- include/clang-c/Index.h
+++ include/clang-c/Index.h
@@ -326,7 +326,7 @@
  *
  * \param tu the translation unit
  *
- * \param file_name the name of the file.
+* \param file_name the name of the file.
  *
  * \returns the file handle for the named file in the translation unit \p tu,
  * or a NULL file handle if the file was not a part of this translation unit.
@@ -2309,7 +2309,11 @@
    */
   CXCursor_OMPDistributeParallelForDirective = 266,
 
-  CXCursor_LastStmt                = CXCursor_OMPDistributeParallelForDirective,
+  /** \brief OpenMP distribute parallel for simd directive.
+   */
+  CXCursor_OMPDistributeParallelForSimdDirective = 267,
+
+  CXCursor_LastStmt = CXCursor_OMPDistributeParallelForSimdDirective,
 
   /**
    * \brief Cursor that represents the translation unit itself.
Index: include/clang/AST/RecursiveASTVisitor.h
===================================================================
--- include/clang/AST/RecursiveASTVisitor.h
+++ include/clang/AST/RecursiveASTVisitor.h
@@ -2518,6 +2518,9 @@
 DEF_TRAVERSE_STMT(OMPDistributeParallelForDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
+DEF_TRAVERSE_STMT(OMPDistributeParallelForSimdDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
 // OpenMP clauses.
 template <typename Derived>
 bool RecursiveASTVisitor<Derived>::TraverseOMPClause(OMPClause *C) {
Index: include/clang/AST/StmtOpenMP.h
===================================================================
--- include/clang/AST/StmtOpenMP.h
+++ include/clang/AST/StmtOpenMP.h
@@ -770,7 +770,8 @@
            T->getStmtClass() == OMPTaskLoopSimdDirectiveClass ||
            T->getStmtClass() == OMPDistributeDirectiveClass ||
            T->getStmtClass() == OMPTargetParallelForDirectiveClass ||
-           T->getStmtClass() == OMPDistributeParallelForDirectiveClass;
+           T->getStmtClass() == OMPDistributeParallelForDirectiveClass ||
+           T->getStmtClass() == OMPDistributeParallelForSimdDirectiveClass;
   }
 };
 
@@ -2881,6 +2882,77 @@
     return T->getStmtClass() == OMPDistributeParallelForDirectiveClass;
   }
 };
+
+/// This represents '#pragma omp distribute parallel for simd' composite
+/// directive.
+///
+/// \code
+/// #pragma omp distribute parallel for simd private(x)
+/// \endcode
+/// In this example directive '#pragma omp distribute parallel for simd' has
+/// clause 'private' with the variables 'x'
+///
+class OMPDistributeParallelForSimdDirective final : public OMPLoopDirective {
+  friend class ASTStmtReader;
+
+  /// Build directive with the given start and end location.
+  ///
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending location of the directive.
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  OMPDistributeParallelForSimdDirective(SourceLocation StartLoc,
+                                        SourceLocation EndLoc,
+                                        unsigned CollapsedNum,
+                                        unsigned NumClauses)
+      : OMPLoopDirective(this, OMPDistributeParallelForSimdDirectiveClass,
+                         OMPD_distribute_parallel_for_simd, StartLoc, 
+                         EndLoc, CollapsedNum, NumClauses) {}
+
+  /// Build an empty directive.
+  ///
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  explicit OMPDistributeParallelForSimdDirective(unsigned CollapsedNum,
+                                                 unsigned NumClauses)
+      : OMPLoopDirective(this, OMPDistributeParallelForSimdDirectiveClass,
+                         OMPD_distribute_parallel_for_simd, 
+                         SourceLocation(), SourceLocation(), CollapsedNum,
+                         NumClauses) {}
+
+public:
+  /// Creates directive with a list of \a Clauses.
+  ///
+  /// \param C AST context.
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending Location of the directive.
+  /// \param CollapsedNum Number of collapsed loops.
+  /// \param Clauses List of clauses.
+  /// \param AssociatedStmt Statement, associated with the directive.
+  /// \param Exprs Helper expressions for CodeGen.
+  ///
+  static OMPDistributeParallelForSimdDirective *Create(
+      const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+      unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
+      Stmt *AssociatedStmt, const HelperExprs &Exprs);
+
+  /// Creates an empty directive with the place for \a NumClauses clauses.
+  ///
+  /// \param C AST context.
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  static OMPDistributeParallelForSimdDirective *CreateEmpty(
+      const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
+      EmptyShell);
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == OMPDistributeParallelForSimdDirectiveClass;
+  }
+};
+
 } // end namespace clang
 
 #endif
Index: include/clang/Basic/Attr.td
===================================================================
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -428,6 +428,22 @@
   let Documentation = [Undocumented];
 }
 
+def XRayInstrument : InheritableAttr {
+  let Spellings = [GNU<"xray_always_instrument">,
+                   CXX11<"clang", "xray_always_instrument">,
+                   GNU<"xray_never_instrument">,
+                   CXX11<"clang", "xray_never_instrument">];
+  let Subjects = SubjectList<[CXXMethod, ObjCMethod, Function], WarnDiag,
+                              "ExpectedFunctionOrMethod">;
+  let Accessors = [Accessor<"alwaysXRayInstrument",
+                     [GNU<"xray_always_instrument">,
+                      CXX11<"clang", "xray_always_instrument">]>,
+                   Accessor<"neverXRayInstrument",
+                     [GNU<"xray_never_instrument">,
+                      CXX11<"clang", "xray_never_instrument">]>];
+  let Documentation = [XRayDocs];
+}
+
 def TLSModel : InheritableAttr {
   let Spellings = [GCC<"tls_model">];
   let Subjects = SubjectList<[TLSVar], ErrorDiag, "ExpectedTLSVar">;
Index: include/clang/Basic/AttrDocs.td
===================================================================
--- include/clang/Basic/AttrDocs.td
+++ include/clang/Basic/AttrDocs.td
@@ -2450,3 +2450,14 @@
 .. _RenderScript: https://developer.android.com/guide/topics/renderscript/compute.html
   }];
 }
+
+def XRayDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+``__attribute__((xray_always_instrument))`` or ``[[clang:xray_always_instrument]]`` is used to mark member functions (in C++), methods (in Objective C), and free functions (in C, C++, and Objective C) to be instrumented with XRay. This will cause the function to always have space at the beginning and exit points to allow for runtime patching.
+
+Conversely, ``__attribute__((xray_never_instrument))`` or ``[[clang:xray_never_instrument]]`` will inhibit the insertion of these instrumentation points.
+
+If a function has neither of these attributes, they become subject to the XRay heuristics used to determine whether a function should be instrumented or otherwise.
+  }];
+}
Index: include/clang/Basic/Builtins.h
===================================================================
--- include/clang/Basic/Builtins.h
+++ include/clang/Basic/Builtins.h
@@ -36,7 +36,7 @@
   CXX_LANG = 0x4,     // builtin for cplusplus only.
   OBJC_LANG = 0x8,    // builtin for objective-c and objective-c++
   MS_LANG = 0x10,     // builtin requires MS mode.
-  OCLC_LANG = 0x20,   // builtin for OpenCL C only.
+  OCLC20_LANG = 0x20, // builtin for OpenCL C only.
   ALL_LANGUAGES = C_LANG | CXX_LANG | OBJC_LANG, // builtin for all languages.
   ALL_GNU_LANGUAGES = ALL_LANGUAGES | GNU_LANG,  // builtin requires GNU mode.
   ALL_MS_LANGUAGES = ALL_LANGUAGES | MS_LANG     // builtin requires MS mode.
Index: include/clang/Basic/Builtins.def
===================================================================
--- include/clang/Basic/Builtins.def
+++ include/clang/Basic/Builtins.def
@@ -1282,34 +1282,34 @@
 
 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Pipe functions.
 // We need the generic prototype, since the packet type could be anything.
-LANGBUILTIN(read_pipe, "i.", "tn", OCLC_LANG)
-LANGBUILTIN(write_pipe, "i.", "tn", OCLC_LANG)
+LANGBUILTIN(read_pipe, "i.", "tn", OCLC20_LANG)
+LANGBUILTIN(write_pipe, "i.", "tn", OCLC20_LANG)
 
-LANGBUILTIN(reserve_read_pipe, "i.", "tn", OCLC_LANG)
-LANGBUILTIN(reserve_write_pipe, "i.", "tn", OCLC_LANG)
+LANGBUILTIN(reserve_read_pipe, "i.", "tn", OCLC20_LANG)
+LANGBUILTIN(reserve_write_pipe, "i.", "tn", OCLC20_LANG)
 
-LANGBUILTIN(commit_write_pipe, "v.", "tn", OCLC_LANG)
-LANGBUILTIN(commit_read_pipe, "v.", "tn", OCLC_LANG)
+LANGBUILTIN(commit_write_pipe, "v.", "tn", OCLC20_LANG)
+LANGBUILTIN(commit_read_pipe, "v.", "tn", OCLC20_LANG)
 
-LANGBUILTIN(sub_group_reserve_read_pipe, "i.", "tn", OCLC_LANG)
-LANGBUILTIN(sub_group_reserve_write_pipe, "i.", "tn", OCLC_LANG)
+LANGBUILTIN(sub_group_reserve_read_pipe, "i.", "tn", OCLC20_LANG)
+LANGBUILTIN(sub_group_reserve_write_pipe, "i.", "tn", OCLC20_LANG)
 
-LANGBUILTIN(sub_group_commit_read_pipe, "v.", "tn", OCLC_LANG)
-LANGBUILTIN(sub_group_commit_write_pipe, "v.", "tn", OCLC_LANG)
+LANGBUILTIN(sub_group_commit_read_pipe, "v.", "tn", OCLC20_LANG)
+LANGBUILTIN(sub_group_commit_write_pipe, "v.", "tn", OCLC20_LANG)
 
-LANGBUILTIN(work_group_reserve_read_pipe, "i.", "tn", OCLC_LANG)
-LANGBUILTIN(work_group_reserve_write_pipe, "i.", "tn", OCLC_LANG)
+LANGBUILTIN(work_group_reserve_read_pipe, "i.", "tn", OCLC20_LANG)
+LANGBUILTIN(work_group_reserve_write_pipe, "i.", "tn", OCLC20_LANG)
 
-LANGBUILTIN(work_group_commit_read_pipe, "v.", "tn", OCLC_LANG)
-LANGBUILTIN(work_group_commit_write_pipe, "v.", "tn", OCLC_LANG)
+LANGBUILTIN(work_group_commit_read_pipe, "v.", "tn", OCLC20_LANG)
+LANGBUILTIN(work_group_commit_write_pipe, "v.", "tn", OCLC20_LANG)
 
-LANGBUILTIN(get_pipe_num_packets, "Ui.", "tn", OCLC_LANG)
-LANGBUILTIN(get_pipe_max_packets, "Ui.", "tn", OCLC_LANG)
+LANGBUILTIN(get_pipe_num_packets, "Ui.", "tn", OCLC20_LANG)
+LANGBUILTIN(get_pipe_max_packets, "Ui.", "tn", OCLC20_LANG)
 
 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
-LANGBUILTIN(to_global, "v*v*", "tn", OCLC_LANG)
-LANGBUILTIN(to_local, "v*v*", "tn", OCLC_LANG)
-LANGBUILTIN(to_private, "v*v*", "tn", OCLC_LANG)
+LANGBUILTIN(to_global, "v*v*", "tn", OCLC20_LANG)
+LANGBUILTIN(to_local, "v*v*", "tn", OCLC20_LANG)
+LANGBUILTIN(to_private, "v*v*", "tn", OCLC20_LANG)
 
 #undef BUILTIN
 #undef LIBBUILTIN
Index: include/clang/Basic/BuiltinsX86.def
===================================================================
--- include/clang/Basic/BuiltinsX86.def
+++ include/clang/Basic/BuiltinsX86.def
@@ -1789,8 +1789,6 @@
 TARGET_BUILTIN(__builtin_ia32_vpermi2varpd512_mask, "V8dV8dV8LLiV8dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varps512_mask, "V16fV16fV16iV16fUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varq512_mask, "V8LLiV8LLiV8LLiV8LLiUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermilpd512_mask, "V8dV8dIiV8dUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermilps512_mask, "V16fV16fIiV16fUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpermilvarpd512_mask, "V8dV8dV8LLiV8dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpermilvarps512_mask, "V16fV16fV16iV16fUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpermt2vard512_maskz, "V16iV16iV16iV16iUs","","avx512f")
@@ -2071,10 +2069,6 @@
 TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask,  "V2dV2dV2dV2dUcIi", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_maskz, "V2dV2dV2dV2dUcIi", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask3, "V2dV2dV2dV2dUcIi", "", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permdf512_mask, "V8dV8dIiV8dUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_permdi512_mask, "V8LLiV8LLiIiV8LLiUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_permdf256_mask, "V4dV4dIiV4dUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permdi256_mask, "V4LLiV4LLiIiV4LLiUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_permvarhi512_mask, "V32sV32sV32sV32sUi","","avx512bw")
 TARGET_BUILTIN(__builtin_ia32_permvardf512_mask, "V8dV8dV8LLiV8dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_permvardi512_mask, "V8LLiV8LLiV8LLiV8LLiUc","","avx512f")
Index: include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- include/clang/Basic/DiagnosticSemaKinds.td
+++ include/clang/Basic/DiagnosticSemaKinds.td
@@ -7908,8 +7908,6 @@
 def warn_opencl_attr_deprecated_ignored : Warning <
   "%0 attribute is deprecated and ignored in OpenCL version %1">,
   InGroup<IgnoredAttributes>;
-def err_opencl_builtin_requires_version : Error<
-  "%0 requires OpenCL version %1%select{| or above}2">;
 
 // OpenCL v2.0 s6.13.6 -- Builtin Pipe Functions
 def err_opencl_builtin_pipe_first_arg : Error<
Index: include/clang/Basic/OpenMPKinds.def
===================================================================
--- include/clang/Basic/OpenMPKinds.def
+++ include/clang/Basic/OpenMPKinds.def
@@ -129,6 +129,9 @@
 #ifndef OPENMP_DISTRIBUTE_PARALLEL_FOR_CLAUSE
 #define OPENMP_DISTRIBUTE_PARALLEL_FOR_CLAUSE(Name)
 #endif
+#ifndef OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE
+#define OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(Name)
+#endif
 
 // OpenMP directives.
 OPENMP_DIRECTIVE(threadprivate)
@@ -170,6 +173,7 @@
 OPENMP_DIRECTIVE_EXT(declare_target, "declare target")
 OPENMP_DIRECTIVE_EXT(end_declare_target, "end declare target")
 OPENMP_DIRECTIVE_EXT(distribute_parallel_for, "distribute parallel for")
+OPENMP_DIRECTIVE_EXT(distribute_parallel_for_simd, "distribute parallel for simd")
 
 // OpenMP clauses.
 OPENMP_CLAUSE(if, OMPIfClause)
@@ -550,6 +554,25 @@
 OPENMP_DISTRIBUTE_PARALLEL_FOR_CLAUSE(copyin)
 OPENMP_DISTRIBUTE_PARALLEL_FOR_CLAUSE(schedule)
 
+// Clauses allowed for OpenMP directive 'distribute parallel for simd'
+OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(firstprivate)
+OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(lastprivate)
+OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(collapse)
+OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(dist_schedule)
+OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(if)
+OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(num_threads)
+OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(default)
+OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(proc_bind)
+OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(private)
+OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(shared)
+OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(reduction)
+OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(copyin)
+OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(schedule)
+OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(linear)
+OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(aligned)
+OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(safelen)
+OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(simdlen)
+
 #undef OPENMP_TASKLOOP_SIMD_CLAUSE
 #undef OPENMP_TASKLOOP_CLAUSE
 #undef OPENMP_LINEAR_KIND
@@ -589,3 +612,4 @@
 #undef OPENMP_DEFAULTMAP_MODIFIER
 #undef OPENMP_TARGET_UPDATE_CLAUSE
 #undef OPENMP_DISTRIBUTE_PARALLEL_FOR_CLAUSE
+#undef OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE
Index: include/clang/Basic/StmtNodes.td
===================================================================
--- include/clang/Basic/StmtNodes.td
+++ include/clang/Basic/StmtNodes.td
@@ -229,3 +229,4 @@
 def OMPTaskLoopSimdDirective : DStmt<OMPLoopDirective>;
 def OMPDistributeDirective : DStmt<OMPLoopDirective>;
 def OMPDistributeParallelForDirective : DStmt<OMPLoopDirective>;
+def OMPDistributeParallelForSimdDirective : DStmt<OMPLoopDirective>;
Index: include/clang/Driver/Options.td
===================================================================
--- include/clang/Driver/Options.td
+++ include/clang/Driver/Options.td
@@ -776,6 +776,21 @@
 def fexec_charset_EQ : Joined<["-"], "fexec-charset=">, Group<f_Group>;
 def finstrument_functions : Flag<["-"], "finstrument-functions">, Group<f_Group>, Flags<[CC1Option]>,
   HelpText<"Generate calls to instrument function entry and exit">;
+
+def fxray_instrument : Flag<["-"], "fxray-instrument">, Group<f_Group>,
+  Flags<[CC1Option]>,
+  HelpText<"Generate XRay instrumentation sleds on function entry and exit">;
+def fnoxray_instrument : Flag<["-"], "fno-xray-instrument">, Group<f_Group>,
+  Flags<[CC1Option]>;
+
+def fxray_instruction_threshold_EQ :
+  JoinedOrSeparate<["-"], "fxray-instruction-threshold=">,
+  Group<f_Group>, Flags<[CC1Option]>,
+  HelpText<"Sets the minimum function size to instrument with XRay">;
+def fxray_instruction_threshold_ :
+  JoinedOrSeparate<["-"], "fxray-instruction-threshold">,
+  Group<f_Group>, Flags<[CC1Option]>;
+
 def flat__namespace : Flag<["-"], "flat_namespace">;
 def flax_vector_conversions : Flag<["-"], "flax-vector-conversions">, Group<f_Group>;
 def flimited_precision_EQ : Joined<["-"], "flimited-precision=">, Group<f_Group>;
Index: include/clang/Frontend/CodeGenOptions.def
===================================================================
--- include/clang/Frontend/CodeGenOptions.def
+++ include/clang/Frontend/CodeGenOptions.def
@@ -74,6 +74,14 @@
 CODEGENOPT(FunctionSections  , 1, 0) ///< Set when -ffunction-sections is enabled.
 CODEGENOPT(InstrumentFunctions , 1, 0) ///< Set when -finstrument-functions is
                                        ///< enabled.
+
+CODEGENOPT(XRayInstrumentFunctions , 1, 0) ///< Set when -fxray-instrument is
+                                           ///< enabled.
+
+///< Set the minimum number of instructions in a function to determine selective
+///< XRay instrumentation.
+VALUE_CODEGENOPT(XRayInstructionThreshold , 32, 200)
+
 CODEGENOPT(InstrumentForProfiling , 1, 0) ///< Set when -pg is enabled.
 CODEGENOPT(LessPreciseFPMAD  , 1, 0) ///< Enable less precise MAD instructions to
                                      ///< be generated.
Index: include/clang/Sema/Sema.h
===================================================================
--- include/clang/Sema/Sema.h
+++ include/clang/Sema/Sema.h
@@ -8206,6 +8206,12 @@
       ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
       SourceLocation EndLoc,
       llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA);
+  /// \brief Called on well-formed '\#pragma omp distribute parallel for simd'
+  /// after parsing of the associated statement.
+  StmtResult ActOnOpenMPDistributeParallelForSimdDirective(
+      ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+      SourceLocation EndLoc,
+      llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA);
 
   /// Checks correctness of linear modifiers.
   bool CheckOpenMPLinearModifier(OpenMPLinearClauseKind LinKind,
Index: include/clang/Serialization/ASTBitCodes.h
===================================================================
--- include/clang/Serialization/ASTBitCodes.h
+++ include/clang/Serialization/ASTBitCodes.h
@@ -1476,6 +1476,7 @@
       STMT_OMP_DISTRIBUTE_DIRECTIVE,
       STMT_OMP_TARGET_UPDATE_DIRECTIVE,
       STMT_OMP_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE,
+      STMT_OMP_DISTRIBUTE_PARALLEL_FOR_SIMD_DIRECTIVE,
       EXPR_OMP_ARRAY_SECTION,
 
       // ARC
Index: lib/AST/StmtOpenMP.cpp
===================================================================
--- lib/AST/StmtOpenMP.cpp
+++ lib/AST/StmtOpenMP.cpp
@@ -1105,3 +1105,60 @@
           numLoopChildren(CollapsedNum, OMPD_distribute_parallel_for));
   return new (Mem) OMPDistributeParallelForDirective(CollapsedNum, NumClauses);
 }
+
+OMPDistributeParallelForSimdDirective *
+OMPDistributeParallelForSimdDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+    const HelperExprs &Exprs) {
+  unsigned Size = llvm::alignTo(sizeof(OMPDistributeParallelForSimdDirective),
+                                llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * Clauses.size() +
+      sizeof(Stmt *) *
+          numLoopChildren(CollapsedNum, OMPD_distribute_parallel_for_simd));
+  OMPDistributeParallelForSimdDirective *Dir = new (Mem)
+      OMPDistributeParallelForSimdDirective(StartLoc, EndLoc, CollapsedNum,
+                                            Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setIterationVariable(Exprs.IterationVarRef);
+  Dir->setLastIteration(Exprs.LastIteration);
+  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
+  Dir->setPreCond(Exprs.PreCond);
+  Dir->setCond(Exprs.Cond);
+  Dir->setInit(Exprs.Init);
+  Dir->setInc(Exprs.Inc);
+  Dir->setIsLastIterVariable(Exprs.IL);
+  Dir->setLowerBoundVariable(Exprs.LB);
+  Dir->setUpperBoundVariable(Exprs.UB);
+  Dir->setStrideVariable(Exprs.ST);
+  Dir->setEnsureUpperBound(Exprs.EUB);
+  Dir->setNextLowerBound(Exprs.NLB);
+  Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setNumIterations(Exprs.NumIterations);
+  Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
+  Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
+  Dir->setCounters(Exprs.Counters);
+  Dir->setPrivateCounters(Exprs.PrivateCounters);
+  Dir->setInits(Exprs.Inits);
+  Dir->setUpdates(Exprs.Updates);
+  Dir->setFinals(Exprs.Finals);
+  Dir->setPreInits(Exprs.PreInits);
+  return Dir;
+}
+
+OMPDistributeParallelForSimdDirective *
+OMPDistributeParallelForSimdDirective::CreateEmpty(const ASTContext &C,
+                                                   unsigned NumClauses,
+                                                   unsigned CollapsedNum,
+                                                   EmptyShell) {
+  unsigned Size = llvm::alignTo(sizeof(OMPDistributeParallelForSimdDirective),
+                                llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * NumClauses +
+      sizeof(Stmt *) *
+          numLoopChildren(CollapsedNum, OMPD_distribute_parallel_for_simd));
+  return new (Mem)
+      OMPDistributeParallelForSimdDirective(CollapsedNum, NumClauses);
+}
Index: lib/AST/StmtPrinter.cpp
===================================================================
--- lib/AST/StmtPrinter.cpp
+++ lib/AST/StmtPrinter.cpp
@@ -1159,6 +1159,12 @@
   PrintOMPExecutableDirective(Node);
 }
 
+void StmtPrinter::VisitOMPDistributeParallelForSimdDirective(
+    OMPDistributeParallelForSimdDirective *Node) {
+  Indent() << "#pragma omp distribute parallel for simd ";
+  PrintOMPExecutableDirective(Node);
+}
+
 //===----------------------------------------------------------------------===//
 //  Expr printing methods.
 //===----------------------------------------------------------------------===//
Index: lib/AST/StmtProfile.cpp
===================================================================
--- lib/AST/StmtProfile.cpp
+++ lib/AST/StmtProfile.cpp
@@ -704,6 +704,11 @@
   VisitOMPLoopDirective(S);
 }
 
+void StmtProfiler::VisitOMPDistributeParallelForSimdDirective(
+    const OMPDistributeParallelForSimdDirective *S) {
+  VisitOMPLoopDirective(S);
+}
+
 void StmtProfiler::VisitExpr(const Expr *S) {
   VisitStmt(S);
 }
Index: lib/Basic/Builtins.cpp
===================================================================
--- lib/Basic/Builtins.cpp
+++ lib/Basic/Builtins.cpp
@@ -69,7 +69,8 @@
   bool MSModeUnsupported =
       !LangOpts.MicrosoftExt && (BuiltinInfo.Langs & MS_LANG);
   bool ObjCUnsupported = !LangOpts.ObjC1 && BuiltinInfo.Langs == OBJC_LANG;
-  bool OclCUnsupported = !LangOpts.OpenCL && BuiltinInfo.Langs == OCLC_LANG;
+  bool OclCUnsupported = LangOpts.OpenCLVersion != 200 &&
+                         BuiltinInfo.Langs == OCLC20_LANG;
   return !BuiltinsUnsupported && !MathBuiltinsUnsupported && !OclCUnsupported &&
          !GnuModeUnsupported && !MSModeUnsupported && !ObjCUnsupported;
 }
Index: lib/Basic/OpenMPKinds.cpp
===================================================================
--- lib/Basic/OpenMPKinds.cpp
+++ lib/Basic/OpenMPKinds.cpp
@@ -576,6 +576,16 @@
       break;
     }
     break;
+  case OMPD_distribute_parallel_for_simd:
+    switch (CKind) {
+#define OPENMP_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(Name)                       \
+  case OMPC_##Name:                                                            \
+    return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
   case OMPD_declare_target:
   case OMPD_end_declare_target:
   case OMPD_unknown:
@@ -598,7 +608,9 @@
          DKind == OMPD_parallel_for || DKind == OMPD_parallel_for_simd ||
          DKind == OMPD_taskloop || DKind == OMPD_taskloop_simd ||
          DKind == OMPD_distribute || DKind == OMPD_target_parallel_for ||
-         DKind == OMPD_distribute_parallel_for; // TODO add next directives.
+         DKind == OMPD_distribute_parallel_for ||
+         DKind == OMPD_distribute_parallel_for_simd;
+  // TODO add next directives.
 }
 
 bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) {
@@ -607,7 +619,9 @@
          DKind == OMPD_single || DKind == OMPD_parallel_for ||
          DKind == OMPD_parallel_for_simd || DKind == OMPD_parallel_sections ||
          DKind == OMPD_target_parallel_for ||
-         DKind == OMPD_distribute_parallel_for; // TODO add next directives.
+         DKind == OMPD_distribute_parallel_for ||
+         DKind == OMPD_distribute_parallel_for_simd;
+  // TODO add next directives.
 }
 
 bool clang::isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind) {
@@ -618,7 +632,8 @@
   return DKind == OMPD_parallel || DKind == OMPD_parallel_for ||
          DKind == OMPD_parallel_for_simd || DKind == OMPD_parallel_sections ||
          DKind == OMPD_target_parallel || DKind == OMPD_target_parallel_for ||
-         DKind == OMPD_distribute_parallel_for;
+         DKind == OMPD_distribute_parallel_for ||
+         DKind == OMPD_distribute_parallel_for_simd;
   // TODO add next directives.
 }
 
@@ -640,13 +655,15 @@
 
 bool clang::isOpenMPSimdDirective(OpenMPDirectiveKind DKind) {
   return DKind == OMPD_simd || DKind == OMPD_for_simd ||
-         DKind == OMPD_parallel_for_simd ||
-         DKind == OMPD_taskloop_simd; // TODO add next directives.
+         DKind == OMPD_parallel_for_simd || DKind == OMPD_taskloop_simd ||
+         DKind == OMPD_distribute_parallel_for_simd;
+  // TODO add next directives.
 }
 
 bool clang::isOpenMPDistributeDirective(OpenMPDirectiveKind Kind) {
-  return Kind == OMPD_distribute ||
-         Kind == OMPD_distribute_parallel_for; // TODO add next directives.
+  return Kind == OMPD_distribute || Kind == OMPD_distribute_parallel_for ||
+         Kind == OMPD_distribute_parallel_for_simd;
+  // TODO add next directives.
 }
 
 bool clang::isOpenMPPrivate(OpenMPClauseKind Kind) {
@@ -664,5 +681,6 @@
 }
 
 bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) {
-  return Kind == OMPD_distribute_parallel_for;
+  return Kind == OMPD_distribute_parallel_for ||
+         Kind == OMPD_distribute_parallel_for_simd;
 }
Index: lib/Basic/Targets.cpp
===================================================================
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -6479,7 +6479,15 @@
     CK_NIAGARA3,
     CK_NIAGARA4,
     CK_MYRIAD2_1,
-    CK_MYRIAD2_2
+    CK_MYRIAD2_2,
+    CK_LEON2,
+    CK_LEON2_AT697E,
+    CK_LEON2_AT697F,
+    CK_LEON3,
+    CK_LEON3_UT699,
+    CK_LEON3_GR712RC,
+    CK_LEON4,
+    CK_LEON4_GR740
   } CPU = CK_GENERIC;
 
   enum CPUGeneration {
@@ -6500,6 +6508,14 @@
     case CK_TSC701:
     case CK_MYRIAD2_1:
     case CK_MYRIAD2_2:
+    case CK_LEON2:
+    case CK_LEON2_AT697E:
+    case CK_LEON2_AT697F:
+    case CK_LEON3:
+    case CK_LEON3_UT699:
+    case CK_LEON3_GR712RC:
+    case CK_LEON4:
+    case CK_LEON4_GR740:
       return CG_V8;
     case CK_V9:
     case CK_ULTRASPARC:
@@ -6533,6 +6549,14 @@
         .Case("myriad2", CK_MYRIAD2_1)
         .Case("myriad2.1", CK_MYRIAD2_1)
         .Case("myriad2.2", CK_MYRIAD2_2)
+        .Case("leon2", CK_LEON2)
+        .Case("at697e", CK_LEON2_AT697E)
+        .Case("at697f", CK_LEON2_AT697F)
+        .Case("leon3", CK_LEON3)
+        .Case("ut699", CK_LEON3_UT699)
+        .Case("gr712rc", CK_LEON3_GR712RC)
+        .Case("leon4", CK_LEON4)
+        .Case("gr740", CK_LEON4_GR740)
         .Default(CK_GENERIC);
   }
 
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -2127,7 +2127,7 @@
     return RValue::get(
         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
   }
-  // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe commit read and write
+  // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
   // functions
   case Builtin::BIcommit_read_pipe:
   case Builtin::BIcommit_write_pipe:
@@ -6512,7 +6512,7 @@
     for (unsigned i = 0; i != NumElts; ++i)
       Indices[i] = i;
     for (unsigned i = NumElts; i != 8; ++i)
-      Indices[i] = NumElts;
+      Indices[i] = i % NumElts + NumElts;
     Cmp = CGF.Builder.CreateShuffleVector(
         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
   }
Index: lib/CodeGen/CGStmt.cpp
===================================================================
--- lib/CodeGen/CGStmt.cpp
+++ lib/CodeGen/CGStmt.cpp
@@ -284,6 +284,10 @@
     EmitOMPDistributeParallelForDirective(
         cast<OMPDistributeParallelForDirective>(*S));
     break;
+  case Stmt::OMPDistributeParallelForSimdDirectiveClass:
+    EmitOMPDistributeParallelForSimdDirective(
+        cast<OMPDistributeParallelForSimdDirective>(*S));
+    break;
   }
 }
 
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -1877,6 +1877,17 @@
       });
 }
 
+void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
+    const OMPDistributeParallelForSimdDirective &S) {
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+  CGM.getOpenMPRuntime().emitInlinedDirective(
+      *this, OMPD_distribute_parallel_for_simd,
+      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+        OMPLoopScope PreInitScope(CGF, S);
+        CGF.EmitStmt(
+            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+      });
+}
 /// \brief Emit a helper variable and return corresponding lvalue.
 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
                                const DeclRefExpr *Helper) {
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -1521,6 +1521,10 @@
   /// instrumented with __cyg_profile_func_* calls
   bool ShouldInstrumentFunction();
 
+  /// ShouldXRayInstrument - Return true if the current function should be
+  /// instrumented with XRay nop sleds.
+  bool ShouldXRayInstrumentFunction() const;
+
   /// EmitFunctionInstrumentation - Emit LLVM code to call the specified
   /// instrumentation function with the current function and the call site, if
   /// function instrumentation is enabled.
@@ -2499,6 +2503,8 @@
   void EmitOMPDistributeLoop(const OMPDistributeDirective &S);
   void EmitOMPDistributeParallelForDirective(
       const OMPDistributeParallelForDirective &S);
+  void EmitOMPDistributeParallelForSimdDirective(
+      const OMPDistributeParallelForSimdDirective &S);
 
   /// Emit outlined function for the target directive.
   static std::pair<llvm::Function * /*OutlinedFn*/,
Index: lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- lib/CodeGen/CodeGenFunction.cpp
+++ lib/CodeGen/CodeGenFunction.cpp
@@ -397,6 +397,12 @@
   return true;
 }
 
+/// ShouldXRayInstrument - Return true if the current function should be
+/// instrumented with XRay nop sleds.
+bool CodeGenFunction::ShouldXRayInstrumentFunction() const {
+  return CGM.getCodeGenOpts().XRayInstrumentFunctions;
+}
+
 /// EmitFunctionInstrumentation - Emit LLVM code to call the specified
 /// instrumentation function with the current function and the call site, if
 /// function instrumentation is enabled.
@@ -684,6 +690,22 @@
   if (SanOpts.has(SanitizerKind::SafeStack))
     Fn->addFnAttr(llvm::Attribute::SafeStack);
 
+  // Apply xray attributes to the function (as a string, for now)
+  if (ShouldXRayInstrumentFunction()) {
+    if (const auto *XRayAttr = D->getAttr<XRayInstrumentAttr>()) {
+      if (XRayAttr->alwaysXRayInstrument()) {
+        Fn->addFnAttr("function-instrument", "xray-always");
+      }
+      if (XRayAttr->neverXRayInstrument()) {
+        Fn->addFnAttr("function-instrument", "xray-never");
+      }
+    } else {
+      Fn->addFnAttr(
+          "xray-instruction-threshold",
+          llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold));
+    }
+  }
+
   // Pass inline keyword to optimizer if it appears explicitly on any
   // declaration. Also, in the case of -fno-inline attach NoInline
   // attribute to all functions that are not marked AlwaysInline, or
Index: lib/Driver/Tools.cpp
===================================================================
--- lib/Driver/Tools.cpp
+++ lib/Driver/Tools.cpp
@@ -96,6 +96,14 @@
           .Case("niagara2", "-Av8plusb")
           .Case("niagara3", "-Av8plusd")
           .Case("niagara4", "-Av8plusd")
+          .Case("leon2", "-Av8")
+          .Case("at697e", "-Av8")
+          .Case("at697f", "-Av8")
+          .Case("leon3", "-Av8")
+          .Case("ut699", "-Av8")
+          .Case("gr712rc", "-Av8")
+          .Case("leon4", "-Av8")
+          .Case("gr740", "-Av8")
           .Default("-Av8");
   }
 }
@@ -3173,6 +3181,28 @@
   return !StaticRuntimes.empty();
 }
 
+static bool addXRayRuntime(const ToolChain &TC, const ArgList &Args,
+                           ArgStringList &CmdArgs) {
+  if (Args.hasArg(options::OPT_fxray_instrument,
+                  options::OPT_fnoxray_instrument, false)) {
+    CmdArgs.push_back("-whole-archive");
+    CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray", false));
+    CmdArgs.push_back("-no-whole-archive");
+    return true;
+  }
+  return false;
+}
+
+static void linkXRayRuntimeDeps(const ToolChain &TC, ArgStringList &CmdArgs) {
+  CmdArgs.push_back("--no-as-needed");
+  CmdArgs.push_back("-lpthread");
+  CmdArgs.push_back("-lrt");
+  CmdArgs.push_back("-lm");
+  CmdArgs.push_back("-latomic");
+  if (TC.getTriple().getOS() != llvm::Triple::FreeBSD)
+    CmdArgs.push_back("-ldl");
+}
+
 static bool areOptimizationsEnabled(const ArgList &Args) {
   // Find the last -O arg and see if it is non-zero.
   if (Arg *A = Args.getLastArg(options::OPT_O_Group))
@@ -4574,6 +4604,16 @@
 
   Args.AddAllArgs(CmdArgs, options::OPT_finstrument_functions);
 
+  if (Args.hasArg(options::OPT_fxray_instrument,
+                  options::OPT_fnoxray_instrument, false)) {
+    CmdArgs.push_back("-fxray-instrument");
+    if (Arg *A = Args.getLastArg(options::OPT_fxray_instruction_threshold_,
+                                 options::OPT_fxray_instruction_threshold_EQ)) {
+      CmdArgs.push_back("-fxray-instruction-threshold");
+      CmdArgs.push_back(A->getValue());
+    }
+  }
+
   addPGOAndCoverageFlags(C, D, Output, Args, CmdArgs);
 
   // Add runtime flag for PS4 when PGO or Coverage are enabled.
@@ -9373,6 +9413,7 @@
     CmdArgs.push_back("--no-demangle");
 
   bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs);
+  bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs);
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
   // The profile runtime also needs access to system libraries.
   getToolChain().addProfileRTLibs(Args, CmdArgs);
@@ -9399,6 +9440,9 @@
       if (NeedsSanitizerDeps)
         linkSanitizerRuntimeDeps(ToolChain, CmdArgs);
 
+      if (NeedsXRayDeps)
+        linkXRayRuntimeDeps(ToolChain, CmdArgs);
+
       bool WantPthread = Args.hasArg(options::OPT_pthread) ||
                          Args.hasArg(options::OPT_pthreads);
 
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -686,6 +686,9 @@
   }
 
   Opts.InstrumentFunctions = Args.hasArg(OPT_finstrument_functions);
+  Opts.XRayInstrumentFunctions = Args.hasArg(OPT_fxray_instrument);
+  Opts.XRayInstructionThreshold =
+      getLastArgIntValue(Args, OPT_fxray_instruction_threshold_, 200, Diags);
   Opts.InstrumentForProfiling = Args.hasArg(OPT_pg);
   Opts.EmitOpenCLArgMetadata = Args.hasArg(OPT_cl_kernel_arg_info);
   Opts.CompressDebugSections = Args.hasArg(OPT_compress_debug_sections);
Index: lib/Headers/avx2intrin.h
===================================================================
--- lib/Headers/avx2intrin.h
+++ lib/Headers/avx2intrin.h
@@ -497,40 +497,42 @@
 
 #define _mm256_shuffle_epi32(a, imm) __extension__ ({ \
   (__m256i)__builtin_shufflevector((__v8si)(__m256i)(a), \
-                                   (__v8si)_mm256_setzero_si256(), \
-                                   (imm) & 0x3, ((imm) & 0xc) >> 2, \
-                                   ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
-                                   4 + (((imm) & 0x03) >> 0), \
-                                   4 + (((imm) & 0x0c) >> 2), \
-                                   4 + (((imm) & 0x30) >> 4), \
-                                   4 + (((imm) & 0xc0) >> 6)); })
+                                   (__v8si)_mm256_undefined_si256(), \
+                                   0 + (((imm) >> 0) & 0x3), \
+                                   0 + (((imm) >> 2) & 0x3), \
+                                   0 + (((imm) >> 4) & 0x3), \
+                                   0 + (((imm) >> 6) & 0x3), \
+                                   4 + (((imm) >> 0) & 0x3), \
+                                   4 + (((imm) >> 2) & 0x3), \
+                                   4 + (((imm) >> 4) & 0x3), \
+                                   4 + (((imm) >> 6) & 0x3)); })
 
 #define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \
   (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \
-                                   (__v16hi)_mm256_setzero_si256(), \
+                                   (__v16hi)_mm256_undefined_si256(), \
                                    0, 1, 2, 3, \
-                                   4 + (((imm) & 0x03) >> 0), \
-                                   4 + (((imm) & 0x0c) >> 2), \
-                                   4 + (((imm) & 0x30) >> 4), \
-                                   4 + (((imm) & 0xc0) >> 6), \
+                                   4  + (((imm) >> 0) & 0x3), \
+                                   4  + (((imm) >> 2) & 0x3), \
+                                   4  + (((imm) >> 4) & 0x3), \
+                                   4  + (((imm) >> 6) & 0x3), \
                                    8, 9, 10, 11, \
-                                   12 + (((imm) & 0x03) >> 0), \
-                                   12 + (((imm) & 0x0c) >> 2), \
-                                   12 + (((imm) & 0x30) >> 4), \
-                                   12 + (((imm) & 0xc0) >> 6)); })
+                                   12 + (((imm) >> 0) & 0x3), \
+                                   12 + (((imm) >> 2) & 0x3), \
+                                   12 + (((imm) >> 4) & 0x3), \
+                                   12 + (((imm) >> 6) & 0x3)); })
 
 #define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \
   (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \
-                                   (__v16hi)_mm256_setzero_si256(), \
-                                   0 + (((imm) & 0x03) >> 0), \
-                                   0 + (((imm) & 0x0c) >> 2), \
-                                   0 + (((imm) & 0x30) >> 4), \
-                                   0 + (((imm) & 0xc0) >> 6), \
+                                   (__v16hi)_mm256_undefined_si256(), \
+                                   0 + (((imm) >> 0) & 0x3), \
+                                   0 + (((imm) >> 2) & 0x3), \
+                                   0 + (((imm) >> 4) & 0x3), \
+                                   0 + (((imm) >> 6) & 0x3), \
                                    4, 5, 6, 7, \
-                                   8 + (((imm) & 0x03) >> 0), \
-                                   8 + (((imm) & 0x0c) >> 2), \
-                                   8 + (((imm) & 0x30) >> 4), \
-                                   8 + (((imm) & 0xc0) >> 6), \
+                                   8 + (((imm) >> 0) & 0x3), \
+                                   8 + (((imm) >> 2) & 0x3), \
+                                   8 + (((imm) >> 4) & 0x3), \
+                                   8 + (((imm) >> 6) & 0x3), \
                                    12, 13, 14, 15); })
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -940,9 +942,11 @@
 
 #define _mm256_permute4x64_pd(V, M) __extension__ ({ \
   (__m256d)__builtin_shufflevector((__v4df)(__m256d)(V), \
-                                   (__v4df)_mm256_setzero_pd(), \
-                                   (M) & 0x3, ((M) & 0xc) >> 2, \
-                                   ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
+                                   (__v4df)_mm256_undefined_pd(), \
+                                   ((M) >> 0) & 0x3, \
+                                   ((M) >> 2) & 0x3, \
+                                   ((M) >> 4) & 0x3, \
+                                   ((M) >> 6) & 0x3); })
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
@@ -952,16 +956,18 @@
 
 #define _mm256_permute4x64_epi64(V, M) __extension__ ({ \
   (__m256i)__builtin_shufflevector((__v4di)(__m256i)(V), \
-                                   (__v4di)_mm256_setzero_si256(), \
-                                   (M) & 0x3, ((M) & 0xc) >> 2, \
-                                   ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
+                                   (__v4di)_mm256_undefined_si256(), \
+                                   ((M) >> 0) & 0x3, \
+                                   ((M) >> 2) & 0x3, \
+                                   ((M) >> 4) & 0x3, \
+                                   ((M) >> 6) & 0x3); })
 
 #define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \
   (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (M)); })
 
 #define _mm256_extracti128_si256(V, M) __extension__ ({ \
   (__m128i)__builtin_shufflevector((__v4di)(__m256i)(V), \
-                                   (__v4di)_mm256_setzero_si256(), \
+                                   (__v4di)_mm256_undefined_si256(), \
                                    (((M) & 1) ? 2 : 0), \
                                    (((M) & 1) ? 3 : 1) ); })
 
Index: lib/Headers/avx512bwintrin.h
===================================================================
--- lib/Headers/avx512bwintrin.h
+++ lib/Headers/avx512bwintrin.h
@@ -1613,27 +1613,27 @@
 
 #define _mm512_shufflehi_epi16(A, imm) __extension__ ({ \
   (__m512i)__builtin_shufflevector((__v32hi)(__m512i)(A), \
-                                   (__v32hi)_mm512_setzero_hi(), \
+                                   (__v32hi)_mm512_undefined_epi32(), \
                                    0, 1, 2, 3, \
-                                   4 + (((imm) & 0x03) >> 0), \
-                                   4 + (((imm) & 0x0c) >> 2), \
-                                   4 + (((imm) & 0x30) >> 4), \
-                                   4 + (((imm) & 0xc0) >> 6), \
+                                   4  + (((imm) >> 0) & 0x3), \
+                                   4  + (((imm) >> 2) & 0x3), \
+                                   4  + (((imm) >> 4) & 0x3), \
+                                   4  + (((imm) >> 6) & 0x3), \
                                    8, 9, 10, 11, \
-                                   12 + (((imm) & 0x03) >> 0), \
-                                   12 + (((imm) & 0x0c) >> 2), \
-                                   12 + (((imm) & 0x30) >> 4), \
-                                   12 + (((imm) & 0xc0) >> 6), \
+                                   12 + (((imm) >> 0) & 0x3), \
+                                   12 + (((imm) >> 2) & 0x3), \
+                                   12 + (((imm) >> 4) & 0x3), \
+                                   12 + (((imm) >> 6) & 0x3), \
                                    16, 17, 18, 19, \
-                                   20 + (((imm) & 0x03) >> 0), \
-                                   20 + (((imm) & 0x0c) >> 2), \
-                                   20 + (((imm) & 0x30) >> 4), \
-                                   20 + (((imm) & 0xc0) >> 6), \
+                                   20 + (((imm) >> 0) & 0x3), \
+                                   20 + (((imm) >> 2) & 0x3), \
+                                   20 + (((imm) >> 4) & 0x3), \
+                                   20 + (((imm) >> 6) & 0x3), \
                                    24, 25, 26, 27, \
-                                   28 + (((imm) & 0x03) >> 0), \
-                                   28 + (((imm) & 0x0c) >> 2), \
-                                   28 + (((imm) & 0x30) >> 4), \
-                                   28 + (((imm) & 0xc0) >> 6)); })
+                                   28 + (((imm) >> 0) & 0x3), \
+                                   28 + (((imm) >> 2) & 0x3), \
+                                   28 + (((imm) >> 4) & 0x3), \
+                                   28 + (((imm) >> 6) & 0x3)); })
 
 #define _mm512_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \
   (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
@@ -1649,26 +1649,26 @@
 
 #define _mm512_shufflelo_epi16(A, imm) __extension__ ({ \
   (__m512i)__builtin_shufflevector((__v32hi)(__m512i)(A), \
-                                   (__v32hi)_mm512_setzero_hi(), \
-                                   0 + (((imm) & 0x03) >> 0), \
-                                   0 + (((imm) & 0x0c) >> 2), \
-                                   0 + (((imm) & 0x30) >> 4), \
-                                   0 + (((imm) & 0xc0) >> 6), \
+                                   (__v32hi)_mm512_undefined_epi32(), \
+                                   0 + (((imm) >> 0) & 0x3), \
+                                   0 + (((imm) >> 2) & 0x3), \
+                                   0 + (((imm) >> 4) & 0x3), \
+                                   0 + (((imm) >> 6) & 0x3), \
                                    4, 5, 6, 7, \
-                                   8 + (((imm) & 0x03) >> 0), \
-                                   8 + (((imm) & 0x0c) >> 2), \
-                                   8 + (((imm) & 0x30) >> 4), \
-                                   8 + (((imm) & 0xc0) >> 6), \
+                                   8 + (((imm) >> 0) & 0x3), \
+                                   8 + (((imm) >> 2) & 0x3), \
+                                   8 + (((imm) >> 4) & 0x3), \
+                                   8 + (((imm) >> 6) & 0x3), \
                                    12, 13, 14, 15, \
-                                   16 + (((imm) & 0x03) >> 0), \
-                                   16 + (((imm) & 0x0c) >> 2), \
-                                   16 + (((imm) & 0x30) >> 4), \
-                                   16 + (((imm) & 0xc0) >> 6), \
+                                   16 + (((imm) >> 0) & 0x3), \
+                                   16 + (((imm) >> 2) & 0x3), \
+                                   16 + (((imm) >> 4) & 0x3), \
+                                   16 + (((imm) >> 6) & 0x3), \
                                    20, 21, 22, 23, \
-                                   24 + (((imm) & 0x03) >> 0), \
-                                   24 + (((imm) & 0x0c) >> 2), \
-                                   24 + (((imm) & 0x30) >> 4), \
-                                   24 + (((imm) & 0xc0) >> 6), \
+                                   24 + (((imm) >> 0) & 0x3), \
+                                   24 + (((imm) >> 2) & 0x3), \
+                                   24 + (((imm) >> 4) & 0x3), \
+                                   24 + (((imm) >> 6) & 0x3), \
                                    28, 29, 30, 31); })
 
 
Index: lib/Headers/avx512fintrin.h
===================================================================
--- lib/Headers/avx512fintrin.h
+++ lib/Headers/avx512fintrin.h
@@ -5950,6 +5950,7 @@
 
 #define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
+
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
          __mmask16 __U, __m512i __B)
@@ -6540,34 +6541,56 @@
 }
 
 #define _mm512_permute_pd(X, C) __extension__ ({ \
-  (__m512d)__builtin_ia32_vpermilpd512_mask((__v8df)(__m512d)(X), (int)(C), \
-                                            (__v8df)_mm512_undefined_pd(), \
-                                            (__mmask8)-1); })
+  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
+                                   (__v8df)_mm512_undefined_pd(), \
+                                   0 + (((C) >> 0) & 0x1), \
+                                   0 + (((C) >> 1) & 0x1), \
+                                   2 + (((C) >> 2) & 0x1), \
+                                   2 + (((C) >> 3) & 0x1), \
+                                   4 + (((C) >> 4) & 0x1), \
+                                   4 + (((C) >> 5) & 0x1), \
+                                   6 + (((C) >> 6) & 0x1), \
+                                   6 + (((C) >> 7) & 0x1)); })
 
 #define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \
-  (__m512d)__builtin_ia32_vpermilpd512_mask((__v8df)(__m512d)(X), (int)(C), \
-                                            (__v8df)(__m512d)(W), \
-                                            (__mmask8)(U)); })
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+                                       (__v8df)_mm512_permute_pd((X), (C)), \
+                                       (__v8df)(__m512d)(W)); })
 
 #define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \
-  (__m512d)__builtin_ia32_vpermilpd512_mask((__v8df)(__m512d)(X), (int)(C), \
-                                            (__v8df)_mm512_setzero_pd(), \
-                                            (__mmask8)(U)); })
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+                                       (__v8df)_mm512_permute_pd((X), (C)), \
+                                       (__v8df)_mm512_setzero_pd()); })
 
 #define _mm512_permute_ps(X, C) __extension__ ({ \
-  (__m512)__builtin_ia32_vpermilps512_mask((__v16sf)(__m512)(X), (int)(C), \
-                                           (__v16sf)_mm512_undefined_ps(), \
-                                           (__mmask16)-1); })
+  (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
+                                  (__v16sf)_mm512_undefined_ps(), \
+                                   0  + (((C) >> 0) & 0x3), \
+                                   0  + (((C) >> 2) & 0x3), \
+                                   0  + (((C) >> 4) & 0x3), \
+                                   0  + (((C) >> 6) & 0x3), \
+                                   4  + (((C) >> 0) & 0x3), \
+                                   4  + (((C) >> 2) & 0x3), \
+                                   4  + (((C) >> 4) & 0x3), \
+                                   4  + (((C) >> 6) & 0x3), \
+                                   8  + (((C) >> 0) & 0x3), \
+                                   8  + (((C) >> 2) & 0x3), \
+                                   8  + (((C) >> 4) & 0x3), \
+                                   8  + (((C) >> 6) & 0x3), \
+                                   12 + (((C) >> 0) & 0x3), \
+                                   12 + (((C) >> 2) & 0x3), \
+                                   12 + (((C) >> 4) & 0x3), \
+                                   12 + (((C) >> 6) & 0x3)); })
 
 #define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \
-  (__m512)__builtin_ia32_vpermilps512_mask((__v16sf)(__m512)(X), (int)(C), \
-                                           (__v16sf)(__m512)(W), \
-                                           (__mmask16)(U)); })
+  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+                                      (__v16sf)_mm512_permute_ps((X), (C)), \
+                                      (__v16sf)(__m512)(W)); })
 
 #define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \
-  (__m512)__builtin_ia32_vpermilps512_mask((__v16sf)(__m512)(X), (int)(C), \
-                                           (__v16sf)_mm512_setzero_ps(), \
-                                           (__mmask16)(U)); })
+  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+                                      (__v16sf)_mm512_permute_ps((X), (C)), \
+                                      (__v16sf)_mm512_setzero_ps()); })
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_permutevar_pd (__m512d __A, __m512i __C)
@@ -7144,23 +7167,27 @@
                                           (__v8di)_mm512_setzero_si512(), \
                                           (__mmask8)(U)); })
 
-#define _mm512_shuffle_pd(M, V, imm) __extension__ ({ \
-  (__m512d)__builtin_ia32_shufpd512_mask((__v8df)(__m512d)(M), \
-                                         (__v8df)(__m512d)(V), (int)(imm), \
-                                         (__v8df)_mm512_undefined_pd(), \
-                                         (__mmask8)-1); })
-
-#define _mm512_mask_shuffle_pd(W, U, M, V, imm) __extension__ ({ \
-  (__m512d)__builtin_ia32_shufpd512_mask((__v8df)(__m512d)(M), \
-                                         (__v8df)(__m512d)(V), (int)(imm), \
-                                         (__v8df)(__m512d)(W), \
-                                         (__mmask8)(U)); })
-
-#define _mm512_maskz_shuffle_pd(U, M, V, imm) __extension__ ({ \
-  (__m512d)__builtin_ia32_shufpd512_mask((__v8df)(__m512d)(M), \
-                                         (__v8df)(__m512d)(V), (int)(imm), \
-                                         (__v8df)_mm512_setzero_pd(), \
-                                         (__mmask8)(U)); })
+#define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
+  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
+                                   (__v8df)(__m512d)(B), \
+                                   0  + (((M) >> 0) & 0x1), \
+                                   8  + (((M) >> 1) & 0x1), \
+                                   2  + (((M) >> 2) & 0x1), \
+                                   10 + (((M) >> 3) & 0x1), \
+                                   4  + (((M) >> 4) & 0x1), \
+                                   12 + (((M) >> 5) & 0x1), \
+                                   6  + (((M) >> 6) & 0x1), \
+                                   14 + (((M) >> 7) & 0x1)); })
+
+#define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+                                       (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
+                                       (__v8df)(__m512d)(W)); })
+
+#define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+                                       (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
+                                       (__v8df)_mm512_setzero_pd()); })
 
 #define _mm512_shuffle_ps(M, V, imm) __extension__ ({ \
   (__m512)__builtin_ia32_shufps512_mask((__v16sf)(__m512)(M), \
@@ -8656,35 +8683,49 @@
                                           -(__v2df)(__m128d)(Y), \
                                           (__mmask8)(U), (int)(R)); })
 
-#define _mm512_permutex_pd(X, M) __extension__ ({ \
-  (__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \
-                                         (__v8df)_mm512_undefined_pd(), \
-                                         (__mmask8)-1); })
-
-#define _mm512_mask_permutex_pd(W, U, X, M) __extension__ ({ \
-  (__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \
-                                         (__v8df)(__m512d)(W), \
-                                         (__mmask8)(U)); })
-
-#define _mm512_maskz_permutex_pd(U, X, M) __extension__ ({ \
-  (__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \
-                                         (__v8df)_mm512_setzero_pd(), \
-                                         (__mmask8)(U)); })
-
-#define _mm512_permutex_epi64(X, I) __extension__ ({ \
-  (__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \
-                                         (__v8di)_mm512_undefined_epi32(), \
-                                         (__mmask8)-1); })
-
-#define _mm512_mask_permutex_epi64(W, M, X, I) __extension__ ({ \
-  (__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \
-                                         (__v8di)(__m512i)(W), \
-                                         (__mmask8)(M)); })
-
-#define _mm512_maskz_permutex_epi64(M, X, I) __extension__ ({ \
-  (__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \
-                                         (__v8di)_mm512_setzero_si512(), \
-                                         (__mmask8)(M)); })
+#define _mm512_permutex_pd(X, C) __extension__ ({ \
+  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
+                                   (__v8df)_mm512_undefined_pd(), \
+                                   0 + (((C) >> 0) & 0x3), \
+                                   0 + (((C) >> 2) & 0x3), \
+                                   0 + (((C) >> 4) & 0x3), \
+                                   0 + (((C) >> 6) & 0x3), \
+                                   4 + (((C) >> 0) & 0x3), \
+                                   4 + (((C) >> 2) & 0x3), \
+                                   4 + (((C) >> 4) & 0x3), \
+                                   4 + (((C) >> 6) & 0x3)); })
+
+#define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+                                       (__v8df)_mm512_permutex_pd((X), (C)), \
+                                       (__v8df)(__m512d)(W)); })
+
+#define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
+  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+                                       (__v8df)_mm512_permutex_pd((X), (C)), \
+                                       (__v8df)_mm512_setzero_pd()); })
+
+#define _mm512_permutex_epi64(X, C) __extension__ ({ \
+  (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
+                                   (__v8di)_mm512_undefined_epi32(), \
+                                   0 + (((C) >> 0) & 0x3), \
+                                   0 + (((C) >> 2) & 0x3), \
+                                   0 + (((C) >> 4) & 0x3), \
+                                   0 + (((C) >> 6) & 0x3), \
+                                   4 + (((C) >> 0) & 0x3), \
+                                   4 + (((C) >> 2) & 0x3), \
+                                   4 + (((C) >> 4) & 0x3), \
+                                   4 + (((C) >> 6) & 0x3)); })
+
+#define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
+  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+                                      (__v8di)_mm512_permutex_epi64((X), (C)), \
+                                      (__v8di)(__m512i)(W)); })
+
+#define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
+  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+                                      (__v8di)_mm512_permutex_epi64((X), (C)), \
+                                      (__v8di)_mm512_setzero_si512()); })
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
@@ -9028,23 +9069,23 @@
 
 #define _mm512_shuffle_epi32(A, I) __extension__ ({ \
   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
-                                   (__v16si)_mm512_setzero_si512(), \
-                                   0  + (((I) & 0x03) >> 0), \
-                                   0  + (((I) & 0x0c) >> 2), \
-                                   0  + (((I) & 0x30) >> 4), \
-                                   0  + (((I) & 0xc0) >> 6), \
-                                   4  + (((I) & 0x03) >> 0), \
-                                   4  + (((I) & 0x0c) >> 2), \
-                                   4  + (((I) & 0x30) >> 4), \
-                                   4  + (((I) & 0xc0) >> 6), \
-                                   8  + (((I) & 0x03) >> 0), \
-                                   8  + (((I) & 0x0c) >> 2), \
-                                   8  + (((I) & 0x30) >> 4), \
-                                   8  + (((I) & 0xc0) >> 6), \
-                                   12 + (((I) & 0x03) >> 0), \
-                                   12 + (((I) & 0x0c) >> 2), \
-                                   12 + (((I) & 0x30) >> 4), \
-                                   12 + (((I) & 0xc0) >> 6)); })
+                                   (__v16si)_mm512_undefined_epi32(), \
+                                   0  + (((I) >> 0) & 0x3), \
+                                   0  + (((I) >> 2) & 0x3), \
+                                   0  + (((I) >> 4) & 0x3), \
+                                   0  + (((I) >> 6) & 0x3), \
+                                   4  + (((I) >> 0) & 0x3), \
+                                   4  + (((I) >> 2) & 0x3), \
+                                   4  + (((I) >> 4) & 0x3), \
+                                   4  + (((I) >> 6) & 0x3), \
+                                   8  + (((I) >> 0) & 0x3), \
+                                   8  + (((I) >> 2) & 0x3), \
+                                   8  + (((I) >> 4) & 0x3), \
+                                   8  + (((I) >> 6) & 0x3), \
+                                   12 + (((I) >> 0) & 0x3), \
+                                   12 + (((I) >> 2) & 0x3), \
+                                   12 + (((I) >> 4) & 0x3), \
+                                   12 + (((I) >> 6) & 0x3)); })
 
 #define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \
   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
Index: lib/Headers/avx512vlintrin.h
===================================================================
--- lib/Headers/avx512vlintrin.h
+++ lib/Headers/avx512vlintrin.h
@@ -7374,51 +7374,45 @@
                                               (__v4di)_mm256_setzero_si256(), \
                                               (__mmask8)(U)); })
 
-#define _mm_mask_shuffle_pd(W, U, A, B, imm) __extension__ ({ \
-  (__m128d)__builtin_ia32_shufpd128_mask((__v2df)(__m128d)(A), \
-                                         (__v2df)(__m128d)(B), (int)(imm), \
-                                         (__v2df)(__m128d)(W), \
-                                         (__mmask8)(U)); })
+#define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
+  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+                                       (__v2df)_mm_shuffle_pd((A), (B), (M)), \
+                                       (__v2df)(__m128d)(W)); })
 
-#define _mm_maskz_shuffle_pd(U, A, B, imm) __extension__ ({ \
-  (__m128d)__builtin_ia32_shufpd128_mask((__v2df)(__m128d)(A), \
-                                         (__v2df)(__m128d)(B), (int)(imm), \
-                                         (__v2df)_mm_setzero_pd(), \
-                                         (__mmask8)(U)); })
+#define _mm_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
+  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+                                       (__v2df)_mm_shuffle_pd((A), (B), (M)), \
+                                       (__v2df)_mm_setzero_pd()); })
 
-#define _mm256_mask_shuffle_pd(W, U, A, B, imm) __extension__ ({ \
-  (__m256d)__builtin_ia32_shufpd256_mask((__v4df)(__m256d)(A), \
-                                         (__v4df)(__m256d)(B), (int)(imm), \
-                                         (__v4df)(__m256d)(W), \
-                                         (__mmask8)(U)); })
+#define _mm256_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
+  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+                                       (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
+                                       (__v4df)(__m256d)(W)); })
 
-#define _mm256_maskz_shuffle_pd(U, A, B, imm) __extension__ ({ \
-  (__m256d)__builtin_ia32_shufpd256_mask((__v4df)(__m256d)(A), \
-                                         (__v4df)(__m256d)(B), (int)(imm), \
-                                         (__v4df)_mm256_setzero_pd(), \
-                                         (__mmask8)(U)); })
+#define _mm256_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
+  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+                                       (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
+                                       (__v4df)_mm256_setzero_pd()); })
 
-#define _mm_mask_shuffle_ps(W, U, A, B, imm) __extension__ ({ \
-  (__m128)__builtin_ia32_shufps128_mask((__v4sf)(__m128)(A), \
-                                        (__v4sf)(__m128)(B), (int)(imm), \
-                                        (__v4sf)(__m128)(W), (__mmask8)(U)); })
+#define _mm_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
+  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+                                      (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
+                                      (__v4sf)(__m128)(W)); })
 
-#define _mm_maskz_shuffle_ps(U, A, B, imm) __extension__ ({ \
-  (__m128)__builtin_ia32_shufps128_mask((__v4sf)(__m128)(A), \
-                                        (__v4sf)(__m128)(B), (int)(imm), \
-                                        (__v4sf)_mm_setzero_ps(), \
-                                        (__mmask8)(U)); })
+#define _mm_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
+  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+                                      (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
+                                      (__v4sf)_mm_setzero_ps()); })
 
-#define _mm256_mask_shuffle_ps(W, U, A, B, imm) __extension__ ({ \
-  (__m256)__builtin_ia32_shufps256_mask((__v8sf)(__m256)(A), \
-                                        (__v8sf)(__m256)(B), (int)(imm), \
-                                        (__v8sf)(__m256)(W), (__mmask8)(U)); })
+#define _mm256_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
+  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+                                      (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
+                                      (__v8sf)(__m256)(W)); })
 
-#define _mm256_maskz_shuffle_ps(U, A, B, imm) __extension__ ({ \
-  (__m256)__builtin_ia32_shufps256_mask((__v8sf)(__m256)(A), \
-                                        (__v8sf)(__m256)(B), (int)(imm), \
-                                        (__v8sf)_mm256_setzero_ps(), \
-                                        (__mmask8)(U)); })
+#define _mm256_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
+  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+                                      (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
+                                      (__v8sf)_mm256_setzero_ps()); })
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_rsqrt14_pd (__m128d __A)
@@ -8806,35 +8800,37 @@
                                         (__v8si)(__m256i)(index), \
                                         (__mmask8)(mask), (int)(scale)); })
 
-#define _mm256_mask_permutex_pd(W, U, X, imm) __extension__ ({ \
-  (__m256d)__builtin_ia32_permdf256_mask((__v4df)(__m256d)(X), (int)(imm), \
-                                         (__v4df)(__m256d)(W), \
-                                         (__mmask8)(U)); })
-
-#define _mm256_maskz_permutex_pd(U, X, imm) __extension__ ({ \
-  (__m256d)__builtin_ia32_permdf256_mask((__v4df)(__m256d)(X), (int)(imm), \
-                                         (__v4df)_mm256_setzero_pd(), \
-                                         (__mmask8)(U)); })
+#define _mm256_permutex_pd(X, C) __extension__ ({ \
+  (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \
+                                   (__v4df)_mm256_undefined_pd(), \
+                                   ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
+                                   ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
 
-#define _mm256_permutex_pd(X, M) __extension__ ({ \
-  (__m256d)__builtin_ia32_permdf256_mask((__v4df)(__m256d)(X), (int)(M), \
-                                         (__v4df)_mm256_undefined_pd(), \
-                                         (__mmask8)-1); })
-
-#define _mm256_mask_permutex_epi64(W, M, X, I) __extension__ ({ \
-  (__m256i)__builtin_ia32_permdi256_mask((__v4di)(__m256i)(X), (int)(I), \
-                                         (__v4di)(__m256i)(W), \
-                                         (__mmask8)(M)); })
+#define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \
+  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+                                       (__v4df)_mm256_permutex_pd((X), (C)), \
+                                       (__v4df)(__m256d)(W)); })
 
-#define _mm256_maskz_permutex_epi64(M, X, I) __extension__ ({ \
-  (__m256i)__builtin_ia32_permdi256_mask((__v4di)(__m256i)(X), (int)(I), \
-                                         (__v4di)_mm256_setzero_si256(), \
-                                         (__mmask8)(M)); })
+#define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \
+  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+                                       (__v4df)_mm256_permutex_pd((X), (C)), \
+                                       (__v4df)_mm256_setzero_pd()); })
 
-#define _mm256_permutex_epi64(X, I) __extension__ ({ \
-  (__m256i)__builtin_ia32_permdi256_mask((__v4di)(__m256i)(X), (int)(I), \
-                                         (__v4di)_mm256_undefined_si256(), \
-                                         (__mmask8)-1); })
+#define _mm256_permutex_epi64(X, C) __extension__ ({ \
+  (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \
+                                   (__v4di)_mm256_undefined_si256(), \
+                                   ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
+                                   ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
+
+#define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
+  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+                                      (__v4di)_mm256_permutex_epi64((X), (C)), \
+                                      (__v4di)(__m256i)(W)); })
+
+#define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \
+  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+                                      (__v4di)_mm256_permutex_epi64((X), (C)), \
+                                      (__v4di)_mm256_setzero_si256()); })
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
Index: lib/Headers/avxintrin.h
===================================================================
--- lib/Headers/avxintrin.h
+++ lib/Headers/avxintrin.h
@@ -999,8 +999,8 @@
 /// \returns A 128-bit vector of [2 x double] containing the copied values.
 #define _mm_permute_pd(A, C) __extension__ ({ \
   (__m128d)__builtin_shufflevector((__v2df)(__m128d)(A), \
-                                   (__v2df)_mm_setzero_pd(), \
-                                   (C) & 0x1, ((C) & 0x2) >> 1); })
+                                   (__v2df)_mm_undefined_pd(), \
+                                   ((C) >> 0) & 0x1, ((C) >> 1) & 0x1); })
 
 /// \brief Copies the values in a 256-bit vector of [4 x double] as
 ///    specified by the immediate integer operand.
@@ -1040,10 +1040,11 @@
 /// \returns A 256-bit vector of [4 x double] containing the copied values.
 #define _mm256_permute_pd(A, C) __extension__ ({ \
   (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \
-                                   (__v4df)_mm256_setzero_pd(), \
-                                   (C) & 0x1, ((C) & 0x2) >> 1, \
-                                   2 + (((C) & 0x4) >> 2), \
-                                   2 + (((C) & 0x8) >> 3)); })
+                                   (__v4df)_mm256_undefined_pd(), \
+                                   0 + (((C) >> 0) & 0x1), \
+                                   0 + (((C) >> 1) & 0x1), \
+                                   2 + (((C) >> 2) & 0x1), \
+                                   2 + (((C) >> 3) & 0x1)); })
 
 /// \brief Copies the values in a 128-bit vector of [4 x float] as
 ///    specified by the immediate integer operand.
@@ -1099,9 +1100,9 @@
 /// \returns A 128-bit vector of [4 x float] containing the copied values.
 #define _mm_permute_ps(A, C) __extension__ ({ \
   (__m128)__builtin_shufflevector((__v4sf)(__m128)(A), \
-                                  (__v4sf)_mm_setzero_ps(), \
-                                   (C) & 0x3, ((C) & 0xc) >> 2, \
-                                   ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); })
+                                  (__v4sf)_mm_undefined_ps(), \
+                                  ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
+                                  ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
 
 /// \brief Copies the values in a 256-bit vector of [8 x float] as
 ///    specified by the immediate integer operand.
@@ -1193,13 +1194,15 @@
 /// \returns A 256-bit vector of [8 x float] containing the copied values.
 #define _mm256_permute_ps(A, C) __extension__ ({ \
   (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \
-                                  (__v8sf)_mm256_setzero_ps(), \
-                                  (C) & 0x3, ((C) & 0xc) >> 2, \
-                                  ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6, \
-                                  4 + (((C) & 0x03) >> 0), \
-                                  4 + (((C) & 0x0c) >> 2), \
-                                  4 + (((C) & 0x30) >> 4), \
-                                  4 + (((C) & 0xc0) >> 6)); })
+                                  (__v8sf)_mm256_undefined_ps(), \
+                                  0 + (((C) >> 0) & 0x3), \
+                                  0 + (((C) >> 2) & 0x3), \
+                                  0 + (((C) >> 4) & 0x3), \
+                                  0 + (((C) >> 6) & 0x3), \
+                                  4 + (((C) >> 0) & 0x3), \
+                                  4 + (((C) >> 2) & 0x3), \
+                                  4 + (((C) >> 4) & 0x3), \
+                                  4 + (((C) >> 6) & 0x3)); })
 
 /// \brief Permutes 128-bit data values stored in two 256-bit vectors of
 ///    [4 x double], as specified by the immediate integer operand.
@@ -1538,16 +1541,16 @@
 ///    11: Bits [127:96] and [255:224] are copied from the selected operand.
 /// \returns A 256-bit vector of [8 x float] containing the shuffled values.
 #define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \
-        (__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \
-                                        (__v8sf)(__m256)(b), \
-                                        (mask) & 0x3, \
-                                        ((mask) & 0xc) >> 2, \
-                                        (((mask) & 0x30) >> 4) + 8, \
-                                        (((mask) & 0xc0) >> 6) + 8, \
-                                        ((mask) & 0x3) + 4, \
-                                        (((mask) & 0xc) >> 2) + 4, \
-                                        (((mask) & 0x30) >> 4) + 12, \
-                                        (((mask) & 0xc0) >> 6) + 12); })
+  (__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \
+                                  (__v8sf)(__m256)(b), \
+                                  0  + (((mask) >> 0) & 0x3), \
+                                  0  + (((mask) >> 2) & 0x3), \
+                                  8  + (((mask) >> 4) & 0x3), \
+                                  8  + (((mask) >> 6) & 0x3), \
+                                  4  + (((mask) >> 0) & 0x3), \
+                                  4  + (((mask) >> 2) & 0x3), \
+                                  12 + (((mask) >> 4) & 0x3), \
+                                  12 + (((mask) >> 6) & 0x3)); })
 
 /// \brief Selects four double-precision values from the 256-bit operands of
 ///    [4 x double], as specified by the immediate value operand. The selected
@@ -1591,12 +1594,12 @@
 ///    destination.
 /// \returns A 256-bit vector of [4 x double] containing the shuffled values.
 #define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \
-        (__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \
-                                         (__v4df)(__m256d)(b), \
-                                         (mask) & 0x1, \
-                                         (((mask) & 0x2) >> 1) + 4, \
-                                         (((mask) & 0x4) >> 2) + 2, \
-                                         (((mask) & 0x8) >> 3) + 6); })
+  (__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \
+                                   (__v4df)(__m256d)(b), \
+                                   0 + (((mask) >> 0) & 0x1), \
+                                   4 + (((mask) >> 1) & 0x1), \
+                                   2 + (((mask) >> 2) & 0x1), \
+                                   6 + (((mask) >> 3) & 0x1)); })
 
 /* Compare */
 #define _CMP_EQ_OQ    0x00 /* Equal (ordered, non-signaling)  */
@@ -2814,7 +2817,7 @@
 #define _mm256_extractf128_ps(V, M) __extension__ ({ \
   (__m128)__builtin_shufflevector( \
     (__v8sf)(__m256)(V), \
-    (__v8sf)(_mm256_setzero_ps()), \
+    (__v8sf)(_mm256_undefined_ps()), \
     (((M) & 1) ? 4 : 0), \
     (((M) & 1) ? 5 : 1), \
     (((M) & 1) ? 6 : 2), \
@@ -2823,14 +2826,14 @@
 #define _mm256_extractf128_pd(V, M) __extension__ ({ \
   (__m128d)__builtin_shufflevector( \
     (__v4df)(__m256d)(V), \
-    (__v4df)(_mm256_setzero_pd()), \
+    (__v4df)(_mm256_undefined_pd()), \
     (((M) & 1) ? 2 : 0), \
     (((M) & 1) ? 3 : 1) );})
 
 #define _mm256_extractf128_si256(V, M) __extension__ ({ \
   (__m128i)__builtin_shufflevector( \
     (__v4di)(__m256i)(V), \
-    (__v4di)(_mm256_setzero_si256()), \
+    (__v4di)(_mm256_undefined_si256()), \
     (((M) & 1) ? 2 : 0), \
     (((M) & 1) ? 3 : 1) );})
 
Index: lib/Headers/emmintrin.h
===================================================================
--- lib/Headers/emmintrin.h
+++ lib/Headers/emmintrin.h
@@ -2300,25 +2300,25 @@
 
 #define _mm_shuffle_epi32(a, imm) __extension__ ({ \
   (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \
-                                   (__v4si)_mm_setzero_si128(), \
-                                   (imm) & 0x3, ((imm) & 0xc) >> 2, \
-                                   ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
+                                   (__v4si)_mm_undefined_si128(), \
+                                   ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \
+                                   ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3); })
 
 #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
   (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
-                                   (__v8hi)_mm_setzero_si128(), \
-                                   (imm) & 0x3, ((imm) & 0xc) >> 2, \
-                                   ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
+                                   (__v8hi)_mm_undefined_si128(), \
+                                   ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \
+                                   ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3, \
                                    4, 5, 6, 7); })
 
 #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
   (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
-                                   (__v8hi)_mm_setzero_si128(), \
+                                   (__v8hi)_mm_undefined_si128(), \
                                    0, 1, 2, 3, \
-                                   4 + (((imm) & 0x03) >> 0), \
-                                   4 + (((imm) & 0x0c) >> 2), \
-                                   4 + (((imm) & 0x30) >> 4), \
-                                   4 + (((imm) & 0xc0) >> 6)); })
+                                   4 + (((imm) >> 0) & 0x3), \
+                                   4 + (((imm) >> 2) & 0x3), \
+                                   4 + (((imm) >> 4) & 0x3), \
+                                   4 + (((imm) >> 6) & 0x3)); })
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_unpackhi_epi8(__m128i __a, __m128i __b)
@@ -2406,7 +2406,8 @@
 
 #define _mm_shuffle_pd(a, b, i) __extension__ ({ \
   (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
-                                   (i) & 1, (((i) & 2) >> 1) + 2); })
+                                   0 + (((i) >> 0) & 0x1), \
+                                   2 + (((i) >> 1) & 0x1)); })
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_castpd_ps(__m128d __a)
Index: lib/Headers/xmmintrin.h
===================================================================
--- lib/Headers/xmmintrin.h
+++ lib/Headers/xmmintrin.h
@@ -2496,9 +2496,10 @@
 /// \returns A 128-bit vector of [4 x float] containing the shuffled values.
 #define _mm_shuffle_ps(a, b, mask) __extension__ ({ \
   (__m128)__builtin_shufflevector((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
-                                  (mask) & 0x3, ((mask) & 0xc) >> 2, \
-                                  (((mask) & 0x30) >> 4) + 4, \
-                                  (((mask) & 0xc0) >> 6) + 4); })
+                                  0 + (((mask) >> 0) & 0x3), \
+                                  0 + (((mask) >> 2) & 0x3), \
+                                  4 + (((mask) >> 4) & 0x3), \
+                                  4 + (((mask) >> 6) & 0x3)); })
 
 /// \brief Unpacks the high-order (index 2,3) values from two 128-bit vectors of
 ///    [4 x float] and interleaves them into a 128-bit vector of [4 x
Index: lib/Parse/ParseOpenMP.cpp
===================================================================
--- lib/Parse/ParseOpenMP.cpp
+++ lib/Parse/ParseOpenMP.cpp
@@ -90,6 +90,8 @@
     { OMPD_declare, OMPD_target, OMPD_declare_target },
     { OMPD_distribute, OMPD_parallel, OMPD_distribute_parallel },
     { OMPD_distribute_parallel, OMPD_for, OMPD_distribute_parallel_for },
+    { OMPD_distribute_parallel_for, OMPD_simd, 
+      OMPD_distribute_parallel_for_simd },
     { OMPD_end, OMPD_declare, OMPD_end_declare },
     { OMPD_end_declare, OMPD_target, OMPD_end_declare_target },
     { OMPD_target, OMPD_data, OMPD_target_data },
@@ -734,6 +736,7 @@
   case OMPD_end_declare_target:
   case OMPD_target_update:
   case OMPD_distribute_parallel_for:
+  case OMPD_distribute_parallel_for_simd:
     Diag(Tok, diag::err_omp_unexpected_directive)
         << getOpenMPDirectiveName(DKind);
     break;
@@ -765,7 +768,8 @@
 ///         'taskgroup' | 'teams' | 'taskloop' | 'taskloop simd' |
 ///         'distribute' | 'target enter data' | 'target exit data' |
 ///         'target parallel' | 'target parallel for' |
-///         'target update' | 'distribute parallel for' {clause}
+///         'target update' | 'distribute parallel for' |
+///         'distribute paralle for simd' {clause}
 ///         annot_pragma_openmp_end
 ///
 StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
@@ -870,7 +874,8 @@
   case OMPD_taskloop:
   case OMPD_taskloop_simd:
   case OMPD_distribute:
-  case OMPD_distribute_parallel_for: {
+  case OMPD_distribute_parallel_for:
+  case OMPD_distribute_parallel_for_simd: {
     ConsumeToken();
     // Parse directive name of the 'critical' directive if any.
     if (DKind == OMPD_critical) {
Index: lib/Sema/SemaChecking.cpp
===================================================================
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -454,7 +454,7 @@
 
   return false;
 }
-
+// \brief OpenCL v2.0 s6.13.9 - Address space qualifier functions.
 // \brief Performs semantic analysis for the to_global/local/private call.
 // \param S Reference to the semantic analyzer.
 // \param BuiltinID ID of the builtin function.
@@ -462,13 +462,6 @@
 // \return True if a semantic error has been found, false otherwise.
 static bool SemaOpenCLBuiltinToAddr(Sema &S, unsigned BuiltinID,
                                     CallExpr *Call) {
-  // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
-  if (S.getLangOpts().OpenCLVersion < 200) {
-    S.Diag(Call->getLocStart(), diag::err_opencl_builtin_requires_version)
-        << Call->getDirectCallee() << "2.0" << 1 << Call->getSourceRange();
-    return true;
-  }
-
   if (Call->getNumArgs() != 1) {
     S.Diag(Call->getLocStart(), diag::err_opencl_builtin_to_addr_arg_num)
         << Call->getDirectCallee() << Call->getSourceRange();
@@ -801,6 +794,7 @@
 
     TheCall->setType(Context.VoidPtrTy);
     break;
+  // OpenCL v2.0, s6.13.16 - Pipe functions
   case Builtin::BIread_pipe:
   case Builtin::BIwrite_pipe:
     // Since those two functions are declared with var args, we need a semantic
@@ -1536,7 +1530,6 @@
   case X86::BI__builtin_ia32_psrlwi512_mask:
   case X86::BI__builtin_ia32_psrlwi128_mask:
   case X86::BI__builtin_ia32_psrlwi256_mask:
-  case X86::BI__builtin_ia32_vpermilpd512_mask:
   case X86::BI__builtin_ia32_psradi128_mask:
   case X86::BI__builtin_ia32_psradi256_mask:
   case X86::BI__builtin_ia32_psradi512_mask:
@@ -1549,10 +1542,6 @@
   case X86::BI__builtin_ia32_psllqi128_mask:
   case X86::BI__builtin_ia32_psllqi256_mask:
   case X86::BI__builtin_ia32_psllqi512_mask:
-  case X86::BI__builtin_ia32_permdf512_mask:
-  case X86::BI__builtin_ia32_permdi512_mask:
-  case X86::BI__builtin_ia32_permdf256_mask:
-  case X86::BI__builtin_ia32_permdi256_mask:
   case X86::BI__builtin_ia32_fpclasspd128_mask:
   case X86::BI__builtin_ia32_fpclasspd256_mask:
   case X86::BI__builtin_ia32_fpclassps128_mask:
@@ -1561,7 +1550,6 @@
   case X86::BI__builtin_ia32_fpclasspd512_mask:
   case X86::BI__builtin_ia32_fpclasssd_mask:
   case X86::BI__builtin_ia32_fpclassss_mask:
-  case X86::BI__builtin_ia32_vpermilps512_mask:
     i = 1; l = 0; u = 255;
     break;
   case X86::BI__builtin_ia32_palignr:
Index: lib/Sema/SemaDeclAttr.cpp
===================================================================
--- lib/Sema/SemaDeclAttr.cpp
+++ lib/Sema/SemaDeclAttr.cpp
@@ -5909,10 +5909,13 @@
   case AttributeList::AT_TypeTagForDatatype:
     handleTypeTagForDatatypeAttr(S, D, Attr);
     break;
-
   case AttributeList::AT_RenderScriptKernel:
     handleSimpleAttribute<RenderScriptKernelAttr>(S, D, Attr);
     break;
+  // XRay attributes.
+  case AttributeList::AT_XRayInstrument:
+    handleSimpleAttribute<XRayInstrumentAttr>(S, D, Attr);
+    break;
   }
 }
 
Index: lib/Sema/SemaOpenMP.cpp
===================================================================
--- lib/Sema/SemaOpenMP.cpp
+++ lib/Sema/SemaOpenMP.cpp
@@ -1814,6 +1814,7 @@
                              Params);
     break;
   }
+  case OMPD_distribute_parallel_for_simd:
   case OMPD_distribute_parallel_for: {
     QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
     QualType KmpInt32PtrTy =
@@ -2038,6 +2039,8 @@
   // | parallel         | distribute      | +                                  |
   // | parallel         | distribute      | +                                  |
   // |                  | parallel for    |                                    |
+  // | parallel         | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | for              | parallel        | *                                  |
   // | for              | for             | +                                  |
@@ -2076,6 +2079,8 @@
   // | for              | distribute      | +                                  |
   // | for              | distribute      | +                                  |
   // |                  | parallel for    |                                    |
+  // | for              | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | master           | parallel        | *                                  |
   // | master           | for             | +                                  |
@@ -2114,6 +2119,8 @@
   // | master           | distribute      | +                                  |
   // | master           | distribute      | +                                  |
   // |                  | parallel for    |                                    |
+  // | master           | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | critical         | parallel        | *                                  |
   // | critical         | for             | +                                  |
@@ -2151,6 +2158,8 @@
   // | critical         | distribute      | +                                  |
   // | critical         | distribute      | +                                  |
   // |                  | parallel for    |                                    |
+  // | critical         | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | simd             | parallel        |                                    |
   // | simd             | for             |                                    |
@@ -2189,6 +2198,8 @@
   // | simd             | distribute      |                                    |
   // | simd             | distribute      |                                    |
   // |                  | parallel for    |                                    |
+  // | simd             | distribute      |                                    |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | for simd         | parallel        |                                    |
   // | for simd         | for             |                                    |
@@ -2227,6 +2238,8 @@
   // | for simd         | distribute      |                                    |
   // | for simd         | distribute      |                                    |
   // |                  | parallel for    |                                    |
+  // | for simd         | distribute      |                                    |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | parallel for simd| parallel        |                                    |
   // | parallel for simd| for             |                                    |
@@ -2265,6 +2278,8 @@
   // | parallel for simd| distribute      |                                    |
   // | parallel for simd| distribute      |                                    |
   // |                  | parallel for    |                                    |
+  // | parallel for simd| distribute      |                                    |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | sections         | parallel        | *                                  |
   // | sections         | for             | +                                  |
@@ -2303,6 +2318,8 @@
   // | sections         | distribute      | +                                  |
   // | sections         | distribute      | +                                  |
   // |                  | parallel for    |                                    |
+  // | sections         | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | section          | parallel        | *                                  |
   // | section          | for             | +                                  |
@@ -2341,6 +2358,8 @@
   // | section          | distribute      | +                                  |
   // | section          | distribute      | +                                  |
   // |                  | parallel for    |                                    |
+  // | section          | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | single           | parallel        | *                                  |
   // | single           | for             | +                                  |
@@ -2379,6 +2398,8 @@
   // | single           | distribute      | +                                  |
   // | single           | distribute      | +                                  |
   // |                  | parallel for    |                                    |
+  // | single           | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | parallel for     | parallel        | *                                  |
   // | parallel for     | for             | +                                  |
@@ -2417,6 +2438,8 @@
   // | parallel for     | distribute      | +                                  |
   // | parallel for     | distribute      | +                                  |
   // |                  | parallel for    |                                    |
+  // | parallel for     | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | parallel sections| parallel        | *                                  |
   // | parallel sections| for             | +                                  |
@@ -2455,6 +2478,8 @@
   // | parallel sections| distribute      | +                                  |
   // | parallel sections| distribute      | +                                  |
   // |                  | parallel for    |                                    |
+  // | parallel sections| distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | task             | parallel        | *                                  |
   // | task             | for             | +                                  |
@@ -2493,6 +2518,8 @@
   // | task             | distribute      | +                                  |
   // | task             | distribute      | +                                  |
   // |                  | parallel for    |                                    |
+  // | task             | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | ordered          | parallel        | *                                  |
   // | ordered          | for             | +                                  |
@@ -2531,6 +2558,8 @@
   // | ordered          | distribute      | +                                  |
   // | ordered          | distribute      | +                                  |
   // |                  | parallel for    |                                    |
+  // | ordered          | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | atomic           | parallel        |                                    |
   // | atomic           | for             |                                    |
@@ -2569,6 +2598,8 @@
   // | atomic           | distribute      |                                    |
   // | atomic           | distribute      |                                    |
   // |                  | parallel for    |                                    |
+  // | atomic           | distribute      |                                    |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | target           | parallel        | *                                  |
   // | target           | for             | *                                  |
@@ -2607,6 +2638,8 @@
   // | target           | distribute      | +                                  |
   // | target           | distribute      | +                                  |
   // |                  | parallel for    |                                    |
+  // | target           | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | target parallel  | parallel        | *                                  |
   // | target parallel  | for             | *                                  |
@@ -2645,6 +2678,8 @@
   // | target parallel  | distribute      |                                    |
   // | target parallel  | distribute      |                                    |
   // |                  | parallel for    |                                    |
+  // | target parallel  | distribute      |                                    |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | target parallel  | parallel        | *                                  |
   // | for              |                 |                                    |
@@ -2710,6 +2745,8 @@
   // | for              |                 |                                    |
   // | parallel         | distribute      |                                    |
   // | for              | parallel for    |                                    |
+  // | parallel         | distribute      |                                    |
+  // | for              |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | teams            | parallel        | *                                  |
   // | teams            | for             | +                                  |
@@ -2748,6 +2785,8 @@
   // | teams            | distribute      | !                                  |
   // | teams            | distribute      | !                                  |
   // |                  | parallel for    |                                    |
+  // | teams            | distribute      | !                                  |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | taskloop         | parallel        | *                                  |
   // | taskloop         | for             | +                                  |
@@ -2785,6 +2824,8 @@
   // | taskloop         | distribute      | +                                  |
   // | taskloop         | distribute      | +                                  |
   // |                  | parallel for    |                                    |
+  // | taskloop         | distribute      | +                                  |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | taskloop simd    | parallel        |                                    |
   // | taskloop simd    | for             |                                    |
@@ -2823,6 +2864,8 @@
   // | taskloop simd    | distribute      |                                    |
   // | taskloop simd    | distribute      |                                    |
   // |                  | parallel for    |                                    |
+  // | taskloop simd    | distribute      |                                    |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | distribute       | parallel        | *                                  |
   // | distribute       | for             | *                                  |
@@ -2861,6 +2904,8 @@
   // | distribute       | distribute      |                                    |
   // | distribute       | distribute      |                                    |
   // |                  | parallel for    |                                    |
+  // | distribute       | distribute      |                                    |
+  // |                  |parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   // | distribute       | parallel        | *                                  |
   // | parallel for     |                 |                                    |
@@ -2927,6 +2972,75 @@
   // | parallel for     |                 |                                    |
   // | distribute       | distribute      |                                    |
   // | parallel for     | parallel for    |                                    |
+  // | distribute       | distribute      |                                    |
+  // | parallel for     |parallel for simd|                                    |
+  // +------------------+-----------------+------------------------------------+
+  // | distribute       | parallel        | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | for             | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | for simd        | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | master          | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | critical        | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | simd            | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | sections        | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | section         | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | single          | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | parallel for    | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       |parallel for simd| *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       |parallel sections| *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | task            | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | taskyield       | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | barrier         | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | taskwait        | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | taskgroup       | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | flush           | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | ordered         | +                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | atomic          | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | target          |                                    |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | target parallel |                                    |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | target parallel |                                    |
+  // | parallel for simd| for             |                                    |
+  // | distribute       | target enter    |                                    |
+  // | parallel for simd| data            |                                    |
+  // | distribute       | target exit     |                                    |
+  // | parallel for simd| data            |                                    |
+  // | distribute       | teams           |                                    |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | cancellation    | +                                  |
+  // | parallel for simd| point           |                                    |
+  // | distribute       | cancel          | +                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | taskloop        | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | taskloop simd   | *                                  |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | distribute      |                                    |
+  // | parallel for simd|                 |                                    |
+  // | distribute       | distribute      | *                                  |
+  // | parallel for simd| parallel for    |                                    |
+  // | distribute       | distribute      | *                                  |
+  // | parallel for simd|parallel for simd|                                    |
   // +------------------+-----------------+------------------------------------+
   if (Stack->getCurScope()) {
     auto ParentRegion = Stack->getParentDirective();
@@ -3417,6 +3531,11 @@
         ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA);
     AllowedNameModifiers.push_back(OMPD_parallel);
     break;
+  case OMPD_distribute_parallel_for_simd:
+    Res = ActOnOpenMPDistributeParallelForSimdDirective(
+        ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA);
+    AllowedNameModifiers.push_back(OMPD_parallel);
+    break;
   case OMPD_declare_target:
   case OMPD_end_declare_target:
   case OMPD_threadprivate:
@@ -6929,6 +7048,39 @@
       Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
 }
 
+StmtResult Sema::ActOnOpenMPDistributeParallelForSimdDirective(
+    ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+    SourceLocation EndLoc,
+    llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA) {
+  if (!AStmt)
+    return StmtError();
+
+  CapturedStmt *CS = cast<CapturedStmt>(AStmt);
+  // 1.2.2 OpenMP Language Terminology
+  // Structured block - An executable statement with a single entry at the
+  // top and a single exit at the bottom.
+  // The point of exit cannot be a branch out of the structured block.
+  // longjmp() and throw() must not violate the entry/exit criteria.
+  CS->getCapturedDecl()->setNothrow();
+
+  OMPLoopDirective::HelperExprs B;
+  // In presence of clause 'collapse' with number of loops, it will
+  // define the nested loops number.
+  unsigned NestedLoopCount = CheckOpenMPLoop(
+      OMPD_distribute_parallel_for_simd, getCollapseNumberExpr(Clauses),
+      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
+      VarsWithImplicitDSA, B);
+  if (NestedLoopCount == 0)
+    return StmtError();
+
+  assert((CurContext->isDependentContext() || B.builtAll()) &&
+         "omp for loop exprs were not built");
+
+  getCurFunction()->setHasBranchProtectedScope();
+  return OMPDistributeParallelForSimdDirective::Create(
+      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
+}
+
 OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
                                              SourceLocation StartLoc,
                                              SourceLocation LParenLoc,
Index: lib/Sema/TreeTransform.h
===================================================================
--- lib/Sema/TreeTransform.h
+++ lib/Sema/TreeTransform.h
@@ -7548,6 +7548,18 @@
   return Res;
 }
 
+template <typename Derived>
+StmtResult
+TreeTransform<Derived>::TransformOMPDistributeParallelForSimdDirective(
+    OMPDistributeParallelForSimdDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(
+      OMPD_distribute_parallel_for_simd, DirName, nullptr, D->getLocStart());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
 //===----------------------------------------------------------------------===//
 // OpenMP clause transformation
 //===----------------------------------------------------------------------===//
Index: lib/Serialization/ASTReaderStmt.cpp
===================================================================
--- lib/Serialization/ASTReaderStmt.cpp
+++ lib/Serialization/ASTReaderStmt.cpp
@@ -2720,6 +2720,11 @@
   VisitOMPLoopDirective(D);
 }
 
+void ASTStmtReader::VisitOMPDistributeParallelForSimdDirective(
+    OMPDistributeParallelForSimdDirective *D) {
+  VisitOMPLoopDirective(D);
+}
+
 //===----------------------------------------------------------------------===//
 // ASTReader Implementation
 //===----------------------------------------------------------------------===//
@@ -3403,6 +3408,15 @@
       break;
     }
 
+    case STMT_OMP_DISTRIBUTE_PARALLEL_FOR_SIMD_DIRECTIVE: {
+      unsigned NumClauses = Record[ASTStmtReader::NumStmtFields];
+      unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields + 1];
+      S = OMPDistributeParallelForSimdDirective::CreateEmpty(Context, NumClauses,
+                                                             CollapsedNum,
+                                                             Empty);
+      break;
+    }
+
     case EXPR_CXX_OPERATOR_CALL:
       S = new (Context) CXXOperatorCallExpr(Context, Empty);
       break;
Index: lib/Serialization/ASTWriterStmt.cpp
===================================================================
--- lib/Serialization/ASTWriterStmt.cpp
+++ lib/Serialization/ASTWriterStmt.cpp
@@ -2436,6 +2436,12 @@
   Code = serialization::STMT_OMP_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE;
 }
 
+void ASTStmtWriter::VisitOMPDistributeParallelForSimdDirective(
+    OMPDistributeParallelForSimdDirective *D) {
+  VisitOMPLoopDirective(D);
+  Code = serialization::STMT_OMP_DISTRIBUTE_PARALLEL_FOR_SIMD_DIRECTIVE;
+}
+
 //===----------------------------------------------------------------------===//
 // ASTWriter Implementation
 //===----------------------------------------------------------------------===//
Index: lib/StaticAnalyzer/Core/ExprEngine.cpp
===================================================================
--- lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -844,6 +844,7 @@
     case Stmt::OMPTaskLoopSimdDirectiveClass:
     case Stmt::OMPDistributeDirectiveClass:
     case Stmt::OMPDistributeParallelForDirectiveClass:
+    case Stmt::OMPDistributeParallelForSimdDirectiveClass:
       llvm_unreachable("Stmt should not be in analyzer evaluation loop");
 
     case Stmt::ObjCSubscriptRefExprClass:
Index: test/CodeGen/avx-builtins.c
===================================================================
--- test/CodeGen/avx-builtins.c
+++ test/CodeGen/avx-builtins.c
@@ -346,19 +346,19 @@
 
 __m128d test_mm256_extractf128_pd(__m256d A) {
   // CHECK-LABEL: test_mm256_extractf128_pd
-  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <2 x i32> <i32 2, i32 3>
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
   return _mm256_extractf128_pd(A, 1);
 }
 
 __m128 test_mm256_extractf128_ps(__m256 A) {
   // CHECK-LABEL: test_mm256_extractf128_ps
-  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   return _mm256_extractf128_ps(A, 1);
 }
 
 __m128i test_mm256_extractf128_si256(__m256i A) {
   // CHECK-LABEL: test_mm256_extractf128_si256
-  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i32> <i32 2, i32 3>
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
   return _mm256_extractf128_si256(A, 1);
 }
 
@@ -647,32 +647,32 @@
 
 __m128d test_mm_permute_pd(__m128d A) {
   // CHECK-LABEL: test_mm_permute_pd
-  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> <i32 1, i32 0>
   return _mm_permute_pd(A, 1);
 }
 
 __m256d test_mm256_permute_pd(__m256d A) {
   // CHECK-LABEL: test_mm256_permute_pd
-  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
   return _mm256_permute_pd(A, 5);
 }
 
 __m128 test_mm_permute_ps(__m128 A) {
   // CHECK-LABEL: test_mm_permute_ps
-  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   return _mm_permute_ps(A, 0x1b);
 }
 
 // Test case for PR12401
 __m128 test2_mm_permute_ps(__m128 a) {
   // CHECK-LABEL: test2_mm_permute_ps
-  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 1, i32 2, i32 3>
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 3>
   return _mm_permute_ps(a, 0xe6);
 }
 
 __m256 test_mm256_permute_ps(__m256 A) {
   // CHECK-LABEL: test_mm256_permute_ps
-  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   return _mm256_permute_ps(A, 0x1b);
 }
 
@@ -1177,7 +1177,7 @@
   // CHECK-LABEL: test_mm256_storeu2_m128
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}}
-  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}}
   _mm256_storeu2_m128(A, B, C);
 }
@@ -1186,7 +1186,7 @@
   // CHECK-LABEL: test_mm256_storeu2_m128d
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <2 x i32> <i32 0, i32 1>
   // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 1{{$}}
-  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <2 x i32> <i32 2, i32 3>
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
   // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 1{{$}}
   _mm256_storeu2_m128d(A, B, C);
 }
@@ -1195,7 +1195,7 @@
   // CHECK-LABEL: test_mm256_storeu2_m128i
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i32> <i32 0, i32 1>
   // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}}
-  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i32> <i32 2, i32 3>
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
   // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}}
   _mm256_storeu2_m128i(A, B, C);
 }
Index: test/CodeGen/avx2-builtins.c
===================================================================
--- test/CodeGen/avx2-builtins.c
+++ test/CodeGen/avx2-builtins.c
@@ -370,20 +370,20 @@
 
 __m128i test0_mm256_extracti128_si256_0(__m256i a) {
   // CHECK-LABEL: test0_mm256_extracti128_si256
-  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i32> <i32 0, i32 1>
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
   return _mm256_extracti128_si256(a, 0);
 }
 
 __m128i test1_mm256_extracti128_si256_1(__m256i a) {
   // CHECK-LABEL: test1_mm256_extracti128_si256
-  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i32> <i32 2, i32 3>
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
   return _mm256_extracti128_si256(a, 1);
 }
 
 // Immediate should be truncated to one bit.
 __m128i test2_mm256_extracti128_si256(__m256i a) {
   // CHECK-LABEL: test2_mm256_extracti128_si256
-  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i32> <i32 0, i32 1>
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
   return _mm256_extracti128_si256(a, 2);
 }
 
@@ -891,13 +891,13 @@
 
 __m256i test_mm256_permute4x64_epi64(__m256i a) {
   // CHECK-LABEL: test_mm256_permute4x64_epi64
-  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 3, i32 0, i32 2, i32 0>
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 2, i32 0>
   return _mm256_permute4x64_epi64(a, 35);
 }
 
 __m256d test_mm256_permute4x64_pd(__m256d a) {
   // CHECK-LABEL: test_mm256_permute4x64_pd
-  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 1, i32 2, i32 1, i32 0>
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> <i32 1, i32 2, i32 1, i32 0>
   return _mm256_permute4x64_pd(a, 25);
 }
 
Index: test/CodeGen/avx512f-builtins.c
===================================================================
--- test/CodeGen/avx512f-builtins.c
+++ test/CodeGen/avx512f-builtins.c
@@ -3409,38 +3409,42 @@
 
 __m512d test_mm512_permute_pd(__m512d __X) {
   // CHECK-LABEL: @test_mm512_permute_pd
-  // CHECK: @llvm.x86.avx512.mask.vpermil.pd.512
-  return _mm512_permute_pd(__X, 2); 
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  return _mm512_permute_pd(__X, 2);
 }
 
 __m512d test_mm512_mask_permute_pd(__m512d __W, __mmask8 __U, __m512d __X) {
   // CHECK-LABEL: @test_mm512_mask_permute_pd
-  // CHECK: @llvm.x86.avx512.mask.vpermil.pd.512
-  return _mm512_mask_permute_pd(__W, __U, __X, 2); 
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+  return _mm512_mask_permute_pd(__W, __U, __X, 2);
 }
 
 __m512d test_mm512_maskz_permute_pd(__mmask8 __U, __m512d __X) {
   // CHECK-LABEL: @test_mm512_maskz_permute_pd
-  // CHECK: @llvm.x86.avx512.mask.vpermil.pd.512
-  return _mm512_maskz_permute_pd(__U, __X, 2); 
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+  return _mm512_maskz_permute_pd(__U, __X, 2);
 }
 
 __m512 test_mm512_permute_ps(__m512 __X) {
   // CHECK-LABEL: @test_mm512_permute_ps
-  // CHECK: @llvm.x86.avx512.mask.vpermil.ps.512
-  return _mm512_permute_ps(__X, 2); 
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
+  return _mm512_permute_ps(__X, 2);
 }
 
 __m512 test_mm512_mask_permute_ps(__m512 __W, __mmask16 __U, __m512 __X) {
   // CHECK-LABEL: @test_mm512_mask_permute_ps
-  // CHECK: @llvm.x86.avx512.mask.vpermil.ps.512
-  return _mm512_mask_permute_ps(__W, __U, __X, 2); 
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+  return _mm512_mask_permute_ps(__W, __U, __X, 2);
 }
 
 __m512 test_mm512_maskz_permute_ps(__mmask16 __U, __m512 __X) {
   // CHECK-LABEL: @test_mm512_maskz_permute_ps
-  // CHECK: @llvm.x86.avx512.mask.vpermil.ps.512
-  return _mm512_maskz_permute_ps(__U, __X, 2); 
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+  return _mm512_maskz_permute_ps(__U, __X, 2);
 }
 
 __m512d test_mm512_permutevar_pd(__m512d __A, __m512i __C) {
@@ -4216,19 +4220,21 @@
 
 __m512d test_mm512_shuffle_pd(__m512d __M, __m512d __V) {
   // CHECK-LABEL: @test_mm512_shuffle_pd
-  // CHECK: @llvm.x86.avx512.mask.shuf.pd.512
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
   return _mm512_shuffle_pd(__M, __V, 4); 
 }
 
 __m512d test_mm512_mask_shuffle_pd(__m512d __W, __mmask8 __U, __m512d __M, __m512d __V) {
   // CHECK-LABEL: @test_mm512_mask_shuffle_pd
-  // CHECK: @llvm.x86.avx512.mask.shuf.pd.512
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_shuffle_pd(__W, __U, __M, __V, 4); 
 }
 
 __m512d test_mm512_maskz_shuffle_pd(__mmask8 __U, __m512d __M, __m512d __V) {
   // CHECK-LABEL: @test_mm512_maskz_shuffle_pd
-  // CHECK: @llvm.x86.avx512.mask.shuf.pd.512
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_shuffle_pd(__U, __M, __V, 4); 
 }
 
@@ -5700,38 +5706,42 @@
 
 __m512d test_mm512_permutex_pd(__m512d __X) {
   // CHECK-LABEL: @test_mm512_permutex_pd
-  // CHECK: @llvm.x86.avx512.mask.perm.df.512
-  return _mm512_permutex_pd(__X, 0); 
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  return _mm512_permutex_pd(__X, 0);
 }
 
 __m512d test_mm512_mask_permutex_pd(__m512d __W, __mmask8 __U, __m512d __X) {
   // CHECK-LABEL: @test_mm512_mask_permutex_pd
-  // CHECK: @llvm.x86.avx512.mask.perm.df.512
-  return _mm512_mask_permutex_pd(__W, __U, __X, 0); 
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+  return _mm512_mask_permutex_pd(__W, __U, __X, 0);
 }
 
 __m512d test_mm512_maskz_permutex_pd(__mmask8 __U, __m512d __X) {
   // CHECK-LABEL: @test_mm512_maskz_permutex_pd
-  // CHECK: @llvm.x86.avx512.mask.perm.df.512
-  return _mm512_maskz_permutex_pd(__U, __X, 0); 
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+  return _mm512_maskz_permutex_pd(__U, __X, 0);
 }
 
 __m512i test_mm512_permutex_epi64(__m512i __X) {
   // CHECK-LABEL: @test_mm512_permutex_epi64
-  // CHECK: @llvm.x86.avx512.mask.perm.di.512
-  return _mm512_permutex_epi64(__X, 0); 
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  return _mm512_permutex_epi64(__X, 0);
 }
 
 __m512i test_mm512_mask_permutex_epi64(__m512i __W, __mmask8 __M, __m512i __X) {
   // CHECK-LABEL: @test_mm512_mask_permutex_epi64
-  // CHECK: @llvm.x86.avx512.mask.perm.di.512
-  return _mm512_mask_permutex_epi64(__W, __M, __X, 0); 
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+  return _mm512_mask_permutex_epi64(__W, __M, __X, 0);
 }
 
 __m512i test_mm512_maskz_permutex_epi64(__mmask8 __M, __m512i __X) {
   // CHECK-LABEL: @test_mm512_maskz_permutex_epi64
-  // CHECK: @llvm.x86.avx512.mask.perm.di.512
-  return _mm512_maskz_permutex_epi64(__M, __X, 0); 
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+  return _mm512_maskz_permutex_epi64(__M, __X, 0);
 }
 
 __m512d test_mm512_permutexvar_pd(__m512i __X, __m512d __Y) {
Index: test/CodeGen/avx512vl-builtins.c
===================================================================
--- test/CodeGen/avx512vl-builtins.c
+++ test/CodeGen/avx512vl-builtins.c
@@ -8,7 +8,7 @@
 __mmask8 test_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) {
   // CHECK-LABEL: @test_mm_cmpeq_epu32_mask
   // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}}
-  // CHECK: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  // CHECK: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return (__mmask8)_mm_cmpeq_epu32_mask(__a, __b);
 }
 
@@ -22,6 +22,7 @@
 __mmask8 test_mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) {
   // CHECK-LABEL: @test_mm_cmpeq_epu64_mask
   // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: shufflevector <2 x i1> %{{.*}}, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
   return (__mmask8)_mm_cmpeq_epu64_mask(__a, __b);
 }
 
@@ -4614,56 +4615,56 @@
 
 __m128d test_mm_mask_permute_pd(__m128d __W, __mmask8 __U, __m128d __X) {
   // CHECK-LABEL: @test_mm_mask_permute_pd
-  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> <i32 1, i32 0>
   // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm_mask_permute_pd(__W, __U, __X, 1); 
 }
 
 __m128d test_mm_maskz_permute_pd(__mmask8 __U, __m128d __X) {
   // CHECK-LABEL: @test_mm_maskz_permute_pd
-  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> <i32 1, i32 0>
   // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm_maskz_permute_pd(__U, __X, 1); 
 }
 
 __m256d test_mm256_mask_permute_pd(__m256d __W, __mmask8 __U, __m256d __X) {
   // CHECK-LABEL: @test_mm256_mask_permute_pd
-  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
   // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_mask_permute_pd(__W, __U, __X, 5); 
 }
 
 __m256d test_mm256_maskz_permute_pd(__mmask8 __U, __m256d __X) {
   // CHECK-LABEL: @test_mm256_maskz_permute_pd
-  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
   // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_maskz_permute_pd(__U, __X, 5); 
 }
 
 __m128 test_mm_mask_permute_ps(__m128 __W, __mmask8 __U, __m128 __X) {
   // CHECK-LABEL: @test_mm_mask_permute_ps
-  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm_mask_permute_ps(__W, __U, __X, 0x1b); 
 }
 
 __m128 test_mm_maskz_permute_ps(__mmask8 __U, __m128 __X) {
   // CHECK-LABEL: @test_mm_maskz_permute_ps
-  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm_maskz_permute_ps(__U, __X, 0x1b); 
 }
 
 __m256 test_mm256_mask_permute_ps(__m256 __W, __mmask8 __U, __m256 __X) {
   // CHECK-LABEL: @test_mm256_mask_permute_ps
-  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_mask_permute_ps(__W, __U, __X, 0x1b); 
 }
 
 __m256 test_mm256_maskz_permute_ps(__mmask8 __U, __m256 __X) {
   // CHECK-LABEL: @test_mm256_maskz_permute_ps
-  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_maskz_permute_ps(__U, __X, 0x1b); 
 }
@@ -5188,49 +5189,57 @@
 
 __m128d test_mm_mask_shuffle_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   // CHECK-LABEL: @test_mm_mask_shuffle_pd
-  // CHECK: @llvm.x86.avx512.mask.shuf.pd.128
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
+  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm_mask_shuffle_pd(__W, __U, __A, __B, 3); 
 }
 
 __m128d test_mm_maskz_shuffle_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   // CHECK-LABEL: @test_mm_maskz_shuffle_pd
-  // CHECK: @llvm.x86.avx512.mask.shuf.pd.128
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
+  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm_maskz_shuffle_pd(__U, __A, __B, 3); 
 }
 
 __m256d test_mm256_mask_shuffle_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_mask_shuffle_pd
-  // CHECK: @llvm.x86.avx512.mask.shuf.pd.256
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 1, i32 5, i32 2, i32 6>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_mask_shuffle_pd(__W, __U, __A, __B, 3); 
 }
 
 __m256d test_mm256_maskz_shuffle_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: @test_mm256_maskz_shuffle_pd
-  // CHECK: @llvm.x86.avx512.mask.shuf.pd.256
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 1, i32 5, i32 2, i32 6>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
   return _mm256_maskz_shuffle_pd(__U, __A, __B, 3); 
 }
 
 __m128 test_mm_mask_shuffle_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   // CHECK-LABEL: @test_mm_mask_shuffle_ps
-  // CHECK: @llvm.x86.avx512.mask.shuf.ps.128
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm_mask_shuffle_ps(__W, __U, __A, __B, 4); 
 }
 
 __m128 test_mm_maskz_shuffle_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   // CHECK-LABEL: @test_mm_maskz_shuffle_ps
-  // CHECK: @llvm.x86.avx512.mask.shuf.ps.128
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm_maskz_shuffle_ps(__U, __A, __B, 4); 
 }
 
 __m256 test_mm256_mask_shuffle_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_mask_shuffle_ps
-  // CHECK: @llvm.x86.avx512.mask.shuf.ps.256
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 8, i32 8, i32 4, i32 5, i32 12, i32 12>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_mask_shuffle_ps(__W, __U, __A, __B, 4); 
 }
 
 __m256 test_mm256_maskz_shuffle_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   // CHECK-LABEL: @test_mm256_maskz_shuffle_ps
-  // CHECK: @llvm.x86.avx512.mask.shuf.ps.256
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 8, i32 8, i32 4, i32 5, i32 12, i32 12>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm256_maskz_shuffle_ps(__U, __A, __B, 4); 
 }
 
@@ -6386,353 +6395,356 @@
   return _mm256_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); 
 }
 
+__m256d test_mm256_permutex_pd(__m256d __X) {
+  // CHECK-LABEL: @test_mm256_permutex_pd
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
+  return _mm256_permutex_pd(__X, 3);
+}
+
 __m256d test_mm256_mask_permutex_pd(__m256d __W, __mmask8 __U, __m256d __X) {
   // CHECK-LABEL: @test_mm256_mask_permutex_pd
-  // CHECK: @llvm.x86.avx512.mask.perm.df.256
-  return _mm256_mask_permutex_pd(__W, __U, __X, 1); 
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_mask_permutex_pd(__W, __U, __X, 1);
 }
 
 __m256d test_mm256_maskz_permutex_pd(__mmask8 __U, __m256d __X) {
   // CHECK-LABEL: @test_mm256_maskz_permutex_pd
-  // CHECK: @llvm.x86.avx512.mask.perm.df.256
-  return _mm256_maskz_permutex_pd(__U, __X, 1); 
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm256_maskz_permutex_pd(__U, __X, 1);
 }
 
-__m256d test_mm256_permutex_pd(__m256d __X) {
-  // CHECK-LABEL: @test_mm256_permutex_pd
-  // CHECK: @llvm.x86.avx512.mask.perm.df.256
-  return _mm256_permutex_pd(__X, 3); 
+__m256i test_mm256_permutex_epi64(__m256i __X) {
+  // CHECK-LABEL: @test_mm256_permutex_epi64
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
+  return _mm256_permutex_epi64(__X, 3);
 }
 
-
 __m256i test_mm256_mask_permutex_epi64(__m256i __W, __mmask8 __M, __m256i __X) {
   // CHECK-LABEL: @test_mm256_mask_permutex_epi64
-  // CHECK: @llvm.x86.avx512.mask.perm.di.256
-  return _mm256_mask_permutex_epi64(__W, __M, __X, 3); 
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
+  return _mm256_mask_permutex_epi64(__W, __M, __X, 3);
 }
 
 __m256i test_mm256_maskz_permutex_epi64(__mmask8 __M, __m256i __X) {
   // CHECK-LABEL: @test_mm256_maskz_permutex_epi64
-  // CHECK: @llvm.x86.avx512.mask.perm.di.256
-  return _mm256_maskz_permutex_epi64(__M, __X, 3); 
-}
-
-__m256i test_mm256_permutex_epi64( __m256i __X) {
-  // CHECK-LABEL: @test_mm256_permutex_epi64
-  // CHECK: @llvm.x86.avx512.mask.perm.di.256
-  return _mm256_permutex_epi64(__X, 3); 
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
+  return _mm256_maskz_permutex_epi64(__M, __X, 3);
 }
 
 __m256d test_mm256_permutexvar_pd(__m256i __X, __m256d __Y) {
   // CHECK-LABEL: @test_mm256_permutexvar_pd
   // CHECK: @llvm.x86.avx512.mask.permvar.df.256
-  return _mm256_permutexvar_pd(__X, __Y); 
+  return _mm256_permutexvar_pd(__X, __Y);
 }
 
 __m256d test_mm256_mask_permutexvar_pd(__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y) {
   // CHECK-LABEL: @test_mm256_mask_permutexvar_pd
   // CHECK: @llvm.x86.avx512.mask.permvar.df.256
-  return _mm256_mask_permutexvar_pd(__W, __U, __X, __Y); 
+  return _mm256_mask_permutexvar_pd(__W, __U, __X, __Y);
 }
 
 __m256d test_mm256_maskz_permutexvar_pd(__mmask8 __U, __m256i __X, __m256d __Y) {
   // CHECK-LABEL: @test_mm256_maskz_permutexvar_pd
   // CHECK: @llvm.x86.avx512.mask.permvar.df.256
-  return _mm256_maskz_permutexvar_pd(__U, __X, __Y); 
+  return _mm256_maskz_permutexvar_pd(__U, __X, __Y);
 }
 
 __m256i test_mm256_maskz_permutexvar_epi64(__mmask8 __M, __m256i __X, __m256i __Y) {
   // CHECK-LABEL: @test_mm256_maskz_permutexvar_epi64
   // CHECK: @llvm.x86.avx512.mask.permvar.di.256
-  return _mm256_maskz_permutexvar_epi64(__M, __X, __Y); 
+  return _mm256_maskz_permutexvar_epi64(__M, __X, __Y);
 }
 
 __m256i test_mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
   // CHECK-LABEL: @test_mm256_mask_permutexvar_epi64
   // CHECK: @llvm.x86.avx512.mask.permvar.di.256
-  return _mm256_mask_permutexvar_epi64(__W, __M, __X, __Y); 
+  return _mm256_mask_permutexvar_epi64(__W, __M, __X, __Y);
 }
 
 __m256 test_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) {
   // CHECK-LABEL: @test_mm256_mask_permutexvar_ps
   // CHECK: @llvm.x86.avx512.mask.permvar.sf.256
-  return _mm256_mask_permutexvar_ps(__W, __U, __X, __Y); 
+  return _mm256_mask_permutexvar_ps(__W, __U, __X, __Y);
 }
 
 __m256 test_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) {
   // CHECK-LABEL: @test_mm256_maskz_permutexvar_ps
   // CHECK: @llvm.x86.avx512.mask.permvar.sf.256
-  return _mm256_maskz_permutexvar_ps(__U, __X, __Y); 
+  return _mm256_maskz_permutexvar_ps(__U, __X, __Y);
 }
 
 __m256 test_mm256_permutexvar_ps(__m256i __X, __m256 __Y) {
   // CHECK-LABEL: @test_mm256_permutexvar_ps
   // CHECK: @llvm.x86.avx512.mask.permvar.sf.256
-  return _mm256_permutexvar_ps( __X, __Y); 
+  return _mm256_permutexvar_ps( __X, __Y);
 }
 
 __m256i test_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) {
   // CHECK-LABEL: @test_mm256_maskz_permutexvar_epi32
   // CHECK: @llvm.x86.avx512.mask.permvar.si.256
-  return _mm256_maskz_permutexvar_epi32(__M, __X, __Y); 
+  return _mm256_maskz_permutexvar_epi32(__M, __X, __Y);
 }
 
 __m256i test_mm256_permutexvar_epi32(__m256i __X, __m256i __Y) {
   // CHECK-LABEL: @test_mm256_permutexvar_epi32
   // CHECK: @llvm.x86.avx512.mask.permvar.si.256
-  return _mm256_permutexvar_epi32(__X, __Y); 
+  return _mm256_permutexvar_epi32(__X, __Y);
 }
 
 __m256i test_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
   // CHECK-LABEL: @test_mm256_mask_permutexvar_epi32
   // CHECK: @llvm.x86.avx512.mask.permvar.si.256
-  return _mm256_mask_permutexvar_epi32(__W, __M, __X, __Y); 
+  return _mm256_mask_permutexvar_epi32(__W, __M, __X, __Y);
 }
 
 __m128i test_mm_alignr_epi32(__m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_alignr_epi32
   // CHECK: @llvm.x86.avx512.mask.valign.d.128
-  return _mm_alignr_epi32(__A, __B, 1); 
+  return _mm_alignr_epi32(__A, __B, 1);
 }
 
 __m128i test_mm_mask_alignr_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_alignr_epi32
   // CHECK: @llvm.x86.avx512.mask.valign.d.128
-  return _mm_mask_alignr_epi32(__W, __U, __A, __B, 1); 
+  return _mm_mask_alignr_epi32(__W, __U, __A, __B, 1);
 }
 
 __m128i test_mm_maskz_alignr_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_alignr_epi32
   // CHECK: @llvm.x86.avx512.mask.valign.d.128
-  return _mm_maskz_alignr_epi32(__U, __A, __B, 1); 
+  return _mm_maskz_alignr_epi32(__U, __A, __B, 1);
 }
 
 __m256i test_mm256_alignr_epi32(__m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_alignr_epi32
   // CHECK: @llvm.x86.avx512.mask.valign.d.256
-  return _mm256_alignr_epi32(__A, __B, 1); 
+  return _mm256_alignr_epi32(__A, __B, 1);
 }
 
 __m256i test_mm256_mask_alignr_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_alignr_epi32
   // CHECK: @llvm.x86.avx512.mask.valign.d.256
-  return _mm256_mask_alignr_epi32(__W, __U, __A, __B, 1); 
+  return _mm256_mask_alignr_epi32(__W, __U, __A, __B, 1);
 }
 
 __m256i test_mm256_maskz_alignr_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_alignr_epi32
   // CHECK: @llvm.x86.avx512.mask.valign.d.256
-  return _mm256_maskz_alignr_epi32(__U, __A, __B, 1); 
+  return _mm256_maskz_alignr_epi32(__U, __A, __B, 1);
 }
 
 __m128i test_mm_alignr_epi64(__m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_alignr_epi64
   // CHECK: @llvm.x86.avx512.mask.valign.q.128
-  return _mm_alignr_epi64(__A, __B, 1); 
+  return _mm_alignr_epi64(__A, __B, 1);
 }
 
 __m128i test_mm_mask_alignr_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_mask_alignr_epi64
   // CHECK: @llvm.x86.avx512.mask.valign.q.128
-  return _mm_mask_alignr_epi64(__W, __U, __A, __B, 1); 
+  return _mm_mask_alignr_epi64(__W, __U, __A, __B, 1);
 }
 
 __m128i test_mm_maskz_alignr_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: @test_mm_maskz_alignr_epi64
   // CHECK: @llvm.x86.avx512.mask.valign.q.128
-  return _mm_maskz_alignr_epi64(__U, __A, __B, 1); 
+  return _mm_maskz_alignr_epi64(__U, __A, __B, 1);
 }
 
 __m256i test_mm256_alignr_epi64(__m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_alignr_epi64
   // CHECK: @llvm.x86.avx512.mask.valign.q.256
-  return _mm256_alignr_epi64(__A, __B, 1); 
+  return _mm256_alignr_epi64(__A, __B, 1);
 }
 
 __m256i test_mm256_mask_alignr_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_mask_alignr_epi64
   // CHECK: @llvm.x86.avx512.mask.valign.q.256
-  return _mm256_mask_alignr_epi64(__W, __U, __A, __B, 1); 
+  return _mm256_mask_alignr_epi64(__W, __U, __A, __B, 1);
 }
 
 __m256i test_mm256_maskz_alignr_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: @test_mm256_maskz_alignr_epi64
   // CHECK: @llvm.x86.avx512.mask.valign.q.256
-  return _mm256_maskz_alignr_epi64(__U, __A, __B, 1); 
+  return _mm256_maskz_alignr_epi64(__U, __A, __B, 1);
 }
 
 __m128 test_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_mask_movehdup_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
   // CHECK: select <4 x i1> %{{.*}} <4 x float> %{{.*}}, <4 x float> %{{.*}}
-  return _mm_mask_movehdup_ps(__W, __U, __A); 
+  return _mm_mask_movehdup_ps(__W, __U, __A);
 }
 
 __m128 test_mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_maskz_movehdup_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
   // CHECK: select <4 x i1> %{{.*}} <4 x float> %{{.*}}, <4 x float> %{{.*}}
-  return _mm_maskz_movehdup_ps(__U, __A); 
+  return _mm_maskz_movehdup_ps(__U, __A);
 }
 
 __m256 test_mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_mask_movehdup_ps
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
   // CHECK: select <8 x i1> %{{.*}} <8 x float> %{{.*}}, <8 x float> %{{.*}}
-  return _mm256_mask_movehdup_ps(__W, __U, __A); 
+  return _mm256_mask_movehdup_ps(__W, __U, __A);
 }
 
 __m256 test_mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_maskz_movehdup_ps
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
   // CHECK: select <8 x i1> %{{.*}} <8 x float> %{{.*}}, <8 x float> %{{.*}}
-  return _mm256_maskz_movehdup_ps(__U, __A); 
+  return _mm256_maskz_movehdup_ps(__U, __A);
 }
 
 __m128 test_mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_mask_moveldup_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
   // CHECK: select <4 x i1> %{{.*}} <4 x float> %{{.*}}, <4 x float> %{{.*}}
-  return _mm_mask_moveldup_ps(__W, __U, __A); 
+  return _mm_mask_moveldup_ps(__W, __U, __A);
 }
 
 __m128 test_mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_maskz_moveldup_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
   // CHECK: select <4 x i1> %{{.*}} <4 x float> %{{.*}}, <4 x float> %{{.*}}
-  return _mm_maskz_moveldup_ps(__U, __A); 
+  return _mm_maskz_moveldup_ps(__U, __A);
 }
 
 __m256 test_mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_mask_moveldup_ps
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   // CHECK: select <8 x i1> %{{.*}} <8 x float> %{{.*}}, <8 x float> %{{.*}}
-  return _mm256_mask_moveldup_ps(__W, __U, __A); 
+  return _mm256_mask_moveldup_ps(__W, __U, __A);
 }
 
 __m256 test_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_maskz_moveldup_ps
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   // CHECK: select <8 x i1> %{{.*}} <8 x float> %{{.*}}, <8 x float> %{{.*}}
-  return _mm256_maskz_moveldup_ps(__U, __A); 
+  return _mm256_maskz_moveldup_ps(__U, __A);
 }
 
 __m128i test_mm_mask_shuffle_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_shuffle_epi32
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
-  return _mm_mask_shuffle_epi32(__W, __U, __A, 1); 
+  return _mm_mask_shuffle_epi32(__W, __U, __A, 1);
 }
 
 __m128i test_mm_maskz_shuffle_epi32(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_shuffle_epi32
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
-  return _mm_maskz_shuffle_epi32(__U, __A, 2); 
+  return _mm_maskz_shuffle_epi32(__U, __A, 2);
 }
 
 __m256i test_mm256_mask_shuffle_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_shuffle_epi32
   // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
-  return _mm256_mask_shuffle_epi32(__W, __U, __A, 2); 
+  return _mm256_mask_shuffle_epi32(__W, __U, __A, 2);
 }
 
 __m256i test_mm256_maskz_shuffle_epi32(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_shuffle_epi32
   // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4>
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
-  return _mm256_maskz_shuffle_epi32(__U, __A, 2); 
+  return _mm256_maskz_shuffle_epi32(__U, __A, 2);
 }
 
 __m128d test_mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A) {
   // CHECK-LABEL: @test_mm_mask_mov_pd
   // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
-  return _mm_mask_mov_pd(__W, __U, __A); 
+  return _mm_mask_mov_pd(__W, __U, __A);
 }
 
 __m128d test_mm_maskz_mov_pd(__mmask8 __U, __m128d __A) {
   // CHECK-LABEL: @test_mm_maskz_mov_pd
   // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
-  return _mm_maskz_mov_pd(__U, __A); 
+  return _mm_maskz_mov_pd(__U, __A);
 }
 
 __m256d test_mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A) {
   // CHECK-LABEL: @test_mm256_mask_mov_pd
   // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
-  return _mm256_mask_mov_pd(__W, __U, __A); 
+  return _mm256_mask_mov_pd(__W, __U, __A);
 }
 
 __m256d test_mm256_maskz_mov_pd(__mmask8 __U, __m256d __A) {
   // CHECK-LABEL: @test_mm256_maskz_mov_pd
   // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
-  return _mm256_maskz_mov_pd(__U, __A); 
+  return _mm256_maskz_mov_pd(__U, __A);
 }
 
 __m128 test_mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_mask_mov_ps
   // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
-  return _mm_mask_mov_ps(__W, __U, __A); 
+  return _mm_mask_mov_ps(__W, __U, __A);
 }
 
 __m128 test_mm_maskz_mov_ps(__mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_maskz_mov_ps
   // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
-  return _mm_maskz_mov_ps(__U, __A); 
+  return _mm_maskz_mov_ps(__U, __A);
 }
 
 __m256 test_mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_mask_mov_ps
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
-  return _mm256_mask_mov_ps(__W, __U, __A); 
+  return _mm256_mask_mov_ps(__W, __U, __A);
 }
 
 __m256 test_mm256_maskz_mov_ps(__mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_maskz_mov_ps
   // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
-  return _mm256_maskz_mov_ps(__U, __A); 
+  return _mm256_maskz_mov_ps(__U, __A);
 }
 
 __m128 test_mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_mask_cvtph_ps
   // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.128
-  return _mm_mask_cvtph_ps(__W, __U, __A); 
+  return _mm_mask_cvtph_ps(__W, __U, __A);
 }
 
 __m128 test_mm_maskz_cvtph_ps(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm_maskz_cvtph_ps
   // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.128
-  return _mm_maskz_cvtph_ps(__U, __A); 
+  return _mm_maskz_cvtph_ps(__U, __A);
 }
 
 __m256 test_mm256_mask_cvtph_ps(__m256 __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm256_mask_cvtph_ps
   // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.256
-  return _mm256_mask_cvtph_ps(__W, __U, __A); 
+  return _mm256_mask_cvtph_ps(__W, __U, __A);
 }
 
 __m256 test_mm256_maskz_cvtph_ps(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: @test_mm256_maskz_cvtph_ps
   // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.256
-  return _mm256_maskz_cvtph_ps(__U, __A); 
+  return _mm256_maskz_cvtph_ps(__U, __A);
 }
 
 __m128i test_mm_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_mask_cvtps_ph
   // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128
-  return _mm_mask_cvtps_ph(__W, __U, __A); 
+  return _mm_mask_cvtps_ph(__W, __U, __A);
 }
 
 __m128i test_mm_maskz_cvtps_ph(__mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_maskz_cvtps_ph
   // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128
-  return _mm_maskz_cvtps_ph(__U, __A); 
+  return _mm_maskz_cvtps_ph(__U, __A);
 }
 
 __m128i test_mm256_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_mask_cvtps_ph
   // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256
-  return _mm256_mask_cvtps_ph(__W, __U, __A); 
+  return _mm256_mask_cvtps_ph(__W, __U, __A);
 }
 
 __m128i test_mm256_maskz_cvtps_ph(__mmask8 __U, __m256 __A) {
@@ -6744,25 +6756,25 @@
 __m128i test_mm_mask_cvt_roundps_ph(__m128i __W, __mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_mask_cvt_roundps_ph
   // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128
-  return _mm_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION);
 }
 
 __m128i test_mm_maskz_cvt_roundps_ph(__mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_maskz_cvt_roundps_ph
   // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128
-  return _mm_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION);
 }
 
 __m128i test_mm256_mask_cvt_roundps_ph(__m128i __W, __mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_mask_cvt_roundps_ph
   // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256
-  return _mm256_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm256_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION);
 }
 
 __m128i test_mm256_maskz_cvt_roundps_ph(__mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_maskz_cvt_roundps_ph
   // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256
-  return _mm256_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm256_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION);
 }
 
 __mmask8 test_mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) {
Index: test/CodeGen/xray-attributes-supported.cpp
===================================================================
--- /dev/null
+++ test/CodeGen/xray-attributes-supported.cpp
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 %s -fxray-instrument -std=c++11 -x c++ -emit-llvm -o - -triple x86_64-unknown-linux-gnu | FileCheck %s
+
+// Make sure that the LLVM attribute for XRay-annotated functions do show up.
+[[clang::xray_always_instrument]] void foo() {
+// CHECK: define void @_Z3foov() #0
+};
+
+[[clang::xray_never_instrument]] void bar() {
+// CHECK: define void @_Z3barv() #1
+};
+
+// CHECK: #0 = {{.*}}"function-instrument"="xray-always"
+// CHECK: #1 = {{.*}}"function-instrument"="xray-never"
Index: test/Driver/sparc-as.c
===================================================================
--- test/Driver/sparc-as.c
+++ test/Driver/sparc-as.c
@@ -76,6 +76,38 @@
 // RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
 // RUN: | FileCheck -check-prefix=SPARC-V8PLUSD %s
 
+// RUN: %clang -mcpu=leon2 -no-canonical-prefixes -target sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
+// RUN: %clang -mcpu=at697e -no-canonical-prefixes -target sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
+// RUN: %clang -mcpu=at697f -no-canonical-prefixes -target sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
+// RUN: %clang -mcpu=leon3 -no-canonical-prefixes -target sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
+// RUN: %clang -mcpu=ut699 -no-canonical-prefixes -target sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
+// RUN: %clang -mcpu=gr712rc -no-canonical-prefixes -target sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
+// RUN: %clang -mcpu=leon4 -no-canonical-prefixes -target sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
+// RUN: %clang -mcpu=gr740 -no-canonical-prefixes -target sparc \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
 // SPARC: as{{.*}}" "-32" "-Av8" "-o"
 // SPARC-V8: as{{.*}}" "-32" "-Av8" "-o"
 // SPARC-SPARCLITE: as{{.*}}" "-32" "-Asparclite" "-o"
Index: test/OpenMP/distribute_parallel_for_simd_aligned_messages.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/distribute_parallel_for_simd_aligned_messages.cpp
@@ -0,0 +1,306 @@
+// RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp %s
+
+struct B {
+  static int ib[20]; // expected-note 0 {{'B::ib' declared here}}
+  static constexpr int bfoo() { return 8; }
+};
+namespace X {
+  B x; // expected-note {{'x' defined here}}
+};
+constexpr int bfoo() { return 4; }
+
+int **z;
+const int C1 = 1;
+const int C2 = 2;
+void test_aligned_colons(int *&rp)
+{
+  int *B = 0;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(B:bfoo())
+  for (int i = 0; i < 10; ++i) ;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(B::ib:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'}}
+  for (int i = 0; i < 10; ++i) ;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(B:B::bfoo())
+  for (int i = 0; i < 10; ++i) ;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(z:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'?}}
+  for (int i = 0; i < 10; ++i) ;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(B:B::bfoo())
+  for (int i = 0; i < 10; ++i) ;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(X::x : ::z) // expected-error {{integral constant expression must have integral or unscoped enumeration type, not 'int **'}} expected-error {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'B'}}
+  for (int i = 0; i < 10; ++i) ;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(B,rp,::z: X::x) // expected-error {{integral constant expression must have integral or unscoped enumeration type, not 'B'}}
+  for (int i = 0; i < 10; ++i) ;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(::z)
+  for (int i = 0; i < 10; ++i) ;
+
+#pragma omp distribute parallel for simd aligned(B::bfoo()) // expected-error {{expected variable name}}
+  for (int i = 0; i < 10; ++i) ;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(B::ib,B:C1+C2) // expected-warning {{aligned clause will be ignored because the requested alignment is not a power of 2}}
+  for (int i = 0; i < 10; ++i) ;
+}
+
+// expected-note@+1 {{'num' defined here}}
+template<int L, class T, class N> T test_template(T* arr, N num) {
+  N i;
+  T sum = (T)0;
+  T ind2 = - num * L;
+  // Negative number is passed as L.
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(arr:L) // expected-error {{argument to 'aligned' clause must be a strictly positive integer value}}
+  for (i = 0; i < num; ++i) {
+    T cur = arr[(int)ind2];
+    ind2 += L;
+    sum += cur;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(num:4) // expected-error {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'int'}}
+  for (i = 0; i < num; ++i);
+
+  return T();
+}
+
+template<int LEN> int test_warn() {
+  int *ind2 = 0;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(ind2:LEN) // expected-error {{argument to 'aligned' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 100; i++) {
+    ind2 += LEN;
+  }
+  return 0;
+}
+
+struct S1; // expected-note 2 {{declared here}}
+extern S1 a; // expected-note {{'a' declared here}}
+class S2 {
+  mutable int a;
+public:
+  S2():a(0) { }
+};
+const S2 b; // expected-note 1 {{'b' defined here}}
+const S2 ba[5];
+class S3 {
+  int a;
+public:
+  S3():a(0) { }
+};
+const S3 ca[5];
+class S4 {
+  int a;
+  S4();
+public:
+  S4(int v):a(v) { }
+};
+class S5 {
+  int a;
+  S5():a(0) {}
+public:
+  S5(int v):a(v) { }
+};
+
+S3 h; // expected-note 2 {{'h' defined here}}
+#pragma omp threadprivate(h)
+
+template<class I, class C> int foomain(I argc, C **argv) {
+  I e(argc);
+  I g(argc);
+  int i; // expected-note {{declared here}} expected-note {{'i' defined here}}
+  // expected-note@+2 {{declared here}}
+  // expected-note@+1 {{reference to 'i' is not a constant expression}}
+  int &j = i;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned // expected-error {{expected '(' after 'aligned'}}
+  for (I k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (I k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned () // expected-error {{expected expression}}
+  for (I k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (I k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (I k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (I k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned (argc : 5) // expected-warning {{aligned clause will be ignored because the requested alignment is not a power of 2}}
+  for (I k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned (S1) // expected-error {{'S1' does not refer to a value}}
+  for (I k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned (argv[1]) // expected-error {{expected variable name}}
+  for (I k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(e, g)
+  for (I k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(h) // expected-error {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'S3'}}
+  for (I k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(i) // expected-error {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'int'}}
+  for (I k = 0; k < argc; ++k) ++k;
+
+  #pragma omp parallel
+  {
+    int *v = 0;
+    I i;
+    #pragma omp target
+    #pragma omp teams
+    #pragma omp distribute parallel for simd aligned(v:16)
+      for (I k = 0; k < argc; ++k) { i = k; v += 2; }
+  }
+  float *f;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(f)
+  for (I k = 0; k < argc; ++k) ++k;
+
+  int v = 0;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(f:j) // expected-note {{initializer of 'j' is not a constant expression}} expected-error {{expression is not an integral constant expression}}
+
+  for (I k = 0; k < argc; ++k) { ++k; v += j; }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(f)
+  for (I k = 0; k < argc; ++k) ++k;
+
+  return 0;
+}
+
+// expected-note@+1 2 {{'argc' defined here}}
+int main(int argc, char **argv) {
+  double darr[100];
+  // expected-note@+1 {{in instantiation of function template specialization 'test_template<-4, double, int>' requested here}}
+  test_template<-4>(darr, 4);
+  test_warn<4>(); // ok
+  // expected-note@+1 {{in instantiation of function template specialization 'test_warn<0>' requested here}}
+  test_warn<0>();
+
+  int i;
+  int &j = i;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned // expected-error {{expected '(' after 'aligned'}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned () // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned (argv // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'int'}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned (argc) // expected-error {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'int'}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned (a, b) // expected-error {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'S1'}} expected-error {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'S2'}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned (argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(h)  // expected-error {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'S3'}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+  int *pargc = &argc;
+  // expected-note@+1 {{in instantiation of function template specialization 'foomain<int *, char>' requested here}}
+  foomain<int*,char>(pargc,argv);
+  return 0;
+}
+
Index: test/OpenMP/distribute_parallel_for_simd_ast_print.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/distribute_parallel_for_simd_ast_print.cpp
@@ -0,0 +1,152 @@
+// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+struct S {
+  S(): a(0) {}
+  S(int v) : a(v) {}
+  int a;
+  typedef int type;
+};
+
+template <typename T>
+class S7 : public T {
+protected:
+  T a;
+  S7() : a(0) {}
+
+public:
+  S7(typename T::type v) : a(v) {
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(a) private(this->a) private(T::a)
+    for (int k = 0; k < a.a; ++k)
+      ++this->a.a;
+  }
+  S7 &operator=(S7 &s) {
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(a) private(this->a)
+    for (int k = 0; k < s.a.a; ++k)
+      ++s.a.a;
+    return *this;
+  }
+};
+
+// CHECK: #pragma omp distribute parallel for simd private(this->a) private(this->a) private(this->S::a)
+// CHECK: #pragma omp distribute parallel for simd private(this->a) private(this->a) private(T::a)
+// CHECK: #pragma omp distribute parallel for simd private(this->a) private(this->a)
+
+class S8 : public S7<S> {
+  S8() {}
+
+public:
+  S8(int v) : S7<S>(v){
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(a) private(this->a) private(S7<S>::a)
+    for (int k = 0; k < a.a; ++k)
+      ++this->a.a;
+  }
+  S8 &operator=(S8 &s) {
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(a) private(this->a)
+    for (int k = 0; k < s.a.a; ++k)
+      ++s.a.a;
+    return *this;
+  }
+};
+
+// CHECK: #pragma omp distribute parallel for simd private(this->a) private(this->a) private(this->S7<S>::a)
+// CHECK: #pragma omp distribute parallel for simd private(this->a) private(this->a)
+
+template <class T, int N>
+T tmain(T argc) {
+  T b = argc, c, d, e, f, h;
+  static T a;
+// CHECK: static T a;
+  static T g;
+#pragma omp threadprivate(g)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule(static, a) schedule(dynamic) default(none) copyin(g) firstprivate(a)
+  // CHECK: #pragma omp distribute parallel for simd dist_schedule(static, a) schedule(dynamic) default(none) copyin(g)
+  for (int i = 0; i < 2; ++i)
+    a = 2;
+// CHECK-NEXT: for (int i = 0; i < 2; ++i)
+// CHECK-NEXT: a = 2;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) schedule(static, N) if (parallel :argc) num_threads(N) default(shared) shared(e) reduction(+ : h) dist_schedule(static,N)
+  for (int i = 0; i < 2; ++i)
+    for (int j = 0; j < 2; ++j)
+      for (int j = 0; j < 2; ++j)
+        for (int j = 0; j < 2; ++j)
+          for (int j = 0; j < 2; ++j)
+  for (int i = 0; i < 2; ++i)
+    for (int j = 0; j < 2; ++j)
+      for (int j = 0; j < 2; ++j)
+        for (int j = 0; j < 2; ++j)
+          for (int j = 0; j < 2; ++j)
+	    a++;
+  // CHECK: #pragma omp distribute parallel for simd private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) schedule(static, N) if(parallel: argc) num_threads(N) default(shared) shared(e) reduction(+: h) dist_schedule(static, N)
+  // CHECK-NEXT: for (int i = 0; i < 2; ++i)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int i = 0; i < 2; ++i)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: a++;
+  return T();
+}
+
+int main(int argc, char **argv) {
+  int b = argc, c, d, e, f, h;
+  int x[200];
+  static int a;
+// CHECK: static int a;
+  static float g;
+#pragma omp threadprivate(g)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule(guided, argc) default(none) copyin(g) dist_schedule(static, a) private(a)
+  // CHECK: #pragma omp distribute parallel for simd schedule(guided, argc) default(none) copyin(g) dist_schedule(static, a) private(a)
+  for (int i = 0; i < 2; ++i)
+    a = 2;
+// CHECK-NEXT: for (int i = 0; i < 2; ++i)
+// CHECK-NEXT: a = 2;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(argc, b), firstprivate(argv, c), lastprivate(d, f) collapse(2) schedule(auto) if (argc) num_threads(a) default(shared) shared(e) reduction(+ : h) dist_schedule(static, b)
+  for (int i = 0; i < 10; ++i)
+    for (int j = 0; j < 10; ++j)
+      a++;
+  // CHECK: #pragma omp distribute parallel for simd private(argc,b) firstprivate(argv,c) lastprivate(d,f) collapse(2) schedule(auto) if(argc) num_threads(a) default(shared) shared(e) reduction(+: h) dist_schedule(static, b)
+  // CHECK-NEXT: for (int i = 0; i < 10; ++i)
+  // CHECK-NEXT: for (int j = 0; j < 10; ++j)
+  // CHECK-NEXT: a++;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd aligned(x:8) linear(h:2) safelen(8) simdlen(8)
+  for (int i = 0; i < 100; i++)
+    for (int j = 0; j < 200; j++)
+      a += h + x[j];
+  // CHECK: #pragma omp distribute parallel for simd aligned(x: 8) linear(h: 2) safelen(8) simdlen(8)
+  // CHECK-NEXT: for (int i = 0; i < 100; i++)
+  // CHECK-NEXT: for (int j = 0; j < 200; j++)
+  // CHECK-NEXT: a += h + x[j];
+
+  return (tmain<int, 5>(argc) + tmain<char, 1>(argv[0][0]));
+}
+
+#endif
Index: test/OpenMP/distribute_parallel_for_simd_collapse_messages.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/distribute_parallel_for_simd_collapse_messages.cpp
@@ -0,0 +1,154 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
+
+void foo() {
+}
+
+#if __cplusplus >= 201103L
+// expected-note@+2 4 {{declared here}}
+#endif
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, typename S, int N, int ST> // expected-note {{declared here}}
+T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse // expected-error {{expected '(' after 'collapse'}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse () // expected-error {{expected expression}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+#pragma omp target
+#pragma omp teams
+  // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}}
+  // expected-error@+2 2 {{expression is not an integral constant expression}}
+  // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
+#pragma omp distribute parallel for simd collapse (argc 
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+#pragma omp target
+#pragma omp teams
+  // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
+#pragma omp distribute parallel for simd collapse (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse (1)) // expected-warning {{extra tokens at the end of '#pragma omp distribute parallel for simd' are ignored}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp distribute parallel for simd', but found only 1}}
+  // expected-error@+8 2 {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'collapse' clause}}
+  // expected-error@+7 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
+  // expected-error@+6 2 {{expression is not an integral constant expression}}
+#if __cplusplus >= 201103L
+  // expected-note@+4 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
+#endif
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse (foobool(argc)), collapse (true), collapse (-5)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+#if __cplusplus <= 199711L
+  // expected-error@+6 2 {{expression is not an integral constant expression}}
+#else
+  // expected-error@+4 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}}
+#endif
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse (1)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse (N) // expected-error {{argument to 'collapse' clause must be a strictly positive integer value}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse (2) // expected-note {{as specified in 'collapse' clause}}
+  foo(); // expected-error {{expected 2 for loops after '#pragma omp distribute parallel for simd'}}
+  return argc;
+}
+
+int main(int argc, char **argv) {
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse // expected-error {{expected '(' after 'collapse'}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse () // expected-error {{expected expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse (4 // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{as specified in 'collapse' clause}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp distribute parallel for simd', but found only 1}}
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp distribute parallel for simd' are ignored}}  expected-note {{as specified in 'collapse' clause}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp distribute parallel for simd', but found only 1}}
+  // expected-error@+6 {{expression is not an integral constant expression}}
+#if __cplusplus >= 201103L
+  // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
+#endif
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse (foobool(1) > 0 ? 1 : 2)
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+8 {{expression is not an integral constant expression}}
+#if __cplusplus >= 201103L
+  // expected-note@+6{{non-constexpr function 'foobool' cannot be used in a constant expression}}
+#endif
+  // expected-error@+4 2 {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'collapse' clause}}
+  // expected-error@+3 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse (foobool(argc)), collapse (true), collapse (-5) 
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+#if __cplusplus <= 199711L
+  // expected-error@+6 {{expression is not an integral constant expression}}
+#else
+  // expected-error@+4 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}}
+#endif
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+5 {{statement after '#pragma omp distribute parallel for simd' must be a for loop}}
+  // expected-note@+3 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse(collapse(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd collapse (2) // expected-note {{as specified in 'collapse' clause}}
+  foo(); // expected-error {{expected 2 for loops after '#pragma omp distribute parallel for simd'}}
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 1, 0>' requested here}}
+  return tmain<int, char, 1, 0>(argc, argv);
+}
+
Index: test/OpenMP/distribute_parallel_for_simd_copyin_messages.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/distribute_parallel_for_simd_copyin_messages.cpp
@@ -0,0 +1,190 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note 2 {{declared here}}
+class S2 {
+  mutable int a;
+
+public:
+  S2() : a(0) {}
+  S2 &operator=(S2 &s2) { return *this; }
+};
+class S3 {
+  int a;
+
+public:
+  S3() : a(0) {}
+  S3 &operator=(S3 &s3) { return *this; }
+};
+class S4 {
+  int a;
+  S4();
+  S4 &operator=(const S4 &s4); // expected-note 3 {{implicitly declared private here}}
+
+public:
+  S4(int v) : a(v) {}
+};
+class S5 {
+  int a;
+  S5() : a(0) {}
+  S5 &operator=(const S5 &s5) { return *this; } // expected-note 3 {{implicitly declared private here}}
+
+public:
+  S5(int v) : a(v) {}
+};
+template <class T>
+class ST {
+public:
+  static T s;
+};
+
+S2 k;
+S3 h;
+S4 l(3);
+S5 m(4);
+#pragma omp threadprivate(h, k, l, m)
+
+namespace A {
+double x;
+#pragma omp threadprivate(x)
+}
+namespace B {
+using A::x;
+}
+
+template <class T, typename S, int N>
+T tmain(T argc, S **argv) {
+  T i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin // expected-error {{expected '(' after 'copyin'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin() // expected-error {{expected expression}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin(k // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin(h, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin(l) // expected-error 2 {{'operator=' is a private member of 'S4'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin(S1) // expected-error {{'S1' does not refer to a value}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin(argv[1]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin(i) // expected-error {{copyin variable must be threadprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin(m) // expected-error 2 {{'operator=' is a private member of 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin(ST<int>::s, B::x) // expected-error {{copyin variable must be threadprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+}
+
+int main(int argc, char **argv) {
+  int i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin // expected-error {{expected '(' after 'copyin'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin() // expected-error {{expected expression}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin(k // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin(h, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin(l) // expected-error {{'operator=' is a private member of 'S4'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin(S1) // expected-error {{'S1' does not refer to a value}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin(argv[1]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin(i) // expected-error {{copyin variable must be threadprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin(m) // expected-error {{'operator=' is a private member of 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd copyin(ST<int>::s, B::x) // expected-error {{copyin variable must be threadprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+
+  return tmain<int, char, 3>(argc, argv); // expected-note {{in instantiation of function template specialization 'tmain<int, char, 3>' requested here}}
+}
Index: test/OpenMP/distribute_parallel_for_simd_default_messages.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/distribute_parallel_for_simd_default_messages.cpp
@@ -0,0 +1,100 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
+
+void foo();
+
+template <class T, int N>
+T tmain(T argc) {
+  int i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd default // expected-error {{expected '(' after 'default'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd default(none // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) // expected-error 2 {{variable 'argc' must have explicitly specified data sharing attributes}}
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd default(shared), default(shared) // expected-error {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'default' clause}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd default(none)
+  for (i = 0; i < argc; ++i)  // expected-error 2 {{variable 'argc' must have explicitly specified data sharing attributes}}
+    foo();
+
+#pragma omp parallel default(none)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd default(shared)
+  for (i = 0; i < argc; ++i)
+    foo();
+
+  return T();
+}
+
+int main(int argc, char **argv) {
+  int i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd default // expected-error {{expected '(' after 'default'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd default(none // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd default(shared), default(shared) // expected-error {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'default' clause}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd default(none)
+  for (i = 0; i < argc; ++i)  // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
+    foo();
+
+#pragma omp parallel default(none)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd default(shared)
+  for (i = 0; i < argc; ++i)
+    foo();
+
+  return (tmain<int, 5>(argc) + tmain<char, 1>(argv[0][0])); // expected-note {{in instantiation of function template specialization 'tmain<int, 5>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<char, 1>' requested here}}
+}
Index: test/OpenMP/distribute_parallel_for_simd_dist_schedule_messages.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/distribute_parallel_for_simd_dist_schedule_messages.cpp
@@ -0,0 +1,103 @@
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}} expected-note {{declared here}}
+
+template <class T, int N>
+T tmain(T argc) {
+  T b = argc, c, d, e, f, g;
+  char ** argv;
+  static T a;
+// CHECK: static T a;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule // expected-error {{expected '(' after 'dist_schedule'}}
+  for (int i = 0; i < 10; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule ( // expected-error {{expected 'static' in OpenMP clause 'dist_schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule () // expected-error {{expected 'static' in OpenMP clause 'dist_schedule'}}
+  for (int i = 0; i < 10; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule (static // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule (static, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule (argc)) // expected-error {{expected 'static' in OpenMP clause 'dist_schedule'}} expected-warning {{extra tokens at the end of '#pragma omp distribute parallel for simd' are ignored}}
+  for (int i = 0; i < 10; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule (static, argc > 0 ? argv[1] : argv[2]) // expected-error2 {{expression must have integral or unscoped enumeration type, not 'char *'}}
+  for (int i = 0; i < 10; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule (static), dist_schedule (static, 1) // expected-error {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'dist_schedule' clause}}
+  for (int i = 0; i < 10; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule (static, S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 0; i < 10; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error3 {{expression must have integral or unscoped enumeration type, not 'char *'}}
+  for (int i = 0; i < 10; ++i) foo();
+  return T();
+}
+
+int main(int argc, char **argv) {
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule // expected-error {{expected '(' after 'dist_schedule'}}
+  for (int i = 0; i < 10; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule ( // expected-error {{expected 'static' in OpenMP clause 'dist_schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule () // expected-error {{expected 'static' in OpenMP clause 'dist_schedule'}}
+  for (int i = 0; i < 10; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule (static // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule (static, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule (argc)) // expected-error {{expected 'static' in OpenMP clause 'dist_schedule'}} expected-warning {{extra tokens at the end of '#pragma omp distribute parallel for simd' are ignored}}
+  for (int i = 0; i < 10; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule (static, argc > 0 ? argv[1] : argv[2]) // expected-error {{expression must have integral or unscoped enumeration type, not 'char *'}}
+  for (int i = 0; i < 10; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule (static), dist_schedule (static, 1) // expected-error {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'dist_schedule' clause}}
+  for (int i = 0; i < 10; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule (static, S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 0; i < 10; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd dist_schedule (static, argv[1]=2) // expected-error {{expression must have integral or unscoped enumeration type, not 'char *'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i) foo();
+  return (tmain<int, 5>(argc) + tmain<char, 1>(argv[0][0])); // expected-note {{in instantiation of function template specialization 'tmain<int, 5>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<char, 1>' requested here}}
+}
Index: test/OpenMP/distribute_parallel_for_simd_firstprivate_messages.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/distribute_parallel_for_simd_firstprivate_messages.cpp
@@ -0,0 +1,359 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+
+public:
+  S2() : a(0) {}
+  S2(const S2 &s2) : a(s2.a) {}
+  static float S2s;
+  static const float S2sc;
+};
+const float S2::S2sc = 0;
+const S2 b;
+const S2 ba[5];
+class S3 {
+  int a;
+  S3 &operator=(const S3 &s3);
+
+public:
+  S3() : a(0) {}
+  S3(const S3 &s3) : a(s3.a) {}
+};
+const S3 c;
+const S3 ca[5];
+extern const int f;
+class S4 {
+  int a;
+  S4();
+  S4(const S4 &s4); // expected-note 2 {{implicitly declared private here}}
+
+public:
+  S4(int v) : a(v) {}
+};
+class S5 {
+  int a;
+  S5(const S5 &s5) : a(s5.a) {} // expected-note 4 {{implicitly declared private here}}
+
+public:
+  S5() : a(0) {}
+  S5(int v) : a(v) {}
+};
+class S6 {
+  int a;
+  S6() : a(0) {}
+
+public:
+  S6(const S6 &s6) : a(s6.a) {}
+  S6(int v) : a(v) {}
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template <class I, class C>
+int foomain(int argc, char **argv) {
+  I e(4);
+  C g(5);
+  int i;
+  int &j = i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate // expected-error {{expected '(' after 'firstprivate'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate() // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(argc)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(a, b) // expected-error {{firstprivate variable with incomplete type 'S1'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(h) // expected-error {{threadprivate or thread local variable cannot be firstprivate}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+  {
+    int v = 0;
+    int i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(i)
+    for (int k = 0; k < argc; ++k) {
+      i = k;
+      v += i;
+    }
+  }
+#pragma omp parallel shared(i)
+#pragma omp parallel private(i)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(j)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel private(i)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(i) // expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp distribute parallel for simd' directive may not be firstprivate, predetermined as linear}}
+    foo();
+#pragma omp parallel reduction(+ : i)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(i) // expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp distribute parallel for simd' directive may not be firstprivate, predetermined as linear}}
+    foo();
+  return 0;
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace B {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  const int d = 5;
+  const int da[5] = {0};
+  S4 e(4);
+  S5 g(5);
+  S3 m;
+  S6 n(2);
+  int i;
+  int &j = i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate // expected-error {{expected '(' after 'firstprivate'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate() // expected-error {{expected expression}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(argc)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(S1) // expected-error {{'S1' does not refer to a value}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(a, b, c, d, f) // expected-error {{firstprivate variable with incomplete type 'S1'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(argv[1]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(2 * 2) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(ba) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(ca) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(da) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+  int xa;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(xa) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(S2::S2s) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(S2::S2sc) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd safelen(5) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(m) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(h, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be firstprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(xa), firstprivate(xa) // expected-error {{private variable cannot be firstprivate}} expected-note {{defined as private}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(i) // expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp distribute parallel for simd' directive may not be firstprivate, predetermined as linear}}
+    foo();
+#pragma omp parallel shared(xa)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(xa) // OK: may be firstprivate
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(j)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(n) firstprivate(n) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+  {
+    int v = 0;
+    int i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(i)
+    for (int k = 0; k < argc; ++k) {
+      i = k;
+      v += i;
+    }
+  }
+#pragma omp parallel private(i)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(i) // expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp distribute parallel for simd' directive may not be firstprivate, predetermined as linear}}
+    foo();
+#pragma omp parallel reduction(+ : i)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(i) // expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp distribute parallel for simd' directive may not be firstprivate, predetermined as linear}}
+    foo();
+  static int si;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(si) // OK
+  for (i = 0; i < argc; ++i)
+    si = i + 1;
+
+  return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
+}
Index: test/OpenMP/distribute_parallel_for_simd_if_messages.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/distribute_parallel_for_simd_if_messages.cpp
@@ -0,0 +1,179 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, class S> // expected-note {{declared here}}
+int tmain(T argc, S **argv) {
+  T i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if // expected-error {{expected '(' after 'if'}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if () // expected-error {{expected expression}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (argc)) // expected-warning {{extra tokens at the end of '#pragma omp distribute parallel for simd' are ignored}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (argc > 0 ? argv[1] : argv[2])
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (foobool(argc)), if (true) // expected-error {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'if' clause}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (S) // expected-error {{'S' does not refer to a value}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if(argc)
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if(parallel // expected-warning {{missing ':' after directive name modifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if(parallel : // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if(parallel : argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if(parallel : argc)
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if(parallel : argc) if (for:argc) // expected-error {{directive name modifier 'for' is not allowed for '#pragma omp distribute parallel for simd'}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if(parallel : argc) if (parallel:argc) // expected-error {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'if' clause with 'parallel' name modifier}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if(parallel : argc) if (argc) // expected-error {{no more 'if' clause is allowed}} expected-note {{previous clause with directive name modifier specified here}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if(distribute : argc) // expected-error {{directive name modifier 'distribute' is not allowed for '#pragma omp distribute parallel for simd'}}
+  for (i = 0; i < argc; ++i) foo();
+
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  int i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if // expected-error {{expected '(' after 'if'}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if () // expected-error {{expected expression}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (argc)) // expected-warning {{extra tokens at the end of '#pragma omp distribute parallel for simd' are ignored}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (argc > 0 ? argv[1] : argv[2])
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (foobool(argc)), if (true) // expected-error {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'if' clause}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (S1) // expected-error {{'S1' does not refer to a value}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if (1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if(parallel // expected-warning {{missing ':' after directive name modifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if(parallel : // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if(parallel : argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if(parallel : argc)
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if(parallel : argc) if (for:argc) // expected-error {{directive name modifier 'for' is not allowed for '#pragma omp distribute parallel for simd'}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if(parallel : argc) if (parallel:argc) // expected-error {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'if' clause with 'parallel' name modifier}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if(parallel : argc) if (argc) // expected-error {{no more 'if' clause is allowed}} expected-note {{previous clause with directive name modifier specified here}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd if(distribute : argc) // expected-error {{directive name modifier 'distribute' is not allowed for '#pragma omp distribute parallel for simd'}}
+  for (i = 0; i < argc; ++i) foo();
+
+  return tmain(argc, argv);
+}
Index: test/OpenMP/distribute_parallel_for_simd_lastprivate_messages.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/distribute_parallel_for_simd_lastprivate_messages.cpp
@@ -0,0 +1,333 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+
+public:
+  S2() : a(0) {}
+  S2(S2 &s2) : a(s2.a) {}
+  const S2 &operator =(const S2&) const;
+  S2 &operator =(const S2&);
+  static float S2s; // expected-note {{static data member is predetermined as shared}}
+  static const float S2sc;
+};
+const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}}
+const S2 b;
+const S2 ba[5];
+class S3 {
+  int a;
+  S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}}
+
+public:
+  S3() : a(0) {}
+  S3(S3 &s3) : a(s3.a) {}
+};
+const S3 c;         // expected-note {{global variable is predetermined as shared}}
+const S3 ca[5];     // expected-note {{global variable is predetermined as shared}}
+extern const int f; // expected-note {{global variable is predetermined as shared}}
+class S4 {
+  int a;
+  S4();             // expected-note 3 {{implicitly declared private here}}
+  S4(const S4 &s4);
+
+public:
+  S4(int v) : a(v) {}
+};
+class S5 {
+  int a;
+  S5() : a(0) {} // expected-note {{implicitly declared private here}}
+
+public:
+  S5(const S5 &s5) : a(s5.a) {}
+  S5(int v) : a(v) {}
+};
+class S6 {
+  int a;
+  S6() : a(0) {}
+
+public:
+  S6(const S6 &s6) : a(s6.a) {}
+  S6(int v) : a(v) {}
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template <class I, class C>
+int foomain(int argc, char **argv) {
+  I e(4);
+  I g(5);
+  int i;
+  int &j = i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate // expected-error {{expected '(' after 'lastprivate'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate() // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(argc)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(a, b) // expected-error {{lastprivate variable with incomplete type 'S1'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(e, g) // expected-error 2 {{calling a private constructor of class 'S4'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(h) // expected-error {{threadprivate or thread local variable cannot be lastprivate}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+
+  int v = 0;
+#pragma omp target
+#pragma omp teams
+  {
+#pragma omp distribute parallel for simd lastprivate(i)
+    for (int k = 0; k < argc; ++k) {
+      i = k;
+      v += i;
+    }
+  }
+#pragma omp target
+#pragma omp teams private(i)
+#pragma omp distribute parallel for simd lastprivate(j)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+  return 0;
+}
+
+void bar(S4 a[2]) {
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(a)
+  for (int i = 0; i < 2; ++i)
+    foo();
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace B {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  const int d = 5;       // expected-note {{constant variable is predetermined as shared}}
+  const int da[5] = {0}; // expected-note {{constant variable is predetermined as shared}}
+  S4 e(4);
+  S5 g(5);
+  S3 m;
+  S6 n(2);
+  int i;
+  int &j = i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate // expected-error {{expected '(' after 'lastprivate'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate() // expected-error {{expected expression}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(argc)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(S1) // expected-error {{'S1' does not refer to a value}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(a, b, c, d, f) // expected-error {{lastprivate variable with incomplete type 'S1'}} expected-error 3 {{shared variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(argv[1]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(2 * 2) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(ba)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(ca) // expected-error {{shared variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(da) // expected-error {{shared variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+  int xa;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(xa) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(S2::S2s) // expected-error {{shared variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(S2::S2sc) // expected-error {{shared variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd safelen(5) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(h) // expected-error {{threadprivate or thread local variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(B::x) // expected-error {{threadprivate or thread local variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(xa), lastprivate(xa) // expected-error {{private variable cannot be lastprivate}} expected-note {{defined as private}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(i) // expected-note {{defined as lastprivate}}
+  for (i = 0; i < argc; ++i) // expected-error{{loop iteration variable in the associated loop of 'omp distribute parallel for simd' directive may not be lastprivate, predetermined as linear}}
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(xa)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(xa)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(j)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(n) firstprivate(n) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+  static int si;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd lastprivate(si) // OK
+  for (i = 0; i < argc; ++i)
+    si = i + 1;
+  return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
+}
Index: test/OpenMP/distribute_parallel_for_simd_linear_messages.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/distribute_parallel_for_simd_linear_messages.cpp
@@ -0,0 +1,338 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+namespace X {
+  int x;
+};
+
+struct B {
+  static int ib; // expected-note {{'B::ib' declared here}}
+  static int bfoo() { return 8; }
+};
+
+int bfoo() { return 4; }
+
+int z;
+const int C1 = 1;
+const int C2 = 2;
+void test_linear_colons()
+{
+  int B = 0;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(B:bfoo())
+  for (int i = 0; i < 10; ++i) ;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(B::ib:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'}}
+  for (int i = 0; i < 10; ++i) ;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(B:ib) // expected-error {{use of undeclared identifier 'ib'; did you mean 'B::ib'}}
+  for (int i = 0; i < 10; ++i) ;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(z:B:ib) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'?}}
+  for (int i = 0; i < 10; ++i) ;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(B:B::bfoo())
+  for (int i = 0; i < 10; ++i) ;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(X::x : ::z)
+  for (int i = 0; i < 10; ++i) ;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(B,::z, X::x)
+  for (int i = 0; i < 10; ++i) ;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(::z)
+  for (int i = 0; i < 10; ++i) ;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(B::bfoo()) // expected-error {{expected variable name}}
+  for (int i = 0; i < 10; ++i) ;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(B::ib,B:C1+C2)
+  for (int i = 0; i < 10; ++i) ;
+}
+
+template<int L, class T, class N> T test_template(T* arr, N num) {
+  N i;
+  T sum = (T)0;
+  T ind2 = - num * L; // expected-note {{'ind2' defined here}}
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(ind2:L) // expected-error {{argument of a linear clause should be of integral or pointer type}}
+  for (i = 0; i < num; ++i) {
+    T cur = arr[(int)ind2];
+    ind2 += L;
+    sum += cur;
+  }
+  return T();
+}
+
+template<int LEN> int test_warn() {
+  int ind2 = 0;
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp parallel for simd linear(ind2:LEN) // expected-warning {{zero linear step (ind2 should probably be const)}}
+  for (int i = 0; i < 100; i++) {
+    ind2 += LEN;
+  }
+  return ind2;
+}
+
+struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+public:
+  S2():a(0) { }
+};
+const S2 b; // expected-note 2 {{'b' defined here}}
+const S2 ba[5];
+class S3 {
+  int a;
+public:
+  S3():a(0) { }
+};
+const S3 ca[5];
+class S4 {
+  int a;
+  S4();
+public:
+  S4(int v):a(v) { }
+};
+class S5 {
+  int a;
+  S5():a(0) {}
+public:
+  S5(int v):a(v) { }
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template<class I, class C> int foomain(I argc, C **argv) {
+  I e(4);
+  I g(5);
+  int i;
+  int &j = i;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear // expected-error {{expected '(' after 'linear'}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear () // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear (argc : 5)
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear (a, b:B::ib) // expected-error {{linear variable with incomplete type 'S1'}} expected-error {{const-qualified variable cannot be linear}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear (argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(e, g)
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(i)
+  for (int k = 0; k < argc; ++k) ++k;
+
+  #pragma omp parallel
+  {
+    int v = 0;
+    int i;
+    #pragma omp target
+    #pragma omp teams
+    #pragma omp distribute parallel for simd linear(v:i)
+    for (int k = 0; k < argc; ++k) { i = k; v += i; }
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp parallel for simd linear(j)
+  for (int k = 0; k < argc; ++k) ++k;
+
+  int v = 0;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(v:j)
+  for (int k = 0; k < argc; ++k) { ++k; v += j; }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(i)
+  for (int k = 0; k < argc; ++k) ++k;
+  return 0;
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace C {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  double darr[100];
+  // expected-note@+1 {{in instantiation of function template specialization 'test_template<-4, double, int>' requested here}}
+  test_template<-4>(darr, 4);
+  // expected-note@+1 {{in instantiation of function template specialization 'test_warn<0>' requested here}}
+  test_warn<0>();
+
+  S4 e(4); // expected-note {{'e' defined here}}
+  S5 g(5); // expected-note {{'g' defined here}}
+  int i;
+  int &j = i;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear // expected-error {{expected '(' after 'linear'}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear () // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear (argc)
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear (a, b) // expected-error {{linear variable with incomplete type 'S1'}} expected-error {{const-qualified variable cannot be linear}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear (argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(e, g) // expected-error {{argument of a linear clause should be of integral or pointer type, not 'S4'}} expected-error {{argument of a linear clause should be of integral or pointer type, not 'S5'}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(h, C::x) // expected-error 2 {{threadprivate or thread local variable cannot be linear}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+  #pragma omp parallel
+  {
+    int i;
+    #pragma omp target
+    #pragma omp teams
+    #pragma omp distribute parallel for simd linear(i)
+      for (int k = 0; k < argc; ++k) ++k;
+
+    #pragma omp target
+    #pragma omp teams
+    #pragma omp distribute parallel for simd linear(i : 4)
+      for (int k = 0; k < argc; ++k) { ++k; i += 4; }
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(j)
+  for (int k = 0; k < argc; ++k) ++k;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd linear(i)
+  for (int k = 0; k < argc; ++k) ++k;
+
+  foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}}
+  return 0;
+}
+
Index: test/OpenMP/distribute_parallel_for_simd_num_threads_messages.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/distribute_parallel_for_simd_num_threads_messages.cpp
@@ -0,0 +1,107 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, typename S, int N> // expected-note {{declared here}}
+T tmain(T argc, S **argv) {
+  T i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads // expected-error {{expected '(' after 'num_threads'}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads () // expected-error {{expected expression}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads (argc)) // expected-warning {{extra tokens at the end of '#pragma omp distribute parallel for simd' are ignored}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads ((argc > 0) ? argv[1] : argv[2]) // expected-error 2 {{expression must have integral or unscoped enumeration type, not 'char *'}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads (foobool(argc)), num_threads (true), num_threads (-5) // expected-error 2 {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'num_threads' clause}} expected-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads (S) // expected-error {{'S' does not refer to a value}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error 2 {{expression must have integral or unscoped enumeration type, not 'char *'}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads (argc)
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads (N) // expected-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
+  for (i = 0; i < argc; ++i) foo();
+
+  return argc;
+}
+
+int main(int argc, char **argv) {
+  int i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads // expected-error {{expected '(' after 'num_threads'}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads () // expected-error {{expected expression}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads (argc)) // expected-warning {{extra tokens at the end of '#pragma omp distribute parallel for simd' are ignored}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads (argc > 0 ? argv[1] : argv[2]) // expected-error {{integral }}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads (foobool(argc)), num_threads (true), num_threads (-5) // expected-error 2 {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'num_threads' clause}} expected-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads (S1) // expected-error {{'S1' does not refer to a value}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expression must have integral or unscoped enumeration type, not 'char *'}}
+  for (i = 0; i < argc; ++i) foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd num_threads (num_threads(tmain<int, char, -1>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}} expected-note {{in instantiation of function template specialization 'tmain<int, char, -1>' requested here}}
+  for (i = 0; i < argc; ++i) foo();
+
+  return tmain<int, char, 3>(argc, argv); // expected-note {{in instantiation of function template specialization 'tmain<int, char, 3>' requested here}}
+}
Index: test/OpenMP/distribute_parallel_for_simd_private_messages.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/distribute_parallel_for_simd_private_messages.cpp
@@ -0,0 +1,315 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+
+public:
+  S2() : a(0) {}
+};
+const S2 b;
+const S2 ba[5];
+class S3 {
+  int a;
+
+public:
+  S3() : a(0) {}
+};
+const S3 ca[5];
+class S4 {
+  int a;
+  S4(); // expected-note {{implicitly declared private here}}
+
+public:
+  S4(int v) : a(v) {
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(a) private(this->a)
+    for (int k = 0; k < v; ++k)
+      ++this->a;
+  }
+};
+class S5 {
+  int a;
+  S5() : a(0) {} // expected-note {{implicitly declared private here}}
+
+public:
+  S5(int v) : a(v) {}
+  S5 &operator=(S5 &s) {
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(a) private(this->a) private(s.a) // expected-error {{expected variable name or data member of current class}}
+    for (int k = 0; k < s.a; ++k)
+      ++s.a;
+    return *this;
+  }
+};
+
+template <typename T>
+class S6 {
+public:
+  T a;
+
+  S6() : a(0) {}
+  S6(T v) : a(v) {
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(a) private(this->a)
+    for (int k = 0; k < v; ++k)
+      ++this->a;
+  }
+  S6 &operator=(S6 &s) {
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(a) private(this->a) private(s.a) // expected-error {{expected variable name or data member of current class}}
+    for (int k = 0; k < s.a; ++k)
+      ++s.a;
+    return *this;
+  }
+};
+
+template <typename T>
+class S7 : public T {
+  T a;
+  S7() : a(0) {}
+
+public:
+  S7(T v) : a(v) {
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(a) private(this->a) private(T::a)
+    for (int k = 0; k < a.a; ++k)
+      ++this->a.a;
+  }
+  S7 &operator=(S7 &s) {
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(a) private(this->a) private(s.a) private(s.T::a) // expected-error 2 {{expected variable name or data member of current class}}
+    for (int k = 0; k < s.a.a; ++k)
+      ++s.a.a;
+    return *this;
+  }
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template <class I, class C>
+int foomain(I argc, C **argv) {
+  I e(4);
+  I g(5);
+  int i;
+  int &j = i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private // expected-error {{expected '(' after 'private'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private() // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(argc)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(a, b) // expected-error {{private variable with incomplete type 'S1'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(e, g)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(h) // expected-error {{threadprivate or thread local variable cannot be private}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd nowait // expected-error {{unexpected OpenMP clause 'nowait' in directive '#pragma omp distribute parallel for simd'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+  {
+    int v = 0;
+    int i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(i)
+    for (int k = 0; k < argc; ++k) {
+      i = k;
+      v += i;
+    }
+  }
+#pragma omp parallel shared(i)
+#pragma omp parallel private(i)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(j)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+  return 0;
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace B {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  S4 e(4);
+  S5 g(5);
+  S6<float> s6(0.0) , s6_0(1.0);
+  S7<S6<float> > s7(0.0) , s7_0(1.0);
+  int i;
+  int &j = i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private // expected-error {{expected '(' after 'private'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private() // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(argc)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(a, b) // expected-error {{private variable with incomplete type 'S1'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(h, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be private}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd nowait // expected-error {{unexpected OpenMP clause 'nowait' in directive '#pragma omp distribute parallel for simd'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+  {
+    int i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(i)
+    for (int k = 0; k < argc; ++k)
+      ++k;
+  }
+#pragma omp parallel shared(i)
+#pragma omp parallel private(i)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(j)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+  static int m;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(m)
+  for (int k = 0; k < argc; ++k)
+    m = k + 2;
+
+  s6 = s6_0; // expected-note {{in instantiation of member function 'S6<float>::operator=' requested here}}
+  s7 = s7_0; // expected-note {{in instantiation of member function 'S7<S6<float> >::operator=' requested here}}
+  return foomain(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}}
+}
+
Index: test/OpenMP/distribute_parallel_for_simd_proc_bind_messages.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/distribute_parallel_for_simd_proc_bind_messages.cpp
@@ -0,0 +1,101 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
+
+void foo();
+
+template <class T, typename S, int N>
+T tmain(T argc, S **argv) {
+  T i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind // expected-error {{expected '(' after 'proc_bind'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind( // expected-error {{expected 'master', 'close' or 'spread' in OpenMP clause 'proc_bind'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind() // expected-error {{expected 'master', 'close' or 'spread' in OpenMP clause 'proc_bind'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind(master // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind(close), proc_bind(spread) // expected-error {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'proc_bind' clause}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind(x) // expected-error {{expected 'master', 'close' or 'spread' in OpenMP clause 'proc_bind'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind(master)
+  for (i = 0; i < argc; ++i)
+    foo();
+
+#pragma omp parallel proc_bind(close)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind(spread)
+  for (i = 0; i < argc; ++i)
+    foo();
+
+  return T();
+}
+
+int main(int argc, char **argv) {
+  int i;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind // expected-error {{expected '(' after 'proc_bind'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind( // expected-error {{expected 'master', 'close' or 'spread' in OpenMP clause 'proc_bind'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind() // expected-error {{expected 'master', 'close' or 'spread' in OpenMP clause 'proc_bind'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind(master // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind(close), proc_bind(spread) // expected-error {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'proc_bind' clause}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind(x) // expected-error {{expected 'master', 'close' or 'spread' in OpenMP clause 'proc_bind'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind(master)
+  for (i = 0; i < argc; ++i)
+    foo();
+
+#pragma omp parallel proc_bind(close)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd proc_bind(spread)
+  for (i = 0; i < argc; ++i)
+    foo();
+  return tmain<int, char, 3>(argc, argv);
+}
Index: test/OpenMP/distribute_parallel_for_simd_reduction_messages.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/distribute_parallel_for_simd_reduction_messages.cpp
@@ -0,0 +1,441 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}} expected-note 4 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+  S2 &operator+(const S2 &arg) { return (*this); } // expected-note 3 {{implicitly declared private here}}
+
+public:
+  S2() : a(0) {}
+  S2(S2 &s2) : a(s2.a) {}
+  static float S2s; // expected-note 2 {{static data member is predetermined as shared}}
+  static const float S2sc;
+};
+const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}}
+S2 b;                     // expected-note 3 {{'b' defined here}}
+const S2 ba[5];           // expected-note 2 {{'ba' defined here}}
+class S3 {
+  int a;
+
+public:
+  int b;
+  S3() : a(0) {}
+  S3(const S3 &s3) : a(s3.a) {}
+  S3 operator+(const S3 &arg1) { return arg1; }
+};
+int operator+(const S3 &arg1, const S3 &arg2) { return 5; }
+S3 c;               // expected-note 3 {{'c' defined here}}
+const S3 ca[5];     // expected-note 2 {{'ca' defined here}}
+extern const int f; // expected-note 4 {{'f' declared here}}
+class S4 {
+  int a;
+  S4(); // expected-note {{implicitly declared private here}}
+  S4(const S4 &s4);
+  S4 &operator+(const S4 &arg) { return (*this); }
+
+public:
+  S4(int v) : a(v) {}
+};
+S4 &operator&=(S4 &arg1, S4 &arg2) { return arg1; }
+class S5 {
+  int a;
+  S5() : a(0) {} // expected-note {{implicitly declared private here}}
+  S5(const S5 &s5) : a(s5.a) {}
+  S5 &operator+(const S5 &arg);
+
+public:
+  S5(int v) : a(v) {}
+};
+class S6 { // expected-note 3 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 3 {{candidate function (the implicit move assignment operator) not viable}}
+#endif
+  int a;
+
+public:
+  S6() : a(6) {}
+  operator int() { return 6; }
+} o;
+
+S3 h, k;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template <class T>       // expected-note {{declared here}}
+T tmain(T argc) {
+  const T d = T();       // expected-note 4 {{'d' defined here}}
+  const T da[5] = {T()}; // expected-note 2 {{'da' defined here}}
+  T qa[5] = {T()};
+  T i;
+  T &j = i;                        // expected-note 4 {{'j' defined here}}
+  S3 &p = k;                       // expected-note 2 {{'p' defined here}}
+  const T &r = da[(int)i];         // expected-note 2 {{'r' defined here}}
+  T &q = qa[(int)i];               // expected-note 2 {{'q' defined here}}
+  T fl;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction // expected-error {{expected '(' after 'reduction'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction + // expected-error {{expected '(' after 'reduction'}} expected-warning {{extra tokens at the end of '#pragma omp distribute parallel for simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(& : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name, array element or array section}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(foo : argc) //expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'float'}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'int'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(&& : argc)
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(^ : T) // expected-error {{'T' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified list item cannot be reduction}} expected-error 2 {{'operator+' is a private member of 'S2'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 4 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified list item cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : ba) // expected-error {{const-qualified list item cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(* : ca) // expected-error {{const-qualified list item cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(- : da) // expected-error {{const-qualified list item cannot be reduction}} expected-error {{const-qualified list item cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : h, k) // expected-error {{threadprivate or thread local variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : o) // expected-error 2 {{no viable overloaded '='}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(i), reduction(+ : j), reduction(+ : q) // expected-error 4 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel private(k)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : p), reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : p), reduction(+ : p) // expected-error 2 {{variable can appear only once in OpenMP 'reduction' clause}} expected-note 2 {{previously referenced here}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : r) // expected-error 2 {{const-qualified list item cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel shared(i)
+#pragma omp parallel reduction(min : i)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(max : j) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel private(fl)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : fl)
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel reduction(* : fl)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : fl)
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return T();
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace B {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  const int d = 5;       // expected-note 2 {{'d' defined here}}
+  const int da[5] = {0}; // expected-note {{'da' defined here}}
+  int qa[5] = {0};
+  S4 e(4);
+  S5 g(5);
+  int i;
+  int &j = i;                      // expected-note 2 {{'j' defined here}}
+  S3 &p = k;                       // expected-note 2 {{'p' defined here}}
+  const int &r = da[i];            // expected-note {{'r' defined here}}
+  int &q = qa[i];                  // expected-note {{'q' defined here}}
+  float fl;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction // expected-error {{expected '(' after 'reduction'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction + // expected-error {{expected '(' after 'reduction'}} expected-warning {{extra tokens at the end of '#pragma omp distribute parallel for simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(foo : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(~ : argc) // expected-error {{expected unqualified-id}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(&& : argc)
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(^ : S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified list item cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified list item cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : ba) // expected-error {{const-qualified list item cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(* : ca) // expected-error {{const-qualified list item cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(- : da) // expected-error {{const-qualified list item cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : o) // expected-error {{no viable overloaded '='}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(i), reduction(+ : j), reduction(+ : q) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel private(k)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : p), reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : p), reduction(+ : p) // expected-error {{variable can appear only once in OpenMP 'reduction' clause}} expected-note {{previously referenced here}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : r) // expected-error {{const-qualified list item cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel shared(i)
+#pragma omp parallel reduction(min : i)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(max : j) // expected-error {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel private(fl)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : fl)
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel reduction(* : fl)
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : fl)
+  for (int i = 0; i < 10; ++i)
+    foo();
+  static int m;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd reduction(+ : m) // OK
+  for (int i = 0; i < 10; ++i)
+    m++;
+
+  return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain<int>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<float>' requested here}}
+}
Index: test/OpenMP/distribute_parallel_for_simd_safelen_messages.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/distribute_parallel_for_simd_safelen_messages.cpp
@@ -0,0 +1,177 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
+
+void foo() {
+}
+
+#if __cplusplus >= 201103L
+// expected-note@+2 4 {{declared here}}
+#endif
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, typename S, int N, int ST> // expected-note {{declared here}}
+T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd safelen // expected-error {{expected '(' after 'safelen'}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd safelen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd safelen () // expected-error {{expected expression}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd safelen (argc  // expected-note {{to match this '('}} expected-error 2 {{expression is not an integral constant expression}} expected-note 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} expected-error {{expected ')'}}
+  for (int i = ST; i < N; i++) 
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+  
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd safelen (ST // expected-error {{argument to 'safelen' clause must be a strictly positive integer value}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd safelen (1)) // expected-warning {{extra tokens at the end of '#pragma omp distribute parallel for simd' are ignored}}
+  for (int i = ST; i < N; i++)
+     argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd safelen ((ST > 0) ? 1 + ST : 2)
+  for (int i = ST; i < N; i++) 
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#if __cplusplus >= 201103L
+  // expected-note@+4 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
+#endif
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) // expected-error 2 {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'safelen' clause}} expected-error 2 {{argument to 'safelen' clause must be a strictly positive integer value}} expected-error 2 {{expression is not an integral constant expression}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd safelen (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#if __cplusplus <= 199711L
+  // expected-error@+6 2 {{expression is not an integral constant expression}}
+#else
+  // expected-error@+4 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}}
+#endif
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd safelen (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd safelen (4)
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd safelen (N) // expected-error {{argument to 'safelen' clause must be a strictly positive integer value}}
+  for (T i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+  return argc;
+}
+
+int main(int argc, char **argv) {
+#pragma omp target
+#pragma omp teams
+#pragma omp parallel for simd safelen // expected-error {{expected '(' after 'safelen'}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-4];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp parallel for simd safelen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-4];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp parallel for simd safelen () // expected-error {{expected expression}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-4];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp parallel for simd safelen (4 // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-4];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp parallel for simd safelen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp parallel for simd' are ignored}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-4];
+  
+#if __cplusplus >= 201103L
+  // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
+#endif
+#pragma omp target
+#pragma omp teams
+#pragma omp parallel for simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{expression is not an integral constant expression}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-4];
+
+#if __cplusplus >= 201103L
+  // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
+#endif
+#pragma omp target
+#pragma omp teams
+#pragma omp parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) // expected-error 2 {{argument to 'safelen' clause must be a strictly positive integer value}} expected-error 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'safelen' clause}} expected-error {{expression is not an integral constant expression}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-4];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp parallel for simd safelen (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-4];
+
+#if __cplusplus <= 199711L
+  // expected-error@+6 {{expression is not an integral constant expression}}
+#else
+  // expected-error@+4 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}}
+#endif
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd safelen (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-4];
+
+  // expected-note@+3 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd safelen(safelen(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  foo(); // expected-error {{statement after '#pragma omp distribute parallel for simd' must be a for loop}}
+
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 12, 4>' requested here}}
+  return tmain<int, char, 12, 4>(argc, argv);
+}
+
Index: test/OpenMP/distribute_parallel_for_simd_schedule_messages.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/distribute_parallel_for_simd_schedule_messages.cpp
@@ -0,0 +1,151 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, typename S, int N, int ST> // expected-note {{declared here}}
+T tmain(T argc, S **argv) {
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule // expected-error {{expected '(' after 'schedule'}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule ( // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule () // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (auto // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (auto_dynamic // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (auto,  // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (runtime, 3)  // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}}
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (guided argc
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+3 2 {{argument to 'schedule' clause must be a strictly positive integer value}}
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (static, ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (dynamic, 1)) // expected-warning {{extra tokens at the end of '#pragma omp distribute parallel for simd' are ignored}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (guided, (ST > 0) ? 1 + ST : 2)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+4 2 {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'schedule' clause}}
+  // expected-error@+3 {{argument to 'schedule' clause must be a strictly positive integer value}}
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (static, foobool(argc)), schedule (dynamic, true), schedule (guided, -5)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (static, S) // expected-error {{'S' does not refer to a value}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+3 2 {{expression must have integral or unscoped enumeration type, not 'char *'}}
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (guided, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (dynamic, 1)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (static, N) // expected-error {{argument to 'schedule' clause must be a strictly positive integer value}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  return argc;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule // expected-error {{expected '(' after 'schedule'}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule ( // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule () // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (auto // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (auto_dynamic // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (auto,  // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (runtime, 3)  // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (guided, 4 // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (static, 2+2)) // expected-warning {{extra tokens at the end of '#pragma omp distribute parallel for simd' are ignored}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (dynamic, foobool(1) > 0 ? 1 : 2)
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+4 2 {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'schedule' clause}}
+  // expected-error@+3 {{argument to 'schedule' clause must be a strictly positive integer value}}
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (guided, foobool(argc)), schedule (static, true), schedule (dynamic, -5)
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (guided, S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+3 {{expression must have integral or unscoped enumeration type, not 'char *'}}
+  #pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+5 {{statement after '#pragma omp distribute parallel for simd' must be a for loop}}
+  // expected-note@+3 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd schedule(dynamic, schedule(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  foo();
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 1, 0>' requested here}}
+  return tmain<int, char, 1, 0>(argc, argv);
+}
+
Index: test/OpenMP/distribute_parallel_for_simd_shared_messages.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/distribute_parallel_for_simd_shared_messages.cpp
@@ -0,0 +1,396 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+
+
+struct S1; // expected-note 2 {{declared here}}
+extern S1 a;
+class S2 {
+  mutable int a;
+public:
+  S2():a(0) { }
+  S2(S2 &s2):a(s2.a) { }
+};
+const S2 b;
+const S2 ba[5];
+class S3 {
+  int a;
+public:
+  S3():a(0) { }
+  S3(S3 &s3):a(s3.a) { }
+};
+const S3 c;
+const S3 ca[5];
+extern const int f;
+class S4 {
+  int a;
+  S4();
+  S4(const S4 &s4);
+public:
+  S4(int v):a(v) { }
+};
+class S5 {
+  int a;
+  S5():a(0) {}
+  S5(const S5 &s5):a(s5.a) { }
+public:
+  S5(int v):a(v) { }
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note 2 {{defined as threadprivate or thread local}}
+}
+namespace B {
+using A::x;
+}
+
+template <class T, typename S, int N>
+T tmain(T argc, S **argv) {
+  const int d = 5;
+  const int da[5] = { 0 };
+  S4 e(4);
+  S5 g(5);
+  int i;
+  int &j = i;
+  int acc = 0;
+  int n = 1000;
+  
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared // expected-error {{expected '(' after 'shared'}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared () // expected-error {{expected expression}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared (argc)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared (S1) // expected-error {{'S1' does not refer to a value}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared (a, b, c, d, f)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared (argv[1]) // expected-error {{expected variable name}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared(ba)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared(ca)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared(da)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared(e, g)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared(h, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be shared}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(i), shared(i) // expected-error {{private variable cannot be shared}} expected-note {{defined as private}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(i), shared(i) // expected-error {{firstprivate variable cannot be shared}} expected-note {{defined as firstprivate}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(i)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared(i)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared(j)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(i)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared(i)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared(j)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+return T();
+}
+
+
+int main(int argc, char **argv) {
+  const int d = 5;
+  const int da[5] = { 0 };
+  S4 e(4);
+  S5 g(5);
+  int i;
+  int &j = i;
+  int acc = 0;
+  int n = argc;
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared // expected-error {{expected '(' after 'shared'}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared () // expected-error {{expected expression}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared (argc)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared (S1) // expected-error {{'S1' does not refer to a value}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared (a, b, c, d, f)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared (argv[1]) // expected-error {{expected variable name}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared(ba)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared(ca)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared(da)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared(e, g)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared(h, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be shared}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(i), shared(i) // expected-error {{private variable cannot be shared}} expected-note {{defined as private}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(i), shared(i) // expected-error {{firstprivate variable cannot be shared}} expected-note {{defined as firstprivate}}
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd private(i)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared(i)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared(j)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd firstprivate(i)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared(i)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd shared(j)
+  for(int k = 0 ; k < n ; k++) {
+    acc++;
+  }
+
+return tmain<int, char, 1000>(argc, argv); // expected-note {{in instantiation of function template specialization 'tmain<int, char, 1000>' requested here}}
+}
Index: test/OpenMP/distribute_parallel_for_simd_simdlen_messages.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/distribute_parallel_for_simd_simdlen_messages.cpp
@@ -0,0 +1,181 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
+
+void foo() {
+}
+
+#if __cplusplus >= 201103L
+// expected-note@+2 4 {{declared here}}
+#endif
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, typename S, int N, int ST> // expected-note {{declared here}}
+T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen // expected-error {{expected '(' after 'simdlen'}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen () // expected-error {{expected expression}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+// expected-error@+5 {{expected ')'}} expected-note@+5 {{to match this '('}}
+// expected-error@+4 2 {{expression is not an integral constant expression}}
+// expected-note@+3 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen (argc 
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+// expected-error@+3 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen (1)) // expected-warning {{extra tokens at the end of '#pragma omp distribute parallel for simd' are ignored}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen ((ST > 0) ? 1 + ST : 2)
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#if __cplusplus >= 201103L
+  // expected-note@+4 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
+#endif
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) // expected-error 2 {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'simdlen' clause}} expected-error 2 {{argument to 'simdlen' clause must be a strictly positive integer value}} expected-error 2 {{expression is not an integral constant expression}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#if __cplusplus <= 199711L
+  // expected-error@+6 2 {{expression is not an integral constant expression}}
+#else
+  // expected-error@+4 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}}
+#endif
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen (4)
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen (N) // expected-error {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  for (T i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-ST];
+
+  return argc;
+}
+
+int main(int argc, char **argv) {
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen // expected-error {{expected '(' after 'simdlen'}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-4];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-4];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen () // expected-error {{expected expression}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-4];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen (4 // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-4];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp distribute parallel for simd' are ignored}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-4];
+
+#if __cplusplus >= 201103L
+  // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
+#endif
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen (foobool(1) > 0 ? 1 : 2) // expected-error {{expression is not an integral constant expression}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-4];
+
+
+#if __cplusplus >= 201103L
+  // expected-note@+4 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
+#endif
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) // expected-error {{expression is not an integral constant expression}} expected-error 2 {{directive '#pragma omp distribute parallel for simd' cannot contain more than one 'simdlen' clause}} expected-error 2 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-4];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-4];
+
+#if __cplusplus <= 199711L
+  // expected-error@+6 {{expression is not an integral constant expression}}
+#else
+  // expected-error@+4 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}}
+#endif
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i-4];
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd simdlen(simdlen(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}} expected-note {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
+  foo(); // expected-error {{statement after '#pragma omp distribute parallel for simd' must be a for loop}}
+
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 12, 4>' requested here}}
+  return tmain<int, char, 12, 4>(argc, argv);
+}
+
Index: test/OpenMP/nesting_of_regions.cpp
===================================================================
--- test/OpenMP/nesting_of_regions.cpp
+++ test/OpenMP/nesting_of_regions.cpp
@@ -143,6 +143,12 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp parallel
+  {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'parallel' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
   
 // SIMD DIRECTIVE
 #pragma omp simd
@@ -323,6 +329,12 @@
     for (int j = 0; j < 10; ++j)
       ;
   }
+#pragma omp simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute parallel for simd // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // FOR DIRECTIVE
 #pragma omp for
@@ -516,6 +528,12 @@
     for (int j = 0; j < 10; ++j)
       ;
   }
+#pragma omp for
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'for' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // FOR SIMD DIRECTIVE
 #pragma omp for simd
@@ -697,6 +715,12 @@
     for (int j = 0; j < 10; ++j)
       ;
   }
+#pragma omp for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute parallel for simd // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // SECTIONS DIRECTIVE
 #pragma omp sections
@@ -895,6 +919,12 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp sections
+  {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'sections' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // SECTION DIRECTIVE
 #pragma omp section // expected-error {{orphaned 'omp section' directives are prohibited, it must be closely nested to a sections region}}
@@ -1148,6 +1178,13 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp sections
+  {
+#pragma omp section
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'section' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // SINGLE DIRECTIVE
 #pragma omp single
@@ -1332,6 +1369,12 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp single
+  {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'single' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // MASTER DIRECTIVE
 #pragma omp master
@@ -1516,6 +1559,12 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp master
+  {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'master' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // CRITICAL DIRECTIVE
 #pragma omp critical
@@ -1714,6 +1763,12 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp critical
+  {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'critical' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // PARALLEL FOR DIRECTIVE
 #pragma omp parallel for
@@ -1912,6 +1967,12 @@
     for (int j = 0; j < 10; ++j)
       ;
   }
+#pragma omp parallel for
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'parallel for' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // PARALLEL FOR SIMD DIRECTIVE
 #pragma omp parallel for simd
@@ -2111,6 +2172,12 @@
     for (int j = 0; j < 10; ++j)
       ;
   }
+#pragma omp parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute parallel for simd // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // PARALLEL SECTIONS DIRECTIVE
 #pragma omp parallel sections
@@ -2298,6 +2365,12 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp parallel sections
+  {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'parallel sections' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // TASK DIRECTIVE
 #pragma omp task
@@ -2432,6 +2505,12 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp task
+  {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'task' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // ORDERED DIRECTIVE
 #pragma omp ordered
@@ -2637,6 +2716,12 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp ordered
+  {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'ordered' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // ATOMIC DIRECTIVE
 #pragma omp atomic
@@ -2866,6 +2951,14 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp atomic
+  // expected-error@+2 {{the statement for 'atomic' must be an expression statement of form '++x;', '--x;', 'x++;', 'x--;', 'x binop= expr;', 'x = x binop expr' or 'x = expr binop x', where x is an l-value expression with scalar type}}
+  // expected-note@+1 {{expected an expression statement}}
+  {
+#pragma omp distribute parallel for simd // expected-error {{OpenMP constructs may not be nested inside an atomic region}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // TARGET DIRECTIVE
 #pragma omp target
@@ -3016,6 +3109,12 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp target
+  { 
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'target' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // TARGET PARALLEL DIRECTIVE
 #pragma omp target parallel
@@ -3160,6 +3259,12 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp target parallel
+  { 
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'target parallel' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // TARGET PARALLEL FOR DIRECTIVE
 #pragma omp target parallel for
@@ -3358,6 +3463,12 @@
     for (int j = 0; j < 10; ++j)
       ;
   }    
+#pragma omp target parallel for
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'target parallel for' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }    
 
 // TEAMS DIRECTIVE
 #pragma omp target
@@ -3536,6 +3647,17 @@
   for (int j = 0; j < 10; ++j)
     ;  
   }
+#pragma omp target
+#pragma omp teams
+  {
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp distribute parallel for simd
+  for (int j = 0; j < 10; ++j)
+    ;  
+  }
+
 // TASKLOOP DIRECTIVE
 #pragma omp taskloop
   for (int i = 0; i < 10; ++i) {
@@ -3724,7 +3846,12 @@
   for (int j = 0; j < 10; ++j)
     ++a;
   }
-
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'taskloop' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+  for (int j = 0; j < 10; ++j)
+    ++a;
+  }
 
 // DISTRIBUTE DIRECTIVE
 #pragma omp target
@@ -4183,162 +4310,401 @@
 #pragma omp target update to(a) // expected-error {{region cannot be nested inside 'target' region}}
     ++a;
   }
-}
 
-void foo() {
-  int a = 0;
-// PARALLEL DIRECTIVE
-#pragma omp parallel
-#pragma omp for
-  for (int i = 0; i < 10; ++i)
-    ;
-#pragma omp parallel
-#pragma omp simd
-  for (int i = 0; i < 10; ++i)
-    ;
-#pragma omp parallel
-#pragma omp for simd
-  for (int i = 0; i < 10; ++i)
-    ;
-#pragma omp parallel
-#pragma omp sections
-  {
-    bar();
-  }
-#pragma omp parallel
-#pragma omp section // expected-error {{'omp section' directive must be closely nested to a sections region, not a parallel region}}
-  {
-    bar();
-  }
-#pragma omp parallel
-#pragma omp sections
-  {
-    bar();
-  }
-#pragma omp parallel
-#pragma omp single
-  bar();
-#pragma omp parallel
-#pragma omp master
-  bar();
-#pragma omp parallel
-#pragma omp critical
-  bar();
-#pragma omp parallel
-#pragma omp parallel for
-  for (int i = 0; i < 10; ++i)
-    ;
-#pragma omp parallel
-#pragma omp parallel for simd
-  for (int i = 0; i < 10; ++i)
-    ;
-#pragma omp parallel
-#pragma omp parallel sections
-  {
-    bar();
-  }
-#pragma omp parallel
-#pragma omp task
-  {
-    bar();
-  }
-#pragma omp parallel
-  {
-#pragma omp taskyield
-    bar();
-  }
-#pragma omp parallel
-  {
-#pragma omp barrier
-    bar();
-  }
-#pragma omp parallel
-  {
-#pragma omp taskwait
-    bar();
-  }
-#pragma omp parallel
-  {
-#pragma omp flush
-    bar();
-  }
-#pragma omp parallel
-  {
-#pragma omp ordered // expected-error {{region cannot be closely nested inside 'parallel' region; perhaps you forget to enclose 'omp ordered' directive into a for or a parallel for region with 'ordered' clause?}}
-    bar();
-  }
-#pragma omp parallel
-  {
-#pragma omp atomic
-    ++a;
-  }
-#pragma omp parallel
-  {
+// DISTRIBUTE PARALLEL FOR SIMD DIRECTIVE
 #pragma omp target
-    ++a;
-  }
-#pragma omp parallel
-  {
-#pragma omp target parallel
-    ++a;
-  }
-#pragma omp parallel
-#pragma omp target parallel for
-  for (int i = 0; i < 10; ++i)
-    ;
-#pragma omp parallel
-  {
-#pragma omp target enter data map(to: a)
-    ++a;
-  }
-#pragma omp parallel
-  {
-#pragma omp target exit data map(from: a)
-    ++a;
-  }
-#pragma omp parallel
-  {
-#pragma omp teams // expected-error {{region cannot be closely nested inside 'parallel' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
-    ++a;
-  }
-#pragma omp parallel
-  {
-#pragma omp taskloop
-  for (int i = 0; i < 10; ++i)
-    ++a;
-  }
-#pragma omp parallel
-  {
-#pragma omp distribute // expected-error {{region cannot be closely nested inside 'parallel' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute parallel for simd // expected-error {{OpenMP constructs may not be nested inside a simd region}}
     for (int i = 0; i < 10; ++i)
       ;
   }
-#pragma omp parallel
-  {
-#pragma omp target update to(a)
-    a++;
-  }
-#pragma omp parallel
-  {
-#pragma omp distribute parallel for // expected-error {{region cannot be closely nested inside 'parallel' region; perhaps you forget to enclose 'omp distribute parallel for' directive into a teams region?}}
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute // expected-error {{OpenMP constructs may not be nested inside a simd region}}
     for (int i = 0; i < 10; ++i)
       ;
   }
-
-// SIMD DIRECTIVE
-#pragma omp simd
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
   for (int i = 0; i < 10; ++i) {
 #pragma omp for // expected-error {{OpenMP constructs may not be nested inside a simd region}}
     for (int i = 0; i < 10; ++i)
       ;
   }
-#pragma omp simd
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd 
   for (int i = 0; i < 10; ++i) {
 #pragma omp simd // expected-warning {{OpenMP only allows an ordered construct with the simd clause nested in a simd construct}}
     for (int i = 0; i < 10; ++i)
       ;
   }
-#pragma omp simd
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp for simd // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams  
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp sections // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp section // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp single // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp master // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp critical // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+#pragma omp single
+      {
+	bar();
+      }
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel for // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel for simd // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel sections // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp task // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskyield // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp barrier // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskwait // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp flush // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp ordered // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp atomic // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    ++a;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp target // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    ++a;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp target parallel  // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    ++a;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp target parallel for // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp target enter data map(to: a) // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    ++a;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp target exit data map(from: a) // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    ++a;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp teams // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    ++a;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp target update to(a) // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    ++a;
+  }
+}
+
+void foo() {
+  int a = 0;
+// PARALLEL DIRECTIVE
+#pragma omp parallel
+#pragma omp for
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp parallel
+#pragma omp simd
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp parallel
+#pragma omp for simd
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp parallel
+#pragma omp sections
+  {
+    bar();
+  }
+#pragma omp parallel
+#pragma omp section // expected-error {{'omp section' directive must be closely nested to a sections region, not a parallel region}}
+  {
+    bar();
+  }
+#pragma omp parallel
+#pragma omp sections
+  {
+    bar();
+  }
+#pragma omp parallel
+#pragma omp single
+  bar();
+#pragma omp parallel
+#pragma omp master
+  bar();
+#pragma omp parallel
+#pragma omp critical
+  bar();
+#pragma omp parallel
+#pragma omp parallel for
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp parallel
+#pragma omp parallel for simd
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp parallel
+#pragma omp parallel sections
+  {
+    bar();
+  }
+#pragma omp parallel
+#pragma omp task
+  {
+    bar();
+  }
+#pragma omp parallel
+  {
+#pragma omp taskyield
+    bar();
+  }
+#pragma omp parallel
+  {
+#pragma omp barrier
+    bar();
+  }
+#pragma omp parallel
+  {
+#pragma omp taskwait
+    bar();
+  }
+#pragma omp parallel
+  {
+#pragma omp flush
+    bar();
+  }
+#pragma omp parallel
+  {
+#pragma omp ordered // expected-error {{region cannot be closely nested inside 'parallel' region; perhaps you forget to enclose 'omp ordered' directive into a for or a parallel for region with 'ordered' clause?}}
+    bar();
+  }
+#pragma omp parallel
+  {
+#pragma omp atomic
+    ++a;
+  }
+#pragma omp parallel
+  {
+#pragma omp target
+    ++a;
+  }
+#pragma omp parallel
+  {
+#pragma omp target parallel
+    ++a;
+  }
+#pragma omp parallel
+#pragma omp target parallel for
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp parallel
+  {
+#pragma omp target enter data map(to: a)
+    ++a;
+  }
+#pragma omp parallel
+  {
+#pragma omp target exit data map(from: a)
+    ++a;
+  }
+#pragma omp parallel
+  {
+#pragma omp teams // expected-error {{region cannot be closely nested inside 'parallel' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
+    ++a;
+  }
+#pragma omp parallel
+  {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp parallel
+  {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'parallel' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp parallel
+  {
+#pragma omp target update to(a)
+    a++;
+  }
+#pragma omp parallel
+  {
+#pragma omp distribute parallel for // expected-error {{region cannot be closely nested inside 'parallel' region; perhaps you forget to enclose 'omp distribute parallel for' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp parallel
+  {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'parallel' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+
+// SIMD DIRECTIVE
+#pragma omp simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp for // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp simd // expected-warning {{OpenMP only allows an ordered construct with the simd clause nested in a simd construct}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp simd
   for (int i = 0; i < 10; ++i) {
 #pragma omp for simd // expected-error {{OpenMP constructs may not be nested inside a simd region}}
     for (int i = 0; i < 10; ++i)
@@ -4488,6 +4854,12 @@
     for (int j = 0; j < 10; ++j)
       ;
   }
+#pragma omp simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute parallel for simd // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // FOR DIRECTIVE
 #pragma omp for
@@ -4672,6 +5044,12 @@
     for (int j = 0; j < 10; ++j)
       ;
   }
+#pragma omp for
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'for' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // FOR SIMD DIRECTIVE
 #pragma omp for simd
@@ -4836,6 +5214,12 @@
     for (int j = 0; j < 10; ++j)
       ;
   }
+#pragma omp for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute parallel for simd // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // SECTIONS DIRECTIVE
 #pragma omp sections
@@ -5009,6 +5393,12 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp sections
+  {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'sections' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // SECTION DIRECTIVE
 #pragma omp section // expected-error {{orphaned 'omp section' directives are prohibited, it must be closely nested to a sections region}}
@@ -5272,6 +5662,13 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp sections
+  {
+#pragma omp section
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'section' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // SINGLE DIRECTIVE
 #pragma omp single
@@ -5446,6 +5843,12 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp single
+  {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'single' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // MASTER DIRECTIVE
 #pragma omp master
@@ -5630,6 +6033,12 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp master
+  {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'master' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // CRITICAL DIRECTIVE
 #pragma omp critical
@@ -5833,6 +6242,12 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp critical
+  {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'critical' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // PARALLEL FOR DIRECTIVE
 #pragma omp parallel for
@@ -6032,6 +6447,12 @@
     for (int j = 0; j < 10; ++j)
       ;
   }
+#pragma omp parallel for
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'parallel for' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // PARALLEL FOR SIMD DIRECTIVE
 #pragma omp parallel for simd
@@ -6231,6 +6652,12 @@
     for (int j = 0; j < 10; ++j)
       ;
   }
+#pragma omp parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute parallel for simd // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // PARALLEL SECTIONS DIRECTIVE
 #pragma omp parallel sections
@@ -6410,7 +6837,13 @@
   }
 #pragma omp parallel sections
   {
-#pragma omp distribute parallel for // expected-error {{region cannot be closely nested inside 'parallel sections' region; perhaps you forget to enclose 'omp distribute parallel for' directive into a teams region?}}
+#pragma omp distribute parallel for // expected-error {{region cannot be closely nested inside 'parallel sections' region; perhaps you forget to enclose 'omp distribute parallel for' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp parallel sections
+  {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'parallel sections' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
     for (int i = 0; i < 10; ++i)
       ;
   }
@@ -6547,6 +6980,12 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp task
+  {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'task' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // ATOMIC DIRECTIVE
 #pragma omp atomic
@@ -6775,6 +7214,14 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp atomic
+  // expected-error@+2 {{the statement for 'atomic' must be an expression statement of form '++x;', '--x;', 'x++;', 'x--;', 'x binop= expr;', 'x = x binop expr' or 'x = expr binop x', where x is an l-value expression with scalar type}}
+  // expected-note@+1 {{expected an expression statement}}
+  {
+#pragma omp distribute parallel for simd // expected-error {{OpenMP constructs may not be nested inside an atomic region}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // TARGET DIRECTIVE
 #pragma omp target
@@ -6922,6 +7369,12 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp target
+  { 
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'target' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // TARGET PARALLEL DIRECTIVE
 #pragma omp target parallel
@@ -7066,7 +7519,12 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
-
+#pragma omp target parallel
+  { 
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'target parallel' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // TARGET PARALLEL FOR DIRECTIVE
 #pragma omp target parallel for
@@ -7266,6 +7724,12 @@
     for (int j = 0; j < 10; ++j)
       ;
   }
+#pragma omp target parallel for
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'target parallel for' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // TEAMS DIRECTIVE
 #pragma omp target
@@ -7448,6 +7912,17 @@
   for (int j = 0; j < 10; ++j)
     ;  
   }
+#pragma omp target
+#pragma omp teams
+  {
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp distribute parallel for simd
+  for (int j = 0; j < 10; ++j)
+    ;  
+  }
+
 // TASKLOOP DIRECTIVE
 #pragma omp taskloop
   for (int i = 0; i < 10; ++i) {
@@ -7636,6 +8111,12 @@
     for (int i = 0; i < 10; ++i)
       ;
   }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'taskloop' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // DISTRIBUTE DIRECTIVE
 #pragma omp target
@@ -7870,6 +8351,14 @@
   for (int i = 0; i < 10; ++i)
     ++a;
   }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute parallel for simd // expected-error {{region cannot be closely nested inside 'distribute' region; perhaps you forget to enclose 'omp distribute parallel for simd' directive into a teams region?}}
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
 
   // DISTRIBUTE PARALLEL FOR DIRECTIVE
 #pragma omp target
@@ -8105,4 +8594,239 @@
 #pragma omp target update to(a) // expected-error {{region cannot be nested inside 'target' region}}
     ++a;
   }
+
+// DISTRIBUTE PARALLEL FOR SIMD DIRECTIVE
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute parallel for simd // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp for // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp simd // expected-warning {{OpenMP only allows an ordered construct with the simd clause nested in a simd construct}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp for simd // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams  
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp sections // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp section // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp single // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp master // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp critical // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+#pragma omp single
+      {
+	bar();
+      }
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel for // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel for simd // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel sections // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp task // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskyield // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp barrier // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskwait // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp flush // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp ordered // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp atomic // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    ++a;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp target // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    ++a;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp target parallel // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    ++a;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp target parallel for // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp teams // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    ++a;
+  }
+  return foo<int>();
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp target enter data map(to: a) // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    ++a;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp target exit data map(from: a) // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    ++a;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp target update to(a) // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    ++a;
+  }
 }
Index: test/Sema/xray-always-instrument-attr.c
===================================================================
--- /dev/null
+++ test/Sema/xray-always-instrument-attr.c
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only -std=c11
+void foo() __attribute__((xray_always_instrument));
+
+struct __attribute__((xray_always_instrument)) a { int x; }; // expected-warning {{'xray_always_instrument' attribute only applies to functions and methods}}
+
+void bar() __attribute__((xray_always_instrument("not-supported"))); // expected-error {{'xray_always_instrument' attribute takes no arguments}}
Index: test/Sema/xray-always-instrument-attr.cpp
===================================================================
--- /dev/null
+++ test/Sema/xray-always-instrument-attr.cpp
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 %s -verify -fsyntax-only -std=c++11 -x c++
+void foo [[clang::xray_always_instrument]] ();
+
+struct [[clang::xray_always_instrument]] a { int x; }; // expected-warning {{'xray_always_instrument' attribute only applies to functions and methods}}
+
+class b {
+ void c [[clang::xray_always_instrument]] ();
+};
+
+void baz [[clang::xray_always_instrument("not-supported")]] (); // expected-error {{'xray_always_instrument' attribute takes no arguments}}
Index: test/SemaOpenCL/clang-builtin-version.cl
===================================================================
--- /dev/null
+++ test/SemaOpenCL/clang-builtin-version.cl
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 %s -fblocks -verify -pedantic -fsyntax-only -ferror-limit 100
+
+// Confirm CL2.0 Clang builtins are not available in earlier versions
+
+kernel void dse_builtins() {
+  int tmp;
+  enqueue_kernel(tmp, tmp, tmp, ^(void) { // expected-warning{{implicit declaration of function 'enqueue_kernel' is invalid in C99}}
+    return;
+  });
+  unsigned size = get_kernel_work_group_size(^(void) { // expected-warning{{implicit declaration of function 'get_kernel_work_group_size' is invalid in C99}}
+    return;
+  });
+  size = get_kernel_preferred_work_group_size_multiple(^(void) { // expected-warning{{implicit declaration of function 'get_kernel_preferred_work_group_size_multiple' is invalid in C99}}
+    return;
+  });
+}
+
+void pipe_builtins() {
+  int tmp;
+
+  read_pipe(tmp, tmp);  // expected-warning{{implicit declaration of function 'read_pipe' is invalid in C99}}
+  write_pipe(tmp, tmp); // expected-warning{{implicit declaration of function 'write_pipe' is invalid in C99}}
+
+  reserve_read_pipe(tmp, tmp);  // expected-warning{{implicit declaration of function 'reserve_read_pipe' is invalid in C99}}
+  reserve_write_pipe(tmp, tmp); // expected-warning{{implicit declaration of function 'reserve_write_pipe' is invalid in C99}}
+
+  work_group_reserve_read_pipe(tmp, tmp);  // expected-warning{{implicit declaration of function 'work_group_reserve_read_pipe' is invalid in C99}}
+  work_group_reserve_write_pipe(tmp, tmp); // expected-warning{{implicit declaration of function 'work_group_reserve_write_pipe' is invalid in C99}}
+
+  sub_group_reserve_write_pipe(tmp, tmp); // expected-warning{{implicit declaration of function 'sub_group_reserve_write_pipe' is invalid in C99}}
+  sub_group_reserve_read_pipe(tmp, tmp);  // expected-warning{{implicit declaration of function 'sub_group_reserve_read_pipe' is invalid in C99}}
+
+  commit_read_pipe(tmp, tmp);  // expected-warning{{implicit declaration of function 'commit_read_pipe' is invalid in C99}}
+  commit_write_pipe(tmp, tmp); // expected-warning{{implicit declaration of function 'commit_write_pipe' is invalid in C99}}
+
+  work_group_commit_read_pipe(tmp, tmp);  // expected-warning{{implicit declaration of function 'work_group_commit_read_pipe' is invalid in C99}}
+  work_group_commit_write_pipe(tmp, tmp); // expected-warning{{implicit declaration of function 'work_group_commit_write_pipe' is invalid in C99}}
+
+  sub_group_commit_write_pipe(tmp, tmp); // expected-warning{{implicit declaration of function 'sub_group_commit_write_pipe' is invalid in C99}}
+  sub_group_commit_read_pipe(tmp, tmp);  // expected-warning{{implicit declaration of function 'sub_group_commit_read_pipe' is invalid in C99}}
+
+  get_pipe_num_packets(tmp); // expected-warning{{implicit declaration of function 'get_pipe_num_packets' is invalid in C99}}
+  get_pipe_max_packets(tmp); // expected-warning{{implicit declaration of function 'get_pipe_max_packets' is invalid in C99}}
+}
Index: test/SemaOpenCL/to_addr_builtin.cl
===================================================================
--- test/SemaOpenCL/to_addr_builtin.cl
+++ test/SemaOpenCL/to_addr_builtin.cl
@@ -10,43 +10,44 @@
 
   glob = to_global(glob, loc);
 #if __OPENCL_C_VERSION__ < CL_VERSION_2_0
-  // expected-error@-2{{'to_global' requires OpenCL version 2.0 or above}}
+  // expected-warning@-2{{implicit declaration of function 'to_global' is invalid in C99}}
+  // expected-warning@-3{{incompatible integer to pointer conversion assigning to '__global int *' from 'int'}}
 #else
-  // expected-error@-4{{invalid number of arguments to function: 'to_global'}}
+  // expected-error@-5{{invalid number of arguments to function: 'to_global'}}
 #endif
 
   int x;
   glob = to_global(x);
 #if __OPENCL_C_VERSION__ < CL_VERSION_2_0
-  // expected-error@-2{{'to_global' requires OpenCL version 2.0 or above}}
+  // expected-warning@-2{{incompatible integer to pointer conversion assigning to '__global int *' from 'int'}}
 #else
   // expected-error@-4{{invalid argument x to function: 'to_global', expecting a generic pointer argument}}
 #endif
 
   glob = to_global(con);
 #if __OPENCL_C_VERSION__ < CL_VERSION_2_0
-  // expected-error@-2{{'to_global' requires OpenCL version 2.0 or above}}
+  // expected-warning@-2{{incompatible integer to pointer conversion assigning to '__global int *' from 'int'}}
 #else
   // expected-error@-4{{invalid argument con to function: 'to_global', expecting a generic pointer argument}}
 #endif
 
   glob = to_global(con_typedef);
 #if __OPENCL_C_VERSION__ < CL_VERSION_2_0
-  // expected-error@-2{{'to_global' requires OpenCL version 2.0 or above}}
+  // expected-warning@-2{{incompatible integer to pointer conversion assigning to '__global int *' from 'int'}}
 #else
   // expected-error@-4{{invalid argument con_typedef to function: 'to_global', expecting a generic pointer argument}}
 #endif
 
   loc = to_global(glob);
 #if __OPENCL_C_VERSION__ < CL_VERSION_2_0
-  // expected-error@-2{{'to_global' requires OpenCL version 2.0 or above}}
+  // expected-warning@-2{{incompatible integer to pointer conversion assigning to '__local int *' from 'int'}}
 #else
   // expected-error@-4{{assigning '__global int *' to '__local int *' changes address space of pointer}}
 #endif
 
   global char *glob_c = to_global(loc);
 #if __OPENCL_C_VERSION__ < CL_VERSION_2_0
-  // expected-error@-2{{'to_global' requires OpenCL version 2.0 or above}}
+  // expected-warning@-2{{incompatible integer to pointer conversion initializing '__global char *' with an expression of type 'int'}}
 #else
   // expected-warning@-4{{incompatible pointer types initializing '__global char *' with an expression of type '__global int *'}}
 #endif
Index: tools/libclang/CIndex.cpp
===================================================================
--- tools/libclang/CIndex.cpp
+++ tools/libclang/CIndex.cpp
@@ -1971,6 +1971,8 @@
   void VisitOMPDistributeDirective(const OMPDistributeDirective *D);
   void VisitOMPDistributeParallelForDirective(
       const OMPDistributeParallelForDirective *D);
+  void VisitOMPDistributeParallelForSimdDirective(
+      const OMPDistributeParallelForSimdDirective *D);
 
 private:
   void AddDeclarationNameInfo(const Stmt *S);
@@ -2729,6 +2731,11 @@
   VisitOMPLoopDirective(D);
 }
 
+void EnqueueVisitor::VisitOMPDistributeParallelForSimdDirective(
+    const OMPDistributeParallelForSimdDirective *D) {
+  VisitOMPLoopDirective(D);
+}
+
 void CursorVisitor::EnqueueWorkList(VisitorWorkList &WL, const Stmt *S) {
   EnqueueVisitor(WL, MakeCXCursor(S, StmtParent, TU,RegionOfInterest)).Visit(S);
 }
@@ -4847,6 +4854,8 @@
     return cxstring::createRef("OMPDistributeDirective");
   case CXCursor_OMPDistributeParallelForDirective:
     return cxstring::createRef("OMPDistributeParallelForDirective");
+  case CXCursor_OMPDistributeParallelForSimdDirective:
+    return cxstring::createRef("OMPDistributeParallelForSimdDirective");
   case CXCursor_OverloadCandidate:
       return cxstring::createRef("OverloadCandidate");
   case CXCursor_TypeAliasTemplateDecl:
Index: tools/libclang/CXCursor.cpp
===================================================================
--- tools/libclang/CXCursor.cpp
+++ tools/libclang/CXCursor.cpp
@@ -637,6 +637,9 @@
   case Stmt::OMPDistributeParallelForDirectiveClass:
     K = CXCursor_OMPDistributeParallelForDirective;
     break;
+  case Stmt::OMPDistributeParallelForSimdDirectiveClass:
+    K = CXCursor_OMPDistributeParallelForSimdDirective;
+    break;
   }
 
   CXCursor C = { K, 0, { Parent, S, TU } };