diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h
--- a/clang/include/clang-c/Index.h
+++ b/clang/include/clang-c/Index.h
@@ -2588,7 +2588,11 @@
    */
   CXCursor_OMPMaskedDirective = 292,
 
-  CXCursor_LastStmt = CXCursor_OMPMaskedDirective,
+  /** OpenMP unroll directive.
+   */
+  CXCursor_OMPUnrollDirective = 293,
+
+  CXCursor_LastStmt = CXCursor_OMPUnrollDirective,
 
   /**
    * Cursor that represents the translation unit itself.
diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -888,6 +888,114 @@
   }
 };
 
+/// Representation of the 'full' clause of the '#pragma omp unroll' directive.
+///
+/// \code
+/// #pragma omp unroll full
+/// for (int i = 0; i < 64; ++i)
+/// \endcode
+class OMPFullClause final : public OMPClause {
+  friend class OMPClauseReader;
+
+  /// Build an empty clause.
+  explicit OMPFullClause() : OMPClause(llvm::omp::OMPC_full, {}, {}) {}
+
+public:
+  /// Build an AST node for a 'full' clause.
+  ///
+  /// \param C        Context of the AST.
+  /// \param StartLoc Starting location of the clause.
+  /// \param EndLoc   Ending location of the clause.
+  static OMPFullClause *Create(const ASTContext &C, SourceLocation StartLoc,
+                               SourceLocation EndLoc);
+
+  /// Build an empty 'full' AST node for deserialization.
+  ///
+  /// \param C Context of the AST.
+  static OMPFullClause *CreateEmpty(const ASTContext &C);
+
+  child_range children() { return {child_iterator(), child_iterator()}; }
+  const_child_range children() const {
+    return {const_child_iterator(), const_child_iterator()};
+  }
+
+  child_range used_children() {
+    return child_range(child_iterator(), child_iterator());
+  }
+  const_child_range used_children() const {
+    return const_child_range(const_child_iterator(), const_child_iterator());
+  }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == llvm::omp::OMPC_full;
+  }
+};
+
+/// Representation of the 'partial' clause of the '#pragma omp unroll'
+/// directive.
+///
+/// \code
+/// #pragma omp unroll partial(4)
+/// for (int i = start; i < end; ++i)
+/// \endcode
+class OMPPartialClause final : public OMPClause {
+  friend class OMPClauseReader;
+
+  /// Location of '('.
+  SourceLocation LParenLoc;
+
+  /// Optional argument to the clause (unroll factor).
+  Stmt *Factor;
+
+  /// Build an empty clause.
+  explicit OMPPartialClause() : OMPClause(llvm::omp::OMPC_partial, {}, {}) {}
+
+  /// Set the unroll factor.
+  void setFactor(Expr *E) { Factor = E; }
+
+  /// Sets the location of '('.
+  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
+public:
+  /// Build an AST node for a 'partial' clause.
+  ///
+  /// \param C         Context of the AST.
+  /// \param StartLoc  Location of the 'partial' identifier.
+  /// \param LParenLoc Location of '('.
+  /// \param EndLoc    Location of ')'.
+  /// \param Factor    Clause argument.
+  static OMPPartialClause *Create(const ASTContext &C, SourceLocation StartLoc,
+                                  SourceLocation LParenLoc,
+                                  SourceLocation EndLoc, Expr *Factor);
+
+  /// Build an empty 'partial' AST node for deserialization.
+  ///
+  /// \param C     Context of the AST.
+  static OMPPartialClause *CreateEmpty(const ASTContext &C);
+
+  /// Returns the location of '('.
+  SourceLocation getLParenLoc() const { return LParenLoc; }
+
+  /// Returns the argument of the clause or nullptr if not set.
+  Expr *getFactor() const { return cast_or_null<Expr>(Factor); }
+
+  child_range children() { return child_range(&Factor, &Factor + 1); }
+  const_child_range children() const {
+    return const_child_range(&Factor, &Factor + 1);
+  }
+
+  child_range used_children() {
+    return child_range(child_iterator(), child_iterator());
+  }
+  const_child_range used_children() const {
+    return const_child_range(const_child_iterator(), const_child_iterator());
+  }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == llvm::omp::OMPC_partial;
+  }
+};
+
 /// This represents 'collapse' clause in the '#pragma omp ...'
 /// directive.
 ///
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -2841,6 +2841,9 @@
 DEF_TRAVERSE_STMT(OMPTileDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
+DEF_TRAVERSE_STMT(OMPUnrollDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
 DEF_TRAVERSE_STMT(OMPForDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
@@ -3097,6 +3100,17 @@
   return true;
 }
 
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPFullClause(OMPFullClause *C) {
+  return true;
+}
+
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPPartialClause(OMPPartialClause *C) {
+  TRY_TO(TraverseStmt(C->getFactor()));
+  return true;
+}
+
 template <typename Derived>
 bool
 RecursiveASTVisitor<Derived>::VisitOMPCollapseClause(OMPCollapseClause *C) {
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -5067,6 +5067,78 @@
   }
 };
 
+/// This represents the '#pragma omp unroll' loop transformation directive.
+///
+/// \code
+/// #pragma omp unroll
+/// for (int i = 0; i < 64; ++i)
+/// \endcode
+class OMPUnrollDirective final : public OMPLoopBasedDirective {
+  friend class ASTStmtReader;
+  friend class OMPExecutableDirective;
+
+  /// Default list of offsets.
+  enum {
+    PreInitsOffset = 0,
+    TransformedStmtOffset,
+  };
+
+  explicit OMPUnrollDirective(SourceLocation StartLoc, SourceLocation EndLoc)
+      : OMPLoopBasedDirective(OMPUnrollDirectiveClass, llvm::omp::OMPD_unroll,
+                              StartLoc, EndLoc, 1) {}
+
+  /// Set the pre-init statements.
+  void setPreInits(Stmt *PreInits) {
+    Data->getChildren()[PreInitsOffset] = PreInits;
+  }
+
+  /// Set the de-sugared statement.
+  void setTransformedStmt(Stmt *S) {
+    Data->getChildren()[TransformedStmtOffset] = S;
+  }
+
+public:
+  /// Create a new AST node representation for '#pragma omp unroll'.
+  ///
+  /// \param C         Context of the AST.
+  /// \param StartLoc  Location of the introducer (e.g. the 'omp' token).
+  /// \param EndLoc    Location of the directive's end (e.g. the tok::eod).
+  /// \param Clauses   The directive's clauses.
+  /// \param AssociatedStmt The outermost associated loop.
+  /// \param TransformedStmt The loop nest after tiling, or nullptr in
+  ///                        dependent contexts.
+  /// \param PreInits   Helper preinits statements for the loop nest.
+  static OMPUnrollDirective *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+         Stmt *TransformedStmt, Stmt *PreInits);
+
+  /// Build an empty '#pragma omp unroll' AST node for deserialization.
+  ///
+  /// \param C          Context of the AST.
+  /// \param NumClauses Number of clauses to allocate.
+  static OMPUnrollDirective *CreateEmpty(const ASTContext &C,
+                                         unsigned NumClauses);
+
+  /// Get the de-sugared associated loops after unrolling.
+  ///
+  /// This is only used if the unrolled loop becomes an associated loop of
+  /// another directive, otherwise the loop is emitted directly using loop
+  /// transformation metadata. When the unrolled loop cannot be used by another
+  /// directive (e.g. because of the full clause), the transformed stmt can also
+  /// be nullptr.
+  Stmt *getTransformedStmt() const {
+    return Data->getChildren()[TransformedStmtOffset];
+  }
+
+  /// Return the pre-init statements.
+  Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == OMPUnrollDirectiveClass;
+  }
+};
+
 /// This represents '#pragma omp scan' directive.
 ///
 /// \code
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10748,6 +10748,9 @@
 def err_omp_dispatch_statement_call
     : Error<"statement after '#pragma omp dispatch' must be a direct call"
             " to a target function or an assignment to one">;
+def err_omp_unroll_full_variable_trip_count : Error<
+  "loop to be fully unrolled must have a constant trip count">;
+def note_omp_directive_here : Note<"'%0' directive found here">;
 } // end of OpenMP category
 
 let CategoryName = "Related Result Type Issue" in {
diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -224,6 +224,7 @@
 def OMPParallelDirective : StmtNode<OMPExecutableDirective>;
 def OMPSimdDirective : StmtNode<OMPLoopDirective>;
 def OMPTileDirective : StmtNode<OMPLoopBasedDirective>;
+def OMPUnrollDirective : StmtNode<OMPLoopBasedDirective>;
 def OMPForDirective : StmtNode<OMPLoopDirective>;
 def OMPForSimdDirective : StmtNode<OMPLoopDirective>;
 def OMPSectionsDirective : StmtNode<OMPExecutableDirective>;
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -32,6 +32,7 @@
 #include "clang/AST/NSAPI.h"
 #include "clang/AST/PrettyPrinter.h"
 #include "clang/AST/StmtCXX.h"
+#include "clang/AST/StmtOpenMP.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/AST/TypeOrdering.h"
 #include "clang/Basic/BitmaskEnum.h"
@@ -10220,7 +10221,8 @@
   void DestroyDataSharingAttributesStack();
   ExprResult
   VerifyPositiveIntegerConstantInClause(Expr *Op, OpenMPClauseKind CKind,
-                                        bool StrictlyPositive = true);
+                                        bool StrictlyPositive = true,
+                                        bool SuppressExprDiags = false);
   /// Returns OpenMP nesting level for current directive.
   unsigned getOpenMPNestingLevel() const;
 
@@ -10238,6 +10240,25 @@
   /// Pop OpenMP function region for non-capturing function.
   void popOpenMPFunctionRegion(const sema::FunctionScopeInfo *OldFSI);
 
+  /// Analyzes and checks a loop nest for use by a loop transformation.
+  ///
+  /// \param Kind          The loop transformation directive kind.
+  /// \param NumLoops      How many nested loops the directive is expecting.
+  /// \param AStmt         Associated statement of the transformation directive.
+  /// \param LoopHelpers   [out] The loop analysis result.
+  /// \param Body          [out] The body code nested in \p NumLoops loop.
+  /// \param OriginalInits [out] Collection of statements and declarations that
+  ///                      must have been executed/declared before entering the
+  ///                      loop.
+  ///
+  /// \return Whether there was any error.
+  bool checkTransformableLoopNest(
+      OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops,
+      SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
+      Stmt *&Body,
+      SmallVectorImpl<SmallVector<llvm::PointerUnion<Stmt *, Decl *>, 0>>
+          &OriginalInits);
+
   /// Helper to keep information about the current `omp begin/end declare
   /// variant` nesting.
   struct OMPDeclareVariantScope {
@@ -10543,6 +10564,11 @@
   StmtResult ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
                                       Stmt *AStmt, SourceLocation StartLoc,
                                       SourceLocation EndLoc);
+  /// Called on well-formed '#pragma omp unroll' after parsing of its clauses
+  /// and the associated statement.
+  StmtResult ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
+                                        Stmt *AStmt, SourceLocation StartLoc,
+                                        SourceLocation EndLoc);
   /// Called on well-formed '\#pragma omp for' after parsing
   /// of the associated statement.
   StmtResult
@@ -10898,6 +10924,13 @@
                                     SourceLocation StartLoc,
                                     SourceLocation LParenLoc,
                                     SourceLocation EndLoc);
+  /// Called on well-form 'full' clauses.
+  OMPClause *ActOnOpenMPFullClause(SourceLocation StartLoc,
+                                   SourceLocation EndLoc);
+  /// Called on well-form 'partial' clauses.
+  OMPClause *ActOnOpenMPPartialClause(Expr *FactorExpr, SourceLocation StartLoc,
+                                      SourceLocation LParenLoc,
+                                      SourceLocation EndLoc);
   /// Called on well-formed 'collapse' clause.
   OMPClause *ActOnOpenMPCollapseClause(Expr *NumForLoops,
                                        SourceLocation StartLoc,
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1894,6 +1894,7 @@
   STMT_OMP_PARALLEL_DIRECTIVE,
   STMT_OMP_SIMD_DIRECTIVE,
   STMT_OMP_TILE_DIRECTIVE,
+  STMT_OMP_UNROLL_DIRECTIVE,
   STMT_OMP_FOR_DIRECTIVE,
   STMT_OMP_FOR_SIMD_DIRECTIVE,
   STMT_OMP_SECTIONS_DIRECTIVE,
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -942,6 +942,36 @@
   return new (Mem) OMPSizesClause(NumSizes);
 }
 
+OMPFullClause *OMPFullClause::Create(const ASTContext &C,
+                                     SourceLocation StartLoc,
+                                     SourceLocation EndLoc) {
+  OMPFullClause *Clause = CreateEmpty(C);
+  Clause->setLocStart(StartLoc);
+  Clause->setLocEnd(EndLoc);
+  return Clause;
+}
+
+OMPFullClause *OMPFullClause::CreateEmpty(const ASTContext &C) {
+  return new (C) OMPFullClause();
+}
+
+OMPPartialClause *OMPPartialClause::Create(const ASTContext &C,
+                                           SourceLocation StartLoc,
+                                           SourceLocation LParenLoc,
+                                           SourceLocation EndLoc,
+                                           Expr *Factor) {
+  OMPPartialClause *Clause = CreateEmpty(C);
+  Clause->setLocStart(StartLoc);
+  Clause->setLParenLoc(LParenLoc);
+  Clause->setLocEnd(EndLoc);
+  Clause->setFactor(Factor);
+  return Clause;
+}
+
+OMPPartialClause *OMPPartialClause::CreateEmpty(const ASTContext &C) {
+  return new (C) OMPPartialClause();
+}
+
 OMPAllocateClause *
 OMPAllocateClause::Create(const ASTContext &C, SourceLocation StartLoc,
                           SourceLocation LParenLoc, Expr *Allocator,
@@ -1602,6 +1632,18 @@
   OS << ")";
 }
 
+void OMPClausePrinter::VisitOMPFullClause(OMPFullClause *Node) { OS << "full"; }
+
+void OMPClausePrinter::VisitOMPPartialClause(OMPPartialClause *Node) {
+  OS << "partial";
+
+  if (Expr *Factor = Node->getFactor()) {
+    OS << '(';
+    Factor->printPretty(OS, nullptr, Policy, 0);
+    OS << ')';
+  }
+}
+
 void OMPClausePrinter::VisitOMPAllocatorClause(OMPAllocatorClause *Node) {
   OS << "allocator(";
   Node->getAllocator()->printPretty(OS, nullptr, Policy, 0);
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -129,9 +129,24 @@
         OnTransformationCallback) {
   CurStmt = CurStmt->IgnoreContainers();
   for (unsigned Cnt = 0; Cnt < NumLoops; ++Cnt) {
-    while (auto *Dir = dyn_cast<OMPTileDirective>(CurStmt)) {
-      OnTransformationCallback(Dir);
-      CurStmt = Dir->getTransformedStmt();
+    while (true) {
+      auto *OrigStmt = CurStmt;
+      if (auto *Dir = dyn_cast<OMPTileDirective>(OrigStmt)) {
+        OnTransformationCallback(Dir);
+        CurStmt = Dir->getTransformedStmt();
+      } else if (auto *Dir = dyn_cast<OMPUnrollDirective>(OrigStmt)) {
+        OnTransformationCallback(Dir);
+        CurStmt = Dir->getTransformedStmt();
+      } else {
+        break;
+      }
+
+      if (!CurStmt) {
+        // May happen if the loop transformation does not result in a generated
+        // loop (such as full unrolling).
+        CurStmt = OrigStmt;
+        break;
+      }
     }
     if (auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(CurStmt))
       CurStmt = CanonLoop->getLoopStmt();
@@ -359,6 +374,25 @@
       SourceLocation(), SourceLocation(), NumLoops);
 }
 
+OMPUnrollDirective *
+OMPUnrollDirective::Create(const ASTContext &C, SourceLocation StartLoc,
+                           SourceLocation EndLoc, ArrayRef<OMPClause *> Clauses,
+                           Stmt *AssociatedStmt, Stmt *TransformedStmt,
+                           Stmt *PreInits) {
+  auto *Dir = createDirective<OMPUnrollDirective>(
+      C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc);
+  Dir->setTransformedStmt(TransformedStmt);
+  Dir->setPreInits(PreInits);
+  return Dir;
+}
+
+OMPUnrollDirective *OMPUnrollDirective::CreateEmpty(const ASTContext &C,
+                                                    unsigned NumClauses) {
+  return createEmptyDirective<OMPUnrollDirective>(
+      C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1,
+      SourceLocation(), SourceLocation());
+}
+
 OMPForSimdDirective *
 OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
                             SourceLocation EndLoc, unsigned CollapsedNum,
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -668,6 +668,11 @@
   PrintOMPExecutableDirective(Node);
 }
 
+void StmtPrinter::VisitOMPUnrollDirective(OMPUnrollDirective *Node) {
+  Indent() << "#pragma omp unroll";
+  PrintOMPExecutableDirective(Node);
+}
+
 void StmtPrinter::VisitOMPForDirective(OMPForDirective *Node) {
   Indent() << "#pragma omp for";
   PrintOMPExecutableDirective(Node);
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -468,6 +468,13 @@
       Profiler->VisitExpr(E);
 }
 
+void OMPClauseProfiler::VisitOMPFullClause(const OMPFullClause *C) {}
+
+void OMPClauseProfiler::VisitOMPPartialClause(const OMPPartialClause *C) {
+  if (const Expr *Factor = C->getFactor())
+    Profiler->VisitExpr(Factor);
+}
+
 void OMPClauseProfiler::VisitOMPAllocatorClause(const OMPAllocatorClause *C) {
   if (C->getAllocator())
     Profiler->VisitStmt(C->getAllocator());
@@ -908,6 +915,10 @@
   VisitOMPLoopBasedDirective(S);
 }
 
+void StmtProfiler::VisitOMPUnrollDirective(const OMPUnrollDirective *S) {
+  VisitOMPLoopBasedDirective(S);
+}
+
 void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) {
   VisitOMPLoopDirective(S);
 }
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -452,7 +452,8 @@
          DKind == OMPD_target_teams_distribute ||
          DKind == OMPD_target_teams_distribute_parallel_for ||
          DKind == OMPD_target_teams_distribute_parallel_for_simd ||
-         DKind == OMPD_target_teams_distribute_simd || DKind == OMPD_tile;
+         DKind == OMPD_target_teams_distribute_simd || DKind == OMPD_tile ||
+         DKind == OMPD_unroll;
 }
 
 bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) {
@@ -580,7 +581,7 @@
 }
 
 bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) {
-  return DKind == OMPD_tile;
+  return DKind == OMPD_tile || DKind == OMPD_unroll;
 }
 
 void clang::getOpenMPCaptureRegions(
@@ -668,6 +669,7 @@
     CaptureRegions.push_back(OMPD_unknown);
     break;
   case OMPD_tile:
+  case OMPD_unroll:
     // loop transformations do not introduce captures.
     break;
   case OMPD_threadprivate:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6678,6 +6678,7 @@
   case OMPD_task:
   case OMPD_simd:
   case OMPD_tile:
+  case OMPD_unroll:
   case OMPD_sections:
   case OMPD_section:
   case OMPD_single:
@@ -6996,6 +6997,7 @@
   case OMPD_task:
   case OMPD_simd:
   case OMPD_tile:
+  case OMPD_unroll:
   case OMPD_sections:
   case OMPD_section:
   case OMPD_single:
@@ -9603,6 +9605,7 @@
     case OMPD_task:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_sections:
     case OMPD_section:
     case OMPD_single:
@@ -10432,6 +10435,7 @@
     case OMPD_task:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_sections:
     case OMPD_section:
     case OMPD_single:
@@ -11144,6 +11148,7 @@
     case OMPD_task:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_sections:
     case OMPD_section:
     case OMPD_single:
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -208,6 +208,9 @@
   case Stmt::OMPTileDirectiveClass:
     EmitOMPTileDirective(cast<OMPTileDirective>(*S));
     break;
+  case Stmt::OMPUnrollDirectiveClass:
+    EmitOMPUnrollDirective(cast<OMPUnrollDirective>(*S));
+    break;
   case Stmt::OMPForDirectiveClass:
     EmitOMPForDirective(cast<OMPForDirective>(*S));
     break;
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -176,6 +176,8 @@
       PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
     } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
       PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
+    } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) {
+      PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits());
     } else {
       llvm_unreachable("Unknown loop-based directive kind.");
     }
@@ -1821,6 +1823,8 @@
   if (SimplifiedS == NextLoop) {
     if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS))
       SimplifiedS = Dir->getTransformedStmt();
+    if (auto *Dir = dyn_cast<OMPUnrollDirective>(SimplifiedS))
+      SimplifiedS = Dir->getTransformedStmt();
     if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
       SimplifiedS = CanonLoop->getLoopStmt();
     if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
@@ -2579,6 +2583,28 @@
   EmitStmt(S.getTransformedStmt());
 }
 
+void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
+  // This function is only called if the unrolled loop is not consumed by any
+  // other loop-associated construct. Such a loop-associated construct will have
+  // used the transformed AST.
+
+  // Set the unroll metadata for the next emitted loop.
+  LoopStack.setUnrollState(LoopAttributes::Enable);
+
+  if (S.hasClausesOfKind<OMPFullClause>()) {
+    LoopStack.setUnrollState(LoopAttributes::Full);
+  } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
+    if (Expr *FactorExpr = PartialClause->getFactor()) {
+      uint64_t Factor =
+          FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
+      assert(Factor >= 1 && "Only positive factors are valid");
+      LoopStack.setUnrollCount(Factor);
+    }
+  }
+
+  EmitStmt(S.getAssociatedStmt());
+}
+
 void CodeGenFunction::EmitOMPOuterLoop(
     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
     CodeGenFunction::OMPPrivateScope &LoopScope,
@@ -5762,6 +5788,8 @@
   case OMPC_safelen:
   case OMPC_simdlen:
   case OMPC_sizes:
+  case OMPC_full:
+  case OMPC_partial:
   case OMPC_allocator:
   case OMPC_allocate:
   case OMPC_collapse:
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3441,6 +3441,7 @@
   void EmitOMPParallelDirective(const OMPParallelDirective &S);
   void EmitOMPSimdDirective(const OMPSimdDirective &S);
   void EmitOMPTileDirective(const OMPTileDirective &S);
+  void EmitOMPUnrollDirective(const OMPUnrollDirective &S);
   void EmitOMPForDirective(const OMPForDirective &S);
   void EmitOMPForSimdDirective(const OMPForSimdDirective &S);
   void EmitOMPSectionsDirective(const OMPSectionsDirective &S);
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -2166,6 +2166,7 @@
   case OMPD_parallel:
   case OMPD_simd:
   case OMPD_tile:
+  case OMPD_unroll:
   case OMPD_task:
   case OMPD_taskyield:
   case OMPD_barrier:
@@ -2403,6 +2404,7 @@
   case OMPD_parallel:
   case OMPD_simd:
   case OMPD_tile:
+  case OMPD_unroll:
   case OMPD_for:
   case OMPD_for_simd:
   case OMPD_sections:
@@ -2793,6 +2795,7 @@
   case OMPC_novariants:
   case OMPC_nocontext:
   case OMPC_filter:
+  case OMPC_partial:
     // OpenMP [2.5, Restrictions]
     //  At most one num_threads clause can appear on the directive.
     // OpenMP [2.8.1, simd construct, Restrictions]
@@ -2824,7 +2827,8 @@
       ErrorFound = true;
     }
 
-    if (CKind == OMPC_ordered && PP.LookAhead(/*N=*/0).isNot(tok::l_paren))
+    if ((CKind == OMPC_ordered || CKind == OMPC_partial) &&
+        PP.LookAhead(/*N=*/0).isNot(tok::l_paren))
       Clause = ParseOpenMPClause(CKind, WrongDirective);
     else
       Clause = ParseOpenMPSingleExprClause(CKind, WrongDirective);
@@ -2887,6 +2891,7 @@
   case OMPC_unified_shared_memory:
   case OMPC_reverse_offload:
   case OMPC_dynamic_allocators:
+  case OMPC_full:
     // OpenMP [2.7.1, Restrictions, p. 9]
     //  Only one ordered clause can appear on a loop directive.
     // OpenMP [2.7.1, Restrictions, C/C++, p. 4]
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1462,6 +1462,7 @@
   case Stmt::OMPSectionsDirectiveClass:
   case Stmt::OMPSimdDirectiveClass:
   case Stmt::OMPTileDirectiveClass:
+  case Stmt::OMPUnrollDirectiveClass:
   case Stmt::OMPSingleDirectiveClass:
   case Stmt::OMPTargetDataDirectiveClass:
   case Stmt::OMPTargetDirectiveClass:
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -3810,6 +3810,11 @@
     VisitStmt(S);
   }
 
+  void VisitOMPUnrollDirective(OMPUnrollDirective *S) {
+    // #pragma omp unroll does not introduce data sharing.
+    VisitStmt(S);
+  }
+
   void VisitStmt(Stmt *S) {
     for (Stmt *C : S->children()) {
       if (C) {
@@ -3976,6 +3981,7 @@
   case OMPD_master:
   case OMPD_masked:
   case OMPD_tile:
+  case OMPD_unroll:
     break;
   case OMPD_simd:
   case OMPD_for:
@@ -5847,6 +5853,10 @@
     Res =
         ActOnOpenMPTileDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc);
     break;
+  case OMPD_unroll:
+    Res = ActOnOpenMPUnrollDirective(ClausesWithImplicit, AStmt, StartLoc,
+                                     EndLoc);
+    break;
   case OMPD_for:
     Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc,
                                   VarsWithInheritedDSA);
@@ -8967,8 +8977,14 @@
             return false;
           },
           [&SemaRef, &Captures](OMPLoopBasedDirective *Transform) {
-            Stmt *DependentPreInits =
-                cast<OMPTileDirective>(Transform)->getPreInits();
+            Stmt *DependentPreInits;
+            if (auto *Dir = dyn_cast<OMPTileDirective>(Transform)) {
+              DependentPreInits = Dir->getPreInits();
+            } else if (auto *Dir = dyn_cast<OMPUnrollDirective>(Transform)) {
+              DependentPreInits = Dir->getPreInits();
+            } else {
+              llvm_unreachable("Unexpected loop transformation");
+            }
             if (!DependentPreInits)
               return;
             for (Decl *C : cast<DeclStmt>(DependentPreInits)->getDeclGroup()) {
@@ -12543,6 +12559,55 @@
       Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
 }
 
+bool Sema::checkTransformableLoopNest(
+    OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops,
+    SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
+    Stmt *&Body,
+    SmallVectorImpl<SmallVector<llvm::PointerUnion<Stmt *, Decl *>, 0>>
+        &OriginalInits) {
+  OriginalInits.emplace_back();
+  bool Result = OMPLoopBasedDirective::doForAllLoops(
+      AStmt->IgnoreContainers(), /*TryImperfectlyNestedLoops=*/false, NumLoops,
+      [this, &LoopHelpers, &Body, &OriginalInits, Kind](unsigned Cnt,
+                                                        Stmt *CurStmt) {
+        VarsWithInheritedDSAType TmpDSA;
+        unsigned SingleNumLoops =
+            checkOpenMPLoop(Kind, nullptr, nullptr, CurStmt, *this, *DSAStack,
+                            TmpDSA, LoopHelpers[Cnt]);
+        if (SingleNumLoops == 0)
+          return true;
+        assert(SingleNumLoops == 1 && "Expect single loop iteration space");
+        if (auto *For = dyn_cast<ForStmt>(CurStmt)) {
+          OriginalInits.back().push_back(For->getInit());
+          Body = For->getBody();
+        } else {
+          assert(isa<CXXForRangeStmt>(CurStmt) &&
+                 "Expected canonical for or range-based for loops.");
+          auto *CXXFor = cast<CXXForRangeStmt>(CurStmt);
+          OriginalInits.back().push_back(CXXFor->getBeginStmt());
+          Body = CXXFor->getBody();
+        }
+        OriginalInits.emplace_back();
+        return false;
+      },
+      [&OriginalInits](OMPLoopBasedDirective *Transform) {
+        Stmt *DependentPreInits;
+        if (auto *Dir = dyn_cast<OMPTileDirective>(Transform))
+          DependentPreInits = Dir->getPreInits();
+        else if (auto *Dir = dyn_cast<OMPUnrollDirective>(Transform))
+          DependentPreInits = Dir->getPreInits();
+        else
+          llvm_unreachable("Unhandled loop transformation");
+        if (!DependentPreInits)
+          return;
+        for (Decl *C : cast<DeclStmt>(DependentPreInits)->getDeclGroup())
+          OriginalInits.back().push_back(C);
+      });
+  assert(OriginalInits.back().empty() && "No preinit after innermost loop");
+  OriginalInits.pop_back();
+  return Result;
+}
+
 StmtResult Sema::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
                                           Stmt *AStmt, SourceLocation StartLoc,
                                           SourceLocation EndLoc) {
@@ -12563,38 +12628,9 @@
   SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops);
   Stmt *Body = nullptr;
   SmallVector<SmallVector<llvm::PointerUnion<Stmt *, Decl *>, 0>, 4>
-      OriginalInits(1);
-  if (!OMPLoopBasedDirective::doForAllLoops(
-          AStmt->IgnoreContainers(), /*TryImperfectlyNestedLoops=*/false,
-          NumLoops,
-          [this, &LoopHelpers, &Body, &OriginalInits](unsigned Cnt,
-                                                      Stmt *CurStmt) {
-            VarsWithInheritedDSAType TmpDSA;
-            unsigned SingleNumLoops =
-                checkOpenMPLoop(OMPD_tile, nullptr, nullptr, CurStmt, *this,
-                                *DSAStack, TmpDSA, LoopHelpers[Cnt]);
-            if (SingleNumLoops == 0)
-              return true;
-            assert(SingleNumLoops == 1 && "Expect single loop iteration space");
-            if (auto *For = dyn_cast<ForStmt>(CurStmt)) {
-              OriginalInits.back().push_back(For->getInit());
-              Body = For->getBody();
-            } else {
-              assert(isa<CXXForRangeStmt>(CurStmt) &&
-                     "Expected canonical for or range-based for loops.");
-              auto *CXXFor = cast<CXXForRangeStmt>(CurStmt);
-              OriginalInits.back().push_back(CXXFor->getBeginStmt());
-              Body = CXXFor->getBody();
-            }
-            OriginalInits.emplace_back();
-            return false;
-          },
-          [&OriginalInits](OMPLoopBasedDirective *Transform) {
-            Stmt *DependentPreInits =
-                cast<OMPTileDirective>(Transform)->getPreInits();
-            for (Decl *C : cast<DeclStmt>(DependentPreInits)->getDeclGroup())
-              OriginalInits.back().push_back(C);
-          }))
+      OriginalInits;
+  if (!checkTransformableLoopNest(OMPD_tile, AStmt, NumLoops, LoopHelpers, Body,
+                                  OriginalInits))
     return StmtError();
 
   // Delay tiling to when template is completely instantiated.
@@ -12783,6 +12819,282 @@
                                   buildPreInits(Context, PreInits));
 }
 
+StmtResult Sema::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
+                                            Stmt *AStmt,
+                                            SourceLocation StartLoc,
+                                            SourceLocation EndLoc) {
+  // Empty statement should only be possible if there already was an error.
+  if (!AStmt)
+    return StmtError();
+
+  if (checkMutuallyExclusiveClauses(*this, Clauses, {OMPC_partial, OMPC_full}))
+    return StmtError();
+
+  const OMPFullClause *FullClause =
+      OMPExecutableDirective::getSingleClause<OMPFullClause>(Clauses);
+  const OMPPartialClause *PartialClause =
+      OMPExecutableDirective::getSingleClause<OMPPartialClause>(Clauses);
+  assert(!(FullClause && PartialClause) &&
+         "mutual exclusivity must have been checked before");
+
+  constexpr unsigned NumLoops = 1;
+  Stmt *Body = nullptr;
+  SmallVector<OMPLoopBasedDirective::HelperExprs, NumLoops> LoopHelpers(
+      NumLoops);
+  SmallVector<SmallVector<llvm::PointerUnion<Stmt *, Decl *>, 0>, NumLoops + 1>
+      OriginalInits;
+  if (!checkTransformableLoopNest(OMPD_unroll, AStmt, NumLoops, LoopHelpers,
+                                  Body, OriginalInits))
+    return StmtError();
+
+  // Delay unrolling to when template is completely instantiated.
+  if (CurContext->isDependentContext())
+    return OMPUnrollDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
+                                      nullptr, nullptr);
+
+  OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers.front();
+
+  if (FullClause) {
+    if (!VerifyPositiveIntegerConstantInClause(
+             LoopHelper.NumIterations, OMPC_full, /*StrictlyPositive=*/false,
+             /*SuppressExprDigs=*/true)
+             .isUsable()) {
+      Diag(AStmt->getBeginLoc(), diag::err_omp_unroll_full_variable_trip_count);
+      Diag(FullClause->getBeginLoc(), diag::note_omp_directive_here)
+          << "#pragma omp unroll full";
+      return StmtError();
+    }
+  }
+
+  // The generated loop may only be passed to other loop-associated directive
+  // when a partial clause is specified. Without the requirement it is
+  // sufficient to generate loop unroll metadata at code-generation.
+  if (!PartialClause)
+    return OMPUnrollDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
+                                      nullptr, nullptr);
+
+  // Otherwise, we need to provide a de-sugared/transformed AST that can be
+  // associated with another loop directive.
+  //
+  // The canonical loop analysis return by checkTransformableLoopNest assumes
+  // the following structure to be the same loop without transformations or
+  // directives applied: \code OriginalInits; LoopHelper.PreInits;
+  // LoopHelper.Counters;
+  // for (; IV < LoopHelper.NumIterations; ++IV) {
+  //   LoopHelper.Updates;
+  //   Body;
+  // }
+  // \endcode
+  // where IV is a variable declared and initialized to 0 in LoopHelper.PreInits
+  // and referenced by LoopHelper.IterationVarRef.
+  //
+  // The unrolling directive transforms this into the following loop:
+  // \code
+  // OriginalInits;         \
+  // LoopHelper.PreInits;    > NewPreInits
+  // LoopHelper.Counters;   /
+  // for (auto UIV = 0; UIV < LoopHelper.NumIterations; UIV+=Factor) {
+  //   #pragma clang loop unroll_count(Factor)
+  //   for (IV = UIV; IV < UIV + Factor && UIV < LoopHelper.NumIterations; ++IV)
+  //   {
+  //     LoopHelper.Updates;
+  //     Body;
+  //   }
+  // }
+  // \endcode
+  // where UIV is a new logical iteration counter. IV must be the same VarDecl
+  // as the original LoopHelper.IterationVarRef because LoopHelper.Updates
+  // references it. If the partially unrolled loop is associated with another
+  // loop directive (like an OMPForDirective), it will use checkOpenMPLoop to
+  // analyze this loop, i.e. the outer loop must fulfill the constraints of an
+  // OpenMP canonical loop. The inner loop is not an associable canonical loop
+  // and only exists to defer its unrolling to LLVM's LoopUnroll instead of
+  // doing it in the frontend (by adding loop metadata). NewPreInits becomes a
+  // property of the OMPLoopBasedDirective instead of statements in
+  // CompoundStatement. This is to allow the loop to become a non-outermost loop
+  // of a canonical loop nest where these PreInits are emitted before the
+  // outermost directive.
+
+  // Determine the PreInit declarations.
+  SmallVector<Decl *, 4> PreInits;
+  assert(OriginalInits.size() == 1 &&
+         "Expecting a single-dimensional loop iteration space");
+  for (auto &P : OriginalInits[0]) {
+    if (auto *D = P.dyn_cast<Decl *>())
+      PreInits.push_back(D);
+    else if (auto *PI = dyn_cast_or_null<DeclStmt>(P.dyn_cast<Stmt *>()))
+      PreInits.append(PI->decl_begin(), PI->decl_end());
+  }
+  if (auto *PI = cast_or_null<DeclStmt>(LoopHelper.PreInits))
+    PreInits.append(PI->decl_begin(), PI->decl_end());
+  // Gather declarations for the data members used as counters.
+  for (Expr *CounterRef : LoopHelper.Counters) {
+    auto *CounterDecl = cast<DeclRefExpr>(CounterRef)->getDecl();
+    if (isa<OMPCapturedExprDecl>(CounterDecl))
+      PreInits.push_back(CounterDecl);
+  }
+
+  auto *IterationVarRef = cast<DeclRefExpr>(LoopHelper.IterationVarRef);
+  QualType IVTy = IterationVarRef->getType();
+  assert(LoopHelper.Counters.size() == 1 &&
+         "Expecting a single-dimensional loop iteration space");
+  auto *OrigVar = cast<DeclRefExpr>(LoopHelper.Counters.front());
+
+  // Determine the unroll factor.
+  uint64_t Factor;
+  SourceLocation FactorLoc;
+  if (Expr *FactorVal = PartialClause->getFactor()) {
+    Factor =
+        FactorVal->getIntegerConstantExpr(Context).getValue().getZExtValue();
+    FactorLoc = FactorVal->getExprLoc();
+  } else {
+    // TODO: Use a better profitability model.
+    Factor = 2;
+  }
+  assert(Factor > 0 && "Expected positive unroll factor");
+  auto MakeFactorExpr = [this, Factor, IVTy, FactorLoc]() {
+    return IntegerLiteral::Create(
+        Context, llvm::APInt(Context.getIntWidth(IVTy), Factor), IVTy,
+        FactorLoc);
+  };
+
+  // Iteration variable SourceLocations.
+  SourceLocation OrigVarLoc = OrigVar->getExprLoc();
+  SourceLocation OrigVarLocBegin = OrigVar->getBeginLoc();
+  SourceLocation OrigVarLocEnd = OrigVar->getEndLoc();
+
+  // Internal variable names.
+  std::string OrigVarName = OrigVar->getNameInfo().getAsString();
+  std::string OuterIVName = (Twine(".unrolled.iv.") + OrigVarName).str();
+  std::string InnerIVName = (Twine(".unroll_inner.iv.") + OrigVarName).str();
+  std::string InnerTripCountName =
+      (Twine(".unroll_inner.tripcount.") + OrigVarName).str();
+
+  // Create the iteration variable for the unrolled loop.
+  VarDecl *OuterIVDecl =
+      buildVarDecl(*this, {}, IVTy, OuterIVName, nullptr, OrigVar);
+  auto MakeOuterRef = [this, OuterIVDecl, IVTy, OrigVarLoc]() {
+    return buildDeclRefExpr(*this, OuterIVDecl, IVTy, OrigVarLoc);
+  };
+
+  // Iteration variable for the inner loop: Reuse the iteration variable created
+  // by checkOpenMPLoop.
+  auto *InnerIVDecl = cast<VarDecl>(IterationVarRef->getDecl());
+  InnerIVDecl->setDeclName(&PP.getIdentifierTable().get(InnerIVName));
+  auto MakeInnerRef = [this, InnerIVDecl, IVTy, OrigVarLoc]() {
+    return buildDeclRefExpr(*this, InnerIVDecl, IVTy, OrigVarLoc);
+  };
+
+  // Make a copy of the NumIterations expression for each use: By the AST
+  // constraints, every expression object in a DeclContext must be unique.
+  CaptureVars CopyTransformer(*this);
+  auto MakeNumIterations = [&CopyTransformer, &LoopHelper]() -> Expr * {
+    return AssertSuccess(
+        CopyTransformer.TransformExpr(LoopHelper.NumIterations));
+  };
+
+  // Inner For init-statement: auto .unroll_inner.iv = .unrolled.iv
+  ExprResult LValueConv = DefaultLvalueConversion(MakeOuterRef());
+  AddInitializerToDecl(InnerIVDecl, LValueConv.get(), /*DirectInit=*/false);
+  StmtResult InnerInit = new (Context)
+      DeclStmt(DeclGroupRef(InnerIVDecl), OrigVarLocBegin, OrigVarLocEnd);
+  if (!InnerInit.isUsable())
+    return StmtError();
+
+  // Inner For cond-expression:
+  // \code
+  //   .unroll_inner.iv < .unrolled.iv + Factor &&
+  //   .unroll_inner.iv < NumIterations
+  // \endcode
+  // This conjunction of two conditions allows ScalarEvolution to derive the
+  // maximum trip count of the inner loop.
+  ExprResult EndOfTile = BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(),
+                                    BO_Add, MakeOuterRef(), MakeFactorExpr());
+  if (!EndOfTile.isUsable())
+    return StmtError();
+  ExprResult InnerCond1 = BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(),
+                                     BO_LE, MakeInnerRef(), EndOfTile.get());
+  if (!InnerCond1.isUsable())
+    return StmtError();
+  ExprResult InnerCond2 =
+      BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LE, MakeInnerRef(),
+                 MakeNumIterations());
+  if (!InnerCond2.isUsable())
+    return StmtError();
+  ExprResult InnerCond =
+      BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LAnd,
+                 InnerCond1.get(), InnerCond2.get());
+  if (!InnerCond.isUsable())
+    return StmtError();
+
+  // Inner For incr-statement: ++.unroll_inner.iv
+  ExprResult InnerIncr = BuildUnaryOp(CurScope, LoopHelper.Inc->getExprLoc(),
+                                      UO_PreInc, MakeInnerRef());
+  if (!InnerIncr.isUsable())
+    return StmtError();
+
+  // Inner For statement.
+  SmallVector<Stmt *> InnerBodyStmts;
+  InnerBodyStmts.append(LoopHelper.Updates.begin(), LoopHelper.Updates.end());
+  InnerBodyStmts.push_back(Body);
+  CompoundStmt *InnerBody = CompoundStmt::Create(
+      Context, InnerBodyStmts, Body->getBeginLoc(), Body->getEndLoc());
+  ForStmt *InnerFor = new (Context)
+      ForStmt(Context, InnerInit.get(), InnerCond.get(), nullptr,
+              InnerIncr.get(), InnerBody, LoopHelper.Init->getBeginLoc(),
+              LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc());
+
+  // Unroll metadata for the inner loop.
+  // This needs to take into account the remainder portion of the unrolled loop,
+  // hence `unroll(full)` does not apply here, even though the LoopUnroll pass
+  // supports multiple loop exits. Instead, unroll using a factor equivalent to
+  // the maximum trip count, which will also generate a remainder loop. Just
+  // `unroll(enable)` (which could have been useful if the user has not
+  // specified a concrete factor; even though the outer loop cannot be
+  // influenced anymore, would avoid more code bloat than necessary) will refuse
+  // the loop because "Won't unroll; remainder loop could not be generated when
+  // assuming runtime trip count". Even if it did work, it must not choose a
+  // larger unroll factor than the maximum loop length, or it would always just
+  // execute the remainder loop.
+  LoopHintAttr *UnrollHintAttr =
+      LoopHintAttr::CreateImplicit(Context, LoopHintAttr::UnrollCount,
+                                   LoopHintAttr::Numeric, MakeFactorExpr());
+  AttributedStmt *InnerUnrolled =
+      AttributedStmt::Create(Context, StartLoc, {UnrollHintAttr}, InnerFor);
+
+  // Outer For init-statement: auto .unrolled.iv = 0
+  AddInitializerToDecl(
+      OuterIVDecl, ActOnIntegerConstant(LoopHelper.Init->getExprLoc(), 0).get(),
+      /*DirectInit=*/false);
+  StmtResult OuterInit = new (Context)
+      DeclStmt(DeclGroupRef(OuterIVDecl), OrigVarLocBegin, OrigVarLocEnd);
+  if (!OuterInit.isUsable())
+    return StmtError();
+
+  // Outer For cond-expression: .unrolled.iv < NumIterations
+  ExprResult OuterConde =
+      BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, MakeOuterRef(),
+                 MakeNumIterations());
+  if (!OuterConde.isUsable())
+    return StmtError();
+
+  // Outer For incr-statement: .unrolled.iv += Factor
+  ExprResult OuterIncr =
+      BuildBinOp(CurScope, LoopHelper.Inc->getExprLoc(), BO_AddAssign,
+                 MakeOuterRef(), MakeFactorExpr());
+  if (!OuterIncr.isUsable())
+    return StmtError();
+
+  // Outer For statement.
+  ForStmt *OuterFor = new (Context)
+      ForStmt(Context, OuterInit.get(), OuterConde.get(), nullptr,
+              OuterIncr.get(), InnerUnrolled, LoopHelper.Init->getBeginLoc(),
+              LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc());
+
+  return OMPUnrollDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
+                                    OuterFor, buildPreInits(Context, PreInits));
+}
+
 OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
                                              SourceLocation StartLoc,
                                              SourceLocation LParenLoc,
@@ -12843,6 +13155,9 @@
   case OMPC_filter:
     Res = ActOnOpenMPFilterClause(Expr, StartLoc, LParenLoc, EndLoc);
     break;
+  case OMPC_partial:
+    Res = ActOnOpenMPPartialClause(Expr, StartLoc, LParenLoc, EndLoc);
+    break;
   case OMPC_device:
   case OMPC_if:
   case OMPC_default:
@@ -13045,6 +13360,7 @@
     case OMPD_end_declare_target:
     case OMPD_teams:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_for:
     case OMPD_sections:
     case OMPD_section:
@@ -13123,6 +13439,7 @@
     case OMPD_teams:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_for:
     case OMPD_for_simd:
     case OMPD_sections:
@@ -13204,6 +13521,7 @@
     case OMPD_end_declare_target:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_for:
     case OMPD_for_simd:
     case OMPD_sections:
@@ -13283,6 +13601,7 @@
     case OMPD_end_declare_target:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_for:
     case OMPD_for_simd:
     case OMPD_sections:
@@ -13363,6 +13682,7 @@
     case OMPD_end_declare_target:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_sections:
     case OMPD_section:
     case OMPD_single:
@@ -13442,6 +13762,7 @@
     case OMPD_end_declare_target:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_for:
     case OMPD_for_simd:
     case OMPD_sections:
@@ -13522,6 +13843,7 @@
     case OMPD_end_declare_target:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_for:
     case OMPD_for_simd:
     case OMPD_sections:
@@ -13603,6 +13925,7 @@
     case OMPD_end_declare_target:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_for:
     case OMPD_for_simd:
     case OMPD_sections:
@@ -13886,17 +14209,33 @@
 
 ExprResult Sema::VerifyPositiveIntegerConstantInClause(Expr *E,
                                                        OpenMPClauseKind CKind,
-                                                       bool StrictlyPositive) {
+                                                       bool StrictlyPositive,
+                                                       bool SuppressExprDiags) {
   if (!E)
     return ExprError();
   if (E->isValueDependent() || E->isTypeDependent() ||
       E->isInstantiationDependent() || E->containsUnexpandedParameterPack())
     return E;
+
   llvm::APSInt Result;
-  ExprResult ICE =
-      VerifyIntegerConstantExpression(E, &Result, /*FIXME*/ AllowFold);
+  ExprResult ICE;
+  if (SuppressExprDiags) {
+    // Use a custom diagnoser that suppresses 'note' diagnostics about the
+    // expression.
+    struct SuppressedDiagnoser : public Sema::VerifyICEDiagnoser {
+      SuppressedDiagnoser() : VerifyICEDiagnoser(/*Suppress=*/true) {}
+      Sema::SemaDiagnosticBuilder diagnoseNotICE(Sema &S,
+                                                 SourceLocation Loc) override {
+        llvm_unreachable("Diagnostic suppressed");
+      }
+    } Diagnoser;
+    ICE = VerifyIntegerConstantExpression(E, &Result, Diagnoser, AllowFold);
+  } else {
+    ICE = VerifyIntegerConstantExpression(E, &Result, /*FIXME*/ AllowFold);
+  }
   if (ICE.isInvalid())
     return ExprError();
+
   if ((StrictlyPositive && !Result.isStrictlyPositive()) ||
       (!StrictlyPositive && !Result.isNonNegative())) {
     Diag(E->getExprLoc(), diag::err_omp_negative_expression_in_clause)
@@ -14301,6 +14640,29 @@
                                 SizeExprs);
 }
 
+OMPClause *Sema::ActOnOpenMPFullClause(SourceLocation StartLoc,
+                                       SourceLocation EndLoc) {
+  return OMPFullClause::Create(Context, StartLoc, EndLoc);
+}
+
+OMPClause *Sema::ActOnOpenMPPartialClause(Expr *FactorExpr,
+                                          SourceLocation StartLoc,
+                                          SourceLocation LParenLoc,
+                                          SourceLocation EndLoc) {
+  if (FactorExpr) {
+    // If an argument is specified, it must be a constant (or an unevaluated
+    // template expression).
+    ExprResult FactorResult = VerifyPositiveIntegerConstantInClause(
+        FactorExpr, OMPC_partial, /*StrictlyPositive=*/true);
+    if (FactorResult.isInvalid())
+      return nullptr;
+    FactorExpr = FactorResult.get();
+  }
+
+  return OMPPartialClause::Create(Context, StartLoc, LParenLoc, EndLoc,
+                                  FactorExpr);
+}
+
 OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause(
     OpenMPClauseKind Kind, ArrayRef<unsigned> Argument, Expr *Expr,
     SourceLocation StartLoc, SourceLocation LParenLoc,
@@ -14603,6 +14965,12 @@
                                    /*LParenLoc=*/SourceLocation(),
                                    /*VarLoc=*/SourceLocation(), EndLoc);
     break;
+  case OMPC_full:
+    Res = ActOnOpenMPFullClause(StartLoc, EndLoc);
+    break;
+  case OMPC_partial:
+    Res = ActOnOpenMPPartialClause(nullptr, StartLoc, /*LParenLoc=*/{}, EndLoc);
+    break;
   case OMPC_if:
   case OMPC_final:
   case OMPC_num_threads:
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1633,6 +1633,20 @@
     return getSema().ActOnOpenMPSizesClause(Sizes, StartLoc, LParenLoc, EndLoc);
   }
 
+  /// Build a new OpenMP 'full' clause.
+  OMPClause *RebuildOMPFullClause(SourceLocation StartLoc,
+                                  SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPFullClause(StartLoc, EndLoc);
+  }
+
+  /// Build a new OpenMP 'partial' clause.
+  OMPClause *RebuildOMPPartialClause(Expr *Factor, SourceLocation StartLoc,
+                                     SourceLocation LParenLoc,
+                                     SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPPartialClause(Factor, StartLoc, LParenLoc,
+                                              EndLoc);
+  }
+
   /// Build a new OpenMP 'allocator' clause.
   ///
   /// By default, performs semantic analysis to build the new OpenMP clause.
@@ -8521,6 +8535,17 @@
   return Res;
 }
 
+template <typename Derived>
+StmtResult
+TreeTransform<Derived>::TransformOMPUnrollDirective(OMPUnrollDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(D->getDirectiveKind(), DirName,
+                                             nullptr, D->getBeginLoc());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
 template <typename Derived>
 StmtResult
 TreeTransform<Derived>::TransformOMPForDirective(OMPForDirective *D) {
@@ -9220,6 +9245,28 @@
                                C->getLParenLoc(), C->getEndLoc());
 }
 
+template <typename Derived>
+OMPClause *TreeTransform<Derived>::TransformOMPFullClause(OMPFullClause *C) {
+  if (!getDerived().AlwaysRebuild())
+    return C;
+  return RebuildOMPFullClause(C->getBeginLoc(), C->getEndLoc());
+}
+
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPPartialClause(OMPPartialClause *C) {
+  ExprResult T = getDerived().TransformExpr(C->getFactor());
+  if (T.isInvalid())
+    return nullptr;
+  Expr *Factor = T.get();
+  bool Changed = Factor != C->getFactor();
+
+  if (!Changed && !getDerived().AlwaysRebuild())
+    return C;
+  return RebuildOMPPartialClause(Factor, C->getBeginLoc(), C->getLParenLoc(),
+                                 C->getEndLoc());
+}
+
 template <typename Derived>
 OMPClause *
 TreeTransform<Derived>::TransformOMPCollapseClause(OMPCollapseClause *C) {
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -11720,6 +11720,12 @@
     C = OMPSizesClause::CreateEmpty(Context, NumSizes);
     break;
   }
+  case llvm::omp::OMPC_full:
+    C = OMPFullClause::CreateEmpty(Context);
+    break;
+  case llvm::omp::OMPC_partial:
+    C = OMPPartialClause::CreateEmpty(Context);
+    break;
   case llvm::omp::OMPC_allocator:
     C = new (Context) OMPAllocatorClause();
     break;
@@ -12032,6 +12038,13 @@
   C->setLParenLoc(Record.readSourceLocation());
 }
 
+void OMPClauseReader::VisitOMPFullClause(OMPFullClause *C) {}
+
+void OMPClauseReader::VisitOMPPartialClause(OMPPartialClause *C) {
+  C->setFactor(Record.readSubExpr());
+  C->setLParenLoc(Record.readSourceLocation());
+}
+
 void OMPClauseReader::VisitOMPAllocatorClause(OMPAllocatorClause *C) {
   C->setAllocator(Record.readExpr());
   C->setLParenLoc(Record.readSourceLocation());
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2321,6 +2321,10 @@
   VisitOMPLoopBasedDirective(D);
 }
 
+void ASTStmtReader::VisitOMPUnrollDirective(OMPUnrollDirective *D) {
+  VisitOMPLoopBasedDirective(D);
+}
+
 void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) {
   VisitOMPLoopDirective(D);
   D->setHasCancel(Record.readBool());
@@ -3201,6 +3205,14 @@
       break;
     }
 
+    case STMT_OMP_UNROLL_DIRECTIVE: {
+      unsigned NumLoops = Record[ASTStmtReader::NumStmtFields];
+      assert(NumLoops == 1 && "Unroll directive accepts only a single loop");
+      unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1];
+      S = OMPUnrollDirective::CreateEmpty(Context, NumClauses);
+      break;
+    }
+
     case STMT_OMP_FOR_DIRECTIVE: {
       unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields];
       unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1];
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -6057,6 +6057,13 @@
   Record.AddSourceLocation(C->getLParenLoc());
 }
 
+void OMPClauseWriter::VisitOMPFullClause(OMPFullClause *C) {}
+
+void OMPClauseWriter::VisitOMPPartialClause(OMPPartialClause *C) {
+  Record.AddStmt(C->getFactor());
+  Record.AddSourceLocation(C->getLParenLoc());
+}
+
 void OMPClauseWriter::VisitOMPAllocatorClause(OMPAllocatorClause *C) {
   Record.AddStmt(C->getAllocator());
   Record.AddSourceLocation(C->getLParenLoc());
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -2222,6 +2222,11 @@
   Code = serialization::STMT_OMP_TILE_DIRECTIVE;
 }
 
+void ASTStmtWriter::VisitOMPUnrollDirective(OMPUnrollDirective *D) {
+  VisitOMPLoopBasedDirective(D);
+  Code = serialization::STMT_OMP_UNROLL_DIRECTIVE;
+}
+
 void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) {
   VisitOMPLoopDirective(D);
   Record.writeBool(D->hasCancel());
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1297,7 +1297,8 @@
     case Stmt::OMPInteropDirectiveClass:
     case Stmt::OMPDispatchDirectiveClass:
     case Stmt::OMPMaskedDirectiveClass:
-    case Stmt::CapturedStmtClass: {
+    case Stmt::CapturedStmtClass:
+    case Stmt::OMPUnrollDirectiveClass: {
       const ExplodedNode *node = Bldr.generateSink(S, Pred, Pred->getState());
       Engine.addAbortedBlock(node, currBldrCtx->getBlock());
       break;
diff --git a/clang/test/OpenMP/unroll_ast_print.cpp b/clang/test/OpenMP/unroll_ast_print.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/OpenMP/unroll_ast_print.cpp
@@ -0,0 +1,127 @@
+// Check no warnings/errors
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -fsyntax-only -verify %s
+// expected-no-diagnostics
+
+// Check AST and unparsing
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -ast-dump  %s | FileCheck %s --check-prefix=DUMP
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -ast-print %s | FileCheck %s --check-prefix=PRINT --match-full-lines
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -ast-print    %s | FileCheck %s --check-prefix=PRINT --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+void body(...);
+
+
+// PRINT-LABEL: void func_unroll() {
+// DUMP-LABEL:  FunctionDecl {{.*}} func_unroll
+void func_unroll() {
+  // PRINT:  #pragma omp unroll
+  // DUMP:   OMPUnrollDirective
+  #pragma omp unroll
+  // PRINT-NEXT: for (int i = 7; i < 17; i += 3)
+  // DUMP-NEXT: ForStmt
+  for (int i = 7; i < 17; i += 3)
+    // PRINT-NEXT: body(i);
+    // DUMP: CallExpr
+    body(i);
+}
+
+
+// PRINT-LABEL: void func_unroll_full() {
+// DUMP-LABEL:  FunctionDecl {{.*}} func_unroll_full 
+void func_unroll_full() {
+  // PRINT:     #pragma omp unroll full
+  // DUMP:      OMPUnrollDirective
+  // DUMP-NEXT:   OMPFullClause
+  #pragma omp unroll full
+  // PRINT-NEXT: for (int i = 7; i < 17; i += 3)
+  // DUMP-NEXT: ForStmt
+  for (int i = 7; i < 17; i += 3)
+    // PRINT-NEXT: body(i);
+    // DUMP: CallExpr
+    body(i);
+}
+
+
+// PRINT-LABEL: void func_unroll_partial() {
+// DUMP-LABEL:  FunctionDecl {{.*}} func_unroll_partial 
+void func_unroll_partial() {
+  // PRINT:     #pragma omp unroll partial
+  // DUMP:      OMPUnrollDirective
+  // DUMP-NEXT:   OMPPartialClause
+  // DUMP-NEXT:     <<<NULL>>>
+  #pragma omp unroll partial
+  // PRINT-NEXT: for (int i = 7; i < 17; i += 3)
+  // DUMP-NEXT: ForStmt
+  for (int i = 7; i < 17; i += 3)
+    // PRINT: body(i);
+    // DUMP: CallExpr
+    body(i);
+}
+
+
+// PRINT-LABEL: void func_unroll_partial_factor() {
+// DUMP-LABEL:  FunctionDecl {{.*}} func_unroll_partial_factor 
+void func_unroll_partial_factor() {
+  // PRINT:     #pragma omp unroll partial(4)
+  // DUMP:      OMPUnrollDirective
+  // DUMP-NEXT:   OMPPartialClause
+  // DUMP-NEXT:     ConstantExpr
+  // DUMP-NEXT:       value: Int 4
+  // DUMP-NEXT:       IntegerLiteral {{.*}} 4
+  #pragma omp unroll partial(4)
+  // PRINT-NEXT: for (int i = 7; i < 17; i += 3)
+  // DUMP-NEXT: ForStmt
+  for (int i = 7; i < 17; i += 3)
+    // PRINT-NEXT: body(i);
+    // DUMP: CallExpr
+    body(i);
+}
+
+
+// PRINT-LABEL: void func_unroll_partial_factor_for() {
+// DUMP-LABEL:  FunctionDecl {{.*}} func_unroll_partial_factor_for 
+void func_unroll_partial_factor_for() {
+  // PRINT:     #pragma omp for
+  // DUMP:      OMPForDirective
+  #pragma omp for
+  // PRINT:       #pragma omp unroll partial(2)
+  // DUMP:        OMPUnrollDirective
+  // DUMP-NEXT:     OMPPartialClause
+  #pragma omp unroll partial(2)
+  // PRINT-NEXT: for (int i = 7; i < 17; i += 3)
+  // DUMP: ForStmt
+  for (int i = 7; i < 17; i += 3)
+    // PRINT-NEXT: body(i);
+    // DUMP: CallExpr
+    body(i);
+}
+
+
+// PRINT-LABEL: template <typename T, T Start, T End, T Step, int Factor> void unroll_templated() {
+// DUMP-LABEL:  FunctionTemplateDecl {{.*}} unroll_templated
+template<typename T, T Start, T End, T Step, int Factor>
+void unroll_templated() {
+  // PRINT: #pragma omp unroll partial(Factor)
+  // DUMP:      OMPUnrollDirective
+  // DUMP-NEXT: OMPPartialClause
+  // DUMP-NEXT:   DeclRefExpr {{.*}} 'Factor' 'int'
+  #pragma omp unroll partial(Factor)
+    // PRINT-NEXT: for (T i = Start; i < End; i += Step)
+    // DUMP-NEXT:  ForStmt
+    for (T i = Start; i < End; i += Step)
+      // PRINT-NEXT: body(i);
+      // DUMP:  CallExpr
+      body(i);
+}
+void unroll_template() {
+  unroll_templated<int,0,1024,1,4>();
+}
+
+#endif
diff --git a/clang/test/OpenMP/unroll_codegen_factor.cpp b/clang/test/OpenMP/unroll_codegen_factor.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/OpenMP/unroll_codegen_factor.cpp
@@ -0,0 +1,65 @@
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+extern "C" void body(...) {}
+
+
+// IR-LABEL: @func(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
+// IR-NEXT:    store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP0:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP0]], i32* %[[I]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND]]:
+// IR-NEXT:    %[[TMP1:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP2:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[CMP:.+]] = icmp slt i32 %[[TMP1]], %[[TMP2]]
+// IR-NEXT:    br i1 %[[CMP]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY]]:
+// IR-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    call void (...) @body(i32 %[[TMP3]], i32 %[[TMP4]], i32 %[[TMP5]], i32 %[[TMP6]])
+// IR-NEXT:    br label %[[FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC]]:
+// IR-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    %[[ADD:.+]] = add nsw i32 %[[TMP8]], %[[TMP7]]
+// IR-NEXT:    store i32 %[[ADD]], i32* %[[I]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END]]:
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+extern "C" void func(int start, int end, int step) {
+  #pragma omp unroll partial(4)
+  for (int i = start; i < end; i+=step)
+    body(start, end, step, i);
+}
+
+#endif /* HEADER */
+
+
+// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]}
+// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
+// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.count", i32 4}
+// IR: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.enable"}
diff --git a/clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp b/clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp
@@ -0,0 +1,251 @@
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+extern "C" void body(...) {}
+
+
+// IR-LABEL: @func(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_IV:.+]] = alloca i64, align 8
+// IR-NEXT:    %[[TMP:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[TMP1:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[J:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_4:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_5:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_7:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_10:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_12:.+]] = alloca i64, align 8
+// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV_J:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_LB:.+]] = alloca i64, align 8
+// IR-NEXT:    %[[DOTOMP_UB:.+]] = alloca i64, align 8
+// IR-NEXT:    %[[DOTOMP_STRIDE:.+]] = alloca i64, align 8
+// IR-NEXT:    %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[I22:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV_J23:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLL_INNER_IV_J:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
+// IR-NEXT:    store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP1_1:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP1_1]], i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP2:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP2]], i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP3]], i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP4]], i32* %[[J]], align 4
+// IR-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP5]], i32* %[[DOTCAPTURE_EXPR_4]], align 4
+// IR-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP6]], i32* %[[DOTCAPTURE_EXPR_5]], align 4
+// IR-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP7]], i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_5]], align 4
+// IR-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_4]], align 4
+// IR-NEXT:    %[[SUB:.+]] = sub i32 %[[TMP8]], %[[TMP9]]
+// IR-NEXT:    %[[SUB8:.+]] = sub i32 %[[SUB]], 1
+// IR-NEXT:    %[[TMP10:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[ADD:.+]] = add i32 %[[SUB8]], %[[TMP10]]
+// IR-NEXT:    %[[TMP11:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP11]]
+// IR-NEXT:    %[[SUB9:.+]] = sub i32 %[[DIV]], 1
+// IR-NEXT:    store i32 %[[SUB9]], i32* %[[DOTCAPTURE_EXPR_7]], align 4
+// IR-NEXT:    %[[TMP12:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_7]], align 4
+// IR-NEXT:    %[[ADD11:.+]] = add i32 %[[TMP12]], 1
+// IR-NEXT:    store i32 %[[ADD11]], i32* %[[DOTCAPTURE_EXPR_10]], align 4
+// IR-NEXT:    %[[TMP13:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[TMP14:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[SUB13:.+]] = sub i32 %[[TMP13]], %[[TMP14]]
+// IR-NEXT:    %[[SUB14:.+]] = sub i32 %[[SUB13]], 1
+// IR-NEXT:    %[[TMP15:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[ADD15:.+]] = add i32 %[[SUB14]], %[[TMP15]]
+// IR-NEXT:    %[[TMP16:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[DIV16:.+]] = udiv i32 %[[ADD15]], %[[TMP16]]
+// IR-NEXT:    %[[CONV:.+]] = zext i32 %[[DIV16]] to i64
+// IR-NEXT:    %[[TMP17:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_10]], align 4
+// IR-NEXT:    %[[SUB17:.+]] = sub i32 %[[TMP17]], -1
+// IR-NEXT:    %[[DIV18:.+]] = udiv i32 %[[SUB17]], 2
+// IR-NEXT:    %[[CONV19:.+]] = zext i32 %[[DIV18]] to i64
+// IR-NEXT:    %[[MUL:.+]] = mul nsw i64 %[[CONV]], %[[CONV19]]
+// IR-NEXT:    %[[SUB20:.+]] = sub nsw i64 %[[MUL]], 1
+// IR-NEXT:    store i64 %[[SUB20]], i64* %[[DOTCAPTURE_EXPR_12]], align 8
+// IR-NEXT:    %[[TMP18:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    store i32 %[[TMP18]], i32* %[[I]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTUNROLLED_IV_J]], align 4
+// IR-NEXT:    %[[TMP19:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP20:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[CMP:.+]] = icmp slt i32 %[[TMP19]], %[[TMP20]]
+// IR-NEXT:    br i1 %[[CMP]], label %[[LAND_LHS_TRUE:.+]], label %[[OMP_PRECOND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_LHS_TRUE]]:
+// IR-NEXT:    %[[TMP21:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_10]], align 4
+// IR-NEXT:    %[[CMP21:.+]] = icmp ult i32 0, %[[TMP21]]
+// IR-NEXT:    br i1 %[[CMP21]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_THEN]]:
+// IR-NEXT:    store i64 0, i64* %[[DOTOMP_LB]], align 8
+// IR-NEXT:    %[[TMP22:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_12]], align 8
+// IR-NEXT:    store i64 %[[TMP22]], i64* %[[DOTOMP_UB]], align 8
+// IR-NEXT:    store i64 1, i64* %[[DOTOMP_STRIDE]], align 8
+// IR-NEXT:    store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4
+// IR-NEXT:    call void @__kmpc_for_static_init_8(%struct.ident_t* @1, i32 %[[TMP0]], i32 34, i32* %[[DOTOMP_IS_LAST]], i64* %[[DOTOMP_LB]], i64* %[[DOTOMP_UB]], i64* %[[DOTOMP_STRIDE]], i64 1, i64 1)
+// IR-NEXT:    %[[TMP23:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8
+// IR-NEXT:    %[[TMP24:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_12]], align 8
+// IR-NEXT:    %[[CMP24:.+]] = icmp sgt i64 %[[TMP23]], %[[TMP24]]
+// IR-NEXT:    br i1 %[[CMP24]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_TRUE]]:
+// IR-NEXT:    %[[TMP25:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_12]], align 8
+// IR-NEXT:    br label %[[COND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_FALSE]]:
+// IR-NEXT:    %[[TMP26:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8
+// IR-NEXT:    br label %[[COND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_END]]:
+// IR-NEXT:    %[[COND:.+]] = phi i64 [ %[[TMP25]], %[[COND_TRUE]] ], [ %[[TMP26]], %[[COND_FALSE]] ]
+// IR-NEXT:    store i64 %[[COND]], i64* %[[DOTOMP_UB]], align 8
+// IR-NEXT:    %[[TMP27:.+]] = load i64, i64* %[[DOTOMP_LB]], align 8
+// IR-NEXT:    store i64 %[[TMP27]], i64* %[[DOTOMP_IV]], align 8
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_COND]]:
+// IR-NEXT:    %[[TMP28:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
+// IR-NEXT:    %[[TMP29:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8
+// IR-NEXT:    %[[CMP25:.+]] = icmp sle i64 %[[TMP28]], %[[TMP29]]
+// IR-NEXT:    br i1 %[[CMP25]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_BODY]]:
+// IR-NEXT:    %[[TMP30:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[CONV26:.+]] = sext i32 %[[TMP30]] to i64
+// IR-NEXT:    %[[TMP31:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
+// IR-NEXT:    %[[TMP32:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_10]], align 4
+// IR-NEXT:    %[[SUB27:.+]] = sub i32 %[[TMP32]], -1
+// IR-NEXT:    %[[DIV28:.+]] = udiv i32 %[[SUB27]], 2
+// IR-NEXT:    %[[MUL29:.+]] = mul i32 1, %[[DIV28]]
+// IR-NEXT:    %[[CONV30:.+]] = zext i32 %[[MUL29]] to i64
+// IR-NEXT:    %[[DIV31:.+]] = sdiv i64 %[[TMP31]], %[[CONV30]]
+// IR-NEXT:    %[[TMP33:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[CONV32:.+]] = sext i32 %[[TMP33]] to i64
+// IR-NEXT:    %[[MUL33:.+]] = mul nsw i64 %[[DIV31]], %[[CONV32]]
+// IR-NEXT:    %[[ADD34:.+]] = add nsw i64 %[[CONV26]], %[[MUL33]]
+// IR-NEXT:    %[[CONV35:.+]] = trunc i64 %[[ADD34]] to i32
+// IR-NEXT:    store i32 %[[CONV35]], i32* %[[I22]], align 4
+// IR-NEXT:    %[[TMP34:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
+// IR-NEXT:    %[[TMP35:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
+// IR-NEXT:    %[[TMP36:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_10]], align 4
+// IR-NEXT:    %[[SUB36:.+]] = sub i32 %[[TMP36]], -1
+// IR-NEXT:    %[[DIV37:.+]] = udiv i32 %[[SUB36]], 2
+// IR-NEXT:    %[[MUL38:.+]] = mul i32 1, %[[DIV37]]
+// IR-NEXT:    %[[CONV39:.+]] = zext i32 %[[MUL38]] to i64
+// IR-NEXT:    %[[DIV40:.+]] = sdiv i64 %[[TMP35]], %[[CONV39]]
+// IR-NEXT:    %[[TMP37:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_10]], align 4
+// IR-NEXT:    %[[SUB41:.+]] = sub i32 %[[TMP37]], -1
+// IR-NEXT:    %[[DIV42:.+]] = udiv i32 %[[SUB41]], 2
+// IR-NEXT:    %[[MUL43:.+]] = mul i32 1, %[[DIV42]]
+// IR-NEXT:    %[[CONV44:.+]] = zext i32 %[[MUL43]] to i64
+// IR-NEXT:    %[[MUL45:.+]] = mul nsw i64 %[[DIV40]], %[[CONV44]]
+// IR-NEXT:    %[[SUB46:.+]] = sub nsw i64 %[[TMP34]], %[[MUL45]]
+// IR-NEXT:    %[[MUL47:.+]] = mul nsw i64 %[[SUB46]], 2
+// IR-NEXT:    %[[ADD48:.+]] = add nsw i64 0, %[[MUL47]]
+// IR-NEXT:    %[[CONV49:.+]] = trunc i64 %[[ADD48]] to i32
+// IR-NEXT:    store i32 %[[CONV49]], i32* %[[DOTUNROLLED_IV_J23]], align 4
+// IR-NEXT:    %[[TMP38:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J23]], align 4
+// IR-NEXT:    store i32 %[[TMP38]], i32* %[[DOTUNROLL_INNER_IV_J]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND]]:
+// IR-NEXT:    %[[TMP39:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
+// IR-NEXT:    %[[TMP40:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J23]], align 4
+// IR-NEXT:    %[[ADD50:.+]] = add i32 %[[TMP40]], 2
+// IR-NEXT:    %[[CMP51:.+]] = icmp ule i32 %[[TMP39]], %[[ADD50]]
+// IR-NEXT:    br i1 %[[CMP51]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_RHS]]:
+// IR-NEXT:    %[[TMP41:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
+// IR-NEXT:    %[[TMP42:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_7]], align 4
+// IR-NEXT:    %[[ADD52:.+]] = add i32 %[[TMP42]], 1
+// IR-NEXT:    %[[CMP53:.+]] = icmp ule i32 %[[TMP41]], %[[ADD52]]
+// IR-NEXT:    br label %[[LAND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_END]]:
+// IR-NEXT:    %[[TMP43:.+]] = phi i1 [ false, %[[FOR_COND]] ], [ %[[CMP53]], %[[LAND_RHS]] ]
+// IR-NEXT:    br i1 %[[TMP43]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY]]:
+// IR-NEXT:    %[[TMP44:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_4]], align 4
+// IR-NEXT:    %[[TMP45:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
+// IR-NEXT:    %[[TMP46:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[MUL54:.+]] = mul i32 %[[TMP45]], %[[TMP46]]
+// IR-NEXT:    %[[ADD55:.+]] = add i32 %[[TMP44]], %[[MUL54]]
+// IR-NEXT:    store i32 %[[ADD55]], i32* %[[J]], align 4
+// IR-NEXT:    %[[TMP47:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    %[[TMP48:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[TMP49:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP50:.+]] = load i32, i32* %[[I22]], align 4
+// IR-NEXT:    %[[TMP51:.+]] = load i32, i32* %[[J]], align 4
+// IR-NEXT:    call void (...) @body(i32 %[[TMP47]], i32 %[[TMP48]], i32 %[[TMP49]], i32 %[[TMP50]], i32 %[[TMP51]])
+// IR-NEXT:    br label %[[FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC]]:
+// IR-NEXT:    %[[TMP52:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
+// IR-NEXT:    %[[INC:.+]] = add i32 %[[TMP52]], 1
+// IR-NEXT:    store i32 %[[INC]], i32* %[[DOTUNROLL_INNER_IV_J]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END]]:
+// IR-NEXT:    br label %[[OMP_BODY_CONTINUE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_BODY_CONTINUE]]:
+// IR-NEXT:    br label %[[OMP_INNER_FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_INC]]:
+// IR-NEXT:    %[[TMP53:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
+// IR-NEXT:    %[[ADD56:.+]] = add nsw i64 %[[TMP53]], 1
+// IR-NEXT:    store i64 %[[ADD56]], i64* %[[DOTOMP_IV]], align 8
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_END]]:
+// IR-NEXT:    br label %[[OMP_LOOP_EXIT:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_LOOP_EXIT]]:
+// IR-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[TMP0]])
+// IR-NEXT:    br label %[[OMP_PRECOND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_END]]:
+// IR-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @3, i32 %[[TMP0]])
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+extern "C" void func(int start, int end, int step) {
+  #pragma omp for collapse(2)
+  for (int i = start; i < end; i+=step) {
+    #pragma omp unroll partial
+    for (int j = start; j < end; j+=step)
+        body(start, end, step, i, j);
+  }
+}
+
+#endif /* HEADER */
+
+
+// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]}
+// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
+// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.count", i32 2}
diff --git a/clang/test/OpenMP/unroll_codegen_for_partial.cpp b/clang/test/OpenMP/unroll_codegen_for_partial.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/OpenMP/unroll_codegen_for_partial.cpp
@@ -0,0 +1,187 @@
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+extern "C" void body(...) {}
+
+
+// IR-LABEL: @func(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_IV:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[TMP:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_LB:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_UB:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV_I12:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLL_INNER_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
+// IR-NEXT:    store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP1:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP1]], i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP2:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP2]], i32* %[[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP3]], i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[SUB:.+]] = sub i32 %[[TMP4]], %[[TMP5]]
+// IR-NEXT:    %[[SUB4:.+]] = sub i32 %[[SUB]], 1
+// IR-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP6]]
+// IR-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP7]]
+// IR-NEXT:    %[[SUB5:.+]] = sub i32 %[[DIV]], 1
+// IR-NEXT:    store i32 %[[SUB5]], i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[ADD7:.+]] = add i32 %[[TMP8]], 1
+// IR-NEXT:    store i32 %[[ADD7]], i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[SUB9:.+]] = sub i32 %[[TMP9]], -1
+// IR-NEXT:    %[[DIV10:.+]] = udiv i32 %[[SUB9]], 2
+// IR-NEXT:    %[[SUB11:.+]] = sub i32 %[[DIV10]], 1
+// IR-NEXT:    store i32 %[[SUB11]], i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP10:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[CMP:.+]] = icmp ult i32 0, %[[TMP10]]
+// IR-NEXT:    br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_THEN]]:
+// IR-NEXT:    store i32 0, i32* %[[DOTOMP_LB]], align 4
+// IR-NEXT:    %[[TMP11:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    store i32 %[[TMP11]], i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    store i32 1, i32* %[[DOTOMP_STRIDE]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4
+// IR-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[TMP0]], i32 34, i32* %[[DOTOMP_IS_LAST]], i32* %[[DOTOMP_LB]], i32* %[[DOTOMP_UB]], i32* %[[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT:    %[[TMP12:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[TMP13:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    %[[CMP13:.+]] = icmp ugt i32 %[[TMP12]], %[[TMP13]]
+// IR-NEXT:    br i1 %[[CMP13]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_TRUE]]:
+// IR-NEXT:    %[[TMP14:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    br label %[[COND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_FALSE]]:
+// IR-NEXT:    %[[TMP15:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    br label %[[COND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_END]]:
+// IR-NEXT:    %[[COND:.+]] = phi i32 [ %[[TMP14]], %[[COND_TRUE]] ], [ %[[TMP15]], %[[COND_FALSE]] ]
+// IR-NEXT:    store i32 %[[COND]], i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[TMP16:.+]] = load i32, i32* %[[DOTOMP_LB]], align 4
+// IR-NEXT:    store i32 %[[TMP16]], i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_COND]]:
+// IR-NEXT:    %[[TMP17:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[TMP18:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[ADD14:.+]] = add i32 %[[TMP18]], 1
+// IR-NEXT:    %[[CMP15:.+]] = icmp ult i32 %[[TMP17]], %[[ADD14]]
+// IR-NEXT:    br i1 %[[CMP15]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_BODY]]:
+// IR-NEXT:    %[[TMP19:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[MUL:.+]] = mul i32 %[[TMP19]], 2
+// IR-NEXT:    %[[ADD16:.+]] = add i32 0, %[[MUL]]
+// IR-NEXT:    store i32 %[[ADD16]], i32* %[[DOTUNROLLED_IV_I12]], align 4
+// IR-NEXT:    %[[TMP20:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I12]], align 4
+// IR-NEXT:    store i32 %[[TMP20]], i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND]]:
+// IR-NEXT:    %[[TMP21:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP22:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I12]], align 4
+// IR-NEXT:    %[[ADD17:.+]] = add i32 %[[TMP22]], 2
+// IR-NEXT:    %[[CMP18:.+]] = icmp ule i32 %[[TMP21]], %[[ADD17]]
+// IR-NEXT:    br i1 %[[CMP18]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_RHS]]:
+// IR-NEXT:    %[[TMP23:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP24:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[ADD19:.+]] = add i32 %[[TMP24]], 1
+// IR-NEXT:    %[[CMP20:.+]] = icmp ule i32 %[[TMP23]], %[[ADD19]]
+// IR-NEXT:    br label %[[LAND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_END]]:
+// IR-NEXT:    %[[TMP25:.+]] = phi i1 [ false, %[[FOR_COND]] ], [ %[[CMP20]], %[[LAND_RHS]] ]
+// IR-NEXT:    br i1 %[[TMP25]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY]]:
+// IR-NEXT:    %[[TMP26:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP27:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP28:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[MUL21:.+]] = mul i32 %[[TMP27]], %[[TMP28]]
+// IR-NEXT:    %[[ADD22:.+]] = add i32 %[[TMP26]], %[[MUL21]]
+// IR-NEXT:    store i32 %[[ADD22]], i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP29:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    %[[TMP30:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[TMP31:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP32:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    call void (...) @body(i32 %[[TMP29]], i32 %[[TMP30]], i32 %[[TMP31]], i32 %[[TMP32]])
+// IR-NEXT:    br label %[[FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC]]:
+// IR-NEXT:    %[[TMP33:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[INC:.+]] = add i32 %[[TMP33]], 1
+// IR-NEXT:    store i32 %[[INC]], i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END]]:
+// IR-NEXT:    br label %[[OMP_BODY_CONTINUE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_BODY_CONTINUE]]:
+// IR-NEXT:    br label %[[OMP_INNER_FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_INC]]:
+// IR-NEXT:    %[[TMP34:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[ADD23:.+]] = add i32 %[[TMP34]], 1
+// IR-NEXT:    store i32 %[[ADD23]], i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_END]]:
+// IR-NEXT:    br label %[[OMP_LOOP_EXIT:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_LOOP_EXIT]]:
+// IR-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[TMP0]])
+// IR-NEXT:    br label %[[OMP_PRECOND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_END]]:
+// IR-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @3, i32 %[[TMP0]])
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+extern "C" void func(int start, int end, int step) {
+  int i;
+  #pragma omp for
+  #pragma omp unroll partial
+  for (i = start; i < end; i+=step)
+    body(start, end, step, i);
+}
+
+#endif /* HEADER */
+
+
+// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]}
+// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
+// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.count", i32 2}
diff --git a/clang/test/OpenMP/unroll_codegen_full.cpp b/clang/test/OpenMP/unroll_codegen_full.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/OpenMP/unroll_codegen_full.cpp
@@ -0,0 +1,52 @@
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+extern "C" void body(...) {}
+
+
+// IR-LABEL: @func(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
+// IR-NEXT:    store i32 7, i32* %[[I]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND]]:
+// IR-NEXT:    %[[TMP0:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    %[[CMP:.+]] = icmp slt i32 %[[TMP0]], 17
+// IR-NEXT:    br i1 %[[CMP]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY]]:
+// IR-NEXT:    %[[TMP1:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    call void (...) @body(i32 %[[TMP1]])
+// IR-NEXT:    br label %[[FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC]]:
+// IR-NEXT:    %[[TMP2:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    %[[ADD:.+]] = add nsw i32 %[[TMP2]], 3
+// IR-NEXT:    store i32 %[[ADD]], i32* %[[I]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END]]:
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+extern "C" void func() {
+  #pragma omp unroll full
+  for (int i = 7; i < 17; i += 3)
+    body(i);
+}
+
+#endif /* HEADER */
+
+
+// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]}
+// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
+// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.full"}
diff --git a/clang/test/OpenMP/unroll_codegen_heuristic.cpp b/clang/test/OpenMP/unroll_codegen_heuristic.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/OpenMP/unroll_codegen_heuristic.cpp
@@ -0,0 +1,64 @@
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+extern "C" void body(...) {}
+
+
+// IR-LABEL: @func(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
+// IR-NEXT:    store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP0:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP0]], i32* %[[I]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND]]:
+// IR-NEXT:    %[[TMP1:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP2:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[CMP:.+]] = icmp slt i32 %[[TMP1]], %[[TMP2]]
+// IR-NEXT:    br i1 %[[CMP]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY]]:
+// IR-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    call void (...) @body(i32 %[[TMP3]], i32 %[[TMP4]], i32 %[[TMP5]], i32 %[[TMP6]])
+// IR-NEXT:    br label %[[FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC]]:
+// IR-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    %[[ADD:.+]] = add nsw i32 %[[TMP8]], %[[TMP7]]
+// IR-NEXT:    store i32 %[[ADD]], i32* %[[I]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END]]:
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+extern "C" void func(int start, int end, int step) {
+  #pragma omp unroll
+  for (int i = start; i < end; i+=step)
+    body(start, end, step, i);
+}
+
+#endif /* HEADER */
+
+
+// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]}
+// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
+// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"}
diff --git a/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp b/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp
@@ -0,0 +1,210 @@
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+extern "C" void body(...) {}
+
+
+// IR-LABEL: @func(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @2, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* %[[END_ADDR]], i32* %[[STEP_ADDR]], i32* %[[START_ADDR]])
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+extern "C" void func(int start, int end, int step) {
+  #pragma omp parallel for
+  #pragma omp unroll partial(7)
+  for (int i = start; i < end; i+=step)
+    body(start, end, step, i);
+}
+
+
+// IR-LABEL: @.omp_outlined.(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[DOTGLOBAL_TID__ADDR:.+]] = alloca i32*, align 8
+// IR-NEXT:    %[[DOTBOUND_TID__ADDR:.+]] = alloca i32*, align 8
+// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32*, align 8
+// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32*, align 8
+// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32*, align 8
+// IR-NEXT:    %[[DOTOMP_IV:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[TMP:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_LB:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_UB:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV_I12:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLL_INNER_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    store i32* %[[DOTGLOBAL_TID_:.+]], i32** %[[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT:    store i32* %[[DOTBOUND_TID_:.+]], i32** %[[DOTBOUND_TID__ADDR]], align 8
+// IR-NEXT:    store i32* %[[END:.+]], i32** %[[END_ADDR]], align 8
+// IR-NEXT:    store i32* %[[STEP:.+]], i32** %[[STEP_ADDR]], align 8
+// IR-NEXT:    store i32* %[[START:.+]], i32** %[[START_ADDR]], align 8
+// IR-NEXT:    %[[TMP0:.+]] = load i32*, i32** %[[END_ADDR]], align 8
+// IR-NEXT:    %[[TMP1:.+]] = load i32*, i32** %[[STEP_ADDR]], align 8
+// IR-NEXT:    %[[TMP2:.+]] = load i32*, i32** %[[START_ADDR]], align 8
+// IR-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4
+// IR-NEXT:    store i32 %[[TMP3]], i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[TMP2]], align 4
+// IR-NEXT:    store i32 %[[TMP4]], i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[TMP0]], align 4
+// IR-NEXT:    store i32 %[[TMP5]], i32* %[[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[TMP1]], align 4
+// IR-NEXT:    store i32 %[[TMP6]], i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[SUB:.+]] = sub i32 %[[TMP7]], %[[TMP8]]
+// IR-NEXT:    %[[SUB4:.+]] = sub i32 %[[SUB]], 1
+// IR-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP9]]
+// IR-NEXT:    %[[TMP10:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP10]]
+// IR-NEXT:    %[[SUB5:.+]] = sub i32 %[[DIV]], 1
+// IR-NEXT:    store i32 %[[SUB5]], i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[TMP11:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[ADD7:.+]] = add i32 %[[TMP11]], 1
+// IR-NEXT:    store i32 %[[ADD7]], i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[TMP12:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[SUB9:.+]] = sub i32 %[[TMP12]], -6
+// IR-NEXT:    %[[DIV10:.+]] = udiv i32 %[[SUB9]], 7
+// IR-NEXT:    %[[SUB11:.+]] = sub i32 %[[DIV10]], 1
+// IR-NEXT:    store i32 %[[SUB11]], i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP13:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[CMP:.+]] = icmp ult i32 0, %[[TMP13]]
+// IR-NEXT:    br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_THEN]]:
+// IR-NEXT:    store i32 0, i32* %[[DOTOMP_LB]], align 4
+// IR-NEXT:    %[[TMP14:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    store i32 %[[TMP14]], i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    store i32 1, i32* %[[DOTOMP_STRIDE]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4
+// IR-NEXT:    %[[TMP15:.+]] = load i32*, i32** %[[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT:    %[[TMP16:.+]] = load i32, i32* %[[TMP15]], align 4
+// IR-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[TMP16]], i32 34, i32* %[[DOTOMP_IS_LAST]], i32* %[[DOTOMP_LB]], i32* %[[DOTOMP_UB]], i32* %[[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT:    %[[TMP17:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[TMP18:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    %[[CMP13:.+]] = icmp ugt i32 %[[TMP17]], %[[TMP18]]
+// IR-NEXT:    br i1 %[[CMP13]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_TRUE]]:
+// IR-NEXT:    %[[TMP19:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    br label %[[COND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_FALSE]]:
+// IR-NEXT:    %[[TMP20:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    br label %[[COND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_END]]:
+// IR-NEXT:    %[[COND:.+]] = phi i32 [ %[[TMP19]], %[[COND_TRUE]] ], [ %[[TMP20]], %[[COND_FALSE]] ]
+// IR-NEXT:    store i32 %[[COND]], i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[TMP21:.+]] = load i32, i32* %[[DOTOMP_LB]], align 4
+// IR-NEXT:    store i32 %[[TMP21]], i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_COND]]:
+// IR-NEXT:    %[[TMP22:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[TMP23:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[ADD14:.+]] = add i32 %[[TMP23]], 1
+// IR-NEXT:    %[[CMP15:.+]] = icmp ult i32 %[[TMP22]], %[[ADD14]]
+// IR-NEXT:    br i1 %[[CMP15]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_BODY]]:
+// IR-NEXT:    %[[TMP24:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[MUL:.+]] = mul i32 %[[TMP24]], 7
+// IR-NEXT:    %[[ADD16:.+]] = add i32 0, %[[MUL]]
+// IR-NEXT:    store i32 %[[ADD16]], i32* %[[DOTUNROLLED_IV_I12]], align 4
+// IR-NEXT:    %[[TMP25:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I12]], align 4
+// IR-NEXT:    store i32 %[[TMP25]], i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND]]:
+// IR-NEXT:    %[[TMP26:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP27:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I12]], align 4
+// IR-NEXT:    %[[ADD17:.+]] = add i32 %[[TMP27]], 7
+// IR-NEXT:    %[[CMP18:.+]] = icmp ule i32 %[[TMP26]], %[[ADD17]]
+// IR-NEXT:    br i1 %[[CMP18]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_RHS]]:
+// IR-NEXT:    %[[TMP28:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP29:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[ADD19:.+]] = add i32 %[[TMP29]], 1
+// IR-NEXT:    %[[CMP20:.+]] = icmp ule i32 %[[TMP28]], %[[ADD19]]
+// IR-NEXT:    br label %[[LAND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_END]]:
+// IR-NEXT:    %[[TMP30:.+]] = phi i1 [ false, %[[FOR_COND]] ], [ %[[CMP20]], %[[LAND_RHS]] ]
+// IR-NEXT:    br i1 %[[TMP30]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY]]:
+// IR-NEXT:    %[[TMP31:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP33:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[MUL21:.+]] = mul i32 %[[TMP32]], %[[TMP33]]
+// IR-NEXT:    %[[ADD22:.+]] = add i32 %[[TMP31]], %[[MUL21]]
+// IR-NEXT:    store i32 %[[ADD22]], i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP34:.+]] = load i32, i32* %[[TMP2]], align 4
+// IR-NEXT:    %[[TMP35:.+]] = load i32, i32* %[[TMP0]], align 4
+// IR-NEXT:    %[[TMP36:.+]] = load i32, i32* %[[TMP1]], align 4
+// IR-NEXT:    %[[TMP37:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    call void (...) @body(i32 %[[TMP34]], i32 %[[TMP35]], i32 %[[TMP36]], i32 %[[TMP37]])
+// IR-NEXT:    br label %[[FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC]]:
+// IR-NEXT:    %[[TMP38:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[INC:.+]] = add i32 %[[TMP38]], 1
+// IR-NEXT:    store i32 %[[INC]], i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END]]:
+// IR-NEXT:    br label %[[OMP_BODY_CONTINUE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_BODY_CONTINUE]]:
+// IR-NEXT:    br label %[[OMP_INNER_FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_INC]]:
+// IR-NEXT:    %[[TMP39:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[ADD23:.+]] = add i32 %[[TMP39]], 1
+// IR-NEXT:    store i32 %[[ADD23]], i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_END]]:
+// IR-NEXT:    br label %[[OMP_LOOP_EXIT:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_LOOP_EXIT]]:
+// IR-NEXT:    %[[TMP40:.+]] = load i32*, i32** %[[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT:    %[[TMP41:.+]] = load i32, i32* %[[TMP40]], align 4
+// IR-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[TMP41]])
+// IR-NEXT:    br label %[[OMP_PRECOND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_END]]:
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+
+#endif /* HEADER */
+
+
+// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]}
+// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
+// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.count", i32 7}
diff --git a/clang/test/OpenMP/unroll_codegen_partial.cpp b/clang/test/OpenMP/unroll_codegen_partial.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/OpenMP/unroll_codegen_partial.cpp
@@ -0,0 +1,64 @@
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+extern "C" void body(...) {}
+
+
+// IR-LABEL: @func(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
+// IR-NEXT:    store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP0:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP0]], i32* %[[I]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND]]:
+// IR-NEXT:    %[[TMP1:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP2:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[CMP:.+]] = icmp slt i32 %[[TMP1]], %[[TMP2]]
+// IR-NEXT:    br i1 %[[CMP]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY]]:
+// IR-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    call void (...) @body(i32 %[[TMP3]], i32 %[[TMP4]], i32 %[[TMP5]], i32 %[[TMP6]])
+// IR-NEXT:    br label %[[FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC]]:
+// IR-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    %[[ADD:.+]] = add nsw i32 %[[TMP8]], %[[TMP7]]
+// IR-NEXT:    store i32 %[[ADD]], i32* %[[I]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END]]:
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+extern "C" void func(int start, int end, int step) {
+  #pragma omp unroll partial
+  for (int i = start; i < end; i+=step)
+    body(start, end, step, i);
+}
+
+#endif /* HEADER */
+
+
+// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]}
+// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
+// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"}
diff --git a/clang/test/OpenMP/unroll_codegen_tile_for.cpp b/clang/test/OpenMP/unroll_codegen_tile_for.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/OpenMP/unroll_codegen_tile_for.cpp
@@ -0,0 +1,245 @@
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+extern "C" void body(...) {}
+
+
+// IR-LABEL: @func(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_IV:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[TMP:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_12:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_14:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTFLOOR_0_IV__UNROLLED_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_LB:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_UB:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTFLOOR_0_IV__UNROLLED_IV_I18:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTTILE_0_IV__UNROLLED_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLL_INNER_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
+// IR-NEXT:    store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP1:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP1]], i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP2:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP2]], i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP3]], i32* %[[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP4]], i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[SUB:.+]] = sub i32 %[[TMP5]], %[[TMP6]]
+// IR-NEXT:    %[[SUB4:.+]] = sub i32 %[[SUB]], 1
+// IR-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP7]]
+// IR-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP8]]
+// IR-NEXT:    %[[SUB5:.+]] = sub i32 %[[DIV]], 1
+// IR-NEXT:    store i32 %[[SUB5]], i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[ADD7:.+]] = add i32 %[[TMP9]], 1
+// IR-NEXT:    store i32 %[[ADD7]], i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[TMP10:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[SUB9:.+]] = sub i32 %[[TMP10]], -1
+// IR-NEXT:    %[[DIV10:.+]] = udiv i32 %[[SUB9]], 2
+// IR-NEXT:    %[[SUB11:.+]] = sub i32 %[[DIV10]], 1
+// IR-NEXT:    store i32 %[[SUB11]], i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    %[[TMP11:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    %[[ADD13:.+]] = add i32 %[[TMP11]], 1
+// IR-NEXT:    store i32 %[[ADD13]], i32* %[[DOTCAPTURE_EXPR_12]], align 4
+// IR-NEXT:    %[[TMP12:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_12]], align 4
+// IR-NEXT:    %[[SUB15:.+]] = sub i32 %[[TMP12]], -3
+// IR-NEXT:    %[[DIV16:.+]] = udiv i32 %[[SUB15]], 4
+// IR-NEXT:    %[[SUB17:.+]] = sub i32 %[[DIV16]], 1
+// IR-NEXT:    store i32 %[[SUB17]], i32* %[[DOTCAPTURE_EXPR_14]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTFLOOR_0_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP13:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_12]], align 4
+// IR-NEXT:    %[[CMP:.+]] = icmp ult i32 0, %[[TMP13]]
+// IR-NEXT:    br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_THEN]]:
+// IR-NEXT:    store i32 0, i32* %[[DOTOMP_LB]], align 4
+// IR-NEXT:    %[[TMP14:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_14]], align 4
+// IR-NEXT:    store i32 %[[TMP14]], i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    store i32 1, i32* %[[DOTOMP_STRIDE]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4
+// IR-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[TMP0]], i32 34, i32* %[[DOTOMP_IS_LAST]], i32* %[[DOTOMP_LB]], i32* %[[DOTOMP_UB]], i32* %[[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT:    %[[TMP15:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[TMP16:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_14]], align 4
+// IR-NEXT:    %[[CMP19:.+]] = icmp ugt i32 %[[TMP15]], %[[TMP16]]
+// IR-NEXT:    br i1 %[[CMP19]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_TRUE]]:
+// IR-NEXT:    %[[TMP17:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_14]], align 4
+// IR-NEXT:    br label %[[COND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_FALSE]]:
+// IR-NEXT:    %[[TMP18:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    br label %[[COND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_END]]:
+// IR-NEXT:    %[[COND:.+]] = phi i32 [ %[[TMP17]], %[[COND_TRUE]] ], [ %[[TMP18]], %[[COND_FALSE]] ]
+// IR-NEXT:    store i32 %[[COND]], i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[TMP19:.+]] = load i32, i32* %[[DOTOMP_LB]], align 4
+// IR-NEXT:    store i32 %[[TMP19]], i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_COND]]:
+// IR-NEXT:    %[[TMP20:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[TMP21:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[ADD20:.+]] = add i32 %[[TMP21]], 1
+// IR-NEXT:    %[[CMP21:.+]] = icmp ult i32 %[[TMP20]], %[[ADD20]]
+// IR-NEXT:    br i1 %[[CMP21]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_BODY]]:
+// IR-NEXT:    %[[TMP22:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[MUL:.+]] = mul i32 %[[TMP22]], 4
+// IR-NEXT:    %[[ADD22:.+]] = add i32 0, %[[MUL]]
+// IR-NEXT:    store i32 %[[ADD22]], i32* %[[DOTFLOOR_0_IV__UNROLLED_IV_I18]], align 4
+// IR-NEXT:    %[[TMP23:.+]] = load i32, i32* %[[DOTFLOOR_0_IV__UNROLLED_IV_I18]], align 4
+// IR-NEXT:    store i32 %[[TMP23]], i32* %[[DOTTILE_0_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND]]:
+// IR-NEXT:    %[[TMP24:.+]] = load i32, i32* %[[DOTTILE_0_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP25:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    %[[ADD23:.+]] = add i32 %[[TMP25]], 1
+// IR-NEXT:    %[[TMP26:.+]] = load i32, i32* %[[DOTFLOOR_0_IV__UNROLLED_IV_I18]], align 4
+// IR-NEXT:    %[[ADD24:.+]] = add i32 %[[TMP26]], 4
+// IR-NEXT:    %[[CMP25:.+]] = icmp ult i32 %[[ADD23]], %[[ADD24]]
+// IR-NEXT:    br i1 %[[CMP25]], label %[[COND_TRUE26:.+]], label %[[COND_FALSE28:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_TRUE26]]:
+// IR-NEXT:    %[[TMP27:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    %[[ADD27:.+]] = add i32 %[[TMP27]], 1
+// IR-NEXT:    br label %[[COND_END30:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_FALSE28]]:
+// IR-NEXT:    %[[TMP28:.+]] = load i32, i32* %[[DOTFLOOR_0_IV__UNROLLED_IV_I18]], align 4
+// IR-NEXT:    %[[ADD29:.+]] = add i32 %[[TMP28]], 4
+// IR-NEXT:    br label %[[COND_END30]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_END30]]:
+// IR-NEXT:    %[[COND31:.+]] = phi i32 [ %[[ADD27]], %[[COND_TRUE26]] ], [ %[[ADD29]], %[[COND_FALSE28]] ]
+// IR-NEXT:    %[[CMP32:.+]] = icmp ult i32 %[[TMP24]], %[[COND31]]
+// IR-NEXT:    br i1 %[[CMP32]], label %[[FOR_BODY:.+]], label %[[FOR_END45:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY]]:
+// IR-NEXT:    %[[TMP29:.+]] = load i32, i32* %[[DOTTILE_0_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[MUL33:.+]] = mul i32 %[[TMP29]], 2
+// IR-NEXT:    %[[ADD34:.+]] = add i32 0, %[[MUL33]]
+// IR-NEXT:    store i32 %[[ADD34]], i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP30:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    store i32 %[[TMP30]], i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND35:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND35]]:
+// IR-NEXT:    %[[TMP31:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[ADD36:.+]] = add i32 %[[TMP32]], 2
+// IR-NEXT:    %[[CMP37:.+]] = icmp ule i32 %[[TMP31]], %[[ADD36]]
+// IR-NEXT:    br i1 %[[CMP37]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_RHS]]:
+// IR-NEXT:    %[[TMP33:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP34:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[ADD38:.+]] = add i32 %[[TMP34]], 1
+// IR-NEXT:    %[[CMP39:.+]] = icmp ule i32 %[[TMP33]], %[[ADD38]]
+// IR-NEXT:    br label %[[LAND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_END]]:
+// IR-NEXT:    %[[TMP35:.+]] = phi i1 [ false, %[[FOR_COND35]] ], [ %[[CMP39]], %[[LAND_RHS]] ]
+// IR-NEXT:    br i1 %[[TMP35]], label %[[FOR_BODY40:.+]], label %[[FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY40]]:
+// IR-NEXT:    %[[TMP36:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP37:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP38:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[MUL41:.+]] = mul i32 %[[TMP37]], %[[TMP38]]
+// IR-NEXT:    %[[ADD42:.+]] = add i32 %[[TMP36]], %[[MUL41]]
+// IR-NEXT:    store i32 %[[ADD42]], i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP39:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    %[[TMP40:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[TMP41:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP42:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    call void (...) @body(i32 %[[TMP39]], i32 %[[TMP40]], i32 %[[TMP41]], i32 %[[TMP42]])
+// IR-NEXT:    br label %[[FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC]]:
+// IR-NEXT:    %[[TMP43:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[INC:.+]] = add i32 %[[TMP43]], 1
+// IR-NEXT:    store i32 %[[INC]], i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND35]], !llvm.loop ![[LOOP2:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END]]:
+// IR-NEXT:    br label %[[FOR_INC43:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC43]]:
+// IR-NEXT:    %[[TMP44:.+]] = load i32, i32* %[[DOTTILE_0_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[INC44:.+]] = add i32 %[[TMP44]], 1
+// IR-NEXT:    store i32 %[[INC44]], i32* %[[DOTTILE_0_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP5:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END45]]:
+// IR-NEXT:    br label %[[OMP_BODY_CONTINUE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_BODY_CONTINUE]]:
+// IR-NEXT:    br label %[[OMP_INNER_FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_INC]]:
+// IR-NEXT:    %[[TMP45:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[ADD46:.+]] = add i32 %[[TMP45]], 1
+// IR-NEXT:    store i32 %[[ADD46]], i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_END]]:
+// IR-NEXT:    br label %[[OMP_LOOP_EXIT:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_LOOP_EXIT]]:
+// IR-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[TMP0]])
+// IR-NEXT:    br label %[[OMP_PRECOND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_END]]:
+// IR-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @3, i32 %[[TMP0]])
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+extern "C" void func(int start, int end, int step) {
+  #pragma omp for
+  #pragma omp tile sizes(4)
+  #pragma omp unroll partial
+  for (int i = start; i < end; i+=step)
+    body(start, end, step, i);
+}
+
+#endif /* HEADER */
+
+
+// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]}
+// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
+// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.count", i32 2}
+// IR: ![[LOOP5]] = distinct !{![[LOOP5]], ![[LOOPPROP3]]}
diff --git a/clang/test/OpenMP/unroll_codegen_unroll_for.cpp b/clang/test/OpenMP/unroll_codegen_unroll_for.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/OpenMP/unroll_codegen_unroll_for.cpp
@@ -0,0 +1,239 @@
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+extern "C" void body(...) {}
+
+
+// IR-LABEL: @func(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_IV:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[TMP:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_12:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_14:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV__UNROLLED_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_LB:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_UB:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV__UNROLLED_IV_I18:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLL_INNER_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
+// IR-NEXT:    store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP1:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP1]], i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP2:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP2]], i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP3]], i32* %[[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP4]], i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[SUB:.+]] = sub i32 %[[TMP5]], %[[TMP6]]
+// IR-NEXT:    %[[SUB4:.+]] = sub i32 %[[SUB]], 1
+// IR-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP7]]
+// IR-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP8]]
+// IR-NEXT:    %[[SUB5:.+]] = sub i32 %[[DIV]], 1
+// IR-NEXT:    store i32 %[[SUB5]], i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[ADD7:.+]] = add i32 %[[TMP9]], 1
+// IR-NEXT:    store i32 %[[ADD7]], i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[TMP10:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[SUB9:.+]] = sub i32 %[[TMP10]], -1
+// IR-NEXT:    %[[DIV10:.+]] = udiv i32 %[[SUB9]], 2
+// IR-NEXT:    %[[SUB11:.+]] = sub i32 %[[DIV10]], 1
+// IR-NEXT:    store i32 %[[SUB11]], i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    %[[TMP11:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    %[[ADD13:.+]] = add i32 %[[TMP11]], 1
+// IR-NEXT:    store i32 %[[ADD13]], i32* %[[DOTCAPTURE_EXPR_12]], align 4
+// IR-NEXT:    %[[TMP12:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_12]], align 4
+// IR-NEXT:    %[[SUB15:.+]] = sub i32 %[[TMP12]], -1
+// IR-NEXT:    %[[DIV16:.+]] = udiv i32 %[[SUB15]], 2
+// IR-NEXT:    %[[SUB17:.+]] = sub i32 %[[DIV16]], 1
+// IR-NEXT:    store i32 %[[SUB17]], i32* %[[DOTCAPTURE_EXPR_14]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTUNROLLED_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP13:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_12]], align 4
+// IR-NEXT:    %[[CMP:.+]] = icmp ult i32 0, %[[TMP13]]
+// IR-NEXT:    br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_THEN]]:
+// IR-NEXT:    store i32 0, i32* %[[DOTOMP_LB]], align 4
+// IR-NEXT:    %[[TMP14:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_14]], align 4
+// IR-NEXT:    store i32 %[[TMP14]], i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    store i32 1, i32* %[[DOTOMP_STRIDE]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4
+// IR-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[TMP0]], i32 34, i32* %[[DOTOMP_IS_LAST]], i32* %[[DOTOMP_LB]], i32* %[[DOTOMP_UB]], i32* %[[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT:    %[[TMP15:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[TMP16:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_14]], align 4
+// IR-NEXT:    %[[CMP19:.+]] = icmp ugt i32 %[[TMP15]], %[[TMP16]]
+// IR-NEXT:    br i1 %[[CMP19]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_TRUE]]:
+// IR-NEXT:    %[[TMP17:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_14]], align 4
+// IR-NEXT:    br label %[[COND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_FALSE]]:
+// IR-NEXT:    %[[TMP18:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    br label %[[COND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_END]]:
+// IR-NEXT:    %[[COND:.+]] = phi i32 [ %[[TMP17]], %[[COND_TRUE]] ], [ %[[TMP18]], %[[COND_FALSE]] ]
+// IR-NEXT:    store i32 %[[COND]], i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[TMP19:.+]] = load i32, i32* %[[DOTOMP_LB]], align 4
+// IR-NEXT:    store i32 %[[TMP19]], i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_COND]]:
+// IR-NEXT:    %[[TMP20:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[TMP21:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[ADD20:.+]] = add i32 %[[TMP21]], 1
+// IR-NEXT:    %[[CMP21:.+]] = icmp ult i32 %[[TMP20]], %[[ADD20]]
+// IR-NEXT:    br i1 %[[CMP21]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_BODY]]:
+// IR-NEXT:    %[[TMP22:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[MUL:.+]] = mul i32 %[[TMP22]], 2
+// IR-NEXT:    %[[ADD22:.+]] = add i32 0, %[[MUL]]
+// IR-NEXT:    store i32 %[[ADD22]], i32* %[[DOTUNROLLED_IV__UNROLLED_IV_I18]], align 4
+// IR-NEXT:    %[[TMP23:.+]] = load i32, i32* %[[DOTUNROLLED_IV__UNROLLED_IV_I18]], align 4
+// IR-NEXT:    store i32 %[[TMP23]], i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND]]:
+// IR-NEXT:    %[[TMP24:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP25:.+]] = load i32, i32* %[[DOTUNROLLED_IV__UNROLLED_IV_I18]], align 4
+// IR-NEXT:    %[[ADD23:.+]] = add i32 %[[TMP25]], 2
+// IR-NEXT:    %[[CMP24:.+]] = icmp ule i32 %[[TMP24]], %[[ADD23]]
+// IR-NEXT:    br i1 %[[CMP24]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_RHS]]:
+// IR-NEXT:    %[[TMP26:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP27:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    %[[ADD25:.+]] = add i32 %[[TMP27]], 1
+// IR-NEXT:    %[[CMP26:.+]] = icmp ule i32 %[[TMP26]], %[[ADD25]]
+// IR-NEXT:    br label %[[LAND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_END]]:
+// IR-NEXT:    %[[TMP28:.+]] = phi i1 [ false, %[[FOR_COND]] ], [ %[[CMP26]], %[[LAND_RHS]] ]
+// IR-NEXT:    br i1 %[[TMP28]], label %[[FOR_BODY:.+]], label %[[FOR_END41:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY]]:
+// IR-NEXT:    %[[TMP29:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[MUL27:.+]] = mul i32 %[[TMP29]], 2
+// IR-NEXT:    %[[ADD28:.+]] = add i32 0, %[[MUL27]]
+// IR-NEXT:    store i32 %[[ADD28]], i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP30:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    store i32 %[[TMP30]], i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND29:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND29]]:
+// IR-NEXT:    %[[TMP31:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[ADD30:.+]] = add i32 %[[TMP32]], 2
+// IR-NEXT:    %[[CMP31:.+]] = icmp ule i32 %[[TMP31]], %[[ADD30]]
+// IR-NEXT:    br i1 %[[CMP31]], label %[[LAND_RHS32:.+]], label %[[LAND_END35:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_RHS32]]:
+// IR-NEXT:    %[[TMP33:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP34:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[ADD33:.+]] = add i32 %[[TMP34]], 1
+// IR-NEXT:    %[[CMP34:.+]] = icmp ule i32 %[[TMP33]], %[[ADD33]]
+// IR-NEXT:    br label %[[LAND_END35]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_END35]]:
+// IR-NEXT:    %[[TMP35:.+]] = phi i1 [ false, %[[FOR_COND29]] ], [ %[[CMP34]], %[[LAND_RHS32]] ]
+// IR-NEXT:    br i1 %[[TMP35]], label %[[FOR_BODY36:.+]], label %[[FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY36]]:
+// IR-NEXT:    %[[TMP36:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP37:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP38:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[MUL37:.+]] = mul i32 %[[TMP37]], %[[TMP38]]
+// IR-NEXT:    %[[ADD38:.+]] = add i32 %[[TMP36]], %[[MUL37]]
+// IR-NEXT:    store i32 %[[ADD38]], i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP39:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    %[[TMP40:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[TMP41:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP42:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    call void (...) @body(i32 %[[TMP39]], i32 %[[TMP40]], i32 %[[TMP41]], i32 %[[TMP42]])
+// IR-NEXT:    br label %[[FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC]]:
+// IR-NEXT:    %[[TMP43:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[INC:.+]] = add i32 %[[TMP43]], 1
+// IR-NEXT:    store i32 %[[INC]], i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND29]], !llvm.loop ![[LOOP2:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END]]:
+// IR-NEXT:    br label %[[FOR_INC39:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC39]]:
+// IR-NEXT:    %[[TMP44:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[INC40:.+]] = add i32 %[[TMP44]], 1
+// IR-NEXT:    store i32 %[[INC40]], i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP5:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END41]]:
+// IR-NEXT:    br label %[[OMP_BODY_CONTINUE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_BODY_CONTINUE]]:
+// IR-NEXT:    br label %[[OMP_INNER_FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_INC]]:
+// IR-NEXT:    %[[TMP45:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[ADD42:.+]] = add i32 %[[TMP45]], 1
+// IR-NEXT:    store i32 %[[ADD42]], i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_END]]:
+// IR-NEXT:    br label %[[OMP_LOOP_EXIT:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_LOOP_EXIT]]:
+// IR-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[TMP0]])
+// IR-NEXT:    br label %[[OMP_PRECOND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_END]]:
+// IR-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @3, i32 %[[TMP0]])
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+extern "C" void func(int start, int end, int step) {
+  #pragma omp for
+  #pragma omp unroll partial
+  #pragma omp unroll partial
+  for (int i = start; i < end; i+=step)
+    body(start, end, step, i);
+}
+
+#endif /* HEADER */
+
+
+// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]}
+// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
+// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.count", i32 2}
+// IR: ![[LOOP5]] = distinct !{![[LOOP5]], ![[LOOPPROP3]], ![[LOOPPROP4]]}
diff --git a/clang/test/OpenMP/unroll_messages.cpp b/clang/test/OpenMP/unroll_messages.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/OpenMP/unroll_messages.cpp
@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++17 -fopenmp -fopenmp-version=51 -fsyntax-only -Wuninitialized -verify %s
+
+void func(int n) {
+  // expected-error@+2 {{statement after '#pragma omp unroll' must be a for loop}}
+  #pragma omp unroll
+  func(n);
+
+  // expected-error@+2 {{statement after '#pragma omp unroll' must be a for loop}}
+  #pragma omp unroll
+    ;
+
+  // expected-error@+2 {{the loop condition expression depends on the current loop control variable}}
+  #pragma omp unroll
+  for (int i = 0; i < 2*(i-4); ++i) {}
+
+  // expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'i'}}
+  #pragma omp unroll
+  for (int i = 0; i/3 < 7; ++i) {}
+
+  // expected-warning@+1 {{extra tokens at the end of '#pragma omp unroll' are ignored}}
+  #pragma omp unroll foo
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+  #pragma omp unroll partial(
+  for (int i = 0; i < n; ++i) {}
+  
+  // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+  #pragma omp unroll partial(4
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+  #pragma omp unroll partial(4+
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+  #pragma omp unroll partial(for)
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error@+1 {{integral constant expression must have integral or unscoped enumeration type, not 'void (int)'}}
+  #pragma omp unroll partial(func)
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error@+1 {{expected expression}}
+  #pragma omp unroll partial()
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+  #pragma omp unroll partial(4,4)
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error@+3 {{expression is not an integral constant expression}} expected-note@+3 {{read of non-const variable 'a' is not allowed in a constant expression}}
+  // expected-note@+1 {{declared here}}
+  int a;
+  #pragma omp unroll partial(a)
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error@+1 {{argument to 'partial' clause must be a strictly positive integer value}} 
+  #pragma omp unroll partial(0)
+  for (int i = 0; i < n; ++i) {}
+    
+  // expected-error@+1 {{directive '#pragma omp unroll' cannot contain more than one 'partial' clause}} 
+  #pragma omp unroll partial partial
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error@+1 {{directive '#pragma omp unroll' cannot contain more than one 'partial' clause}} 
+  #pragma omp unroll partial(4) partial
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error@+1 {{directive '#pragma omp unroll' cannot contain more than one 'full' clause}}
+  #pragma omp unroll full full
+  for (int i = 0; i < 128; ++i) {}
+
+  // expected-error@+1 {{'full' and 'partial' clause are mutually exclusive and may not appear on the same directive}} expected-note@+1 {{'partial' clause is specified here}}
+  #pragma omp unroll partial full
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error@+1 {{'partial' and 'full' clause are mutually exclusive and may not appear on the same directive}} expected-note@+1 {{'full' clause is specified here}}
+  #pragma omp unroll full partial
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error@+2 {{loop to be fully unrolled must have a constant trip count}} expected-note@+1 {{'#pragma omp unroll full' directive found here}}
+  #pragma omp unroll full
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error@+2 {{statement after '#pragma omp for' must be a for loop}}
+  #pragma omp for
+  #pragma omp unroll
+  for (int i = 0; i < n; ++i) {}
+
+    // expected-error@+2 {{statement after '#pragma omp for' must be a for loop}}
+  #pragma omp for
+  #pragma omp unroll full
+  for (int i = 0; i < 128; ++i) {}
+
+  // expected-error@+2 {{statement after '#pragma omp unroll' must be a for loop}}
+  #pragma omp unroll
+  #pragma omp unroll
+  for (int i = 0; i < n; ++i) {}
+  
+  // expected-error@+2 {{statement after '#pragma omp tile' must be a for loop}}
+  #pragma omp tile sizes(4)
+  #pragma omp unroll
+  for (int i = 0; i < n; ++i) {}
+  
+  // expected-error@+4 {{expected 2 for loops after '#pragma omp for', but found only 1}} 
+  // expected-note@+1 {{as specified in 'collapse' clause}}
+  #pragma omp for collapse(2)
+  for (int i = 0; i < n; ++i) {
+    #pragma omp unroll full
+    for (int j = 0; j < 128; ++j) {}
+  }
+}
+
+
+template<typename T, int Factor>
+void templated_func(int n) {
+  // expected-error@+1 {{argument to 'partial' clause must be a strictly positive integer value}} 
+  #pragma omp unroll partial(Factor)
+  for (T i = 0; i < n; ++i) {}
+
+  // expected-error@+2 {{loop to be fully unrolled must have a constant trip count}} expected-note@+1 {{'#pragma omp unroll full' directive found here}}
+  #pragma omp unroll full
+  for (int i = 0; i < n; i-=Factor) {}
+}
+
+void template_inst(int n) {
+  // expected-note@+1 {{in instantiation of function template specialization 'templated_func<int, -1>' requested here}}
+  templated_func<int, -1>(n);
+}
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2045,6 +2045,7 @@
   void VisitOMPParallelDirective(const OMPParallelDirective *D);
   void VisitOMPSimdDirective(const OMPSimdDirective *D);
   void VisitOMPTileDirective(const OMPTileDirective *D);
+  void VisitOMPUnrollDirective(const OMPUnrollDirective *D);
   void VisitOMPForDirective(const OMPForDirective *D);
   void VisitOMPForSimdDirective(const OMPForSimdDirective *D);
   void VisitOMPSectionsDirective(const OMPSectionsDirective *D);
@@ -2223,6 +2224,12 @@
     Visitor->AddStmt(E);
 }
 
+void OMPClauseEnqueue::VisitOMPFullClause(const OMPFullClause *C) {}
+
+void OMPClauseEnqueue::VisitOMPPartialClause(const OMPPartialClause *C) {
+  Visitor->AddStmt(C->getFactor());
+}
+
 void OMPClauseEnqueue::VisitOMPAllocatorClause(const OMPAllocatorClause *C) {
   Visitor->AddStmt(C->getAllocator());
 }
@@ -2896,6 +2903,10 @@
   VisitOMPLoopBasedDirective(D);
 }
 
+void EnqueueVisitor::VisitOMPUnrollDirective(const OMPUnrollDirective *D) {
+  VisitOMPLoopBasedDirective(D);
+}
+
 void EnqueueVisitor::VisitOMPForDirective(const OMPForDirective *D) {
   VisitOMPLoopDirective(D);
 }
@@ -5575,6 +5586,8 @@
     return cxstring::createRef("OMPSimdDirective");
   case CXCursor_OMPTileDirective:
     return cxstring::createRef("OMPTileDirective");
+  case CXCursor_OMPUnrollDirective:
+    return cxstring::createRef("OMPUnrollDirective");
   case CXCursor_OMPForDirective:
     return cxstring::createRef("OMPForDirective");
   case CXCursor_OMPForSimdDirective:
diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -652,6 +652,9 @@
   case Stmt::OMPTileDirectiveClass:
     K = CXCursor_OMPTileDirective;
     break;
+  case Stmt::OMPUnrollDirectiveClass:
+    K = CXCursor_OMPUnrollDirective;
+    break;
   case Stmt::OMPForDirectiveClass:
     K = CXCursor_OMPForDirective;
     break;
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -68,6 +68,8 @@
   let flangClass = "OmpObjectList";
 }
 def OMPC_Sizes: Clause<"sizes"> { let clangClass = "OMPSizesClause"; }
+def OMPC_Full: Clause<"full"> { let clangClass = "OMPFullClause"; }
+def OMPC_Partial: Clause<"partial"> { let clangClass = "OMPPartialClause"; }
 def OMPC_FirstPrivate : Clause<"firstprivate"> {
   let clangClass = "OMPFirstprivateClause";
   let flangClass = "OmpObjectList";
@@ -400,6 +402,12 @@
     VersionedClause<OMPC_Sizes, 51>,
   ];
 }
+def OMP_Unroll : Directive<"unroll"> {
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_Full, 51>,
+    VersionedClause<OMPC_Partial, 51>,
+  ];
+}
 def OMP_For : Directive<"for"> {
   let allowedClauses = [
     VersionedClause<OMPC_Private>,