Index: docs/LangRef.rst
===================================================================
--- docs/LangRef.rst
+++ docs/LangRef.rst
@@ -5051,6 +5051,8 @@
 is treated as a boolean value; if it exists, it signals that the branch
 or switch that it is attached to is completely unpredictable.
 
+.. _llvm.loop:
+
 '``llvm.loop``'
 ^^^^^^^^^^^^^^^
 
@@ -5084,6 +5086,13 @@
     !0 = !{!0, !1}
     !1 = !{!"llvm.loop.unroll.count", i32 4}
 
+'``llvm.loop.disable_nonforced``'
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This metadata disables any non-explicit transformation of this loop,
+meaning no heuristic is applied that tries to optimize this loop. See
+:ref:`transformation-metadata` for details.
+
 '``llvm.loop.vectorize``' and '``llvm.loop.interleave``'
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -5142,6 +5151,29 @@
 0 or if the loop does not have this metadata the width will be
 determined automatically.
 
+'``llvm.loop.vectorize.followup_vectorized``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This metadata defines which loop attributes the vectorized loop will
+have. See :ref:`transformation-metadata` for details.
+
+'``llvm.loop.vectorize.followup_remainder``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This metadata defines which loop attributes the epilogue will have. The
+epilogue is not vectorized and is executed when either the vectorized
+loop is not known to preserving semantics (because e.g. two arrays it
+processes are found to alias by a runtime check) or for the last
+iterations that do not fill a complete vector lane. See
+:ref:`Transformation Metadata <transformation-metadata>` for details.
+
+'``llvm.loop.vectorize.followup_all``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Attributes in the metadata will be added to the vectorized as well as to
+the remainder loop. See
+:ref:`Transformation Metadata <transformation-metadata>` for details.
+
 '``llvm.loop.unroll``'
 ^^^^^^^^^^^^^^^^^^^^^^
 
@@ -5210,6 +5242,19 @@
 
    !0 = !{!"llvm.loop.unroll.full"}
 
+'``llvm.loop.unroll.followup``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This metadata defines which loop attributes the unrolled loop will have.
+See :ref:`Transformation Metadata <transformation-metadata>` for details.
+
+'``llvm.loop.unroll.followup_remainder``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This metadata defines which loop attributes the remainder loop after
+partial/runtime unrolling will have. See
+:ref:`Transformation Metadata <transformation-metadata>` for details.
+
 '``llvm.loop.unroll_and_jam``'
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -5263,6 +5308,43 @@
 
    !0 = !{!"llvm.loop.unroll_and_jam.enable"}
 
+'``llvm.loop.unroll_and_jam.followup_outer``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This metadata defines which loop attributes the outer unrolled loop will
+have. See :ref:`Transformation Metadata <transformation-metadata>` for
+details.
+
+'``llvm.loop.unroll_and_jam.followup_inner``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This metadata defines which loop attributes the inner jammed loop will
+have. See :ref:`Transformation Metadata <transformation-metadata>` for
+details.
+
+'``llvm.loop.unroll_and_jam.followup_remainder_outer``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This metadata defines which attributes the epilogue of the outer loop
+will have. This loop is usually unrolled, meaning there is no such
+loop. This attribute will be ignored in this case. See
+:ref:`Transformation Metadata <transformation-metadata>` for details.
+
+'``llvm.loop.unroll_and_jam.followup_remainder_inner``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This metadata defines which attributes the inner loop of the epilogue
+will have. The outer epilogue will usually be unrolled, meaning there
+can be multiple inner remainder loops. See
+:ref:`Transformation Metadata <transformation-metadata>` for details.
+
+'``llvm.loop.unroll_and_jam.followup_all``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Attributes specified in the metadata is added to all
+``llvm.loop.unroll_and_jam.*`` loops. See
+:ref:`Transformation Metadata <transformation-metadata>` for details.
+
 '``llvm.loop.licm_versioning.disable``' Metadata
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -5295,6 +5377,34 @@
 This metadata should be used in conjunction with ``llvm.loop`` loop
 identification metadata.
 
+'``llvm.loop.distribute.followup_coincident``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This metadata defines which attributes extracted loops with no cyclic
+dependencies will have (i.e. can be vectorized). See
+:ref:`Transformation Metadata <transformation-metadata>` for details.
+
+'``llvm.loop.distribute.followup_sequential``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This metadata defines which attributes the isolated loops with unsafe
+memory dependencies will have. See
+:ref:`Transformation Metadata <transformation-metadata>` for details.
+
+'``llvm.loop.distribute.followup_fallback``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If loop versioning is necessary, this metadata defined the attributes
+the non-distributed fallback version will have. See
+:ref:`Transformation Metadata <transformation-metadata>` for details.
+
+'``llvm.loop.distribute.followup_all``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Thes attributes in this metdata is added to all followup loops of the
+loop distribution pass. See
+:ref:`Transformation Metadata <transformation-metadata>` for details.
+
 '``llvm.mem``'
 ^^^^^^^^^^^^^^^
 
Index: docs/Passes.rst
===================================================================
--- docs/Passes.rst
+++ docs/Passes.rst
@@ -1215,3 +1215,8 @@
 Displays the post dominator tree using the GraphViz tool, but omitting function
 bodies.
 
+``-transform-warning``: Report missed forced transformations
+------------------------------------------------------------
+
+Emits warnings about not yet applied forced transformations (e.g. from
+``#pragma omp simd``).
Index: docs/TransformMetadata.rst
===================================================================
--- /dev/null
+++ docs/TransformMetadata.rst
@@ -0,0 +1,410 @@
+.. _transformation-metadata:
+
+============================
+Code Transformation Metadata
+============================
+
+.. contents::
+   :local:
+
+Overview
+========
+
+LLVM transformation passes can be controlled by attaching metadata to
+the code to transform. By default passes will apply a heuristic on
+whether to apply a transformation and using which parameters.
+Transformations are usually applied conservatively, i.e. will only be
+applied if it is unlikely to cause any slowdown for any workload, or
+such a slowdown would be minor. Therefore most optimizations will be
+missed out.
+
+Frontends can give additional hints to LLVM passes on which
+transformations it should apply. This can be additional knowledge that
+cannot be derived from the emitted IR, or directives passed from the
+user/programmer. OpenMP pragmas are an example for the latter.
+
+If any such metadata is dropped from the program, the code's semantics
+must not change.
+
+Metadata on Loops
+=================
+
+Attributes can be attached to loops as described in :ref:`llvm.loop`.
+Attributes can describe properties of the loop, disable transformations,
+force specific transformations and set transformation options.
+
+Because metadata nodes are immutable (with the exception of
+``MDNode::replaceOperandWith`` which is dangerous to use on uniqued
+metadata), in order to add or remove a loop attributes, a new ``MDNode``
+must be created and assigned as the new ``llvm.loop`` metadata. Any
+connection between the old ``MDNode`` and the loop is lost. The
+``llvm.loop`` node is also used as LoopID (``Loop::getLoopID()``), i.e.
+the loop effectively gets a new identifier. For instance,
+``llvm.mem.parallel_loop_access`` references the LoopID. Therefore, if
+the parallel access property is to be preserved after adding/removing
+loop attributes, any ``llvm.mem.parallel_loop_access`` reference must be
+updated to the new LoopID.
+
+Transformation Metadata Structure
+=================================
+
+Some attributes describe code transformations (Unrolling, Vectorizing,
+Loop Distribution, etc.). They can either be a hint to the optimizer
+that a transformation might be beneficial, instruction to use a specific
+option, or convey a mandatory declaration by the user ('forced'; e.g.
+``#pragma clang loop`` or ``#pragma omp simd``).
+
+If a transformation is forced but cannot be carried-out for any reason,
+an optimization missed warning must be emitted. Semantic information
+such as a transformation being safe (e.g.
+``llvm.mem.parallel_loop_access``) is separate.
+
+Unless explicitly disabled, any optimization pass may heuristically
+determine whether a transformation is beneficial and apply it. If
+metadata for another transformation was specified, applying a different
+transformation before it might be inadvertent due to being applied on a
+different loop or the loop not existing anymore. To avoid having to
+explicitly disable an unknown number of passes, the attribute
+``llvm.loop.disable_nonforced`` disables all non-forced transformation.
+
+The following example avoids that the loop is altered
+before being vectorized, for instance being unrolled.
+
+.. code-block:: llvm
+
+      br i1 %exitcond, label %for.exit, label %for.header, !llvm.loop !0
+    ...
+    !0 = distinct !{!0, !1, !2}
+    !1 = !{!"llvm.loop.vectorize.enable", i1 true}
+    !2 = !{!"llvm.loop.disable_nonforced"}
+
+After a transformation is applied, follow-up attributes are set on the
+transformed and/or new loop(s). This allows additional attributes
+including followup-transformations to be specified. Specifying multiple
+transformations in the same metadata node is possible for compatibility
+reasons, but their execution order is undefined. For instance, when
+``llvm.loop.vectorize.enable`` and ``llvm.loop.unroll.enable`` are
+specified at the same time, unrolling may occur either before or after
+vectorization.
+
+As an example, the following instructs a loop being vectorized and only
+then unrolled.
+
+.. code-block:: llvm
+
+    !0 = distinct !{!0, !1, !2, !3}
+    !1 = !{!"llvm.loop.vectorize.enable", i1 true}
+    !2 = !{!"llvm.loop.disable_nonforced"}
+    !3 = !{!"llvm.loop.vectorize.followup_vectorized", !{"llvm.loop.unroll.enable"}}
+
+If no followup is specified, the pass may add attributes itself. For
+instance, the vectorizer adds a ``llvm.loop.isvectorized`` attribute and
+all attributes from the original loop excluding its loop vectorizer
+attributes. To avoid this, an empty followup attribute can be used, e.g.
+
+.. code-block:: llvm
+
+    !3 = !{!"llvm.loop.vectorize.followup_vectorized"}
+
+The followup attributes of a transformation that cannot be applied will
+never be added to a loop and are therefore effectively ignored. This means
+that any followup-transformation in such attributes requires that its
+prior transformations are applied before the followup-transformation.
+The user should receive a warning about the first transformation in the
+transformation chain that could not be applied if it a forced
+transformation. All following transformations are skipped.
+
+Pass-Specific Transformation Metadata
+=====================================
+
+Transformation options are specific for each transformation. In the
+following we present the model for each LLVM loop optimization pass and
+the metadata to influence them.
+
+Loop Vectorization and Interleaving
+-----------------------------------
+
+Loop vectorization and interleaving is interpreted as a single
+transformation. It is interpreted as forced if
+``!{"llvm.loop.vectorize.enable", i1 true}`` is set.
+
+Assuming the pre-vectorization loop is
+
+.. code-block:: c
+
+    for (int i = 0; i < n; i+=1) // original loop
+      Stmt(i);
+
+then the code after vectorization will be approximately (assuming an
+SIMD width of 4):
+
+.. code-block:: c
+
+    int i = 0;
+    if (rtc) {
+      for (; i + 3 < n; i+=4) // vectorized/interleaved loop
+        Stmt(i:i+3);
+    }
+    for (; i < n; i+=1) // remainder loop
+      Stmt(i);
+
+``llvm.loop.vectorize.followup_vectorized`` will set the attributes for
+the vectorized loop. If not specified, ``llvm.loop.isvectorized`` is
+combined with the original loop's attributes to avoid it being
+vectorized multiple times.
+
+``llvm.loop.vectorize.followup_remainder`` will set the attributes for
+the remainder loop. If not specified, it will have the original loop's
+attributes combined with ``llvm.loop.isvectorized`` and
+``llvm.loop.unroll.runtime.disable`` (unless the original loop already
+has unroll metadata).
+
+The attributes specified by ``llvm.loop.vectorize.followup_all`` are
+added to both loops.
+
+Loop Unrolling
+--------------
+
+Unrolling is interpreted as forced any ``!{!"llvm.loop.unroll.enable"}``
+metadata or option (``llvm.loop.unroll.count``, ``llvm.loop.unroll.full``)
+is present. Unrolling can be full unrolling, partial unrolling of a loop
+with constant trip count or runtime unrolling of a loop with a trip
+count unknown at compile-time.
+
+If the loop has been unrolled fully, there is no followup-loop. For
+partial/runtime unrolling, the original loop of
+
+.. code-block:: c
+
+    for (int i = 0; i < n; i+=1) // original loop
+      Stmt(i);
+
+is transformed into (using an unroll factor of 4):
+
+.. code-block:: c
+
+    int i = 0;
+    for (; i + 3 < n; i+=4) // unrolled loop
+      Stmt(i);
+      Stmt(i+1);
+      Stmt(i+2);
+      Stmt(i+3);
+    }
+    for (; i < n; i+=1) // remainder loop
+      Stmt(i);
+
+``llvm.loop.unroll.followup_unrolled`` will set the loop attributes of
+the unrolled loop. If not specified, the attributes of the original loop
+without the ``llvm.loop.unroll.*`` attributes are copied and
+``llvm.loop.unroll.disable`` added to it.
+
+``llvm.loop.unroll.followup_remainder`` defines the attributes of the
+remainder loop. If not specified the remainder loop will have no
+attributes. The remainder loop might not be present due to being fully
+unrolled in which case this attribute has no effect.
+
+Attributes defined in ``llvm.loop.unroll.followup_all`` are added to the
+unrolled and remainder loops.
+
+Unroll-And-Jam
+--------------
+
+Unroll-and-jam uses the following transformation model (here with an
+unroll factor if 2). Currently, it does not support a fallback version
+when the transformation is unsafe.
+
+.. code-block:: c
+
+    for (int i = 0; i < n; i+=1) { // original outer loop
+      Fore(i);
+      for (int j = 0; j < m; j+=1) // original inner loop
+        SubLoop(i, j);
+      Aft(i);
+    }
+
+.. code-block:: c
+
+    int i = 0;
+    for (; i + 1 < n; i+=2) { // unrolled outer loop
+      Fore(i);
+      Fore(i+1);
+      for (int j = 0; j < m; j+=1) { // unrolled inner loop
+        SubLoop(i, j);
+        SubLoop(i+1, j);
+      }
+      Aft(i);
+      Aft(i+1);
+    }
+    for (; i < n; i+=1) { // remainder outer loop
+      Fore(i);
+      for (int j = 0; j < m; j+=1) // remainder inner loop
+        SubLoop(i, j);
+      Aft(i);
+    }
+
+``llvm.loop.unroll_and_jam.followup_outer`` will set the loop attributes
+of the unrolled outer loop. If not specified, the attributes of the
+original outer loop without the ``llvm.loop.unroll.*`` attributes are
+copied and ``llvm.loop.unroll.disable`` added to it.
+
+``llvm.loop.unroll_and_jam.followup_inner`` will set the loop attributes
+of the unrolled inner loop. If not specified, the attributes of the
+original inner loop are used unchanged.
+
+``llvm.loop.unroll_and_jam.followup_remainder_outer`` sets the loop
+attributes of the outer remainder loop. If not specified it will not
+have any attributes. The remainder loop might not be present due to
+being fully unrolled.
+
+``llvm.loop.unroll_and_jam.followup_remainder_inner`` sets the loop
+attributes of the inner remainder loop. If not specified it will have
+the attributes of the original inner loop. It the outer remainder loop
+is unrolled, the inner remainder loop might be present multiple times.
+
+Attributes defined in ``llvm.loop.unroll_and_jam.followup_all`` are
+added to all of the aforementioned output loops.
+
+Loop Distribution
+-----------------
+
+The LoopDistribution pass tries to separate vectorizable parts of a loop
+from the non-vectorizable part (which otherwise would make the entire
+loop non-vectorizable). Conceptually, it transforms a loop such as
+
+.. code-block:: c
+
+    for (int i = 1; i < n; i+=1) { // original loop
+      A[i] = i;
+      B[i] = 2 + B[i];
+      C[i] = 3 + C[i - 1];
+    }
+
+into the following code:
+
+.. code-block:: c
+
+    if (rtc) {
+      for (int i = 1; i < n; i+=1) // coincident loop
+        A[i] = i;
+      for (int i = 1; i < n; i+=1) // coincident loop
+        B[i] = 2 + B[i];
+      for (int i = 1; i < n; i+=1) // sequential loop
+        C[i] = 3 + C[i - 1];
+    } else {
+      for (int i = 1; i < n; i+=1) { // fallback loop
+        A[i] = i;
+        B[i] = 2 + B[i];
+        C[i] = 3 + C[i - 1];
+      }
+    }
+
+``llvm.loop.distribute.followup_coincident`` sets the loop attributes of
+all loops without loop-carried dependencies (i.e. vectorizable loops).
+There might be more than one such loops. If not defined, the loops will
+inherit the original loop's attributes.
+
+``llvm.loop.distribute.followup_sequential`` sets the loop attributes of the
+loop with potentially unsafe dependencies. There should be at most one
+such loop. If not defined, the loop will inherit the original loop's
+attributes.
+
+``llvm.loop.distribute.followup_fallback`` defines the loop attributes
+for the fallback loop, which is a copy of the original loop for when
+loop versioning is required. If undefined, the fallback loop inherits
+all attributes from the original loop.
+
+Attributes defined in ``llvm.loop.distribute.followup_all`` are added to
+all of the aforementioned output loops.
+
+Versioning LICM
+---------------
+
+The pass hoists code out of loops that are only loop-invariant when
+dynamic conditions apply. For instance, it transforms the loop
+
+.. code-block:: c
+
+    for (int i = 0; i < n; i+=1) // original loop
+      A[i] = B[0];
+
+into:
+
+.. code-block:: c
+
+    if (rtc) {
+      auto b = B[0];
+      for (int i = 0; i < n; i+=1) // versioned loop
+        A[i] = b;
+    } else {
+      for (int i = 0; i < n; i+=1) // unversioned loop
+        A[i] = B[0];
+    }
+
+The runtime condition (``rtc``) checks that the array ``A`` and the
+element `B[0]` do not alias.
+
+Currently, this transformation does not support followup-attributes.
+
+Loop Interchange
+----------------
+
+Currently, the ``LoopInterchange`` pass does not use any metadata.
+
+Ambiguous Transformation Order
+==============================
+
+If there multiple transformations defined, the order in which they are
+executed depends on the order in LLVM's pass pipeline, which is subject
+to change. The default optimization pipeline (anything higher than
+``-O0``) has the following order.
+
+When using the legacy pass manager:
+
+ - LoopInterchange (if enabled)
+ - SimpleLoopUnroll/LoopFullUnroll (only performs full unrolling)
+ - VersioningLICM (if enabled)
+ - LoopDistribute
+ - LoopVectorizer
+ - LoopUnrollAndJam (if enabled)
+ - LoopUnroll (partial and runtime unrolling)
+
+When using the legacy pass manager with LTO:
+
+ - LoopInterchange (if enabled)
+ - SimpleLoopUnroll/LoopFullUnroll (only performs full unrolling)
+ - LoopVectorizer
+ - LoopUnroll (partial and runtime unrolling)
+
+When using the new pass manager:
+
+ - SimpleLoopUnroll/LoopFullUnroll (only performs full unrolling)
+ - LoopDistribute
+ - LoopVectorizer
+ - LoopUnrollAndJam (if enabled)
+ - LoopUnroll (partial and runtime unrolling)
+
+Leftover Transformations
+========================
+
+Forced transformations that have not been applied after the last
+transformation pass must be reported to the user. The transformation
+passes themselves cannot be responsible because they might not be in the
+pipeline, they might be multiple passes being able to apply a
+transformation (e.g. ``LoopInterchange`` and Polly) or a transformation
+attribute is 'hidden' inside another passes' followup attribute.
+
+The pass ``-transform-warning`` (``WarnMissedTransformationsPass``)
+emits such warnings. It should be placed after the last transformation
+pass.
+
+The current pass pipeline has a fixed order in which transformations
+passes are executed. A transformation can be in the followup of a pass
+that is executed later and thus leftover. For instance, a loop nest
+cannot be distributed and then interchanged with the current pass
+pipeline. The loop distribution will execute, but there is no loop
+interchange pass following such that any loop interchange metadata will
+be ignored. The ``-transform-warning`` should emit a warning in this
+case.
+
+Future versions of LLVM may fix this by executing transformations in a
+fixpoint loop.
Index: docs/index.rst
===================================================================
--- docs/index.rst
+++ docs/index.rst
@@ -284,6 +284,7 @@
    Statepoints
    MergeFunctions
    TypeMetadata
+   TransformMetadata
    FaultMaps
    MIRLangRef
    Coroutines
Index: include/llvm/InitializePasses.h
===================================================================
--- include/llvm/InitializePasses.h
+++ include/llvm/InitializePasses.h
@@ -394,6 +394,7 @@
 void initializeVerifierLegacyPassPass(PassRegistry&);
 void initializeVirtRegMapPass(PassRegistry&);
 void initializeVirtRegRewriterPass(PassRegistry&);
+void initializeWarnMissedTransformationsLegacyPass(PassRegistry &);
 void initializeWasmEHPreparePass(PassRegistry&);
 void initializeWholeProgramDevirtPass(PassRegistry&);
 void initializeWinEHPreparePass(PassRegistry&);
Index: include/llvm/LinkAllPasses.h
===================================================================
--- include/llvm/LinkAllPasses.h
+++ include/llvm/LinkAllPasses.h
@@ -218,6 +218,7 @@
       (void) llvm::createFloat2IntPass();
       (void) llvm::createEliminateAvailableExternallyPass();
       (void) llvm::createScalarizeMaskedMemIntrinPass();
+      (void) llvm::createWarnMissedTransformationsPass();
 
       (void)new llvm::IntervalPartition();
       (void)new llvm::ScalarEvolutionWrapperPass();
Index: include/llvm/Transforms/Scalar.h
===================================================================
--- include/llvm/Transforms/Scalar.h
+++ include/llvm/Transforms/Scalar.h
@@ -491,6 +491,13 @@
 // primarily to help other loop passes.
 //
 Pass *createLoopSimplifyCFGPass();
+
+//===----------------------------------------------------------------------===//
+//
+// WarnMissedTransformations - This pass emits warnings for leftover forced
+// transformations.
+//
+Pass *createWarnMissedTransformationsPass();
 } // End llvm namespace
 
 #endif
Index: include/llvm/Transforms/Scalar/WarnMissedTransforms.h
===================================================================
--- /dev/null
+++ include/llvm/Transforms/Scalar/WarnMissedTransforms.h
@@ -0,0 +1,38 @@
+//===- WarnMissedTransforms.h -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Emit warnings if forced code transformations have not been performed.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_WARNMISSEDTRANSFORMS_H
+#define LLVM_TRANSFORMS_SCALAR_WARNMISSEDTRANSFORMS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class Function;
+class Loop;
+class LPMUpdater;
+
+// New pass manager boilerplate.
+class WarnMissedTransformationsPass
+    : public PassInfoMixin<WarnMissedTransformationsPass> {
+public:
+  explicit WarnMissedTransformationsPass() {}
+
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+// Legacy pass manager boilerplate.
+Pass *createWarnMissedTransformationsPass();
+void initializeWarnMissedTransformationsLegacyPass(PassRegistry &);
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_WARNMISSEDTRANSFORMS_H
Index: include/llvm/Transforms/Utils/LoopUtils.h
===================================================================
--- include/llvm/Transforms/Utils/LoopUtils.h
+++ include/llvm/Transforms/Utils/LoopUtils.h
@@ -481,6 +481,77 @@
 Optional<const MDOperand *> findStringMetadataForLoop(Loop *TheLoop,
                                                       StringRef Name);
 
+/// Find named metadata for a loop with an integer value.
+llvm::Optional<int> getOptionalIntLoopAttribute(Loop *TheLoop, StringRef Name);
+
+/// Create a new loop identifier for a loop created from a loop transformation.
+///
+/// @param OrigLoopID The loop ID of the loop before the transformation.
+/// @param FollowupAttrs List of attribute names that contain attributes to be
+///                      added to the new loop ID.
+/// @param InheritAttrsExceptPrefix Selects which attributes should be inherited
+///                                 from the original loop. The following values
+///                                 are considered:
+///        nullptr   : Inherit all attributes from @p OrigLoopID.
+///        ""        : Do not inherit any attribute from @p OrigLoopID; only use
+///                    those specified by a followup attribute.
+///        "<prefix>": Inherit all attributes except those which start with
+///                    <prefix>; commonly used to remove metadata for the
+///                    applied transformation.
+/// @param AlwaysNew If true, do not try to reuse OrigLoopID and never return
+///                  None.
+///
+/// @return The loop ID for the after-transformation loop. The following values
+///         can be returned:
+///         None         : No followup attribute was found; it is up to the
+///                        transformation to choose attributes that make sense.
+///         @p OrigLoopID: The original identifier can be reused.
+///         nullptr      : The new loop has no attributes.
+///         MDNode*      : A new unique loop identifier.
+Optional<MDNode *>
+makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef<StringRef> FollowupAttrs,
+                   const char *InheritOptionsAttrsPrefix = "",
+                   bool AlwaysNew = false);
+
+/// Look for the loop attribute that disables all transformation heuristic.
+bool hasDisableAllTransformsHint(const Loop *L);
+
+/// The mode sets how eager a transformation should be applied.
+enum TransformationMode {
+  /// The pass can use heuristics to determine whether a transformation should
+  /// be applied.
+  TM_Unspecified,
+
+  /// The transformation should be applied without considering a cost model.
+  TM_Enable,
+
+  /// The transformation should not be applied.
+  TM_Disable,
+
+  /// Force is a flag and should not be used alone.
+  TM_Force = 0x04,
+
+  /// The transformation is necessary for correctness. Unlike general loop
+  /// metadata, it must not be dropped. If the transformation could not be
+  /// applied, a warning should be emitted.
+  TM_ForcedByUser = TM_Enable | TM_Force,
+
+  /// The transformation must not be applied. For instance, `#pragma clang loop
+  /// unroll(disable)` explicitly forbids any unrolling to take place. Unlike
+  /// general loop metadata, it must not be dropped. Most passes should not
+  /// behave differently under TM_Disable and TM_SuppressedByUser.
+  TM_SuppressedByUser = TM_Disable | TM_Force
+};
+
+/// @{
+/// Get the mode for LLVM's supported loop transformations.
+TransformationMode hasUnrollTransformation(Loop *L);
+TransformationMode hasUnrollAndJamTransformation(Loop *L);
+TransformationMode hasVectorizeTransformation(Loop *L);
+TransformationMode hasDistributeTransformation(Loop *L);
+TransformationMode hasLICMVersioningTransformation(Loop *L);
+/// @}
+
 /// Set input string into loop metadata by keeping other values intact.
 void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
                              unsigned V = 0);
Index: include/llvm/Transforms/Utils/UnrollLoop.h
===================================================================
--- include/llvm/Transforms/Utils/UnrollLoop.h
+++ include/llvm/Transforms/Utils/UnrollLoop.h
@@ -35,6 +35,15 @@
 
 using NewLoopsMap = SmallDenseMap<const Loop *, Loop *, 4>;
 
+/// @{
+/// Metadata attribute names
+static const char *LLVMLoopUnrollFollowupAll = "llvm.loop.unroll.followup_all";
+static const char *LLVMLoopUnrollFollowupUnrolled =
+    "llvm.loop.unroll.followup_unrolled";
+static const char *LLVMLoopUnrollFollowupRemainder =
+    "llvm.loop.unroll.followup_remainder";
+/// @}
+
 const Loop* addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
                                      BasicBlock *ClonedBB, LoopInfo *LI,
                                      NewLoopsMap &NewLoops);
@@ -61,15 +70,16 @@
                             unsigned PeelCount, bool UnrollRemainder,
                             LoopInfo *LI, ScalarEvolution *SE,
                             DominatorTree *DT, AssumptionCache *AC,
-                            OptimizationRemarkEmitter *ORE, bool PreserveLCSSA);
+                            OptimizationRemarkEmitter *ORE, bool PreserveLCSSA,
+                            Loop **RemainderLoop = nullptr);
 
 bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                 bool AllowExpensiveTripCount,
                                 bool UseEpilogRemainder, bool UnrollRemainder,
-                                LoopInfo *LI,
-                                ScalarEvolution *SE, DominatorTree *DT,
-                                AssumptionCache *AC,
-                                bool PreserveLCSSA);
+                                LoopInfo *LI, ScalarEvolution *SE,
+                                DominatorTree *DT, AssumptionCache *AC,
+                                bool PreserveLCSSA,
+                                Loop **ResultLoop = nullptr);
 
 void computePeelCount(Loop *L, unsigned LoopSize,
                       TargetTransformInfo::UnrollingPreferences &UP,
@@ -84,7 +94,8 @@
                                   unsigned TripMultiple, bool UnrollRemainder,
                                   LoopInfo *LI, ScalarEvolution *SE,
                                   DominatorTree *DT, AssumptionCache *AC,
-                                  OptimizationRemarkEmitter *ORE);
+                                  OptimizationRemarkEmitter *ORE,
+                                  Loop **EpilogueLoop = nullptr);
 
 bool isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
                           DependenceInfo &DI);
@@ -96,7 +107,7 @@
                         unsigned MaxTripCount, unsigned &TripMultiple,
                         unsigned LoopSize,
                         TargetTransformInfo::UnrollingPreferences &UP,
-                        bool &UseUpperBound);
+                        bool &UseUpperBound, bool IgnoreUser = false);
 
 BasicBlock *foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI,
                                      ScalarEvolution *SE, DominatorTree *DT);
Index: include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
===================================================================
--- include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -113,7 +113,11 @@
   unsigned getWidth() const { return Width.Value; }
   unsigned getInterleave() const { return Interleave.Value; }
   unsigned getIsVectorized() const { return IsVectorized.Value; }
-  enum ForceKind getForce() const { return (ForceKind)Force.Value; }
+  enum ForceKind getForce() const {
+    if (Force.Value == FK_Undefined && hasDisableAllTransformsHint(TheLoop))
+      return FK_Disabled;
+    return (ForceKind)Force.Value;
+  }
 
   /// If hints are provided that force vectorization, use the AlwaysPrint
   /// pass name to force the frontend to print the diagnostic.
Index: lib/Analysis/LoopInfo.cpp
===================================================================
--- lib/Analysis/LoopInfo.cpp
+++ lib/Analysis/LoopInfo.cpp
@@ -248,23 +248,19 @@
 }
 
 void Loop::setLoopID(MDNode *LoopID) const {
-  assert(LoopID && "Loop ID should not be null");
-  assert(LoopID->getNumOperands() > 0 && "Loop ID needs at least one operand");
-  assert(LoopID->getOperand(0) == LoopID && "Loop ID should refer to itself");
+  assert((!LoopID || LoopID->getNumOperands() > 0) &&
+         "Loop ID needs at least one operand");
+  assert((!LoopID || LoopID->getOperand(0) == LoopID) &&
+         "Loop ID should refer to itself");
 
-  if (BasicBlock *Latch = getLoopLatch()) {
-    Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
-    return;
-  }
-
-  assert(!getLoopLatch() &&
-         "The loop should have no single latch at this point");
   BasicBlock *H = getHeader();
   for (BasicBlock *BB : this->blocks()) {
     TerminatorInst *TI = BB->getTerminator();
     for (BasicBlock *Successor : TI->successors()) {
-      if (Successor == H)
+      if (Successor == H) {
         TI->setMetadata(LLVMContext::MD_loop, LoopID);
+        break;
+      }
     }
   }
 }
Index: lib/Passes/PassBuilder.cpp
===================================================================
--- lib/Passes/PassBuilder.cpp
+++ lib/Passes/PassBuilder.cpp
@@ -61,7 +61,6 @@
 #include "llvm/Support/Regex.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
-#include "llvm/Transforms/Instrumentation/CGProfile.h"
 #include "llvm/Transforms/IPO/AlwaysInliner.h"
 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
 #include "llvm/Transforms/IPO/CalledValuePropagation.h"
@@ -87,6 +86,7 @@
 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
 #include "llvm/Transforms/InstCombine/InstCombine.h"
 #include "llvm/Transforms/Instrumentation/BoundsChecking.h"
+#include "llvm/Transforms/Instrumentation/CGProfile.h"
 #include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
@@ -142,6 +142,7 @@
 #include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h"
 #include "llvm/Transforms/Scalar/SpeculativeExecution.h"
 #include "llvm/Transforms/Scalar/TailRecursionElimination.h"
+#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
 #include "llvm/Transforms/Utils/AddDiscriminators.h"
 #include "llvm/Transforms/Utils/BreakCriticalEdges.h"
 #include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
@@ -810,6 +811,7 @@
         createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level)));
   }
   OptimizePM.addPass(LoopUnrollPass(Level));
+  OptimizePM.addPass(WarnMissedTransformationsPass());
   OptimizePM.addPass(InstCombinePass());
   OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
   OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));
Index: lib/Passes/PassRegistry.def
===================================================================
--- lib/Passes/PassRegistry.def
+++ lib/Passes/PassRegistry.def
@@ -217,6 +217,7 @@
 FUNCTION_PASS("verify<regions>", RegionInfoVerifierPass())
 FUNCTION_PASS("view-cfg", CFGViewerPass())
 FUNCTION_PASS("view-cfg-only", CFGOnlyViewerPass())
+FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
 #undef FUNCTION_PASS
 
 #ifndef LOOP_ANALYSIS
Index: lib/Transforms/IPO/PassManagerBuilder.cpp
===================================================================
--- lib/Transforms/IPO/PassManagerBuilder.cpp
+++ lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -692,6 +692,8 @@
     MPM.add(createLICMPass());
  }
 
+  MPM.add(createWarnMissedTransformationsPass());
+
   // After vectorization and unrolling, assume intrinsics may tell us more
   // about pointer alignments.
   MPM.add(createAlignmentFromAssumptionsPass());
@@ -858,6 +860,8 @@
   if (!DisableUnrollLoops)
     PM.add(createLoopUnrollPass(OptLevel));
 
+  PM.add(createWarnMissedTransformationsPass());
+
   // Now that we've optimized loops (in particular loop induction variables),
   // we may have exposed more scalar opportunities. Run parts of the scalar
   // optimizer again at this point.
Index: lib/Transforms/Scalar/CMakeLists.txt
===================================================================
--- lib/Transforms/Scalar/CMakeLists.txt
+++ lib/Transforms/Scalar/CMakeLists.txt
@@ -68,6 +68,7 @@
   StraightLineStrengthReduce.cpp
   StructurizeCFG.cpp
   TailRecursionElimination.cpp
+  WarnMissedTransforms.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms
Index: lib/Transforms/Scalar/LoopDistribute.cpp
===================================================================
--- lib/Transforms/Scalar/LoopDistribute.cpp
+++ lib/Transforms/Scalar/LoopDistribute.cpp
@@ -78,6 +78,18 @@
 #define LDIST_NAME "loop-distribute"
 #define DEBUG_TYPE LDIST_NAME
 
+/// @{
+/// Metadata attribute names
+static const char *LLVMLoopDistributeFollowupAll =
+    "llvm.loop.distribute.followup_all";
+static const char *LLVMLoopDistributeFollowupCoincident =
+    "llvm.loop.distribute.followup_coincident";
+static const char *LLVMLoopDistributeFollowuSequential =
+    "llvm.loop.distribute.followup_sequential";
+static const char *LLVMLoopDistributeFollowupFallback =
+    "llvm.loop.distribute.followup_fallback";
+/// @}
+
 static cl::opt<bool>
     LDistVerify("loop-distribute-verify", cl::Hidden,
                 cl::desc("Turn on DominatorTree and LoopInfo verification "
@@ -186,7 +198,7 @@
   /// Returns the loop where this partition ends up after distribution.
   /// If this partition is mapped to the original loop then use the block from
   /// the loop.
-  const Loop *getDistributedLoop() const {
+  Loop *getDistributedLoop() const {
     return ClonedLoop ? ClonedLoop : OrigLoop;
   }
 
@@ -443,6 +455,9 @@
     assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
            "preheader not empty");
 
+    // Preserve the original loop ID for use after the transformation.
+    MDNode *OrigLoopID = L->getLoopID();
+
     // Create a loop for each partition except the last.  Clone the original
     // loop before PH along with adding a preheader for the cloned loop.  Then
     // update PH to point to the newly added preheader.
@@ -457,9 +472,13 @@
 
       Part->getVMap()[ExitBlock] = TopPH;
       Part->remapInstructions();
+      setNewLoopID(OrigLoopID, Part);
     }
     Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
 
+    // Also set a new loop ID for the last loop.
+    setNewLoopID(OrigLoopID, &PartitionContainer.back());
+
     // Now go in forward order and update the immediate dominator for the
     // preheaders with the exiting block of the previous loop.  Dominance
     // within the loop is updated in cloneLoopWithPreheader.
@@ -575,6 +594,19 @@
       }
     }
   }
+
+  /// Assign new LoopIDs for the partition's cloned loop.
+  void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
+    Optional<MDNode *> PartitionID = makeFollowupLoopID(
+        OrigLoopID,
+        {LLVMLoopDistributeFollowupAll,
+         Part->hasDepCycle() ? LLVMLoopDistributeFollowuSequential
+                             : LLVMLoopDistributeFollowupCoincident});
+    if (PartitionID.hasValue()) {
+      Loop *NewLoop = Part->getDistributedLoop();
+      NewLoop->setLoopID(PartitionID.getValue());
+    }
+  }
 };
 
 /// For each memory instruction, this class maintains difference of the
@@ -743,6 +775,9 @@
       return fail("TooManySCEVRuntimeChecks",
                   "too many SCEV run-time checks needed.\n");
 
+    if (!IsForced.getValueOr(false) && hasDisableAllTransformsHint(L))
+      return fail("HeuristicDisabled", "distribution heuristic disabled");
+
     LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
     // We're done forming the partitions set up the reverse mapping from
     // instructions to partitions.
@@ -762,6 +797,8 @@
                                                   RtPtrChecking);
 
     if (!Pred.isAlwaysTrue() || !Checks.empty()) {
+      MDNode *OrigLoopID = L->getLoopID();
+
       LLVM_DEBUG(dbgs() << "\nPointers:\n");
       LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
       LoopVersioning LVer(*LAI, L, LI, DT, SE, false);
@@ -769,6 +806,17 @@
       LVer.setSCEVChecks(LAI->getPSE().getUnionPredicate());
       LVer.versionLoop(DefsUsedOutside);
       LVer.annotateLoopWithNoAlias();
+
+      // The unversioned loop will not be changed, so we inherit all attributes
+      // from the original loop, but remove the loop distribution metadata to
+      // avoid to distribute it again.
+      MDNode *UnversionedLoopID =
+          makeFollowupLoopID(OrigLoopID,
+                             {LLVMLoopDistributeFollowupAll,
+                              LLVMLoopDistributeFollowupFallback},
+                             "llvm.loop.distribute.", true)
+              .getValue();
+      LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
     }
 
     // Create identical copies of the original loop for each partition and hook
Index: lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
===================================================================
--- lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -56,6 +56,20 @@
 
 #define DEBUG_TYPE "loop-unroll-and-jam"
 
+/// @{
+/// Metadata attribute names
+static const char *LLVMLoopUnrollAndJamFollowupAll =
+    "llvm.loop.unroll_and_jam.followup_all";
+static const char *LLVMLoopUnrollAndJamFollowupInner =
+    "llvm.loop.unroll_and_jam.followup_inner";
+static const char *LLVMLoopUnrollAndJamFollowupOuter =
+    "llvm.loop.unroll_and_jam.followup_outer";
+static const char *LLVMLoopUnrollAndJamFollowupRemainderInner =
+    "llvm.loop.unroll_and_jam.followup_remainder_inner";
+static const char *LLVMLoopUnrollAndJamFollowupRemainderOuter =
+    "llvm.loop.unroll_and_jam.followup_remainder_outer";
+/// @}
+
 static cl::opt<bool>
     AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden,
                       cl::desc("Allows loops to be unroll-and-jammed."));
@@ -112,11 +126,6 @@
   return GetUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.enable");
 }
 
-// Returns true if the loop has an unroll_and_jam(disable) pragma.
-static bool HasUnrollAndJamDisablePragma(const Loop *L) {
-  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.disable");
-}
-
 // If loop has an unroll_and_jam_count pragma return the (necessarily
 // positive) value from the pragma.  Otherwise return 0.
 static unsigned UnrollAndJamCountPragmaValue(const Loop *L) {
@@ -156,9 +165,10 @@
   // We have already checked that the loop has no unroll.* pragmas.
   unsigned MaxTripCount = 0;
   bool UseUpperBound = false;
-  bool ExplicitUnroll = computeUnrollCount(
-      L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount,
-      OuterTripMultiple, OuterLoopSize, UP, UseUpperBound);
+  bool ExplicitUnroll =
+      computeUnrollCount(L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount,
+                         MaxTripCount, OuterTripMultiple, OuterLoopSize, UP,
+                         UseUpperBound, /* IgnoreUser */ true);
   if (ExplicitUnroll || UseUpperBound) {
     // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it
     // for the unroller instead.
@@ -299,13 +309,16 @@
                     << L->getHeader()->getParent()->getName() << "] Loop %"
                     << L->getHeader()->getName() << "\n");
 
+  TransformationMode EnableMode = hasUnrollAndJamTransformation(L);
+  if (EnableMode & TM_Disable)
+    return LoopUnrollResult::Unmodified;
+
   // A loop with any unroll pragma (enabling/disabling/count/etc) is left for
   // the unroller, so long as it does not explicitly have unroll_and_jam
   // metadata. This means #pragma nounroll will disable unroll and jam as well
   // as unrolling
-  if (HasUnrollAndJamDisablePragma(L) ||
-      (HasAnyUnrollPragma(L, "llvm.loop.unroll.") &&
-       !HasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam."))) {
+  if (HasAnyUnrollPragma(L, "llvm.loop.unroll.") &&
+      !HasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam.")) {
     LLVM_DEBUG(dbgs() << "  Disabled due to pragma.\n");
     return LoopUnrollResult::Unmodified;
   }
@@ -344,6 +357,19 @@
     return LoopUnrollResult::Unmodified;
   }
 
+  // Save original loop IDs for after the transformation.
+  MDNode *OrigOuterLoopID = L->getLoopID();
+  MDNode *OrigSubLoopID = SubLoop->getLoopID();
+
+  // To assign the loop id of the epilogue, assign it before unrolling it so it
+  // is applied to every inner loop of the epilogue. We later apply the loop ID
+  // for the jammed inner loop.
+  Optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID(
+      OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
+                        LLVMLoopUnrollAndJamFollowupRemainderInner});
+  if (NewInnerEpilogueLoopID.hasValue())
+    SubLoop->setLoopID(NewInnerEpilogueLoopID.getValue());
+
   // Find trip count and trip multiple
   unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch);
   unsigned OuterTripMultiple = SE.getSmallConstantTripMultiple(L, Latch);
@@ -359,9 +385,39 @@
   if (OuterTripCount && UP.Count > OuterTripCount)
     UP.Count = OuterTripCount;
 
-  LoopUnrollResult UnrollResult =
-      UnrollAndJamLoop(L, UP.Count, OuterTripCount, OuterTripMultiple,
-                       UP.UnrollRemainder, LI, &SE, &DT, &AC, &ORE);
+  Loop *EpilogueOuterLoop = nullptr;
+  LoopUnrollResult UnrollResult = UnrollAndJamLoop(
+      L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI,
+      &SE, &DT, &AC, &ORE, &EpilogueOuterLoop);
+
+  // Assign new loop attributes.
+  if (EpilogueOuterLoop) {
+    Optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID(
+        OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
+                          LLVMLoopUnrollAndJamFollowupRemainderOuter});
+    if (NewOuterEpilogueLoopID.hasValue())
+      EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.getValue());
+  }
+
+  Optional<MDNode *> NewInnerLoopID =
+      makeFollowupLoopID(OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
+                                           LLVMLoopUnrollAndJamFollowupInner});
+  if (NewInnerLoopID.hasValue())
+    SubLoop->setLoopID(NewInnerLoopID.getValue());
+  else
+    SubLoop->setLoopID(OrigSubLoopID);
+
+  if (UnrollResult == LoopUnrollResult::PartiallyUnrolled) {
+    Optional<MDNode *> NewOuterLoopID = makeFollowupLoopID(
+        OrigOuterLoopID,
+        {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter});
+    if (NewOuterLoopID.hasValue()) {
+      L->setLoopID(NewOuterLoopID.getValue());
+
+      // Do not setLoopAlreadyUnrolled if a followup was given.
+      return UnrollResult;
+    }
+  }
 
   // If loop has an unroll count pragma or unrolled by explicitly set count
   // mark loop as unrolled to prevent unrolling beyond that requested.
Index: lib/Transforms/Scalar/LoopUnrollPass.cpp
===================================================================
--- lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -713,12 +713,22 @@
 
 // Returns true if unroll count was set explicitly.
 // Calculates unroll count and writes it to UP.Count.
-bool llvm::computeUnrollCount(
-    Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
-    ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
-    OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
-    unsigned &TripMultiple, unsigned LoopSize,
-    TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
+// Unless IgnoreUser is true, will also use metadata and command-line options
+// that are specific to to the LoopUnroll pass (which, for instance, are
+// irrelevant for the LoopUnrollAndJam pass).
+// FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes
+// many LoopUnroll-specific options. The shared functionality should be
+// refactored into it own function.
+bool llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
+                              DominatorTree &DT, LoopInfo *LI,
+                              ScalarEvolution &SE,
+                              const SmallPtrSetImpl<const Value *> &EphValues,
+                              OptimizationRemarkEmitter *ORE,
+                              unsigned &TripCount, unsigned MaxTripCount,
+                              unsigned &TripMultiple, unsigned LoopSize,
+                              TargetTransformInfo::UnrollingPreferences &UP,
+                              bool &UseUpperBound, bool IgnoreUser) {
+
   // Check for explicit Count.
   // 1st priority is unroll count set by "unroll-count" option.
   bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
@@ -748,6 +758,14 @@
       return false;
   }
 
+  if (!IgnoreUser) {
+    TransformationMode EnableMode = hasUnrollTransformation(L);
+    if (EnableMode & TM_Disable) {
+      UP.Count = 0;
+      return false;
+    }
+  }
+
   bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
   bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
                         PragmaEnableUnroll || UserUnrollCount;
@@ -1066,14 +1084,39 @@
   if (TripCount && UP.Count > TripCount)
     UP.Count = TripCount;
 
+  // Save loop properties before it is transformed.
+  MDNode *OrigLoopID = L->getLoopID();
+
   // Unroll the loop.
+  Loop *RemainderLoop = nullptr;
   LoopUnrollResult UnrollResult = UnrollLoop(
       L, UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
       UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
-      LI, &SE, &DT, &AC, &ORE, PreserveLCSSA);
+      LI, &SE, &DT, &AC, &ORE, PreserveLCSSA, &RemainderLoop);
   if (UnrollResult == LoopUnrollResult::Unmodified)
     return LoopUnrollResult::Unmodified;
 
+  if (RemainderLoop) {
+    Optional<MDNode *> RemainderLoopID =
+        makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll,
+                                        LLVMLoopUnrollFollowupRemainder});
+    if (RemainderLoopID.hasValue())
+      RemainderLoop->setLoopID(RemainderLoopID.getValue());
+  }
+
+  if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
+    Optional<MDNode *> NewLoopID =
+        makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll,
+                                        LLVMLoopUnrollFollowupUnrolled});
+    if (NewLoopID.hasValue()) {
+      L->setLoopID(NewLoopID.getValue());
+
+      // Do not setLoopAlreadyUnrolled if loop attributes have been specified
+      // explicitly.
+      return UnrollResult;
+    }
+  }
+
   // If loop has an unroll count pragma or unrolled by explicitly set count
   // mark loop as unrolled to prevent unrolling beyond that requested.
   // If the loop was peeled, we already "used up" the profile information
Index: lib/Transforms/Scalar/LoopVersioningLICM.cpp
===================================================================
--- lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -594,6 +594,11 @@
 
   if (skipLoop(L))
     return false;
+
+  // Do not do the transformation if disabled by metadata.
+  if (hasLICMVersioningTransformation(L) & TM_Disable)
+    return false;
+
   // Get Analysis information.
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
Index: lib/Transforms/Scalar/Scalar.cpp
===================================================================
--- lib/Transforms/Scalar/Scalar.cpp
+++ lib/Transforms/Scalar/Scalar.cpp
@@ -72,6 +72,7 @@
   initializeLoopUnrollPass(Registry);
   initializeLoopUnrollAndJamPass(Registry);
   initializeLoopUnswitchPass(Registry);
+  initializeWarnMissedTransformationsLegacyPass(Registry);
   initializeLoopVersioningLICMPass(Registry);
   initializeLoopIdiomRecognizeLegacyPassPass(Registry);
   initializeLowerAtomicLegacyPassPass(Registry);
Index: lib/Transforms/Scalar/WarnMissedTransforms.cpp
===================================================================
--- /dev/null
+++ lib/Transforms/Scalar/WarnMissedTransforms.cpp
@@ -0,0 +1,134 @@
+//===- LoopTransformWarning.cpp -  ----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Emit warnings if forced code transformations have not been performed.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "transform-warning"
+
+/// Emit warnings for forced (i.e. user-defined) loop transformations which have
+/// still not been performed.
+static void warnAboutLeftoverTransformations(Loop *L,
+                                             OptimizationRemarkEmitter *ORE) {
+  if (hasUnrollTransformation(L) == TM_ForcedByUser) {
+    LLVM_DEBUG(dbgs() << "Leftover unroll transformation\n");
+    ORE->emit(
+        DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
+                                          "FailedRequestedUnrolling",
+                                          L->getStartLoc(), L->getHeader())
+        << "loop not unrolled: failed explicitly specified loop unrolling");
+  }
+
+  if (hasUnrollAndJamTransformation(L) == TM_ForcedByUser) {
+    LLVM_DEBUG(dbgs() << "Leftover unroll-and-jam transformation\n");
+    ORE->emit(DiagnosticInfoOptimizationFailure(
+                  DEBUG_TYPE, "FailedRequestedUnrollAndJamming",
+                  L->getStartLoc(), L->getHeader())
+              << "loop not unroll-and-jammed: failed explicitly specified loop "
+                 "unroll-and-jam");
+  }
+
+  if (hasVectorizeTransformation(L) == TM_ForcedByUser) {
+    LLVM_DEBUG(dbgs() << "Leftover vectorization transformation\n");
+    Optional<int> VectorizeWidth =
+        getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
+    Optional<int> InterleaveCount =
+        getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
+
+    if (VectorizeWidth.getValueOr(0) != 1)
+      ORE->emit(DiagnosticInfoOptimizationFailure(
+                    DEBUG_TYPE, "FailedRequestedVectorization",
+                    L->getStartLoc(), L->getHeader())
+                << "loop not vectorized: "
+                << "failed explicitly specified loop vectorization");
+    else if (InterleaveCount.getValueOr(0) != 1)
+      ORE->emit(DiagnosticInfoOptimizationFailure(
+                    DEBUG_TYPE, "FailedRequestedInterleaving", L->getStartLoc(),
+                    L->getHeader())
+                << "loop not interleaved: "
+                << "failed explicitly specified loop interleaving");
+  }
+
+  if (hasDistributeTransformation(L) == TM_ForcedByUser) {
+    LLVM_DEBUG(dbgs() << "Leftover distribute transformation\n");
+    ORE->emit(DiagnosticInfoOptimizationFailure(
+                  DEBUG_TYPE, "FailedRequestedDistribution", L->getStartLoc(),
+                  L->getHeader())
+              << "loop not distributed: failed explicitly specified loop "
+                 "distribution");
+  }
+}
+
+static void warnAboutLeftoverTransformations(Function *F, LoopInfo *LI,
+                                             OptimizationRemarkEmitter *ORE) {
+  for (auto *L : LI->getLoopsInPreorder())
+    warnAboutLeftoverTransformations(L, ORE);
+}
+
+// New pass manager boilerplate
+PreservedAnalyses
+WarnMissedTransformationsPass::run(Function &F, FunctionAnalysisManager &AM) {
+  auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+  auto &LI = AM.getResult<LoopAnalysis>(F);
+
+  warnAboutLeftoverTransformations(&F, &LI, &ORE);
+
+  return PreservedAnalyses::all();
+}
+
+// Legacy pass manager boilerplate
+namespace {
+class WarnMissedTransformationsLegacy : public FunctionPass {
+public:
+  static char ID;
+
+  explicit WarnMissedTransformationsLegacy() : FunctionPass(ID) {
+    initializeWarnMissedTransformationsLegacyPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override {
+    if (skipFunction(F))
+      return false;
+
+    auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+    auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+
+    warnAboutLeftoverTransformations(&F, &LI, &ORE);
+    return false;
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+
+    AU.setPreservesAll();
+  }
+};
+} // end anonymous namespace
+
+char WarnMissedTransformationsLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(WarnMissedTransformationsLegacy, "transform-warning",
+                      "Warn about non-applied transformations", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_END(WarnMissedTransformationsLegacy, "transform-warning",
+                    "Warn about non-applied transformations", false, false)
+
+Pass *llvm::createWarnMissedTransformationsPass() {
+  return new WarnMissedTransformationsLegacy();
+}
Index: lib/Transforms/Utils/LoopUnroll.cpp
===================================================================
--- lib/Transforms/Utils/LoopUnroll.cpp
+++ lib/Transforms/Utils/LoopUnroll.cpp
@@ -330,12 +330,15 @@
 ///
 /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
 /// DominatorTree if they are non-null.
+///
+/// If RemainderLoop is non-null, it will receive the remainder loop (if
+/// required and not fully unrolled).
 LoopUnrollResult llvm::UnrollLoop(
     Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime,
     bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst,
     unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder,
     LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
-    OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) {
+    OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop) {
 
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
@@ -469,7 +472,7 @@
   if (RuntimeTripCount && TripMultiple % Count != 0 &&
       !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
                                   EpilogProfitability, UnrollRemainder, LI, SE,
-                                  DT, AC, PreserveLCSSA)) {
+                                  DT, AC, PreserveLCSSA, RemainderLoop)) {
     if (Force)
       RuntimeTripCount = false;
     else {
Index: lib/Transforms/Utils/LoopUnrollAndJam.cpp
===================================================================
--- lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -167,12 +167,14 @@
 
   isSafeToUnrollAndJam should be used prior to calling this to make sure the
   unrolling will be valid. Checking profitablility is also advisable.
+
+  If EpilogueLoop is non-null, it receives the epilogue loop (if it was
+  necessary to create one and not fully unrolled).
 */
-LoopUnrollResult
-llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
-                       unsigned TripMultiple, bool UnrollRemainder,
-                       LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
-                       AssumptionCache *AC, OptimizationRemarkEmitter *ORE) {
+LoopUnrollResult llvm::UnrollAndJamLoop(
+    Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple,
+    bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
+    AssumptionCache *AC, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) {
 
   // When we enter here we should have already checked that it is safe
   BasicBlock *Header = L->getHeader();
@@ -196,7 +198,8 @@
   if (TripMultiple == 1 || TripMultiple % Count != 0) {
     if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
                                     /*UseEpilogRemainder*/ true,
-                                    UnrollRemainder, LI, SE, DT, AC, true)) {
+                                    UnrollRemainder, LI, SE, DT, AC, true,
+                                    EpilogueLoop)) {
       LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
                            "generated when assuming runtime trip count\n");
       return LoopUnrollResult::Unmodified;
Index: lib/Transforms/Utils/LoopUnrollRuntime.cpp
===================================================================
--- lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -380,6 +380,7 @@
   }
   if (CreateRemainderLoop) {
     Loop *NewLoop = NewLoops[L];
+    MDNode *LoopID = NewLoop->getLoopID();
     assert(NewLoop && "L should have been cloned");
 
     // Only add loop metadata if the loop is not going to be completely
@@ -387,6 +388,16 @@
     if (UnrollRemainder)
       return NewLoop;
 
+    Optional<MDNode *> NewLoopID = makeFollowupLoopID(
+        LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
+    if (NewLoopID.hasValue()) {
+      NewLoop->setLoopID(NewLoopID.getValue());
+
+      // Do not setLoopAlreadyUnrolled if loop attributes have been defined
+      // explicitly.
+      return NewLoop;
+    }
+
     // Add unroll disable metadata to disable future unrolling for this loop.
     NewLoop->setLoopAlreadyUnrolled();
     return NewLoop;
@@ -525,10 +536,10 @@
 bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                       bool AllowExpensiveTripCount,
                                       bool UseEpilogRemainder,
-                                      bool UnrollRemainder,
-                                      LoopInfo *LI, ScalarEvolution *SE,
-                                      DominatorTree *DT, AssumptionCache *AC,
-                                      bool PreserveLCSSA) {
+                                      bool UnrollRemainder, LoopInfo *LI,
+                                      ScalarEvolution *SE, DominatorTree *DT,
+                                      AssumptionCache *AC, bool PreserveLCSSA,
+                                      Loop **ResultLoop) {
   LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
   LLVM_DEBUG(L->dump());
   LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
@@ -897,16 +908,20 @@
       formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
   }
 
+  auto UnrollResult = LoopUnrollResult::Unmodified;
   if (remainderLoop && UnrollRemainder) {
     LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
-    UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,
-               /*Force*/ false, /*AllowRuntime*/ false,
-               /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
-               /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
-               /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC,
-               /*ORE*/ nullptr, PreserveLCSSA);
+    UnrollResult =
+        UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,
+                   /*Force*/ false, /*AllowRuntime*/ false,
+                   /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
+                   /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
+                   /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC,
+                   /*ORE*/ nullptr, PreserveLCSSA);
   }
 
+  if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
+    *ResultLoop = remainderLoop;
   NumRuntimeUnrolled++;
   return true;
 }
Index: lib/Transforms/Utils/LoopUtils.cpp
===================================================================
--- lib/Transforms/Utils/LoopUtils.cpp
+++ lib/Transforms/Utils/LoopUtils.cpp
@@ -1282,14 +1282,8 @@
   INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 }
 
-/// Find string metadata for loop
-///
-/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
-/// operand or null otherwise.  If the string metadata is not found return
-/// Optional's not-a-value.
-Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop,
-                                                            StringRef Name) {
-  MDNode *LoopID = TheLoop->getLoopID();
+static Optional<MDNode *> findOptionMDForLoopID(MDNode *LoopID,
+                                                StringRef Name) {
   // Return none if LoopID is false.
   if (!LoopID)
     return None;
@@ -1308,18 +1302,242 @@
       continue;
     // Return true if MDString holds expected MetaData.
     if (Name.equals(S->getString()))
-      switch (MD->getNumOperands()) {
-      case 1:
-        return nullptr;
-      case 2:
-        return &MD->getOperand(1);
-      default:
-        llvm_unreachable("loop metadata has 0 or 1 operand");
-      }
+      return MD;
   }
   return None;
 }
 
+static Optional<MDNode *> findOptionMDForLoop(const Loop *TheLoop,
+                                              StringRef Name) {
+  return findOptionMDForLoopID(TheLoop->getLoopID(), Name);
+}
+
+/// Find string metadata for loop
+///
+/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
+/// operand or null otherwise.  If the string metadata is not found return
+/// Optional's not-a-value.
+Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop,
+                                                            StringRef Name) {
+  auto MD = findOptionMDForLoop(TheLoop, Name).getValueOr(nullptr);
+  if (!MD)
+    return None;
+  switch (MD->getNumOperands()) {
+  case 1:
+    return nullptr;
+  case 2:
+    return &MD->getOperand(1);
+  default:
+    llvm_unreachable("loop metadata has 0 or 1 operand");
+  }
+}
+
+static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
+                                                   StringRef Name) {
+  Optional<MDNode *> MD = findOptionMDForLoop(TheLoop, Name);
+  if (!MD.hasValue())
+    return None;
+  MDNode *OptionNode = MD.getValue();
+  if (OptionNode == nullptr)
+    return None;
+  switch (OptionNode->getNumOperands()) {
+  case 1:
+    // When the value is absent it is interpreted as 'attribute set'.
+    return true;
+  case 2:
+    return mdconst::extract_or_null<ConstantInt>(
+        OptionNode->getOperand(1).get());
+  }
+  llvm_unreachable("unexpected number of options");
+}
+
+static bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
+  return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false);
+}
+
+llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop,
+                                                      StringRef Name) {
+  const MDOperand *AttrMD =
+      findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr);
+  if (!AttrMD)
+    return None;
+
+  ConstantInt *IntMD = mdconst::extract_or_null<ConstantInt>(AttrMD->get());
+  if (!IntMD)
+    return None;
+
+  return IntMD->getSExtValue();
+}
+
+Optional<MDNode *> llvm::makeFollowupLoopID(
+    MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,
+    const char *InheritOptionsExceptPrefix, bool AlwaysNew) {
+  if (!OrigLoopID) {
+    if (AlwaysNew)
+      return nullptr;
+    return None;
+  }
+
+  assert(OrigLoopID->getOperand(0) == OrigLoopID);
+
+  bool InheritAllAttrs = !InheritOptionsExceptPrefix;
+  bool InheritSomeAttrs =
+      InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0';
+  SmallVector<Metadata *, 8> MDs;
+  MDs.push_back(nullptr);
+
+  bool Changed = false;
+  if (InheritAllAttrs || InheritSomeAttrs) {
+    for (const MDOperand &Existing : drop_begin(OrigLoopID->operands(), 1)) {
+      MDNode *Op = cast<MDNode>(Existing.get());
+
+      // Do not inherit excluded attributes.
+      if (InheritSomeAttrs) {
+        auto AttrName = cast<MDString>(Op->getOperand(0).get())->getString();
+        if (AttrName.startswith(InheritOptionsExceptPrefix)) {
+          Changed = true;
+          continue;
+        }
+      }
+
+      MDs.push_back(Op);
+    }
+  } else {
+    // Modified if we dropped at least one attribute.
+    Changed = OrigLoopID->getNumOperands() > 1;
+  }
+
+  bool HasAnyFollowup = false;
+  for (StringRef OptionName : FollowupOptions) {
+    MDNode *FollowupNode =
+        findOptionMDForLoopID(OrigLoopID, OptionName).getValueOr(nullptr);
+    if (!FollowupNode)
+      continue;
+
+    HasAnyFollowup = true;
+    for (const MDOperand &Option : drop_begin(FollowupNode->operands(), 1)) {
+      MDs.push_back(Option.get());
+      Changed = true;
+    }
+  }
+
+  // Attributes of the followup loop not specified explicity, so signal to the
+  // transformation pass to add suitable attributes.
+  if (!AlwaysNew && !HasAnyFollowup)
+    return None;
+
+  // If no attributes were added or remove, the previous loop Id can be reused.
+  if (!AlwaysNew && !Changed)
+    return OrigLoopID;
+
+  // No attributes is equivalent to having no !llvm.loop metadata at all.
+  if (MDs.size() == 1)
+    return nullptr;
+
+  // Build the new loop ID.
+  MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs);
+  FollowupLoopID->replaceOperandWith(0, FollowupLoopID);
+  return FollowupLoopID;
+}
+
+bool llvm::hasDisableAllTransformsHint(const Loop *L) {
+  return getBooleanLoopAttribute(L, "llvm.loop.disable_nonforced");
+}
+
+TransformationMode llvm::hasUnrollTransformation(Loop *L) {
+  if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
+    return TM_SuppressedByUser;
+
+  Optional<int> Count =
+      getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
+  if (Count.hasValue())
+    return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
+
+  if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
+    return TM_ForcedByUser;
+
+  if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full"))
+    return TM_ForcedByUser;
+
+  if (hasDisableAllTransformsHint(L))
+    return TM_Disable;
+
+  return TM_Unspecified;
+}
+
+TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) {
+  if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
+    return TM_SuppressedByUser;
+
+  Optional<int> Count =
+      getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
+  if (Count.hasValue())
+    return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
+
+  if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
+    return TM_ForcedByUser;
+
+  if (hasDisableAllTransformsHint(L))
+    return TM_Disable;
+
+  return TM_Unspecified;
+}
+
+TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
+  Optional<bool> Enable =
+      getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
+
+  if (Enable == false)
+    return TM_SuppressedByUser;
+
+  Optional<int> VectorizeWidth =
+      getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
+  Optional<int> InterleaveCount =
+      getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
+
+  if (Enable == true) {
+    // 'Forcing' vector width and interleave count to one effectively disables
+    // this tranformation.
+    if (VectorizeWidth == 1 && InterleaveCount == 1)
+      return TM_SuppressedByUser;
+    return TM_ForcedByUser;
+  }
+
+  if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
+    return TM_Disable;
+
+  if (VectorizeWidth == 1 && InterleaveCount == 1)
+    return TM_Disable;
+
+  if (VectorizeWidth > 1 || InterleaveCount > 1)
+    return TM_Enable;
+
+  if (hasDisableAllTransformsHint(L))
+    return TM_Disable;
+
+  return TM_Unspecified;
+}
+
+TransformationMode llvm::hasDistributeTransformation(Loop *L) {
+  if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
+    return TM_ForcedByUser;
+
+  if (hasDisableAllTransformsHint(L))
+    return TM_Disable;
+
+  return TM_Unspecified;
+}
+
+TransformationMode llvm::hasLICMVersioningTransformation(Loop *L) {
+  if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
+    return TM_SuppressedByUser;
+
+  if (hasDisableAllTransformsHint(L))
+    return TM_Disable;
+
+  return TM_Unspecified;
+}
+
 /// Does a BFS from a given node to all of its children inside a given loop.
 /// The returned vector of nodes includes the starting point.
 SmallVector<DomTreeNode *, 16>
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -151,6 +151,16 @@
 #define LV_NAME "loop-vectorize"
 #define DEBUG_TYPE LV_NAME
 
+/// @{
+/// Metadata attribute names
+static const char *LLVMLoopVectorizeFollowupAll =
+    "llvm.loop.vectorize.followup_all";
+static const char *LLVMLoopVectorizeFollowupVectorized =
+    "llvm.loop.vectorize.followup_vectorized";
+static const char *LLVMLoopVectorizeFollowupRemainder =
+    "llvm.loop.vectorize.followup_remainder";
+/// @}
+
 STATISTIC(LoopsVectorized, "Number of loops vectorized");
 STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
 
@@ -1139,27 +1149,6 @@
 
 } // end anonymous namespace
 
-static void emitMissedWarning(Function *F, Loop *L,
-                              const LoopVectorizeHints &LH,
-                              OptimizationRemarkEmitter *ORE) {
-  LH.emitRemarkWithHints();
-
-  if (LH.getForce() == LoopVectorizeHints::FK_Enabled) {
-    if (LH.getWidth() != 1)
-      ORE->emit(DiagnosticInfoOptimizationFailure(
-                    DEBUG_TYPE, "FailedRequestedVectorization",
-                    L->getStartLoc(), L->getHeader())
-                << "loop not vectorized: "
-                << "failed explicitly specified loop vectorization");
-    else if (LH.getInterleave() != 1)
-      ORE->emit(DiagnosticInfoOptimizationFailure(
-                    DEBUG_TYPE, "FailedRequestedInterleaving", L->getStartLoc(),
-                    L->getHeader())
-                << "loop not interleaved: "
-                << "failed explicitly specified loop interleaving");
-  }
-}
-
 namespace llvm {
 
 /// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -1646,7 +1635,7 @@
 
   if (!Hints.getWidth()) {
     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No user vector width.\n");
-    emitMissedWarning(Fn, OuterLp, Hints, ORE);
+    Hints.emitRemarkWithHints();
     return false;
   }
 
@@ -1654,7 +1643,7 @@
     // TODO: Interleave support is future work.
     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Interleave is not supported for "
                          "outer loops.\n");
-    emitMissedWarning(Fn, OuterLp, Hints, ORE);
+    Hints.emitRemarkWithHints();
     return false;
   }
 
@@ -2825,6 +2814,7 @@
   BasicBlock *OldBasicBlock = OrigLoop->getHeader();
   BasicBlock *VectorPH = OrigLoop->getLoopPreheader();
   BasicBlock *ExitBlock = OrigLoop->getExitBlock();
+  MDNode *OrigLoopID = OrigLoop->getLoopID();
   assert(VectorPH && "Invalid loop structure");
   assert(ExitBlock && "Must have an exit block");
 
@@ -2965,6 +2955,17 @@
   LoopVectorBody = VecBody;
   LoopScalarBody = OldBasicBlock;
 
+  Optional<MDNode *> VectorizedLoopID =
+      makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
+                                      LLVMLoopVectorizeFollowupVectorized});
+  if (VectorizedLoopID.hasValue()) {
+    Lp->setLoopID(VectorizedLoopID.getValue());
+
+    // Do not setAlreadyVectorized if loop attributes have been defined
+    // explicitly.
+    return LoopVectorPreHeader;
+  }
+
   // Keep all loop hints from the original loop on the vector loop (we'll
   // replace the vectorizer-specific hints below).
   if (MDNode *LID = OrigLoop->getLoopID())
@@ -7324,7 +7325,7 @@
                                 &Requirements, &Hints, DB, AC);
   if (!LVL.canVectorize(EnableVPlanNativePath)) {
     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
-    emitMissedWarning(F, L, Hints, ORE);
+    Hints.emitRemarkWithHints();
     return false;
   }
 
@@ -7397,7 +7398,7 @@
     ORE->emit(createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(),
                                      "NoImplicitFloat", L)
               << "loop not vectorized due to NoImplicitFloat attribute");
-    emitMissedWarning(F, L, Hints, ORE);
+    Hints.emitRemarkWithHints();
     return false;
   }
 
@@ -7412,7 +7413,7 @@
     ORE->emit(
         createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(), "UnsafeFP", L)
         << "loop not vectorized due to unsafe FP support.");
-    emitMissedWarning(F, L, Hints, ORE);
+    Hints.emitRemarkWithHints();
     return false;
   }
 
@@ -7454,7 +7455,7 @@
   if (Requirements.doesNotMeet(F, L, Hints)) {
     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization "
                          "requirements.\n");
-    emitMissedWarning(F, L, Hints, ORE);
+    Hints.emitRemarkWithHints();
     return false;
   }
 
@@ -7531,6 +7532,8 @@
   LVP.setBestPlan(VF.Width, IC);
 
   using namespace ore;
+  bool DisableRuntimeUnroll = false;
+  MDNode *OrigLoopID = L->getLoopID();
 
   if (!VectorizeLoop) {
     assert(IC > 1 && "interleave count should not be 1 or 0");
@@ -7557,7 +7560,7 @@
     // no runtime checks about strides and memory. A scalar loop that is
     // rarely used is not worth unrolling.
     if (!LB.areSafetyChecksAdded())
-      AddRuntimeUnrollDisableMetaData(L);
+      DisableRuntimeUnroll = true;
 
     // Report the vectorization decision.
     ORE->emit([&]() {
@@ -7569,8 +7572,18 @@
     });
   }
 
-  // Mark the loop as already vectorized to avoid vectorizing again.
-  Hints.setAlreadyVectorized();
+  Optional<MDNode *> RemainderLoopID =
+      makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
+                                      LLVMLoopVectorizeFollowupRemainder});
+  if (RemainderLoopID.hasValue()) {
+    L->setLoopID(RemainderLoopID.getValue());
+  } else {
+    if (DisableRuntimeUnroll)
+      AddRuntimeUnrollDisableMetaData(L);
+
+    // Mark the loop as already vectorized to avoid vectorizing again.
+    Hints.setAlreadyVectorized();
+  }
 
   LLVM_DEBUG(verifyFunction(*L->getHeader()->getParent()));
   return true;
Index: test/Other/new-pm-defaults.ll
===================================================================
--- test/Other/new-pm-defaults.ll
+++ test/Other/new-pm-defaults.ll
@@ -237,6 +237,7 @@
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: LoopUnrollPass
 ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
+; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis
 ; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass
Index: test/Other/new-pm-thinlto-defaults.ll
===================================================================
--- test/Other/new-pm-thinlto-defaults.ll
+++ test/Other/new-pm-thinlto-defaults.ll
@@ -215,6 +215,7 @@
 ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass
 ; CHECK-POSTLINK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
+; CHECK-POSTLINK-O-NEXT: Running pass: WarnMissedTransformationsPass
 ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-POSTLINK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis
 ; CHECK-POSTLINK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass
Index: test/Other/opt-O2-pipeline.ll
===================================================================
--- test/Other/opt-O2-pipeline.ll
+++ test/Other/opt-O2-pipeline.ll
@@ -249,6 +249,10 @@
 ; CHECK-NEXT:       Scalar Evolution Analysis
 ; CHECK-NEXT:       Loop Pass Manager
 ; CHECK-NEXT:         Loop Invariant Code Motion
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
+; CHECK-NEXT:       Warn about non-applied transformations
 ; CHECK-NEXT:       Alignment from assumptions
 ; CHECK-NEXT:     Strip Unused Function Prototypes
 ; CHECK-NEXT:     Dead Global Elimination
Index: test/Other/opt-O3-pipeline.ll
===================================================================
--- test/Other/opt-O3-pipeline.ll
+++ test/Other/opt-O3-pipeline.ll
@@ -253,6 +253,10 @@
 ; CHECK-NEXT:       Scalar Evolution Analysis
 ; CHECK-NEXT:       Loop Pass Manager
 ; CHECK-NEXT:         Loop Invariant Code Motion
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
+; CHECK-NEXT:       Warn about non-applied transformations
 ; CHECK-NEXT:       Alignment from assumptions
 ; CHECK-NEXT:     Strip Unused Function Prototypes
 ; CHECK-NEXT:     Dead Global Elimination
Index: test/Other/opt-Os-pipeline.ll
===================================================================
--- test/Other/opt-Os-pipeline.ll
+++ test/Other/opt-Os-pipeline.ll
@@ -236,6 +236,10 @@
 ; CHECK-NEXT:       Scalar Evolution Analysis
 ; CHECK-NEXT:       Loop Pass Manager
 ; CHECK-NEXT:         Loop Invariant Code Motion
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
+; CHECK-NEXT:       Warn about non-applied transformations
 ; CHECK-NEXT:       Alignment from assumptions
 ; CHECK-NEXT:     Strip Unused Function Prototypes
 ; CHECK-NEXT:     Dead Global Elimination
Index: test/Transforms/LoopDistribute/disable-heuristic.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopDistribute/disable-heuristic.ll
@@ -0,0 +1,48 @@
+; RUN: opt -basicaa -loop-distribute -enable-loop-distribute=0 -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @disable_heuristic(
+; EXPLICIT-NOT: for.body.ldist1:
+define void @disable_heuristic(i32* noalias %a,
+                         i32* noalias %b,
+                         i32* noalias %c,
+                         i32* noalias %d,
+                         i32* noalias %e) {
+entry:
+  br label %for.body
+
+for.body:
+  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+  %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+  %loadA = load i32, i32* %arrayidxA, align 4
+
+  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+  %loadB = load i32, i32* %arrayidxB, align 4
+
+  %mulA = mul i32 %loadB, %loadA
+
+  %add = add nuw nsw i64 %ind, 1
+  %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
+  store i32 %mulA, i32* %arrayidxA_plus_4, align 4
+
+  %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
+  %loadD = load i32, i32* %arrayidxD, align 4
+
+  %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind
+  %loadE = load i32, i32* %arrayidxE, align 4
+
+  %mulC = mul i32 %loadD, %loadE
+
+  %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+  store i32 %mulC, i32* %arrayidxC, align 4
+
+  %exitcond = icmp eq i64 %add, 20
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+!0 = distinct !{!0, !{!"llvm.loop.transformations.disable_nonforced"}}
Index: test/Transforms/LoopDistribute/followup.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopDistribute/followup.ll
@@ -0,0 +1,62 @@
+; RUN: opt -basicaa -loop-distribute -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %a, i32* %b, i32* %c, i32* %d, i32* %e) {
+entry:
+  br label %for.body
+
+for.body:
+  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+  %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+  %loadA = load i32, i32* %arrayidxA, align 4
+
+  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+  %loadB = load i32, i32* %arrayidxB, align 4
+
+  %mulA = mul i32 %loadB, %loadA
+
+  %add = add nuw nsw i64 %ind, 1
+  %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
+  store i32 %mulA, i32* %arrayidxA_plus_4, align 4
+
+  %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
+  %loadD = load i32, i32* %arrayidxD, align 4
+
+  %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind
+  %loadE = load i32, i32* %arrayidxE, align 4
+
+  %mulC = mul i32 %loadD, %loadE
+
+  %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+  store i32 %mulC, i32* %arrayidxC, align 4
+
+  %exitcond = icmp eq i64 %add, 20
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+!0 = !{!0, !1, !2, !3, !4, !5}
+!1 = !{!"llvm.loop.distribute.enable", i1 true}
+!2 = !{!"llvm.loop.distribute.followup_all", !{!"llvm.loop.unroll.runtime.disable"}}
+!3 = !{!"llvm.loop.distribute.followup_coincident", !{!"llvm.loop.vectorize.enable", i1 false}}
+!4 = !{!"llvm.loop.distribute.followup_sequential", !{!"llvm.loop.vectorize.width", i32 8}}
+!5 = !{!"llvm.loop.distribute.followup_fallback", !{!"llvm.loop.unroll.disable"}}
+
+; CHECK-LABEL: for.body.lver.orig:
+; CHECK: br i1 %exitcond.lver.orig, label %for.end, label %for.body.lver.orig, !llvm.loop ![[LOOP_ORIG:[0-9]+]]
+; CHECK-LABEL: for.body.ldist1:
+; CHECK: br i1 %exitcond.ldist1, label %for.body.ph, label %for.body.ldist1, !llvm.loop ![[LOOP_SEQUENTIAL:[0-9]+]]
+; CHECK-LABEL: for.body:
+; CHECK: br i1 %exitcond, label %for.end, label %for.body, !llvm.loop ![[LOOP_COINCIDENT:[0-9]+]]
+
+; CHECK: ![[LOOP_ORIG]] = distinct !{![[LOOP_ORIG]], ![[RUNTIME_DISABLE:[0-9]+]], ![[UNROLL_DISABLE:[0-9]+]]}
+; CHECK: ![[RUNTIME_DISABLE]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: ![[UNROLL_DISABLE]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: ![[LOOP_SEQUENTIAL]] = distinct !{![[LOOP_SEQUENTIAL]], ![[RUNTIME_DISABLE]], ![[WIDTH:[0-9]+]]}
+; CHECK: ![[WIDTH]] = !{!"llvm.loop.vectorize.width", i32 8}
+; CHECK: ![[LOOP_COINCIDENT]] = distinct !{![[LOOP_COINCIDENT]], ![[RUNTIME_DISABLE]], ![[VECTORIZE_ENABLE:[0-9]+]]}
+; CHECK: ![[VECTORIZE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 false}
Index: test/Transforms/LoopTransformWarning/distribution-remarks-missed.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopTransformWarning/distribution-remarks-missed.ll
@@ -0,0 +1,99 @@
+; Legacy pass manager
+; RUN: opt < %s -transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning 2>&1 | FileCheck %s
+; RUN: opt < %s -transform-warning -disable-output -pass-remarks-output=%t.yaml
+; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
+
+; New pass manager
+; RUN: opt < %s -passes=transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=transform-warning -disable-output -pass-remarks-output=%t.yaml
+; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
+
+
+; CHECK: warning: source.cpp:19:5: loop not distributed: failed explicitly specified loop distribution
+
+; YAML:     --- !Failure
+; YAML-NEXT: Pass:            transform-warning
+; YAML-NEXT: Name:            FailedRequestedDistribution
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 19, Column: 5 }
+; YAML-NEXT: Function:        _Z17test_array_boundsPiS_i
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          'loop not distributed: failed explicitly specified loop distribution'
+; YAML-NEXT: ...
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) !dbg !8 {
+entry:
+  %cmp9 = icmp sgt i32 %Length, 0, !dbg !32
+  br i1 %cmp9, label %for.body.preheader, label %for.end, !dbg !32
+
+for.body.preheader:                          
+  br label %for.body, !dbg !35
+
+for.body:                                    
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !35
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !35, !tbaa !18
+  %idxprom1 = sext i32 %0 to i64, !dbg !35
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !35
+  %1 = load i32, i32* %arrayidx2, align 4, !dbg !35, !tbaa !18
+  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !35
+  store i32 %1, i32* %arrayidx4, align 4, !dbg !35, !tbaa !18
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !32
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !32
+  %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !32
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !32, !llvm.loop !50
+
+for.end.loopexit:                            
+  br label %for.end
+
+for.end:                                      
+  ret void, !dbg !36
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "source.cpp", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "source.cpp", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "test_disabled", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 10, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!8 = distinct !DISubprogram(name: "test_array_bounds", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 16, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!9 = !{i32 2, !"Dwarf Version", i32 2}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.5.0"}
+!12 = !DILocation(line: 3, column: 8, scope: !13)
+!13 = distinct !DILexicalBlock(line: 3, column: 3, file: !1, scope: !4)
+!16 = !DILocation(line: 4, column: 5, scope: !17)
+!17 = distinct !DILexicalBlock(line: 3, column: 36, file: !1, scope: !13)
+!18 = !{!19, !19, i64 0}
+!19 = !{!"int", !20, i64 0}
+!20 = !{!"omnipotent char", !21, i64 0}
+!21 = !{!"Simple C/C++ TBAA"}
+!22 = !DILocation(line: 5, column: 9, scope: !23)
+!23 = distinct !DILexicalBlock(line: 5, column: 9, file: !1, scope: !17)
+!24 = !DILocation(line: 8, column: 1, scope: !4)
+!25 = !DILocation(line: 12, column: 8, scope: !26)
+!26 = distinct !DILexicalBlock(line: 12, column: 3, file: !1, scope: !7)
+!30 = !DILocation(line: 13, column: 5, scope: !26)
+!31 = !DILocation(line: 14, column: 1, scope: !7)
+!32 = !DILocation(line: 18, column: 8, scope: !33)
+!33 = distinct !DILexicalBlock(line: 18, column: 3, file: !1, scope: !8)
+!35 = !DILocation(line: 19, column: 5, scope: !33)
+!36 = !DILocation(line: 20, column: 1, scope: !8)
+!37 = distinct !DILexicalBlock(line: 24, column: 3, file: !1, scope: !46)
+!38 = !DILocation(line: 27, column: 3, scope: !37)
+!39 = !DILocation(line: 31, column: 3, scope: !37)
+!40 = !DILocation(line: 28, column: 9, scope: !37)
+!41 = !DILocation(line: 29, column: 11, scope: !37)
+!42 = !DILocation(line: 29, column: 7, scope: !37)
+!43 = !DILocation(line: 27, column: 32, scope: !37)
+!44 = !DILocation(line: 27, column: 30, scope: !37)
+!45 = !DILocation(line: 27, column: 21, scope: !37)
+!46 = distinct !DISubprogram(name: "test_multiple_failures", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 26, file: !1, scope: !5, type: !6, retainedNodes: !2)
+
+!50 = !{!50, !{!"llvm.loop.distribute.enable"}}
\ No newline at end of file
Index: test/Transforms/LoopTransformWarning/unrollandjam-remarks-missed.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopTransformWarning/unrollandjam-remarks-missed.ll
@@ -0,0 +1,99 @@
+; Legacy pass manager
+; RUN: opt < %s -transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning 2>&1 | FileCheck %s
+; RUN: opt < %s -transform-warning -disable-output -pass-remarks-output=%t.yaml
+; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
+
+; New pass manager
+; RUN: opt < %s -passes=transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=transform-warning -disable-output -pass-remarks-output=%t.yaml
+; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
+
+
+; CHECK: warning: source.cpp:19:5: loop not unroll-and-jammed: failed explicitly specified loop unroll-and-jam
+
+; YAML:     --- !Failure
+; YAML-NEXT: Pass:            transform-warning
+; YAML-NEXT: Name:            FailedRequestedUnrollAndJamming
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 19, Column: 5 }
+; YAML-NEXT: Function:        _Z17test_array_boundsPiS_i
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          'loop not unroll-and-jammed: failed explicitly specified loop unroll-and-jam'
+; YAML-NEXT: ...
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) !dbg !8 {
+entry:
+  %cmp9 = icmp sgt i32 %Length, 0, !dbg !32
+  br i1 %cmp9, label %for.body.preheader, label %for.end, !dbg !32
+
+for.body.preheader:                          
+  br label %for.body, !dbg !35
+
+for.body:                                    
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !35
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !35, !tbaa !18
+  %idxprom1 = sext i32 %0 to i64, !dbg !35
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !35
+  %1 = load i32, i32* %arrayidx2, align 4, !dbg !35, !tbaa !18
+  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !35
+  store i32 %1, i32* %arrayidx4, align 4, !dbg !35, !tbaa !18
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !32
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !32
+  %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !32
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !32, !llvm.loop !50
+
+for.end.loopexit:                            
+  br label %for.end
+
+for.end:                                      
+  ret void, !dbg !36
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "source.cpp", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "source.cpp", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "test_disabled", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 10, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!8 = distinct !DISubprogram(name: "test_array_bounds", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 16, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!9 = !{i32 2, !"Dwarf Version", i32 2}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.5.0"}
+!12 = !DILocation(line: 3, column: 8, scope: !13)
+!13 = distinct !DILexicalBlock(line: 3, column: 3, file: !1, scope: !4)
+!16 = !DILocation(line: 4, column: 5, scope: !17)
+!17 = distinct !DILexicalBlock(line: 3, column: 36, file: !1, scope: !13)
+!18 = !{!19, !19, i64 0}
+!19 = !{!"int", !20, i64 0}
+!20 = !{!"omnipotent char", !21, i64 0}
+!21 = !{!"Simple C/C++ TBAA"}
+!22 = !DILocation(line: 5, column: 9, scope: !23)
+!23 = distinct !DILexicalBlock(line: 5, column: 9, file: !1, scope: !17)
+!24 = !DILocation(line: 8, column: 1, scope: !4)
+!25 = !DILocation(line: 12, column: 8, scope: !26)
+!26 = distinct !DILexicalBlock(line: 12, column: 3, file: !1, scope: !7)
+!30 = !DILocation(line: 13, column: 5, scope: !26)
+!31 = !DILocation(line: 14, column: 1, scope: !7)
+!32 = !DILocation(line: 18, column: 8, scope: !33)
+!33 = distinct !DILexicalBlock(line: 18, column: 3, file: !1, scope: !8)
+!35 = !DILocation(line: 19, column: 5, scope: !33)
+!36 = !DILocation(line: 20, column: 1, scope: !8)
+!37 = distinct !DILexicalBlock(line: 24, column: 3, file: !1, scope: !46)
+!38 = !DILocation(line: 27, column: 3, scope: !37)
+!39 = !DILocation(line: 31, column: 3, scope: !37)
+!40 = !DILocation(line: 28, column: 9, scope: !37)
+!41 = !DILocation(line: 29, column: 11, scope: !37)
+!42 = !DILocation(line: 29, column: 7, scope: !37)
+!43 = !DILocation(line: 27, column: 32, scope: !37)
+!44 = !DILocation(line: 27, column: 30, scope: !37)
+!45 = !DILocation(line: 27, column: 21, scope: !37)
+!46 = distinct !DISubprogram(name: "test_multiple_failures", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 26, file: !1, scope: !5, type: !6, retainedNodes: !2)
+
+!50 = !{!50, !{!"llvm.loop.unroll_and_jam.enable"}}
\ No newline at end of file
Index: test/Transforms/LoopTransformWarning/unrolling-remarks-missed.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopTransformWarning/unrolling-remarks-missed.ll
@@ -0,0 +1,99 @@
+; Legacy pass manager
+; RUN: opt < %s -transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning 2>&1 | FileCheck %s
+; RUN: opt < %s -transform-warning -disable-output -pass-remarks-output=%t.yaml
+; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
+
+; New pass manager
+; RUN: opt < %s -passes=transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=transform-warning -disable-output -pass-remarks-output=%t.yaml
+; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
+
+
+; CHECK: warning: source.cpp:19:5: loop not unrolled: failed explicitly specified loop unrolling
+
+; YAML:     --- !Failure
+; YAML-NEXT: Pass:            transform-warning
+; YAML-NEXT: Name:            FailedRequestedUnrolling
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 19, Column: 5 }
+; YAML-NEXT: Function:        _Z17test_array_boundsPiS_i
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          'loop not unrolled: failed explicitly specified loop unrolling'
+; YAML-NEXT: ...
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) !dbg !8 {
+entry:
+  %cmp9 = icmp sgt i32 %Length, 0, !dbg !32
+  br i1 %cmp9, label %for.body.preheader, label %for.end, !dbg !32
+
+for.body.preheader:                          
+  br label %for.body, !dbg !35
+
+for.body:                                    
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !35
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !35, !tbaa !18
+  %idxprom1 = sext i32 %0 to i64, !dbg !35
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !35
+  %1 = load i32, i32* %arrayidx2, align 4, !dbg !35, !tbaa !18
+  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !35
+  store i32 %1, i32* %arrayidx4, align 4, !dbg !35, !tbaa !18
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !32
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !32
+  %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !32
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !32, !llvm.loop !50
+
+for.end.loopexit:                            
+  br label %for.end
+
+for.end:                                      
+  ret void, !dbg !36
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "source.cpp", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "source.cpp", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "test_disabled", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 10, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!8 = distinct !DISubprogram(name: "test_array_bounds", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 16, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!9 = !{i32 2, !"Dwarf Version", i32 2}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.5.0"}
+!12 = !DILocation(line: 3, column: 8, scope: !13)
+!13 = distinct !DILexicalBlock(line: 3, column: 3, file: !1, scope: !4)
+!16 = !DILocation(line: 4, column: 5, scope: !17)
+!17 = distinct !DILexicalBlock(line: 3, column: 36, file: !1, scope: !13)
+!18 = !{!19, !19, i64 0}
+!19 = !{!"int", !20, i64 0}
+!20 = !{!"omnipotent char", !21, i64 0}
+!21 = !{!"Simple C/C++ TBAA"}
+!22 = !DILocation(line: 5, column: 9, scope: !23)
+!23 = distinct !DILexicalBlock(line: 5, column: 9, file: !1, scope: !17)
+!24 = !DILocation(line: 8, column: 1, scope: !4)
+!25 = !DILocation(line: 12, column: 8, scope: !26)
+!26 = distinct !DILexicalBlock(line: 12, column: 3, file: !1, scope: !7)
+!30 = !DILocation(line: 13, column: 5, scope: !26)
+!31 = !DILocation(line: 14, column: 1, scope: !7)
+!32 = !DILocation(line: 18, column: 8, scope: !33)
+!33 = distinct !DILexicalBlock(line: 18, column: 3, file: !1, scope: !8)
+!35 = !DILocation(line: 19, column: 5, scope: !33)
+!36 = !DILocation(line: 20, column: 1, scope: !8)
+!37 = distinct !DILexicalBlock(line: 24, column: 3, file: !1, scope: !46)
+!38 = !DILocation(line: 27, column: 3, scope: !37)
+!39 = !DILocation(line: 31, column: 3, scope: !37)
+!40 = !DILocation(line: 28, column: 9, scope: !37)
+!41 = !DILocation(line: 29, column: 11, scope: !37)
+!42 = !DILocation(line: 29, column: 7, scope: !37)
+!43 = !DILocation(line: 27, column: 32, scope: !37)
+!44 = !DILocation(line: 27, column: 30, scope: !37)
+!45 = !DILocation(line: 27, column: 21, scope: !37)
+!46 = distinct !DISubprogram(name: "test_multiple_failures", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 26, file: !1, scope: !5, type: !6, retainedNodes: !2)
+
+!50 = !{!50, !{!"llvm.loop.unroll.enable"}}
\ No newline at end of file
Index: test/Transforms/LoopTransformWarning/vectorization-remarks-missed.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopTransformWarning/vectorization-remarks-missed.ll
@@ -0,0 +1,113 @@
+; Legacy pass manager
+; RUN: opt < %s -transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning 2>&1 | FileCheck %s
+; RUN: opt < %s -transform-warning -disable-output -pass-remarks-output=%t.yaml
+; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
+
+; New pass manager
+; RUN: opt < %s -passes=transform-warning -disable-output -pass-remarks-missed=transform-warning -pass-remarks-analysis=transform-warning 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=transform-warning -disable-output -pass-remarks-output=%t.yaml
+; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
+
+; C/C++ code for tests
+; void test(int *A, int Length) {
+; #pragma clang loop vectorize(enable) interleave(enable)
+;   for (int i = 0; i < Length; i++) {
+;     A[i] = i;
+;     if (A[i] > Length)
+;       break;
+;   }
+; }
+; File, line, and column should match those specified in the metadata
+; CHECK: warning: source.cpp:19:5: loop not vectorized: failed explicitly specified loop vectorization
+
+; YAML:     --- !Failure
+; YAML-NEXT: Pass:            transform-warning
+; YAML-NEXT: Name:            FailedRequestedVectorization
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 19, Column: 5 }
+; YAML-NEXT: Function:        _Z17test_array_boundsPiS_i
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          'loop not vectorized: '
+; YAML-NEXT:   - String:          failed explicitly specified loop vectorization
+; YAML-NEXT: ...
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) !dbg !8 {
+entry:
+  %cmp9 = icmp sgt i32 %Length, 0, !dbg !32
+  br i1 %cmp9, label %for.body.preheader, label %for.end, !dbg !32, !llvm.loop !34
+
+for.body.preheader:                          
+  br label %for.body, !dbg !35
+
+for.body:                                    
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !35
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !35, !tbaa !18
+  %idxprom1 = sext i32 %0 to i64, !dbg !35
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !35
+  %1 = load i32, i32* %arrayidx2, align 4, !dbg !35, !tbaa !18
+  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !35
+  store i32 %1, i32* %arrayidx4, align 4, !dbg !35, !tbaa !18
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !32
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !32
+  %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !32
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !32, !llvm.loop !34
+
+for.end.loopexit:                            
+  br label %for.end
+
+for.end:                                      
+  ret void, !dbg !36
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "source.cpp", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "source.cpp", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "test_disabled", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 10, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!8 = distinct !DISubprogram(name: "test_array_bounds", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 16, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!9 = !{i32 2, !"Dwarf Version", i32 2}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.5.0"}
+!12 = !DILocation(line: 3, column: 8, scope: !13)
+!13 = distinct !DILexicalBlock(line: 3, column: 3, file: !1, scope: !4)
+!14 = !{!14, !15, !15}
+!15 = !{!"llvm.loop.vectorize.enable", i1 true}
+!16 = !DILocation(line: 4, column: 5, scope: !17)
+!17 = distinct !DILexicalBlock(line: 3, column: 36, file: !1, scope: !13)
+!18 = !{!19, !19, i64 0}
+!19 = !{!"int", !20, i64 0}
+!20 = !{!"omnipotent char", !21, i64 0}
+!21 = !{!"Simple C/C++ TBAA"}
+!22 = !DILocation(line: 5, column: 9, scope: !23)
+!23 = distinct !DILexicalBlock(line: 5, column: 9, file: !1, scope: !17)
+!24 = !DILocation(line: 8, column: 1, scope: !4)
+!25 = !DILocation(line: 12, column: 8, scope: !26)
+!26 = distinct !DILexicalBlock(line: 12, column: 3, file: !1, scope: !7)
+!27 = !{!27, !28, !29}
+!28 = !{!"llvm.loop.interleave.count", i32 1}
+!29 = !{!"llvm.loop.vectorize.width", i32 1}
+!30 = !DILocation(line: 13, column: 5, scope: !26)
+!31 = !DILocation(line: 14, column: 1, scope: !7)
+!32 = !DILocation(line: 18, column: 8, scope: !33)
+!33 = distinct !DILexicalBlock(line: 18, column: 3, file: !1, scope: !8)
+!34 = !{!34, !15}
+!35 = !DILocation(line: 19, column: 5, scope: !33)
+!36 = !DILocation(line: 20, column: 1, scope: !8)
+!37 = distinct !DILexicalBlock(line: 24, column: 3, file: !1, scope: !46)
+!38 = !DILocation(line: 27, column: 3, scope: !37)
+!39 = !DILocation(line: 31, column: 3, scope: !37)
+!40 = !DILocation(line: 28, column: 9, scope: !37)
+!41 = !DILocation(line: 29, column: 11, scope: !37)
+!42 = !DILocation(line: 29, column: 7, scope: !37)
+!43 = !DILocation(line: 27, column: 32, scope: !37)
+!44 = !DILocation(line: 27, column: 30, scope: !37)
+!45 = !DILocation(line: 27, column: 21, scope: !37)
+!46 = distinct !DISubprogram(name: "test_multiple_failures", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 26, file: !1, scope: !5, type: !6, retainedNodes: !2)
Index: test/Transforms/LoopUnroll/disable_nonforced.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopUnroll/disable_nonforced.ll
@@ -0,0 +1,26 @@
+; RUN: opt -loop-unroll -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @disable_nonforced(
+; CHECK: load
+; CHECK-NOT: load
+define void @disable_nonforced(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+!0 = !{!0, !{!"llvm.loop.transformations.disable_nonforced"}}
Index: test/Transforms/LoopUnroll/disable_nonforced_count.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopUnroll/disable_nonforced_count.ll
@@ -0,0 +1,27 @@
+; RUN: opt -loop-unroll -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @disable_nonforced_count(
+; CHECK: store
+; CHECK: store
+; CHECK-NOT: store
+define void @disable_nonforced_count(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+!0 = !{!0, !{!"llvm.loop.transformations.disable_nonforced"}, !{!"llvm.loop.unroll.count", i32 2}}
Index: test/Transforms/LoopUnroll/disable_nonforced_enable.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopUnroll/disable_nonforced_enable.ll
@@ -0,0 +1,27 @@
+; RUN: opt -loop-unroll -unroll-count=2 -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @disable_nonforced_enable(
+; CHECK: store
+; CHECK: store
+; CHECK-NOT: store
+define void @disable_nonforced_enable(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+!0 = !{!0, !{!"llvm.loop.transformations.disable_nonforced"}, !{!"llvm.loop.unroll.enable"}}
Index: test/Transforms/LoopUnroll/disable_nonforced_full.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopUnroll/disable_nonforced_full.ll
@@ -0,0 +1,29 @@
+; RUN: opt -loop-unroll -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @disable_nonforced_full(
+; CHECK: store
+; CHECK: store
+; CHECK: store
+; CHECK: store
+; CHECK-NOT: store
+define void @disable_nonforced_full(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+!0 = !{!0, !{!"llvm.loop.transformations.disable_nonforced"}, !{!"llvm.loop.unroll.full"}}
Index: test/Transforms/LoopUnroll/runtime-loop_transform.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopUnroll/runtime-loop_transform.ll
@@ -0,0 +1,251 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true  | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-runtime=true -unroll-runtime-epilog=true  | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Tests for unrolling loops with run-time trip counts
+
+; EPILOG: %xtraiter = and i32 %n
+; EPILOG:  %lcmp.mod = icmp ne i32 %xtraiter, 0
+; EPILOG:  br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit
+
+; PROLOG: %xtraiter = and i32 %n
+; PROLOG:  %lcmp.mod = icmp ne i32 %xtraiter, 0
+; PROLOG:  br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
+
+; EPILOG: for.body.epil:
+; EPILOG: %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ],  [ %indvars.iv.unr, %for.body.epil.preheader ]
+; EPILOG:  %epil.iter.sub = sub i32 %epil.iter, 1
+; EPILOG:  %epil.iter.cmp = icmp ne i32 %epil.iter.sub, 0
+; EPILOG:  br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop !2
+
+; PROLOG: for.body.prol:
+; PROLOG: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
+; PROLOG:  %prol.iter.sub = sub i32 %prol.iter, 1
+; PROLOG:  %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
+; PROLOG:  br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit.unr-lcssa, !llvm.loop !0
+
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
+
+; Still try to completely unroll loops with compile-time trip counts
+; even if the -unroll-runtime is specified
+
+; EPILOG: for.body:
+; EPILOG-NOT: for.body.epil:
+
+; PROLOG: for.body:
+; PROLOG-NOT: for.body.prol:
+
+define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.01
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 5
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
+
+; This is test 2007-05-09-UnknownTripCount.ll which can be unrolled now
+; if the -unroll-runtime option is turned on
+
+; EPILOG: bb72.2:
+; PROLOG: bb72.2:
+
+define void @foo(i32 %trips) {
+entry:
+        br label %cond_true.outer
+
+cond_true.outer:
+        %indvar1.ph = phi i32 [ 0, %entry ], [ %indvar.next2, %bb72 ]
+        br label %bb72
+
+bb72:
+        %indvar.next2 = add i32 %indvar1.ph, 1
+        %exitcond3 = icmp eq i32 %indvar.next2, %trips
+        br i1 %exitcond3, label %cond_true138, label %cond_true.outer
+
+cond_true138:
+        ret void
+}
+
+
+; Test run-time unrolling for a loop that counts down by -2.
+
+; EPILOG: for.body.epil:
+; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.cond.for.end_crit_edge.epilog-lcssa
+
+; PROLOG: for.body.prol:
+; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit
+
+define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
+entry:
+  %cmp2 = icmp eq i32 %len, 0
+  br i1 %cmp2, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %p.addr.05 = phi i16* [ %incdec.ptr, %for.body ], [ %p, %entry ]
+  %len.addr.04 = phi i32 [ %sub, %for.body ], [ %len, %entry ]
+  %res.03 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %incdec.ptr = getelementptr inbounds i16, i16* %p.addr.05, i64 1
+  %0 = load i16, i16* %p.addr.05, align 2
+  %conv = zext i16 %0 to i32
+  %add = add i32 %conv, %res.03
+  %sub = add nsw i32 %len.addr.04, -2
+  %cmp = icmp eq i32 %sub, 0
+  br i1 %cmp, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  %phitmp = trunc i32 %add to i16
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %res.0.lcssa = phi i16 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i16 %res.0.lcssa
+}
+
+; Test run-time unrolling disable metadata.
+; EPILOG: for.body:
+; EPILOG-NOT: for.body.epil:
+
+; PROLOG: for.body:
+; PROLOG-NOT: for.body.prol:
+
+define zeroext i16 @test2(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
+entry:
+  %cmp2 = icmp eq i32 %len, 0
+  br i1 %cmp2, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %p.addr.05 = phi i16* [ %incdec.ptr, %for.body ], [ %p, %entry ]
+  %len.addr.04 = phi i32 [ %sub, %for.body ], [ %len, %entry ]
+  %res.03 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %incdec.ptr = getelementptr inbounds i16, i16* %p.addr.05, i64 1
+  %0 = load i16, i16* %p.addr.05, align 2
+  %conv = zext i16 %0 to i32
+  %add = add i32 %conv, %res.03
+  %sub = add nsw i32 %len.addr.04, -2
+  %cmp = icmp eq i32 %sub, 0
+  br i1 %cmp, label %for.cond.for.end_crit_edge, label %for.body, !llvm.loop !0
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  %phitmp = trunc i32 %add to i16
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %res.0.lcssa = phi i16 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i16 %res.0.lcssa
+}
+
+; dont unroll loop with multiple exit/exiting blocks, unless
+; -runtime-unroll-multi-exit=true
+; single exit, multiple exiting blocks.
+define void @unique_exit(i32 %arg) {
+; PROLOG: unique_exit(
+; PROLOG-NOT: .unr
+
+; EPILOG: unique_exit(
+; EPILOG-NOT: .unr
+entry:
+  %tmp = icmp sgt i32 undef, %arg
+  br i1 %tmp, label %preheader, label %returnblock
+
+preheader:                                 ; preds = %entry
+  br label %header
+
+LoopExit:                                ; preds = %header, %latch
+  %tmp2.ph = phi i32 [ %tmp4, %header ], [ -1, %latch ]
+  br label %returnblock
+
+returnblock:                                         ; preds = %LoopExit, %entry
+  %tmp2 = phi i32 [ -1, %entry ], [ %tmp2.ph, %LoopExit ]
+  ret void
+
+header:                                           ; preds = %preheader, %latch
+  %tmp4 = phi i32 [ %inc, %latch ], [ %arg, %preheader ]
+  %inc = add nsw i32 %tmp4, 1
+  br i1 true, label %LoopExit, label %latch
+
+latch:                                            ; preds = %header
+  %cmp = icmp slt i32 %inc, undef
+  br i1 %cmp, label %header, label %LoopExit
+}
+
+; multiple exit blocks. don't unroll
+define void @multi_exit(i64 %trip, i1 %cond) {
+; PROLOG: multi_exit(
+; PROLOG-NOT: .unr
+
+; EPILOG: multi_exit(
+; EPILOG-NOT: .unr
+entry:
+  br label %loop_header
+
+loop_header:
+  %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ]
+  br i1 %cond, label %loop_latch, label %loop_exiting_bb1
+
+loop_exiting_bb1:
+  br i1 false, label %loop_exiting_bb2, label %exit1
+
+loop_exiting_bb2:
+  br i1 false, label %loop_latch, label %exit3
+
+exit3:
+  ret void
+
+loop_latch:
+  %iv_next = add i64 %iv, 1
+  %cmp = icmp ne i64 %iv_next, %trip
+  br i1 %cmp, label %loop_header, label %exit2.loopexit
+
+exit1:
+ ret void
+
+exit2.loopexit:
+  ret void
+}
+!0 = distinct !{!0, !1, !2, !3}
+!1 = !{!"llvm.loop.unroll.runtime.disable"}
+!2 = !{!"llvm.loop.unroll.followup_unrolled", !{!"llvm.loop.unroll.disable"}}
+!3 = !{!"llvm.loop.unroll.followup_remainder", !{!"llvm.loop.unroll.disable"}}
+!4 = distinct !{!4, !2, !3}
+
+; EPILOG: !0 = distinct !{!0, !1}
+; EPILOG: !1 = !{!"llvm.loop.unroll.disable"}
+
+; PROLOG: !0 = distinct !{!0, !1}
+; PROLOG: !1 = !{!"llvm.loop.unroll.disable"}
Index: test/Transforms/LoopUnroll/unroll-count_transform.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopUnroll/unroll-count_transform.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -S -loop-unroll -unroll-count=2 | FileCheck %s
+; Checks that "llvm.loop.unroll.disable" is set when
+; unroll with count set by user has been applied.
+;
+; CHECK-LABEL: @foo(
+; CHECK: llvm.loop.unroll.disable
+
+define void @foo(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = !{!0, !{!"llvm.loop.unroll.followup", !{!"llvm.loop.unroll.disable"}}}
Index: test/Transforms/LoopUnroll/unroll-pragmas-disabled_transform.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopUnroll/unroll-pragmas-disabled_transform.ll
@@ -0,0 +1,150 @@
+; RUN: opt < %s -loop-unroll -S | FileCheck %s
+;
+; Verify that the unrolling pass removes existing unroll count metadata
+; and adds a disable unrolling node after unrolling is complete.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; #pragma clang loop  vectorize(enable) unroll_count(4) vectorize_width(8)
+;
+; Unroll count metadata should be replaced with unroll(disable).  Vectorize
+; metadata should be untouched.
+;
+; CHECK-LABEL: @unroll_count_4(
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_1:.*]]
+define void @unroll_count_4(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!1 = !{!1, !3, !11}
+!2 = !{!"llvm.loop.vectorize.enable", i1 true}
+!3 = !{!"llvm.loop.unroll.count", i32 4}
+!4 = !{!"llvm.loop.vectorize.width", i32 8}
+!11 = !{!"llvm.loop.unroll.followup_unrolled", !2, !4, !{!"llvm.loop.unroll.disable"}}
+
+; #pragma clang loop unroll(full)
+;
+; An unroll disable metadata node is only added for the unroll count case.
+; In this case, the loop has a full unroll metadata but can't be fully unrolled
+; because the trip count is dynamic.  The full unroll metadata should remain
+; after unrolling.
+;
+; CHECK-LABEL: @unroll_full(
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_2:.*]]
+define void @unroll_full(i32* nocapture %a, i32 %b) {
+entry:
+  %cmp3 = icmp sgt i32 %b, 0
+  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !5
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %b
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+!5 = !{!5, !6}
+!6 = !{!"llvm.loop.unroll.full"}
+
+; #pragma clang loop unroll(disable)
+;
+; Unroll metadata should not change.
+;
+; CHECK-LABEL: @unroll_disable(
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_3:.*]]
+define void @unroll_disable(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!7 = !{!7, !8}
+!8 = !{!"llvm.loop.unroll.disable"}
+
+; This function contains two loops which share the same llvm.loop metadata node
+; with an llvm.loop.unroll.count 2 hint.  Both loops should be unrolled.  This
+; verifies that adding disable metadata to a loop after unrolling doesn't affect
+; other loops which previously shared the same llvm.loop metadata.
+;
+; CHECK-LABEL: @shared_metadata(
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_4:.*]]
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_5:.*]]
+define void @shared_metadata(i32* nocapture %List) #0 {
+entry:
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx = getelementptr inbounds i32, i32* %List, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add4 = add nsw i32 %0, 10
+  store i32 %add4, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.body3.1.preheader, label %for.body3, !llvm.loop !9
+
+for.body3.1.preheader:                            ; preds = %for.body3
+  br label %for.body3.1
+
+for.body3.1:                                      ; preds = %for.body3.1.preheader, %for.body3.1
+  %indvars.iv.1 = phi i64 [ %1, %for.body3.1 ], [ 0, %for.body3.1.preheader ]
+  %1 = add nsw i64 %indvars.iv.1, 1
+  %arrayidx.1 = getelementptr inbounds i32, i32* %List, i64 %1
+  %2 = load i32, i32* %arrayidx.1, align 4
+  %add4.1 = add nsw i32 %2, 10
+  store i32 %add4.1, i32* %arrayidx.1, align 4
+  %exitcond.1 = icmp eq i64 %1, 4
+  br i1 %exitcond.1, label %for.inc5.1, label %for.body3.1, !llvm.loop !9
+
+for.inc5.1:                                       ; preds = %for.body3.1
+  ret void
+}
+!9 = !{!9, !10, !13}
+!10 = !{!"llvm.loop.unroll.count", i32 2}
+!13 = !{!"llvm.loop.unroll.followup_unrolled", !{!"llvm.loop.unroll.disable"}}
+
+; CHECK: ![[LOOP_1]] = distinct !{![[LOOP_1]], ![[VEC_ENABLE:.*]], ![[WIDTH_8:.*]], ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[VEC_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true}
+; CHECK: ![[WIDTH_8]] = !{!"llvm.loop.vectorize.width", i32 8}
+; CHECK: ![[UNROLL_DISABLE]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: ![[LOOP_2]] = distinct !{![[LOOP_2]], ![[UNROLL_FULL:.*]]}
+; CHECK: ![[UNROLL_FULL]] = !{!"llvm.loop.unroll.full"}
+; CHECK: ![[LOOP_3]] = distinct !{![[LOOP_3]], ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[LOOP_4]] = distinct !{![[LOOP_4]], ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[LOOP_5]] = distinct !{![[LOOP_5]], ![[UNROLL_DISABLE:.*]]}
Index: test/Transforms/LoopUnroll/unroll-pragmas_transform.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopUnroll/unroll-pragmas_transform.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s
+; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s
+; RUN: opt < %s -loop-unroll -unroll-allow-remainder=0 -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,NOREM %s
+;
+; Run loop unrolling twice to verify that loop unrolling metadata is properly
+; removed and further unrolling is disabled after the pass is run once.
+
+; #pragma clang loop unroll_count(1)
+; Loop should not be unrolled
+;
+; CHECK-LABEL: @unroll_1(
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @unroll_1(i32* nocapture %a, i32 %b) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!10 = !{!10, !11, !18}
+!11 = !{!"llvm.loop.unroll.count", i32 1}
+!18 = !{!"llvm.loop.unroll.followup", !{!"llvm.loop.unroll.disable"}}
Index: test/Transforms/LoopUnrollAndJam/disable_nonforced.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopUnrollAndJam/disable_nonforced.ll
@@ -0,0 +1,47 @@
+; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-runtime -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK-LABEL: disable_nonforced
+; CHECK: load
+; CHECK-NOT: load
+define void @disable_nonforced(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
+  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
+  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
+  %0 = load i32, i32* %arrayidx.us, align 4
+  %add.us = add i32 %0, %sum1.us
+  %inc.us = add nuw i32 %j.us, 1
+  %exitcond = icmp eq i32 %inc.us, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
+  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
+  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
+  %add8.us = add nuw i32 %i.us, 1
+  %exitcond25 = icmp eq i32 %add8.us, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !0
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+!0 = distinct !{!0, !{!"llvm.loop.disable_nonforced"}}
Index: test/Transforms/LoopUnrollAndJam/disable_nonforced_count.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopUnrollAndJam/disable_nonforced_count.ll
@@ -0,0 +1,49 @@
+; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK-LABEL: @disable_nonforced_enable(
+; CHECK: load
+; CHECK: load
+; CHECK-NOT: load
+; CHECK: br i1
+define void @disable_nonforced_enable(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
+  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
+  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
+  %0 = load i32, i32* %arrayidx.us, align 4
+  %add.us = add i32 %0, %sum1.us
+  %inc.us = add nuw i32 %j.us, 1
+  %exitcond = icmp eq i32 %inc.us, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
+  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
+  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
+  %add8.us = add nuw i32 %i.us, 1
+  %exitcond25 = icmp eq i32 %add8.us, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !0
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+!0 = distinct !{!0, !{!"llvm.loop.disable_nonforced"}, !{!"llvm.loop.unroll_and_jam.count", i32 2}}
Index: test/Transforms/LoopUnrollAndJam/disable_nonforced_enable.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopUnrollAndJam/disable_nonforced_enable.ll
@@ -0,0 +1,49 @@
+; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=2 -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK-LABEL: disable_nonforced_enable
+; CHECK: load
+; CHECK: load
+; CHECK-NOT: load
+; CHECK: br i1
+define void @disable_nonforced_enable(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
+  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
+  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
+  %0 = load i32, i32* %arrayidx.us, align 4
+  %add.us = add i32 %0, %sum1.us
+  %inc.us = add nuw i32 %j.us, 1
+  %exitcond = icmp eq i32 %inc.us, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
+  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
+  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
+  %add8.us = add nuw i32 %i.us, 1
+  %exitcond25 = icmp eq i32 %add8.us, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !0
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+!0 = distinct !{!0, !{!"llvm.loop.disable_nonforced"}, !{!"llvm.loop.unroll_and_jam.enable"}}
Index: test/Transforms/LoopUnrollAndJam/followup-metadata.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopUnrollAndJam/followup-metadata.ll
@@ -0,0 +1,63 @@
+; RUN: opt -basicaa -tbaa -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 -unroll-remainder < %s -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+define void @followup(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) {
+entry:
+  %cmp = icmp ne i32 %J, 0
+  %cmp122 = icmp ne i32 %I, 0
+  %or.cond = and i1 %cmp, %cmp122
+  br i1 %or.cond, label %for.outer.preheader, label %for.end
+
+for.outer.preheader:
+  br label %for.outer
+
+for.outer:
+  %i.us = phi i32 [ %add8.us, %for.latch ], [ 0, %for.outer.preheader ]
+  br label %for.inner
+
+for.inner:
+  %j.us = phi i32 [ 0, %for.outer ], [ %inc.us, %for.inner ]
+  %sum1.us = phi i32 [ 0, %for.outer ], [ %add.us, %for.inner ]
+  %arrayidx.us = getelementptr inbounds i32, i32* %B, i32 %j.us
+  %0 = load i32, i32* %arrayidx.us, align 4
+  %add.us = add i32 %0, %sum1.us
+  %inc.us = add nuw i32 %j.us, 1
+  %exitcond = icmp eq i32 %inc.us, %J
+  br i1 %exitcond, label %for.latch, label %for.inner
+
+for.latch:
+  %add.us.lcssa = phi i32 [ %add.us, %for.inner ]
+  %arrayidx6.us = getelementptr inbounds i32, i32* %A, i32 %i.us
+  store i32 %add.us.lcssa, i32* %arrayidx6.us, align 4
+  %add8.us = add nuw i32 %i.us, 1
+  %exitcond25 = icmp eq i32 %add8.us, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer, !llvm.loop !0
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+!0 = !{!0, !1, !2, !3, !4, !6}
+!1 = !{!"llvm.loop.unroll_and_jam.enable"}
+!2 = !{!"llvm.loop.unroll_and_jam.followup_outer", !{!"llvm.loop.unroll.disable"}}
+!3 = !{!"llvm.loop.unroll_and_jam.followup_inner", !{!"llvm.loop.vectorize.width", i32 4}}
+!4 = !{!"llvm.loop.unroll_and_jam.followup_all", !{!"llvm.loop.unroll.runtime.disable"}}
+!6 = !{!"llvm.loop.unroll_and_jam.followup_remainder_inner", !{!"llvm.loop.vectorize.width", i32 1}}
+
+; CHECK: br i1 %exitcond.3, label %for.latch, label %for.inner, !llvm.loop ![[LOOP_INNER:[0-9]+]]
+; CHECK: br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa.loopexit, label %for.outer, !llvm.loop ![[LOOP_OUTER:[0-9]+]]
+; CHECK: br i1 %exitcond.epil, label %for.latch.epil, label %for.inner.epil, !llvm.loop ![[LOOP_REMAINDER_INNER:[0-9]+]]
+; CHECK: br i1 %exitcond.epil.1, label %for.latch.epil.1, label %for.inner.epil.1, !llvm.loop ![[LOOP_REMAINDER_INNER]]
+; CHECK: br i1 %exitcond.epil.2, label %for.latch.epil.2, label %for.inner.epil.2, !llvm.loop ![[LOOP_REMAINDER_INNER]]
+
+; CHECK: ![[LOOP_INNER]] = distinct !{![[LOOP_INNER]], ![[RUNTIME_DISABLE:[0-9]+]], ![[VEC_WIDTH:[0-9]+]]}
+; CHECK: ![[RUNTIME_DISABLE]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: ![[VEC_WIDTH]] = !{!"llvm.loop.vectorize.width", i32 4}
+; CHECK: ![[LOOP_OUTER]] = distinct !{![[LOOP_OUTER]], ![[RUNTIME_DISABLE]], ![[UNROLL_DISABLE:[0-9]+]]}
+; CHECK: ![[UNROLL_DISABLE]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: ![[LOOP_REMAINDER_INNER]] = distinct !{![[LOOP_REMAINDER_INNER]], ![[RUNTIME_DISABLE]], ![[VEC_DISABLE:[0-9]+]]}
+; CHECK: ![[VEC_DISABLE]] = !{!"llvm.loop.vectorize.width", i32 1}
Index: test/Transforms/LoopUnrollAndJam/pragma.ll
===================================================================
--- test/Transforms/LoopUnrollAndJam/pragma.ll
+++ test/Transforms/LoopUnrollAndJam/pragma.ll
@@ -316,4 +316,4 @@
 !8 = distinct !{!"llvm.loop.unroll.disable"}
 !9 = distinct !{!9, !10}
 !10 = distinct !{!"llvm.loop.unroll.enable"}
-!11 = distinct !{!11, !8, !6}
\ No newline at end of file
+!11 = distinct !{!11, !8, !6}
Index: test/Transforms/LoopVectorize/X86/already-vectorized_transform.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopVectorize/X86/already-vectorized_transform.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -disable-loop-unrolling -debug-only=loop-vectorize -O3 -S 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; We want to make sure that we don't even try to vectorize loops again
+; The vectorizer used to mark the un-vectorized loop only as already vectorized
+; thus, trying to vectorize the vectorized loop again
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = external global [255 x i32]
+
+; Function Attrs: nounwind readonly uwtable
+define i32 @vect() {
+; CHECK: LV: Checking a loop in "vect"
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+; We need to make sure we did vectorize the loop
+; CHECK: LV: Found a loop: for.body
+; CHECK: LV: We can vectorize this loop!
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds [255 x i32], [255 x i32]* @a, i64 0, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %red.05
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 255
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+; If it did, we have two loops:
+; CHECK: vector.body:
+; CHECK: br {{.*}} label %vector.body, !llvm.loop [[vect:![0-9]+]]
+; CHECK: for.body:
+; CHECK: br {{.*}} label %for.body, !llvm.loop [[scalar:![0-9]+]]
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
+
+!0 = !{!0, !3, !4}
+!3 = !{!"llvm.loop.vectorize.followup_vectorized", !{!"llvm.loop.isvectorized", i32 1}}
+!4 = !{!"llvm.loop.vectorize.followup_remainder", !{!"llvm.loop.unroll.runtime.disable"}, !{!"llvm.loop.isvectorized", i32 1}}
+
+; Now, we check for the Hint metadata
+; CHECK: [[vect]] = distinct !{[[vect]], [[width:![0-9]+]]}
+; CHECK: [[width]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[scalar]] = distinct !{[[scalar]], [[runtime_unroll:![0-9]+]], [[width]]}
+; CHECK: [[runtime_unroll]] = !{!"llvm.loop.unroll.runtime.disable"}
+
Index: test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
===================================================================
--- test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -1,9 +1,9 @@
-; RUN: opt < %s -loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
-; RUN: opt < %s -loop-vectorize -o /dev/null -pass-remarks-output=%t.yaml
+; RUN: opt < %s -loop-vectorize -transform-warning -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -transform-warning -o /dev/null -pass-remarks-output=%t.yaml
 ; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
 
-; RUN: opt < %s -passes=loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
-; RUN: opt < %s -passes=loop-vectorize -o /dev/null -pass-remarks-output=%t.yaml
+; RUN: opt < %s -passes=loop-vectorize,transform-warning -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize'  2>&1 | FileCheck %s
+; RUN: opt < %s -passes=loop-vectorize,transform-warning -o /dev/null -pass-remarks-output=%t.yaml
 ; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
 
 ; C/C++ code for tests
@@ -94,7 +94,7 @@
 ; YAML-NEXT:   - String:          ')'
 ; YAML-NEXT: ...
 ; YAML-NEXT: --- !Failure
-; YAML-NEXT: Pass:            loop-vectorize
+; YAML-NEXT: Pass:            transform-warning
 ; YAML-NEXT: Name:            FailedRequestedVectorization
 ; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 19, Column: 5 }
 ; YAML-NEXT: Function:        _Z17test_array_boundsPiS_i
Index: test/Transforms/LoopVectorize/X86/x86_fp80-vector-store_transform.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopVectorize/X86/x86_fp80-vector-store_transform.ll
@@ -0,0 +1,32 @@
+; RUN: opt -O3 -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+@x = common global [1024 x x86_fp80] zeroinitializer, align 16
+
+;CHECK-LABEL: @example(
+;CHECK-NOT: bitcast x86_fp80* {{%[^ ]+}} to <{{[2-9][0-9]*}} x x86_fp80>*
+;CHECK: store
+;CHECK: ret void
+
+define void @example() nounwind ssp uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %conv = sitofp i32 1 to x86_fp80
+  %arrayidx = getelementptr inbounds [1024 x x86_fp80], [1024 x x86_fp80]* @x, i64 0, i64 %indvars.iv
+  store x86_fp80 %conv, x86_fp80* %arrayidx, align 16
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.followup", !{!"llvm.loop.isvectorized", i1 true}}
Index: test/Transforms/LoopVectorize/disable-heuristic.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopVectorize/disable-heuristic.ll
@@ -0,0 +1,26 @@
+; RUN: opt -loop-vectorize -force-vector-interleave=1 -dce -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @disable_heuristic(
+; CHECK-NOT: x i32>
+define void @disable_heuristic(i32* nocapture %a, i32 %n) {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = trunc i64 %indvars.iv to i32
+  store i32 %0, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+!0 = !{!0, !{!"llvm.loop.transformations.disable_nonforced"}}
Index: test/Transforms/LoopVectorize/duplicated-metadata_transform.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopVectorize/duplicated-metadata_transform.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -loop-vectorize -S 2>&1 | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; This test makes sure we don't duplicate the loop vectorizer's metadata
+; while marking them as already vectorized (by setting width = 1), even
+; at lower optimization levels, where no extra cleanup is done
+
+define void @_Z3fooPf(float* %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
+  %p = load float, float* %arrayidx, align 4
+  %mul = fmul float %p, 2.000000e+00
+  store float %mul, float* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = !{!0, !1, !2}
+!1 = !{!"llvm.loop.vectorize.width", i32 4}
+!2 = !{!"llvm.loop.vectorize.followup", !{!"llvm.loop.isvectorized", i32 1}}
+; CHECK-NOT: !{metadata !"llvm.loop.vectorize.width", i32 4}
+; CHECK: !{!"llvm.loop.isvectorized", i32 1}
Index: test/Transforms/LoopVectorize/followups.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopVectorize/followups.ll
@@ -0,0 +1,41 @@
+; RUN: opt -loop-vectorize -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; CHECK-LABEL @followups(
+define void @followups(i32* nocapture %a, i32 %n) {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = trunc i64 %indvars.iv to i32
+  store i32 %0, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+!0 = !{!0, !1, !2, !3, !4, !5}
+!1 = !{!"llvm.loop.vectorize.enable", i1 true}
+!2 = !{!"llvm.loop.vectorize.width", i32 4}
+!3 = !{!"llvm.loop.vectorize.followup_vectorized", !{!"llvm.loop.isvectorized", i1 true}}
+!4 = !{!"llvm.loop.vectorize.followup_remainder", !{!"llvm.loop.unroll.disable"}}
+!5 = !{!"llvm.loop.vectorize.followup_all", !{!"llvm.loop.unroll.runtime.disable"}}
+
+; CHECK-LABEL: vector.body:
+; CHECK: br i1 %13, label %middle.block, label %vector.body, !llvm.loop ![[LOOP_VECTOR:[0-9]+]]
+; CHECK-LABEL: for.body:
+; CHECK: br i1 %exitcond, label %for.end.loopexit, label %for.body, !llvm.loop ![[LOOP_REMAINDER:[0-9]+]]
+
+; CHECK: ![[LOOP_VECTOR]] = distinct !{![[LOOP_VECTOR]], ![[RUNTIMEUNROLL_DISABLE:[0-9]+]], ![[ISVECTORIZED:[0-9]+]]}
+; CHECK: ![[RUNTIMEUNROLL_DISABLE]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: ![[ISVECTORIZED:[0-9]+]] = !{!"llvm.loop.isvectorized", i1 true}
+; CHECK: ![[LOOP_REMAINDER]] = distinct !{![[LOOP_REMAINDER]], ![[RUNTIMEUNROLL_DISABLE]], ![[UNROLLDISABLE:[0-9]+]]}
+; CHECK: ![[UNROLLDISABLE]] = !{!"llvm.loop.unroll.disable"}
Index: test/Transforms/LoopVectorize/hints-trans_transform.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopVectorize/hints-trans_transform.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instsimplify -simplifycfg < %s | FileCheck %s
+; Note: -instsimplify -simplifycfg remove the (now dead) original loop, making
+; it easy to test that the llvm.loop.unroll.disable hint is still present.
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: norecurse nounwind uwtable
+define void @foo(i32* nocapture %b) #0 {
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+  store i32 1, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 16
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !0
+}
+
+; CHECK-LABEL: @foo
+; CHECK: = !{!"llvm.loop.unroll.disable"}
+
+attributes #0 = { norecurse nounwind uwtable }
+
+!0 = distinct !{!0, !1, !2}
+!1 = !{!"llvm.loop.unroll.disable"}
+!2 = !{!"llvm.loop.vectorize.followup", !1}
Index: test/Transforms/LoopVectorize/multiple-strides-vectorization_transform.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopVectorize/multiple-strides-vectorization_transform.ll
@@ -0,0 +1,67 @@
+; RUN: opt -loop-vectorize -force-vector-width=4 -S < %s | FileCheck %s
+
+; This is the test case from PR26314.
+; When we were retrying dependence checking with memchecks only,
+; the loop-invariant access in the inner loop was incorrectly determined to be wrapping
+; because it was not strided in the inner loop.
+; Improved wrapping detection allows vectorization in the following case.
+
+; #define Z 32
+; typedef struct s {
+;       int v1[Z];
+;       int v2[Z];
+;       int v3[Z][Z];
+; } s;
+;
+; void slow_function (s* const obj, int z) {
+;    for (int j=0; j<Z; j++) {
+;        for (int k=0; k<z; k++) {
+;            int x = obj->v1[k] + obj->v2[j];
+;            obj->v3[j][k] += x;
+;        }
+;    }
+; }
+
+; CHECK-LABEL: Test
+; CHECK: <4 x i64>
+; CHECK: <4 x i32>, <4 x i32>
+; CHECK: !{!"llvm.loop.isvectorized", i32 1}
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.s = type { [32 x i32], [32 x i32], [32 x [32 x i32]] }
+
+define void @Test(%struct.s* nocapture %obj, i64 %z) #0 {
+  br label %.outer.preheader
+
+
+.outer.preheader:
+  %i = phi i64 [ 0, %0 ], [ %i.next, %.outer ]
+  %1 = getelementptr inbounds %struct.s, %struct.s* %obj, i64 0, i32 1, i64 %i
+  br label %.inner
+
+.exit:
+  ret void
+ 
+.outer:
+  %i.next = add nuw nsw i64 %i, 1
+  %exitcond.outer = icmp eq i64 %i.next, 32
+  br i1 %exitcond.outer, label %.exit, label %.outer.preheader
+
+.inner:
+  %j = phi i64 [ 0, %.outer.preheader ], [ %j.next, %.inner ]
+  %2 = getelementptr inbounds %struct.s, %struct.s* %obj, i64 0, i32 0, i64 %j
+  %3 = load i32, i32* %2
+  %4 = load i32, i32* %1
+  %5 = add nsw i32 %4, %3
+  %6 = getelementptr inbounds %struct.s, %struct.s* %obj, i64 0, i32 2, i64 %i, i64 %j
+  %7 = load i32, i32* %6
+  %8 = add nsw i32 %5, %7
+  store i32 %8, i32* %6  
+  %j.next = add nuw nsw i64 %j, 1
+  %exitcond.inner = icmp eq i64 %j.next, %z
+  br i1 %exitcond.inner, label %.outer, label %.inner, !llvm.loop !0
+}
+
+!0 = !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.followup", !{!"llvm.loop.isvectorized", i32 1}}
Index: test/Transforms/LoopVectorize/no_array_bounds.ll
===================================================================
--- test/Transforms/LoopVectorize/no_array_bounds.ll
+++ test/Transforms/LoopVectorize/no_array_bounds.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -S 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -transform-warning -S 2>&1 | FileCheck %s
 
 ; Verify warning is generated when vectorization/ interleaving is explicitly specified and fails to occur.
 ; CHECK: warning: no_array_bounds.cpp:5:5: loop not vectorized: failed explicitly specified loop vectorization
Index: test/Transforms/LoopVectorize/no_switch.ll
===================================================================
--- test/Transforms/LoopVectorize/no_switch.ll
+++ test/Transforms/LoopVectorize/no_switch.ll
@@ -1,13 +1,9 @@
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S 2>&1 | FileCheck %s
-; RUN: opt < %s -loop-vectorize -force-vector-width=1 -S 2>&1 | FileCheck %s -check-prefix=NOANALYSIS
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -pass-remarks-missed='loop-vectorize' -S 2>&1 | FileCheck %s -check-prefix=MOREINFO
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -transform-warning -S 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -pass-remarks-missed='loop-vectorize' -transform-warning -S 2>&1 | FileCheck %s -check-prefix=MOREINFO
 
 ; CHECK: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
 ; CHECK: warning: source.cpp:4:5: loop not vectorized: failed explicitly specified loop vectorization
 
-; NOANALYSIS-NOT: remark: {{.*}}
-; NOANALYSIS: warning: source.cpp:4:5: loop not interleaved: failed explicitly specified loop interleaving
-
 ; MOREINFO: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
 ; MOREINFO: remark: source.cpp:4:5: loop not vectorized (Force=true, Vector Width=4)
 ; MOREINFO: warning: source.cpp:4:5: loop not vectorized: failed explicitly specified loop vectorization
Index: test/Transforms/LoopVectorize/vectorize-once_transform.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopVectorize/vectorize-once_transform.ll
@@ -0,0 +1,79 @@
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -simplifycfg | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+;
+; We want to make sure that we are vectorizeing the scalar loop only once
+; even if the pass manager runs the vectorizer multiple times due to inlining.
+
+
+; This test checks that we add metadata to vectorized loops
+; CHECK-LABEL: @_Z4foo1Pii(
+; CHECK: <4 x i32>
+; CHECK: llvm.loop
+; CHECK: ret
+
+; This test comes from the loop:
+;
+;int foo (int *A, int n) {
+;  return std::accumulate(A, A + n, 0);
+;}
+define i32 @_Z4foo1Pii(i32* %A, i32 %n) #0 {
+entry:
+  %idx.ext = sext i32 %n to i64
+  %add.ptr = getelementptr inbounds i32, i32* %A, i64 %idx.ext
+  %cmp3.i = icmp eq i32 %n, 0
+  br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+for.body.i:                                       ; preds = %entry, %for.body.i
+  %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
+  %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
+  %0 = load i32, i32* %__first.addr.04.i, align 4
+  %add.i = add nsw i32 %0, %__init.addr.05.i
+  %incdec.ptr.i = getelementptr inbounds i32, i32* %__first.addr.04.i, i64 1
+  %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
+  br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i, !llvm.loop !2
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
+  %__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
+  ret i32 %__init.addr.0.lcssa.i
+}
+
+; This test checks that we don't vectorize loops that are marked with the "width" == 1 metadata.
+; CHECK-LABEL: @_Z4foo2Pii(
+; CHECK-NOT: <4 x i32>
+; CHECK: llvm.loop
+; CHECK: ret
+define i32 @_Z4foo2Pii(i32* %A, i32 %n) #0 {
+entry:
+  %idx.ext = sext i32 %n to i64
+  %add.ptr = getelementptr inbounds i32, i32* %A, i64 %idx.ext
+  %cmp3.i = icmp eq i32 %n, 0
+  br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
+
+for.body.i:                                       ; preds = %entry, %for.body.i
+  %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
+  %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
+  %0 = load i32, i32* %__first.addr.04.i, align 4
+  %add.i = add nsw i32 %0, %__init.addr.05.i
+  %incdec.ptr.i = getelementptr inbounds i32, i32* %__first.addr.04.i, i64 1
+  %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
+  br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i, !llvm.loop !0
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
+  %__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ]
+  ret i32 %__init.addr.0.lcssa.i
+}
+
+attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
+
+; CHECK: !0 = distinct !{!0, !1}
+; CHECK: !1 = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: !2 = distinct !{!2, !3, !1}
+; CHECK: !3 = !{!"llvm.loop.unroll.runtime.disable"}
+
+!0 = !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.width", i32 1}
+!2 = !{!2, !3, !4}
+!3 = !{!"llvm.loop.vectorize.followup_vectorized", !{!"llvm.loop.isvectorized", i32 1}}
+!4 = !{!"llvm.loop.vectorize.followup_remainder", !{!"llvm.loop.unroll.runtime.disable"}, !{!"llvm.loop.isvectorized", i32 1}}