Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1625,11 +1625,17 @@
 private:
   unsigned NumPredStores = 0;
 
+  /// \return UserVF if it is non-zero and there are no dependences, otherwise
+  /// a clamped value. For scalable UserVF, the resulting feasible VF may be a
+  /// fixed-width VF.
+  Optional<ElementCount> computeFeasibleUserVF(ElementCount UserVF,
+                                               unsigned WidestType);
+
   /// \return An upper bound for the vectorization factor, a power-of-2 larger
   /// than zero. One is returned if vectorization should best be avoided due
   /// to cost.
   ElementCount computeFeasibleMaxVF(unsigned ConstTripCount,
-                                    ElementCount UserVF);
+                                    unsigned SmallestType, unsigned WidestType);
 
   /// The vectorization cost is a combination of the cost itself and a boolean
   /// indicating whether any of the contributing operations will actually
@@ -5505,9 +5511,23 @@
     return None;
   }
 
+  MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
+  unsigned SmallestType, WidestType;
+  std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
+
+  auto GetFeasibleMaxVF = [&]() -> ElementCount {
+    // First analyze the UserVF, fall back if the UserVF should be ignored.
+    Optional<ElementCount> MaybeMaxVF =
+        computeFeasibleUserVF(UserVF, WidestType);
+    if (!MaybeMaxVF)
+      MaybeMaxVF = computeFeasibleMaxVF(TC, SmallestType, WidestType);
+
+    return MaybeMaxVF.getValue();
+  };
+
   switch (ScalarEpilogueStatus) {
   case CM_ScalarEpilogueAllowed:
-    return computeFeasibleMaxVF(TC, UserVF);
+    return GetFeasibleMaxVF();
   case CM_ScalarEpilogueNotAllowedUsePredicate:
     LLVM_FALLTHROUGH;
   case CM_ScalarEpilogueNotNeededUsePredicate:
@@ -5545,7 +5565,7 @@
       LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
                            "scalar epilogue instead.\n");
       ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
-      return computeFeasibleMaxVF(TC, UserVF);
+      return GetFeasibleMaxVF();
     }
     return None;
   }
@@ -5562,7 +5582,7 @@
     InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
   }
 
-  ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF);
+  ElementCount MaxVF = GetFeasibleMaxVF();
   assert(!MaxVF.isScalable() &&
          "Scalable vectors do not yet support tail folding");
   assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) &&
@@ -5623,97 +5643,91 @@
   return None;
 }
 
-ElementCount
-LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount,
-                                                 ElementCount UserVF) {
-  bool IgnoreScalableUserVF = UserVF.isScalable() &&
-                              !TTI.supportsScalableVectors() &&
-                              !ForceTargetSupportsScalableVectors;
-  if (IgnoreScalableUserVF) {
-    LLVM_DEBUG(
-        dbgs() << "LV: Ignoring VF=" << UserVF
-               << " because target does not support scalable vectors.\n");
-    ORE->emit([&]() {
-      return OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreScalableUserVF",
-                                        TheLoop->getStartLoc(),
-                                        TheLoop->getHeader())
-             << "Ignoring VF=" << ore::NV("UserVF", UserVF)
-             << " because target does not support scalable vectors.";
-    });
-  }
-
-  // Beyond this point two scenarios are handled. If UserVF isn't specified
-  // then a suitable VF is chosen. If UserVF is specified and there are
-  // dependencies, check if it's legal. However, if a UserVF is specified and
-  // there are no dependencies, then there's nothing to do.
-  if (UserVF.isNonZero() && !IgnoreScalableUserVF &&
-      Legal->isSafeForAnyVectorWidth())
-    return UserVF;
+Optional<ElementCount>
+LoopVectorizationCostModel::computeFeasibleUserVF(ElementCount UserVF,
+                                                  unsigned WidestType) {
+  if (!UserVF.isNonZero())
+    return None;
 
-  MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
-  unsigned SmallestType, WidestType;
-  std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
-  unsigned WidestRegister = TTI.getRegisterBitWidth(true);
+  if (UserVF.isScalable() && !TTI.supportsScalableVectors() &&
+      !ForceTargetSupportsScalableVectors) {
+    OptimizationRemarkAnalysis R(DEBUG_TYPE, "IgnoreScalableUserVF",
+                                 TheLoop->getStartLoc(), TheLoop->getHeader());
+    R << "Ignoring VF=" << ore::NV("UserVF", UserVF)
+      << " because target does not support scalable vectors.";
+    LLVM_DEBUG(dbgs() << "LV: " << R.getMsg() << "\n");
+    ORE->emit(R);
+    return None;
+  }
 
   // Get the maximum safe dependence distance in bits computed by LAA.
   // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from
   // the memory accesses that is most restrictive (involved in the smallest
   // dependence distance).
   unsigned MaxSafeVectorWidthInBits = Legal->getMaxSafeVectorWidthInBits();
+  unsigned MaxSafeElements =
+      PowerOf2Floor(MaxSafeVectorWidthInBits / WidestType);
 
-  // If the user vectorization factor is legally unsafe, clamp it to a safe
-  // value. Otherwise, return as is.
-  if (UserVF.isNonZero() && !IgnoreScalableUserVF) {
-    unsigned MaxSafeElements =
-        PowerOf2Floor(MaxSafeVectorWidthInBits / WidestType);
-    ElementCount MaxSafeVF = ElementCount::getFixed(MaxSafeElements);
-
-    if (UserVF.isScalable()) {
-      Optional<unsigned> MaxVScale = TTI.getMaxVScale();
-
-      // Scale VF by vscale before checking if it's safe.
-      MaxSafeVF = ElementCount::getScalable(
-          MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
-
-      if (MaxSafeVF.isZero()) {
-        // The dependence distance is too small to use scalable vectors,
-        // fallback on fixed.
-        LLVM_DEBUG(
-            dbgs()
-            << "LV: Max legal vector width too small, scalable vectorization "
-               "unfeasible. Using fixed-width vectorization instead.\n");
-        ORE->emit([&]() {
-          return OptimizationRemarkAnalysis(DEBUG_TYPE, "ScalableVFUnfeasible",
-                                            TheLoop->getStartLoc(),
-                                            TheLoop->getHeader())
-                 << "Max legal vector width too small, scalable vectorization "
-                 << "unfeasible. Using fixed-width vectorization instead.";
-        });
-        return computeFeasibleMaxVF(
-            ConstTripCount, ElementCount::getFixed(UserVF.getKnownMinValue()));
-      }
-    }
+  // If UserVF is specified and there are dependencies, check if it's legal.
+  if (Legal->isSafeForAnyVectorWidth())
+    return UserVF;
 
-    LLVM_DEBUG(dbgs() << "LV: The max safe VF is: " << MaxSafeVF << ".\n");
+  ElementCount MaxSafeVF = ElementCount::getFixed(MaxSafeElements);
 
-    if (ElementCount::isKnownLE(UserVF, MaxSafeVF))
-      return UserVF;
+  if (UserVF.isScalable()) {
+    Optional<unsigned> MaxVScale = TTI.getMaxVScale();
 
-    LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
-                      << " is unsafe, clamping to max safe VF=" << MaxSafeVF
-                      << ".\n");
-    ORE->emit([&]() {
-      return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
-                                        TheLoop->getStartLoc(),
-                                        TheLoop->getHeader())
-             << "User-specified vectorization factor "
-             << ore::NV("UserVectorizationFactor", UserVF)
-             << " is unsafe, clamping to maximum safe vectorization factor "
-             << ore::NV("VectorizationFactor", MaxSafeVF);
-    });
-    return MaxSafeVF;
+    // Scale VF by vscale before checking if it's safe.
+    MaxSafeVF = ElementCount::getScalable(
+        MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
+
+    if (MaxSafeVF.isZero()) {
+      // The dependence distance is too small to use scalable vectors,
+      // fallback on fixed.
+      LLVM_DEBUG(
+          dbgs()
+          << "LV: Max legal vector width too small, scalable vectorization "
+             "unfeasible. Using fixed-width vectorization instead.\n");
+      ORE->emit([&]() {
+        return OptimizationRemarkAnalysis(DEBUG_TYPE, "ScalableVFUnfeasible",
+                                          TheLoop->getStartLoc(),
+                                          TheLoop->getHeader())
+               << "Max legal vector width too small, scalable vectorization "
+               << "unfeasible. Using fixed-width vectorization instead.";
+      });
+      return computeFeasibleUserVF(
+          ElementCount::getFixed(UserVF.getKnownMinValue()), WidestType);
+    }
   }
 
+  LLVM_DEBUG(dbgs() << "LV: The max safe VF is: " << MaxSafeVF << ".\n");
+  if (ElementCount::isKnownLE(UserVF, MaxSafeVF))
+    return UserVF;
+
+  LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
+                    << " is unsafe, clamping to max safe VF=" << MaxSafeVF
+                    << ".\n");
+  ORE->emit([&]() {
+    return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
+                                      TheLoop->getStartLoc(),
+                                      TheLoop->getHeader())
+           << "User-specified vectorization factor "
+           << ore::NV("UserVectorizationFactor", UserVF)
+           << " is unsafe, clamping to maximum safe vectorization factor "
+           << ore::NV("VectorizationFactor", MaxSafeVF);
+  });
+  return MaxSafeVF;
+}
+
+ElementCount LoopVectorizationCostModel::computeFeasibleMaxVF(
+    unsigned ConstTripCount, unsigned SmallestType, unsigned WidestType) {
+  // Get the maximum safe dependence distance in bits computed by LAA.
+  // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from
+  // the memory accesses that is most restrictive (involved in the smallest
+  // dependence distance).
+  unsigned MaxSafeVectorWidthInBits = Legal->getMaxSafeVectorWidthInBits();
+
+  unsigned WidestRegister = TTI.getRegisterBitWidth(true);
   WidestRegister = std::min(WidestRegister, MaxSafeVectorWidthInBits);
 
   // Ensure MaxVF is a power of 2; the dependence distance bound may not be.