Index: llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -194,6 +194,50 @@
   }
 };
 
+/// A class that can hold upto two possible vectorization factors, a fixed VF
+/// and a scalable VF. This can be used by the vectorizer to choose from a range
+/// of fixed and/or scalable VFs in order to find the most cost-effective VF to
+/// vectorize with.
+class MinMaxVFCandidates {
+  Optional<ElementCount> FixedVF;
+  Optional<ElementCount> ScalableVF;
+
+public:
+  /// Initializes both FixedVF/ScalableVF with None
+  MinMaxVFCandidates() : FixedVF({}), ScalableVF({}) {}
+
+  /// Initializes either FixedVF or ScalableVF with VF, the other with None.
+  MinMaxVFCandidates(const ElementCount &VF) : MinMaxVFCandidates() {
+    Optional<ElementCount> *LHSPtr = VF.isScalable() ? &ScalableVF : &FixedVF;
+    *LHSPtr = VF;
+  }
+
+  /// Initializes FixedVF and ScalableVF
+  MinMaxVFCandidates(const ElementCount &FixedVF,
+                     const ElementCount &ScalableVF)
+      : FixedVF(FixedVF), ScalableVF(ScalableVF) {}
+
+  static MinMaxVFCandidates None() { return MinMaxVFCandidates(); }
+
+  /// Assign VF to either FixedVF or ScalableVF, depending on whether VF
+  /// is fixed/scalable. If the VF value is already set, the value is
+  /// overwritten with the largest VF.
+  void addMaxVF(const ElementCount &VF) {
+    Optional<ElementCount> *LHSPtr =
+        VF.isScalable() ? &ScalableVF : &FixedVF;
+    if (LHSPtr->hasValue())
+      *LHSPtr = ElementCount::isKnownGT(**LHSPtr, VF) ? **LHSPtr : VF;
+    else
+      *LHSPtr = VF;
+  }
+
+  explicit operator bool() const { return FixedVF || ScalableVF; }
+  bool hasFixedVF() const { return FixedVF.hasValue(); }
+  bool hasScalableVF() const { return ScalableVF.hasValue(); }
+  Optional<ElementCount> getFixedVF() const { return FixedVF; }
+  Optional<ElementCount> getScalableVF() const { return ScalableVF; }
+};
+
 /// Planner drives the vectorization process after having passed
 /// Legality checks.
 class LoopVectorizationPlanner {
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1212,10 +1212,9 @@
       : ScalarEpilogueStatus(SEL), TheLoop(L), PSE(PSE), LI(LI), Legal(Legal),
         TTI(TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), TheFunction(F),
         Hints(Hints), InterleaveInfo(IAI) {}
-
   /// \return An upper bound for the vectorization factor, or None if
   /// vectorization and interleaving should be avoided up front.
-  Optional<ElementCount> computeMaxVF(ElementCount UserVF, unsigned UserIC);
+  MinMaxVFCandidates computeMaxVF(ElementCount UserVF, unsigned UserIC);
 
   /// \return True if runtime checks are required for vectorization, and false
   /// otherwise.
@@ -5545,8 +5544,8 @@
   return false;
 }
 
-Optional<ElementCount>
-LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
+MinMaxVFCandidates LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF,
+                                                            unsigned UserIC) {
   if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) {
     // TODO: It may by useful to do since it's still likely to be dynamically
     // uniform if the target can skip.
@@ -5554,7 +5553,7 @@
         "Not inserting runtime ptr check for divergent target",
         "runtime pointer checks needed. Not enabled for divergent target",
         "CantVersionLoopWithDivergentTarget", ORE, TheLoop);
-    return None;
+    return MinMaxVFCandidates::None();
   }
 
   unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
@@ -5563,10 +5562,10 @@
     reportVectorizationFailure("Single iteration (non) loop",
         "loop trip count is one, irrelevant for vectorization",
         "SingleIterationLoop", ORE, TheLoop);
-    return None;
+    return MinMaxVFCandidates::None();
   }
 
-  auto GetFeasibleMaxVF = [&]() -> ElementCount {
+  auto GetFeasibleMaxVF = [&]() -> MinMaxVFCandidates {
     MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
     unsigned SmallestType, WidestType;
     std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
@@ -5578,29 +5577,44 @@
     auto MaxSafeElements = ElementCount::getFixed(
         PowerOf2Floor(Legal->getMaxSafeVectorWidthInBits() / WidestType));
 
-    // Try to find a max scalable VF.
-    if (Hints->allowScalable() && TTI.supportsScalableVectors()) {
-      auto MaxSafeVF = clampFeasibleMaxVF(ElementCount::getScalable(1 << 16),
-                                          MaxSafeElements);
-      if (MaxSafeVF.isScalableVector()) {
-        ElementCount MaxScalableVF =
-            computeFeasibleMaxVF(TC, MaxSafeVF, SmallestType, WidestType);
-        if (MaxScalableVF.isScalable())
-          LLVM_DEBUG(dbgs() << "LV: Found feasible scalable VF = "
-                            << MaxScalableVF << "\n");
-        else
-          LLVM_DEBUG(dbgs() << "LV: No feasible scalable VF found.\n");
-      }
-    }
+    MinMaxVFCandidates Result;
+
+    // Always make sure to add at least a max fixed-VF of 1 (scalar) as a
+    // fallback when vectorization is not possible.
+    Result.addMaxVF(ElementCount::getFixed(1));
 
     // First analyze the UserVF, fall back if the UserVF should be ignored.
     if (auto MaybeMaxVF = getFeasibleUserVF(UserVF, MaxSafeElements))
-      return MaybeMaxVF.getValue();
+      Result.addMaxVF(*MaybeMaxVF);
+    else {
+      // If we don't force the use of scalable vectors or if the UserVF was
+      // ignored for some reason, we should consider auto-selecting a
+      // fixed-width VF.
+      if (!Hints->isForcedScalable() || UserVF.isNonZero()) {
+        auto MaxSafeVF = clampFeasibleMaxVF(ElementCount::getFixed(1 << 16),
+                                            MaxSafeElements);
+        Result.addMaxVF(
+            computeFeasibleMaxVF(TC, MaxSafeVF, SmallestType, WidestType));
+      }
+
+      // Try to find a max scalable VF as well if the target supports it.
+      if (Hints->allowScalable() && TTI.supportsScalableVectors()) {
+        auto MaxSafeVF = clampFeasibleMaxVF(ElementCount::getScalable(1 << 16),
+                                            MaxSafeElements);
+        if (MaxSafeVF.isScalableVector()) {
+          ElementCount MaxScalableVF =
+              computeFeasibleMaxVF(TC, MaxSafeVF, SmallestType, WidestType);
+          if (MaxScalableVF.isScalable())
+            LLVM_DEBUG(dbgs() << "LV: Found feasible scalable VF = "
+                              << MaxScalableVF << "\n");
+          else
+            LLVM_DEBUG(dbgs() << "LV: No feasible scalable VF found.\n");
+          Result.addMaxVF(MaxScalableVF);
+        }
+      }
+    }
 
-    // Try to automatically determine a suitable maximum VF.
-    auto MaxSafeVF =
-        clampFeasibleMaxVF(ElementCount::getFixed(1 << 16), MaxSafeElements);
-    return computeFeasibleMaxVF(TC, MaxSafeVF, SmallestType, WidestType);
+    return Result;
   };
 
   switch (ScalarEpilogueStatus) {
@@ -5627,7 +5641,7 @@
     // Bail if runtime checks are required, which are not good when optimising
     // for size.
     if (runtimeChecksRequired())
-      return None;
+      return MinMaxVFCandidates::None();
 
     break;
   }
@@ -5645,7 +5659,7 @@
       ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
       return GetFeasibleMaxVF();
     }
-    return None;
+    return MinMaxVFCandidates::None();
   }
 
   // Now try the tail folding
@@ -5660,26 +5674,27 @@
     InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
   }
 
-  ElementCount MaxVF = GetFeasibleMaxVF();
-  assert(!MaxVF.isScalable() &&
-         "Scalable vectors do not yet support tail folding");
-  assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) &&
-         "MaxVF must be a power of 2");
-  unsigned MaxVFtimesIC =
-      UserIC ? MaxVF.getFixedValue() * UserIC : MaxVF.getFixedValue();
-  // Avoid tail folding if the trip count is known to be a multiple of any VF we
-  // chose.
-  ScalarEvolution *SE = PSE.getSE();
-  const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
-  const SCEV *ExitCount = SE->getAddExpr(
-      BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType()));
-  const SCEV *Rem = SE->getURemExpr(
-      SE->applyLoopGuards(ExitCount, TheLoop),
-      SE->getConstant(BackedgeTakenCount->getType(), MaxVFtimesIC));
-  if (Rem->isZero()) {
-    // Accept MaxVF if we do not have a tail.
-    LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n");
-    return MaxVF;
+  MinMaxVFCandidates MaxFactors = GetFeasibleMaxVF();
+  if (MaxFactors.hasFixedVF() && !MaxFactors.hasScalableVF()) {
+    ElementCount MaxFixedVF = *MaxFactors.getFixedVF();
+    assert((UserVF.isNonZero() || isPowerOf2_32(MaxFixedVF.getFixedValue())) &&
+           "MaxFixedVF must be a power of 2");
+    unsigned MaxVFtimesIC = UserIC ? MaxFixedVF.getFixedValue() * UserIC
+                                   : MaxFixedVF.getFixedValue();
+    // Avoid tail folding if the trip count is known to be a multiple of any VF
+    // we chose.
+    ScalarEvolution *SE = PSE.getSE();
+    const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
+    const SCEV *ExitCount = SE->getAddExpr(
+        BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType()));
+    const SCEV *Rem = SE->getURemExpr(
+        SE->applyLoopGuards(ExitCount, TheLoop),
+        SE->getConstant(BackedgeTakenCount->getType(), MaxVFtimesIC));
+    if (Rem->isZero()) {
+      // Accept MaxFixedVF if we do not have a tail.
+      LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n");
+      return MaxFactors;
+    }
   }
 
   // If we don't know the precise trip count, or if the trip count that we
@@ -5688,7 +5703,7 @@
   // FIXME: look for a smaller MaxVF that does divide TC rather than masking.
   if (Legal->prepareToFoldTailByMasking()) {
     FoldTailByMasking = true;
-    return MaxVF;
+    return MaxFactors;
   }
 
   // If there was a tail-folding hint/switch, but we can't fold the tail by
@@ -5697,12 +5712,12 @@
     LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
                          "scalar epilogue instead.\n");
     ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
-    return MaxVF;
+    return MaxFactors;
   }
 
   if (ScalarEpilogueStatus == CM_ScalarEpilogueNotAllowedUsePredicate) {
     LLVM_DEBUG(dbgs() << "LV: Can't fold tail by masking: don't vectorize\n");
-    return None;
+    return MinMaxVFCandidates::None();
   }
 
   if (TC == 0) {
@@ -5710,7 +5725,7 @@
         "Unable to calculate the loop count due to complex control flow",
         "unable to calculate the loop count due to complex control flow",
         "UnknownLoopCountComplexCFG", ORE, TheLoop);
-    return None;
+    return MinMaxVFCandidates::None();
   }
 
   reportVectorizationFailure(
@@ -5719,7 +5734,7 @@
       "Enable vectorization of this loop with '#pragma clang loop "
       "vectorize(enable)' when compiling with -Os/-Oz",
       "NoTailLoopWithOptForSize", ORE, TheLoop);
-  return None;
+  return MinMaxVFCandidates::None();
 }
 
 ElementCount LoopVectorizationCostModel::clampFeasibleMaxVF(
@@ -7775,8 +7790,8 @@
 Optional<VectorizationFactor>
 LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
   assert(OrigLoop->isInnermost() && "Inner loop expected.");
-  Optional<ElementCount> MaybeMaxVF = CM.computeMaxVF(UserVF, UserIC);
-  if (!MaybeMaxVF) // Cases that should not to be vectorized nor interleaved.
+  MinMaxVFCandidates MaxFactors = CM.computeMaxVF(UserVF, UserIC);
+  if (!MaxFactors) // Cases that should not to be vectorized nor interleaved.
     return None;
 
   // Invalidate interleave groups if all blocks of loop will be predicated.
@@ -7793,7 +7808,10 @@
       CM.invalidateCostModelingDecisions();
   }
 
-  ElementCount MaxVF = MaybeMaxVF.getValue();
+  ElementCount MaxVF =
+      (UserVF && UserVF.isScalable() && MaxFactors.hasScalableVF())
+          ? *MaxFactors.getScalableVF()
+          : *MaxFactors.getFixedVF();
   assert(MaxVF.isNonZero() && "MaxVF is zero.");
 
   bool UserVFIsLegal = ElementCount::isKnownLE(UserVF, MaxVF);