diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -344,10 +344,14 @@
       VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions,
       const DenseMap<Instruction *, Instruction *> &SinkAfter);
 
-  /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
-  /// according to the information gathered by Legal when it checked if it is
-  /// legal to vectorize the loop. This method creates VPlans using VPRecipes.
-  void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
+  /// Build VPlans for power-of-2 VF's for the ranges specified by Range1 and
+  /// Range2. according to the information gathered by Legal when it checked if
+  /// it is legal to vectorize the loop. This method creates VPlans using
+  /// VPRecipes. When the Maximum VF is set, the corresponding Minimum VF must
+  /// also be set.
+  void buildVPlansWithVPRecipes(VFRange Range1,
+                                VFRange Range2 = {ElementCount::getFixed(1),
+                                                  ElementCount::getFixed(1)});
 
   /// Adjust the recipes for any inloop reductions. The chain of instructions
   /// leading from the loop exit instr to the phi need to be converted to
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -432,6 +432,17 @@
   return None;
 }
 
+/// Generate power-of-2 VFs starting from the minimum VF up to and including
+/// maximum VF. For scalable VFs, it is only the minimum known value that is a
+/// power of 2, the eventual VF may not be if the runtime value for vscale is
+/// not also a power of two.
+static void genVFs(SmallVectorImpl<ElementCount> &List, const ElementCount &Min,
+                   const ElementCount &Max) {
+  assert((Min.isScalable() == Max.isScalable()) && "Scalable flags must match");
+  for (auto VF = Min; ElementCount::isKnownLE(VF, Max); VF *= 2)
+    List.push_back(VF);
+}
+
 // Forward declare GeneratedRTChecks.
 class GeneratedRTChecks;
 
@@ -1241,7 +1252,9 @@
   /// This method checks every power of two up to MaxVF. If UserVF is not ZERO
   /// then this vectorization factor will be selected if vectorization is
   /// possible.
-  VectorizationFactor selectVectorizationFactor(ElementCount MaxVF);
+  VectorizationFactor
+  selectVectorizationFactor(const MaxVFCandidates &MaxFactors);
+
   VectorizationFactor
   selectEpilogueVectorizationFactor(const ElementCount MaxVF,
                                     const LoopVectorizationPlanner &LVP);
@@ -1505,7 +1518,7 @@
 
   /// Returns true if the target machine supports all of the reduction
   /// variables found for the given VF.
-  bool canVectorizeReductions(ElementCount VF) {
+  bool canVectorizeReductions(ElementCount VF) const {
     return (all_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {
       RecurrenceDescriptor RdxDesc = Reduction.second;
       return TTI.isLegalToVectorizeReduction(RdxDesc, VF);
@@ -5973,6 +5986,14 @@
   InstructionCost::CostType CostA = *A.Cost.getValue();
   InstructionCost::CostType CostB = *B.Cost.getValue();
 
+  // When set to preferred, for now assume vscale may be larger than 1, so
+  // that scalable vectorization is slightly favorable over fixed-width
+  // vectorization.
+  if (Hints->isScalableVectorizationPreferred())
+    if (A.Width.isScalable() && !B.Width.isScalable())
+      return (CostA * B.Width.getKnownMinValue()) <=
+             (CostB * A.Width.getKnownMinValue());
+
   // To avoid the need for FP division:
   //      (CostA / A.Width) < (CostB / B.Width)
   // <=>  (CostA * B.Width) < (CostB * A.Width)
@@ -5980,13 +6001,8 @@
          (CostB * A.Width.getKnownMinValue());
 }
 
-VectorizationFactor
-LoopVectorizationCostModel::selectVectorizationFactor(ElementCount MaxVF) {
-  // FIXME: This can be fixed for scalable vectors later, because at this stage
-  // the LoopVectorizer will only consider vectorizing a loop with scalable
-  // vectors when the loop has a hint to enable vectorization for a given VF.
-  assert(!MaxVF.isScalable() && "scalable vectors not yet supported");
-
+VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
+    const MaxVFCandidates &MaxFactors) {
   InstructionCost ExpectedCost = expectedCost(ElementCount::getFixed(1)).first;
   LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n");
   assert(ExpectedCost.isValid() && "Unexpected invalid cost for scalar loop");
@@ -5995,15 +6011,20 @@
   VectorizationFactor ChosenFactor = ScalarCost;
 
   bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled;
-  if (ForceVectorization && MaxVF.isVector()) {
+  if (ForceVectorization && MaxFactors.hasVectorCandidates()) {
     // Ignore scalar width, because the user explicitly wants vectorization.
     // Initialize cost to max so that VF = 2 is, at least, chosen during cost
     // evaluation.
     ChosenFactor.Cost = std::numeric_limits<InstructionCost::CostType>::max();
   }
 
-  for (auto i = ElementCount::getFixed(2); ElementCount::isKnownLE(i, MaxVF);
-       i *= 2) {
+  // Iterate through all VFs, prioritising scalable vectors so they have a
+  // slight edge over fixed-width vectors when the cost is the same.
+  SmallVector<ElementCount> VFCandidates;
+  genVFs(VFCandidates, ElementCount::getScalable(1), MaxFactors.ScalableVF);
+  genVFs(VFCandidates, ElementCount::getFixed(2), MaxFactors.FixedVF);
+
+  for (const auto &i : VFCandidates) {
     // Notice that the vector loop needs to be executed less times, so
     // we need to divide the cost of the vector loops by the width of
     // the vector elements.
@@ -6011,10 +6032,10 @@
 
     assert(C.first.isValid() && "Unexpected invalid cost for vector loop");
     VectorizationFactor Candidate(i, C.first);
-    LLVM_DEBUG(
-        dbgs() << "LV: Vector loop of width " << i << " costs: "
-               << (*Candidate.Cost.getValue() / Candidate.Width.getFixedValue())
-               << ".\n");
+    LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: "
+                      << (*Candidate.Cost.getValue() /
+                          Candidate.Width.getKnownMinValue())
+                      << ".\n");
 
     if (!C.second && !ForceVectorization) {
       LLVM_DEBUG(
@@ -7927,17 +7948,15 @@
     // profitable to scalarize.
     CM.selectUserVectorizationFactor(UserVF);
     CM.collectInLoopReductions();
-    buildVPlansWithVPRecipes({UserVF}, {UserVF});
+    buildVPlansWithVPRecipes({UserVF, UserVF.getWithIncrement(1)});
     LLVM_DEBUG(printPlans(dbgs()));
     return {{UserVF, 0}};
   }
 
-  ElementCount MaxVF = MaxFactors.FixedVF;
-  assert(!MaxVF.isScalable() &&
-         "Scalable vectors not yet supported beyond this point");
-
-  for (ElementCount VF = ElementCount::getFixed(1);
-       ElementCount::isKnownLE(VF, MaxVF); VF *= 2) {
+  SmallVector<ElementCount> VFCandidates;
+  genVFs(VFCandidates, ElementCount::getFixed(1), MaxFactors.FixedVF);
+  genVFs(VFCandidates, ElementCount::getScalable(1), MaxFactors.ScalableVF);
+  for (auto &VF : VFCandidates) {
     // Collect Uniform and Scalar instructions after vectorization with VF.
     CM.collectUniformsAndScalars(VF);
 
@@ -7949,13 +7968,18 @@
 
   CM.collectInLoopReductions();
 
-  buildVPlansWithVPRecipes(ElementCount::getFixed(1), MaxVF);
+  VFRange FixedVFRange = {ElementCount::getFixed(1),
+                          MaxFactors.FixedVF.getWithIncrement(1)};
+  VFRange ScalableVFRange = {ElementCount::getScalable(1),
+                             MaxFactors.ScalableVF.getWithIncrement(1)};
+  buildVPlansWithVPRecipes(FixedVFRange, ScalableVFRange);
+
   LLVM_DEBUG(printPlans(dbgs()));
   if (!MaxFactors.hasVectorCandidates())
     return VectorizationFactor::Disabled();
 
   // Select the optimal vectorization factor.
-  auto SelectedVF = CM.selectVectorizationFactor(MaxVF);
+  auto SelectedVF = CM.selectVectorizationFactor(MaxFactors);
 
   // Check if it is profitable to vectorize with runtime checks.
   unsigned NumRuntimePointerChecks = Requirements.getNumRuntimePointerChecks();
@@ -8901,8 +8925,8 @@
   return toVPRecipeResult(tryToWiden(Instr, Operands));
 }
 
-void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
-                                                        ElementCount MaxVF) {
+void LoopVectorizationPlanner::buildVPlansWithVPRecipes(VFRange Range1,
+                                                        VFRange Range2) {
   assert(OrigLoop->isInnermost() && "Inner loop expected.");
 
   // Collect instructions from the original loop that will become trivially dead
@@ -8926,13 +8950,17 @@
   for (Instruction *I : DeadInstructions)
     SinkAfter.erase(I);
 
-  auto MaxVFPlusOne = MaxVF.getWithIncrement(1);
-  for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) {
-    VFRange SubRange = {VF, MaxVFPlusOne};
-    VPlans.push_back(
-        buildVPlanWithVPRecipes(SubRange, DeadInstructions, SinkAfter));
-    VF = SubRange.End;
-  }
+  auto CollectVPlans = [&](VFRange R) {
+    for (ElementCount VF = R.Start; ElementCount::isKnownLT(VF, R.End);) {
+      VFRange SubRange = {VF, R.End};
+      VPlans.push_back(
+          buildVPlanWithVPRecipes(SubRange, DeadInstructions, SinkAfter));
+      VF = SubRange.End;
+    }
+  };
+
+  CollectVPlans(Range1);
+  CollectVPlans(Range2);
 }
 
 VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll
@@ -41,7 +41,7 @@
 for.inc:                                          ; preds = %for.body, %if.then
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %n
-  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !5
 }
 
 
@@ -90,3 +90,5 @@
 !2 = !{!"llvm.loop.vectorize.width", i32 4}
 !3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
 !4 = !{!"llvm.loop.vectorize.enable", i1 true}
+!5 = distinct !{!5, !6}
+!6 = !{!"llvm.loop.vectorize.scalable.enable", i1 false}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-analysis.ll
@@ -10,9 +10,13 @@
 define void @test0(i32* %a, i8* %b, i32* %c) {
 ; CHECK: LV: Checking a loop in "test0"
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
+; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 4.
 ; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4
+; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 4.
 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
+; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
 ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 16
+; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: vscale x 16.
 entry:
   br label %loop
 
@@ -39,9 +43,13 @@
 define void @test1(i32* %a, i8* %b) {
 ; CHECK: LV: Checking a loop in "test1"
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
+; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 4.
 ; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4
+; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 4.
 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
+; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
 ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 4
+; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16.
 entry:
   br label %loop
 
@@ -69,9 +77,13 @@
 define void @test2(i32* %a, i8* %b) {
 ; CHECK: LV: Checking a loop in "test2"
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
+; CHECK_SCALABLE_ON: LV: Selecting VF: 4.
 ; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 2
+; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4.
 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
+; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
 ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 2
+; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16.
 entry:
   br label %loop
 
@@ -99,9 +111,13 @@
 define void @test3(i32* %a, i8* %b) {
 ; CHECK: LV: Checking a loop in "test3"
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
+; CHECK_SCALABLE_ON: LV: Selecting VF: 4.
 ; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 1
+; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4.
 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
+; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
 ; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 1
+; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16.
 entry:
   br label %loop
 
@@ -124,15 +140,16 @@
   ret void
 }
 
-; Test the fallback mechanism when scalable vectors are not feasible due
-; to e.g. dependence distance. For the '-scalable-vectorization=exclusive'
-; it shouldn't try to vectorize with fixed-width vectors.
 define void @test4(i32* %a, i32* %b) {
 ; CHECK: LV: Checking a loop in "test4"
 ; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
+; CHECK_SCALABLE_ON: LV: Selecting VF: 4.
 ; CHECK_SCALABLE_PREFERRED-NOT: LV: Found feasible scalable VF
+; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4.
 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
+; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
 ; CHECK_SCALABLE_ON_MAXBW-NOT: LV: Found feasible scalable VF
+; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 4.
 entry:
   br label %loop