Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6229,15 +6229,6 @@
     return Result;
   }
 
-  // FIXME: This can be fixed for scalable vectors later, because at this stage
-  // the LoopVectorizer will only consider vectorizing a loop with scalable
-  // vectors when the loop has a hint to enable vectorization for a given VF.
-  if (MainLoopVF.isScalable()) {
-    LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization for scalable vectors not "
-                         "yet supported.\n");
-    return Result;
-  }
-
   // Not really a cost consideration, but check for unsupported cases here to
   // simplify the logic.
   if (!isCandidateForEpilogueVectorization(*TheLoop, MainLoopVF)) {
@@ -6250,7 +6241,7 @@
   if (EpilogueVectorizationForceVF > 1) {
     LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization factor is forced.\n";);
     ElementCount ForcedEC = ElementCount::getFixed(EpilogueVectorizationForceVF);
-    if (LVP.hasPlanWithVFs({MainLoopVF, ForcedEC}))
+    if (LVP.hasPlanWithVFs({ForcedEC}))
       return {ForcedEC, 0};
     else {
       LLVM_DEBUG(
@@ -6268,14 +6259,24 @@
     return Result;
   }
 
-  if (!isEpilogueVectorizationProfitable(MainLoopVF))
+  auto FixedMainLoopVF = ElementCount::getFixed(MainLoopVF.getKnownMinValue());
+  if (MainLoopVF.isScalable())
+    LLVM_DEBUG(
+        dbgs() << "LEV: Epilogue vectorization using scalable vectors not "
+                  "yet supported. Converting to fixed-width (VF="
+               << FixedMainLoopVF << ") instead\n");
+
+  if (!isEpilogueVectorizationProfitable(FixedMainLoopVF)) {
+    LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is not profitable for "
+                         "this loop\n");
     return Result;
+  }
 
   for (auto &NextVF : ProfitableVFs)
-    if (ElementCount::isKnownLT(NextVF.Width, MainLoopVF) &&
+    if (ElementCount::isKnownLT(NextVF.Width, FixedMainLoopVF) &&
         (Result.Width.getFixedValue() == 1 ||
          isMoreProfitable(NextVF, Result)) &&
-        LVP.hasPlanWithVFs({MainLoopVF, NextVF.Width}))
+        LVP.hasPlanWithVFs({NextVF.Width}))
       Result = NextVF;
 
   if (Result != VectorizationFactor::Disabled())
@@ -8393,7 +8394,9 @@
   OldInduction = Legal->getPrimaryInduction();
   Type *IdxTy = Legal->getWidestInductionType();
   Value *StartIdx = ConstantInt::get(IdxTy, 0);
-  Constant *Step = ConstantInt::get(IdxTy, VF.getKnownMinValue() * UF);
+
+  IRBuilder<> B(&*Lp->getLoopPreheader()->getFirstInsertionPt());
+  Value *Step = getRuntimeVF(B, IdxTy, VF * UF);
   Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
   EPI.VectorTripCount = CountRoundDown;
   Induction =
@@ -10385,7 +10388,6 @@
                              F->getParent()->getDataLayout());
     if (!VF.Width.isScalar() || IC > 1)
       Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate());
-    VPlanPtr &BestPlan = LVP.getBestPlanFor(VF.Width, IC);
 
     using namespace ore;
     if (!VectorizeLoop) {
@@ -10394,6 +10396,7 @@
       // interleave it.
       InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
                                  &CM, BFI, PSI, Checks);
+      VPlanPtr &BestPlan = LVP.getBestPlanFor(VF.Width, IC);
       LVP.executePlan(BestPlan, Unroller, DT);
 
       ORE->emit([&]() {
@@ -10440,6 +10443,7 @@
       } else {
         InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
                                &LVL, &CM, BFI, PSI, Checks);
+        VPlanPtr &BestPlan = LVP.getBestPlanFor(VF.Width, IC);
         LVP.executePlan(BestPlan, LB, DT);
         ++LoopsVectorized;
 
Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
@@ -0,0 +1,59 @@
+; REQUIRES: asserts
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-minimum-VF=0 --debug-only=loop-vectorize -S -scalable-vectorization=preferred 2>%t | FileCheck %s
+; RUN: cat %t | FileCheck %s --check-prefix=DEBUG
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-minimum-VF=8 --debug-only=loop-vectorize -S -scalable-vectorization=preferred 2>%t | FileCheck %s
+; RUN: cat %t | FileCheck %s --check-prefix=DEBUG
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-force-VF=8 --debug-only=loop-vectorize -S -scalable-vectorization=preferred 2>%t | FileCheck %s
+; RUN: cat %t | FileCheck %s --check-prefix=DEBUG-FORCED
+
+target triple = "aarch64-linux-gnu"
+
+; DEBUG: LV: Checking a loop in "f1"
+; DEBUG: LEV: Epilogue vectorization using scalable vectors not yet supported. Converting to fixed-width (VF=16) instead
+; DEBUG: Create Skeleton for epilogue vectorized loop (first pass)
+; DEBUG: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1
+
+; DEBUG-FORCED: LV: Checking a loop in "f1"
+; DEBUG-FORCED: LEV: Epilogue vectorization factor is forced.
+; DEBUG-FORCED: Create Skeleton for epilogue vectorized loop (first pass)
+; DEBUG-FORCED: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1
+
+define void @f1(i8* %A) #0 {
+; CHECK-LABEL: @f1(
+; CHECK:       vector.main.loop.iter.check:
+; CHECK-NEXT:    [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[NUM_ITS:%.*]] = mul i64 [[VSCALE]], 32
+; CHECK-NEXT:    [[MIN_IT_CHECK:%.*]] = icmp ult i64 1024, [[NUM_ITS]]
+; CHECK-NEXT:    br i1 [[MIN_IT_CHECK]], label %vec.epilog.ph, label %vector.ph
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[VSCALE1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[VEC_ITS1:%.*]] = mul i64 [[VSCALE1]], {{.*}}
+; CHECK-NEXT:    [[VSCALE2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[VEC_ITS2:%.*]] = mul i64 [[VSCALE2]], {{.*}}
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[VEC_ITS2]]
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[IDX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[IDX_NXT:%.*]], %vector.body ]
+; CHECK:         store <vscale x 16 x i8>
+; CHECK:         store <vscale x 16 x i8>
+; CHECK:         [[IDX_NXT]] = add nuw i64 [[IDX]], [[VEC_ITS1]]
+; CHECK-NEXT:    {{%.*}} = icmp eq i64 [[IDX_NXT]], [[N_VEC]]
+; CHECK:       vec.epilog.vector.body:
+; CHECK:         store <8 x i8>
+; CHECK:       for.body:
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %iv
+  store i8 1, i8* %arrayidx, align 1
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp ne i64 %iv.next, 1024
+  br i1 %exitcond, label %for.body, label %exit
+
+exit:
+  ret void
+}
+
+attributes #0 = { "target-features"="+sve" }
Index: llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll
+++ llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll
@@ -1,11 +1,12 @@
 ; REQUIRES: asserts
-; RUN: opt < %s  -passes='loop-vectorize' -force-vector-width=2 -force-target-supports-scalable-vectors=true -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 --debug-only=loop-vectorize -S -scalable-vectorization=on 2>&1 | FileCheck %s
+; RUN: opt < %s  -passes='loop-vectorize' -force-target-supports-scalable-vectors=true -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 --debug-only=loop-vectorize -S -scalable-vectorization=on 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
 
 ; Currently we cannot handle scalable vectorization factors.
 ; CHECK: LV: Checking a loop in "f1"
-; CHECK: LEV: Epilogue vectorization for scalable vectors not yet supported.
+; CHECK: LEV: Epilogue vectorization factor is forced.
+; CHECK: Epilogue Loop VF:2, Epilogue Loop UF:1
 
 define void @f1(i8* %A) {
 entry: