diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -267,7 +267,7 @@
   LoopVectorizationLegality *Legal;
 
   /// The profitability analysis.
-  LoopVectorizationCostModel &CM;
+  SmallVector<LoopVectorizationCostModel *> CMs;
 
   /// The interleaved access analysis.
   InterleavedAccessInfo &IAI;
@@ -292,16 +292,18 @@
   LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI,
                            const TargetTransformInfo *TTI,
                            LoopVectorizationLegality *Legal,
-                           LoopVectorizationCostModel &CM,
+                           ArrayRef<LoopVectorizationCostModel *> CMs,
                            InterleavedAccessInfo &IAI,
                            PredicatedScalarEvolution &PSE,
                            const LoopVectorizeHints &Hints,
                            OptimizationRemarkEmitter *ORE)
-      : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), IAI(IAI),
-        PSE(PSE), Hints(Hints), ORE(ORE) {}
+      : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CMs(CMs),
+        IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
 
   /// Plan how to best vectorize, return the best VF and its cost, or
   /// std::nullopt if vectorization and interleaving should be avoided up front.
+  void plan(ElementCount UserVF, unsigned UserIC,
+            LoopVectorizationCostModel &CM);
   std::optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC);
 
   /// Use the VPlan-native path to plan how to best vectorize, return the best
@@ -331,7 +333,12 @@
         VPlans,
         [&](const std::pair<VPlanPtr,
                             SmallVector<std::pair<VectorizationFactor, bool>>>
-                &Plan) { return Plan.first->hasVF(VF); });
+                &Plan) {
+          return any_of(Plan.second,
+                        [VF](const std::pair<VectorizationFactor, bool> &P) {
+                          return P.first.Width == VF;
+                        });
+        });
   }
 
   /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
@@ -357,13 +364,14 @@
   /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
   /// according to the information gathered by Legal when it checked if it is
   /// legal to vectorize the loop.
-  void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
+  void buildVPlans(ElementCount MinVF, ElementCount MaxVF,
+                   LoopVectorizationCostModel &CM);
 
 private:
   /// Build a VPlan according to the information gathered by Legal. \return a
   /// VPlan for vectorization factors \p Range.Start and up to \p Range.End
   /// exclusive, possibly decreasing \p Range.End.
-  VPlanPtr buildVPlan(VFRange &Range);
+  VPlanPtr buildVPlan(VFRange &Range, LoopVectorizationCostModel &CM);
 
   /// Build a VPlan using VPRecipes according to the information gather by
   /// Legal. This method is only used for the legacy inner loop vectorizer.
@@ -371,13 +379,16 @@
   /// returned VPlan is valid for. If no VPlan can be built for the input range,
   /// set the largest included VF to the maximum VF for which no plan could be
   /// built.
-  std::optional<VPlanPtr> tryToBuildVPlanWithVPRecipes(
-      VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions);
+  std::optional<VPlanPtr>
+  tryToBuildVPlanWithVPRecipes(VFRange &Range,
+                               SmallPtrSetImpl<Instruction *> &DeadInstructions,
+                               LoopVectorizationCostModel &CM);
 
   /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
   /// according to the information gathered by Legal when it checked if it is
   /// legal to vectorize the loop. This method creates VPlans using VPRecipes.
-  void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
+  void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF,
+                                LoopVectorizationCostModel &CM);
 
   // Adjust the recipes for reductions. For in-loop reductions the chain of
   // instructions leading from the loop exit instr to the phi need to be
@@ -386,7 +397,8 @@
   // between the phi and live-out recipes when folding the tail.
   void adjustRecipesForReductions(VPBasicBlock *LatchVPBB, VPlanPtr &Plan,
                                   VPRecipeBuilder &RecipeBuilder,
-                                  ElementCount MinVF);
+                                  ElementCount MinVF,
+                                  LoopVectorizationCostModel &CM);
 
   std::optional<unsigned> getVScaleForTuning() const;
 
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5440,8 +5440,8 @@
 
   unsigned MaxTripCount = PSE.getSE()->getSmallConstantMaxTripCount(OrigLoop);
 
-  if (!A.Width.isScalable() && !B.Width.isScalable() &&
-      CM.foldTailByMasking() && MaxTripCount) {
+  // TODO: Missing check for CM.foldTailByMasking here.
+  if (!A.Width.isScalable() && !B.Width.isScalable() && MaxTripCount) {
     // If we are folding the tail and the trip count is a known (possibly small)
     // constant, the trip count will be rounded up to an integer number of
     // iterations. The total cost will be PerIterationCost*ceil(TripCount/VF),
@@ -5489,7 +5489,8 @@
   return TTI->getVScaleForTuning();
 }
 VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
-  assert(!VPlans.empty());
+  if (VPlans.empty())
+    return VectorizationFactor::Disabled();
 
   InstructionCost ExpectedCost = VPlans[0].second[0].first.ScalarCost;
   const VectorizationFactor ScalarCost(ElementCount::getFixed(1), ExpectedCost,
@@ -5508,10 +5509,6 @@
 
   // Filter out unprofitable VFs.
   for (auto &[Plan, VFsAndCosts] : VPlans) {
-    erase_if(VFsAndCosts, [this, &ChosenFactor](const auto &Candidate) {
-      return !isMoreProfitable(Candidate.first, ChosenFactor);
-    });
-
     for (const auto &[Candidate, HasVec] : VFsAndCosts) {
       if (Candidate.Width.isScalar())
         continue;
@@ -5602,11 +5599,12 @@
     return Result;
   }
 
-  if (!CM.isScalarEpilogueAllowed()) {
-    LLVM_DEBUG(dbgs() << "LEV: Unable to vectorize epilogue because no "
-                         "epilogue is allowed.\n");
-    return Result;
-  }
+  // TODO: check whether scalar epilogue is allowed.
+  /*  if (!CM.isScalarEpilogueAllowed()) {*/
+  /*LLVM_DEBUG(dbgs() << "LEV: Unable to vectorize epilogue because no "*/
+  /*"epilogue is allowed.\n");*/
+  /*return Result;*/
+  /*}*/
 
   // Not really a cost consideration, but check for unsupported cases here to
   // simplify the logic.
@@ -7511,7 +7509,7 @@
       VF = ElementCount::getFixed(determineVPlanVF(
           TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
               .getFixedValue(),
-          CM));
+          *CMs[0]));
       LLVM_DEBUG(dbgs() << "LV: VPlan computed VF " << VF << ".\n");
 
       // Make sure we have a VF > 1 for stress testing.
@@ -7526,7 +7524,7 @@
            "VF needs to be a power of two");
     LLVM_DEBUG(dbgs() << "LV: Using " << (!UserVF.isZero() ? "user " : "")
                       << "VF " << VF << " to build VPlans.\n");
-    buildVPlans(VF, VF);
+    buildVPlans(VF, VF, *CMs[0]);
 
     // For VPlan build stress testing, we bail out after VPlan construction.
     if (VPlanBuildStressTest)
@@ -7543,13 +7541,39 @@
 
 std::optional<VectorizationFactor>
 LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
+  for (LoopVectorizationCostModel *CM : CMs)
+    plan(UserVF, UserIC, *CM);
+
+  if (VPlans.empty())
+    return std::nullopt;
+
+  if (UserVF && hasPlanWithVF(UserVF)) {
+    LLVM_DEBUG(dbgs() << "LV: Selecting User VF: " << UserVF << ".\n");
+    return {{UserVF, 0, 0}};
+  }
+  // Select the optimal vectorization factor.
+  VectorizationFactor VF = selectVectorizationFactor();
+  assert((VF.Width.isScalar() || VF.ScalarCost > 0) &&
+         "when vectorizing, the scalar cost must be non-zero.");
+
+  if (!hasPlanWithVF(VF.Width)) {
+    LLVM_DEBUG(dbgs() << "LV: No VPlan could be built for " << VF.Width
+                      << ".\n");
+    return std::nullopt;
+  }
+
+  return VF;
+}
+
+void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC,
+                                    LoopVectorizationCostModel &CM) {
   assert(OrigLoop->isInnermost() && "Inner loop expected.");
   CM.collectValuesToIgnore();
   CM.collectElementTypesForWidening();
 
   FixedScalableVFPair MaxFactors = CM.computeMaxVF(UserVF, UserIC);
   if (!MaxFactors) // Cases that should not to be vectorized nor interleaved.
-    return std::nullopt;
+    return;
 
   // Invalidate interleave groups if all blocks of loop will be predicated.
   if (CM.blockNeedsPredicationForAnyReason(OrigLoop->getHeader()) &&
@@ -7576,15 +7600,13 @@
     if (CM.selectUserVectorizationFactor(UserVF)) {
       LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
       CM.collectInLoopReductions();
-      buildVPlansWithVPRecipes(UserVF, UserVF);
+      buildVPlansWithVPRecipes(UserVF, UserVF, CM);
       if (!hasPlanWithVF(UserVF)) {
         LLVM_DEBUG(dbgs() << "LV: No VPlan could be built for " << UserVF
                           << ".\n");
-        return std::nullopt;
-      }
-
-      LLVM_DEBUG(printPlans(dbgs()));
-      return {{UserVF, 0, 0}};
+      } else
+        LLVM_DEBUG(printPlans(dbgs()));
+      return;
     } else
       reportVectorizationInfo("UserVF ignored because of invalid costs.",
                               "InvalidCost", ORE, OrigLoop);
@@ -7605,27 +7627,17 @@
 
     // Collect the instructions (and their associated costs) that will be more
     // profitable to scalarize.
-    if (VF.isVector())
+    if (VF.isVector()) {
       CM.collectInstsToScalarize(VF);
+    }
   }
 
   CM.collectInLoopReductions();
-  buildVPlansWithVPRecipes(ElementCount::getFixed(1), MaxFactors.FixedVF);
-  buildVPlansWithVPRecipes(ElementCount::getScalable(1), MaxFactors.ScalableVF);
+  buildVPlansWithVPRecipes(ElementCount::getFixed(1), MaxFactors.FixedVF, CM);
+  buildVPlansWithVPRecipes(ElementCount::getScalable(1), MaxFactors.ScalableVF,
+                           CM);
 
   LLVM_DEBUG(printPlans(dbgs()));
-  if (!MaxFactors.hasVector())
-    return VectorizationFactor::Disabled();
-
-  // Select the optimal vectorization factor.
-  VectorizationFactor VF = selectVectorizationFactor();
-  assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
-  if (!hasPlanWithVF(VF.Width)) {
-    LLVM_DEBUG(dbgs() << "LV: No VPlan could be built for " << VF.Width
-                      << ".\n");
-    return std::nullopt;
-  }
-  return VF;
 }
 
 VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
@@ -8086,11 +8098,12 @@
 /// vectorization decision can potentially shorten this sub-range during
 /// buildVPlan().
 void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
-                                           ElementCount MaxVF) {
+                                           ElementCount MaxVF,
+                                           LoopVectorizationCostModel &CM) {
   auto MaxVFTimes2 = MaxVF * 2;
   for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
     VFRange SubRange = {VF, MaxVFTimes2};
-    VPlans.emplace_back(buildVPlan(SubRange),
+    VPlans.emplace_back(buildVPlan(SubRange, CM),
                         SmallVector<std::pair<VectorizationFactor, bool>>());
     VF = SubRange.End;
   }
@@ -8696,8 +8709,8 @@
   return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan));
 }
 
-void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
-                                                        ElementCount MaxVF) {
+void LoopVectorizationPlanner::buildVPlansWithVPRecipes(
+    ElementCount MinVF, ElementCount MaxVF, LoopVectorizationCostModel &CM) {
   assert(OrigLoop->isInnermost() && "Inner loop expected.");
 
   // Add assume instructions we need to drop to DeadInstructions, to prevent
@@ -8714,7 +8727,7 @@
   auto MaxVFTimes2 = MaxVF * 2;
   for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
     VFRange SubRange = {VF, MaxVFTimes2};
-    auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, DeadInstructions);
+    auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, DeadInstructions, CM);
     if (!Plan) {
       VF = SubRange.End;
       continue;
@@ -8723,6 +8736,8 @@
     for (ElementCount CostVF = VF;
          ElementCount::isKnownLT(CostVF, SubRange.End); CostVF *= 2) {
       auto [VecCost, IsVec] = CM.expectedCost(CostVF, &InvalidCosts);
+      if (!VecCost.isValid())
+        continue;
       Costs.emplace_back(VectorizationFactor(CostVF, VecCost, ScalarCost),
                          IsVec);
 #ifndef NDEBUG
@@ -8880,7 +8895,8 @@
 }
 
 std::optional<VPlanPtr> LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
-    VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions) {
+    VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions,
+    LoopVectorizationCostModel &CM) {
 
   SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
 
@@ -8914,7 +8930,7 @@
   // placeholders for its members' Recipes which we'll be replacing with a
   // single VPInterleaveRecipe.
   for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) {
-    auto applyIG = [IG, this](ElementCount VF) -> bool {
+    auto applyIG = [IG, &CM](ElementCount VF) -> bool {
       return (VF.isVector() && // Query is illegal for VF == 1
               CM.getWideningDecision(IG->getInsertPos(), VF) ==
                   LoopVectorizationCostModel::CM_Interleave);
@@ -9061,7 +9077,7 @@
 
   // Adjust the recipes for any inloop reductions.
   adjustRecipesForReductions(cast<VPBasicBlock>(TopRegion->getExiting()), Plan,
-                             RecipeBuilder, Range.Start);
+                             RecipeBuilder, Range.Start, CM);
 
   // Sink users of fixed-order recurrence past the recipe defining the previous
   // value and introduce FirstOrderRecurrenceSplice VPInstructions.
@@ -9121,7 +9137,8 @@
   return std::make_optional(std::move(Plan));
 }
 
-VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
+VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range,
+                                              LoopVectorizationCostModel &CM) {
   // Outer loop handling: They may require CFG and instruction level
   // transformations before even evaluating whether vectorization is profitable.
   // Since we cannot modify the incoming IR, we need to build VPlan upfront in
@@ -9163,7 +9180,7 @@
 // and live-out recipes when folding the tail.
 void LoopVectorizationPlanner::adjustRecipesForReductions(
     VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder,
-    ElementCount MinVF) {
+    ElementCount MinVF, LoopVectorizationCostModel &CM) {
   for (const auto &Reduction : CM.getInLoopReductionChains()) {
     PHINode *Phi = Reduction.first;
     const RecurrenceDescriptor &RdxDesc =
@@ -9922,7 +9939,8 @@
   // Use the planner for outer loop vectorization.
   // TODO: CM is not used at this point inside the planner. Turn CM into an
   // optional argument if we don't need it in the future.
-  LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE, Hints, ORE);
+  LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, ArrayRef(&CM), IAI, PSE,
+                               Hints, ORE);
 
   // Get user vectorization factor.
   ElementCount UserVF = Hints.getWidth();
@@ -10262,7 +10280,8 @@
   LoopVectorizationCostModel CM(SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE,
                                 F, &Hints, IAI);
   // Use the planner for vectorization.
-  LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE, Hints, ORE);
+  LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, {&CM}, IAI, PSE, Hints,
+                               ORE);
 
   // Get user vectorization factor and interleave count.
   ElementCount UserVF = Hints.getWidth();