diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -267,7 +267,7 @@
   LoopVectorizationLegality *Legal;
 
   /// The profitability analysis.
-  LoopVectorizationCostModel &CM;
+  SmallVector<LoopVectorizationCostModel *> CMs;
 
   /// The interleaved access analysis.
   InterleavedAccessInfo &IAI;
@@ -292,16 +292,18 @@
   LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI,
                            const TargetTransformInfo *TTI,
                            LoopVectorizationLegality *Legal,
-                           LoopVectorizationCostModel &CM,
+                           ArrayRef<LoopVectorizationCostModel *> CMs,
                            InterleavedAccessInfo &IAI,
                            PredicatedScalarEvolution &PSE,
                            const LoopVectorizeHints &Hints,
                            OptimizationRemarkEmitter *ORE)
-      : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), IAI(IAI),
-        PSE(PSE), Hints(Hints), ORE(ORE) {}
+      : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CMs(CMs),
+        IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
 
   /// Plan how to best vectorize, return the best VF and its cost, or
   /// std::nullopt if vectorization and interleaving should be avoided up front.
+  void plan(ElementCount UserVF, unsigned UserIC,
+            LoopVectorizationCostModel &CM);
   std::optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC);
 
   /// Use the VPlan-native path to plan how to best vectorize, return the best
@@ -331,7 +333,12 @@
         VPlans,
         [&](const std::pair<VPlanPtr,
                             SmallVector<std::pair<VectorizationFactor, bool>>>
-                &Plan) { return Plan.first->hasVF(VF); });
+                &Plan) {
+          return any_of(Plan.second,
+                        [VF](const std::pair<VectorizationFactor, bool> &P) {
+                          return P.first.Width == VF;
+                        });
+        });
   }
 
   /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
@@ -357,24 +364,27 @@
   /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
   /// according to the information gathered by Legal when it checked if it is
   /// legal to vectorize the loop.
-  void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
+  void buildVPlans(ElementCount MinVF, ElementCount MaxVF,
+                   LoopVectorizationCostModel &CM);
 
 private:
   /// Build a VPlan according to the information gathered by Legal. \return a
   /// VPlan for vectorization factors \p Range.Start and up to \p Range.End
   /// exclusive, possibly decreasing \p Range.End.
-  VPlanPtr buildVPlan(VFRange &Range);
+  VPlanPtr buildVPlan(VFRange &Range, LoopVectorizationCostModel &CM);
 
   /// Build a VPlan using VPRecipes according to the information gather by
   /// Legal. This method is only used for the legacy inner loop vectorizer.
   VPlanPtr
   buildVPlanWithVPRecipes(VFRange &Range,
-                          SmallPtrSetImpl<Instruction *> &DeadInstructions);
+                          SmallPtrSetImpl<Instruction *> &DeadInstructions,
+                          LoopVectorizationCostModel &CM);
 
   /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
   /// according to the information gathered by Legal when it checked if it is
   /// legal to vectorize the loop. This method creates VPlans using VPRecipes.
-  void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
+  void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF,
+                                LoopVectorizationCostModel &CM);
 
   // Adjust the recipes for reductions. For in-loop reductions the chain of
   // instructions leading from the loop exit instr to the phi need to be
@@ -383,7 +393,8 @@
   // between the phi and live-out recipes when folding the tail.
   void adjustRecipesForReductions(VPBasicBlock *LatchVPBB, VPlanPtr &Plan,
                                   VPRecipeBuilder &RecipeBuilder,
-                                  ElementCount MinVF);
+                                  ElementCount MinVF,
+                                  LoopVectorizationCostModel &CM);
 
   std::optional<unsigned> getVScaleForTuning() const;
 
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5398,8 +5398,8 @@
 
   unsigned MaxTripCount = PSE.getSE()->getSmallConstantMaxTripCount(OrigLoop);
 
-  if (!A.Width.isScalable() && !B.Width.isScalable() &&
-      CM.foldTailByMasking() && MaxTripCount) {
+  // TODO: Missing check for CM.foldTailByMasking here.
+  if (!A.Width.isScalable() && !B.Width.isScalable() && MaxTripCount) {
     // If we are folding the tail and the trip count is a known (possibly small)
     // constant, the trip count will be rounded up to an integer number of
     // iterations. The total cost will be PerIterationCost*ceil(TripCount/VF),
@@ -5447,7 +5447,8 @@
   return TTI->getVScaleForTuning();
 }
 VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
-  assert(!VPlans.empty());
+  if (VPlans.empty())
+    return VectorizationFactor::Disabled();
 
   InstructionCost ExpectedCost = VPlans[0].second[0].first.ScalarCost;
   const VectorizationFactor ScalarCost(ElementCount::getFixed(1), ExpectedCost,
@@ -5466,10 +5467,6 @@
 
   // Filter out unprofitable VFs.
   for (auto &[Plan, VFsAndCosts] : VPlans) {
-    erase_if(VFsAndCosts, [this, &ChosenFactor](const auto &Candidate) {
-      return !isMoreProfitable(Candidate.first, ChosenFactor);
-    });
-
     for (const auto &[Candidate, HasVec] : VFsAndCosts) {
       if (Candidate.Width.isScalar())
         continue;
@@ -5558,12 +5555,13 @@
     return Result;
   }
 
-  if (!CM.isScalarEpilogueAllowed()) {
-    LLVM_DEBUG(
-        dbgs() << "LEV: Unable to vectorize epilogue because no epilogue is "
-                  "allowed.\n";);
-    return Result;
-  }
+  // TODO: check whether scalar epilogue is allowed.
+  /*  if (!CM.isScalarEpilogueAllowed()) {*/
+  /*LLVM_DEBUG(*/
+  /*dbgs() << "LEV: Unable to vectorize epilogue because no epilogue is "*/
+  /*"allowed.\n";);*/
+  /*return Result;*/
+  /*}*/
 
   // Not really a cost consideration, but check for unsupported cases here to
   // simplify the logic.
@@ -7470,7 +7468,7 @@
       VF = ElementCount::getFixed(determineVPlanVF(
           TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
               .getFixedValue(),
-          CM));
+          *CMs[0]));
       LLVM_DEBUG(dbgs() << "LV: VPlan computed VF " << VF << ".\n");
 
       // Make sure we have a VF > 1 for stress testing.
@@ -7485,7 +7483,7 @@
            "VF needs to be a power of two");
     LLVM_DEBUG(dbgs() << "LV: Using " << (!UserVF.isZero() ? "user " : "")
                       << "VF " << VF << " to build VPlans.\n");
-    buildVPlans(VF, VF);
+    buildVPlans(VF, VF, *CMs[0]);
 
     // For VPlan build stress testing, we bail out after VPlan construction.
     if (VPlanBuildStressTest)
@@ -7502,13 +7500,32 @@
 
 std::optional<VectorizationFactor>
 LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
+  for (LoopVectorizationCostModel *CM : CMs)
+    plan(UserVF, UserIC, *CM);
+
+  if (VPlans.empty())
+    return std::nullopt;
+
+  if (UserVF && hasPlanWithVF(UserVF)) {
+    LLVM_DEBUG(dbgs() << "LV: Selecting User VF: " << UserVF << ".\n");
+    return {{UserVF, 0, 0}};
+  }
+  // Select the optimal vectorization factor.
+  VectorizationFactor VF = selectVectorizationFactor();
+  assert((VF.Width.isScalar() || VF.ScalarCost > 0) &&
+         "when vectorizing, the scalar cost must be non-zero.");
+  return VF;
+}
+
+void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC,
+                                    LoopVectorizationCostModel &CM) {
   assert(OrigLoop->isInnermost() && "Inner loop expected.");
   CM.collectValuesToIgnore();
   CM.collectElementTypesForWidening();
 
   FixedScalableVFPair MaxFactors = CM.computeMaxVF(UserVF, UserIC);
   if (!MaxFactors) // Cases that should not to be vectorized nor interleaved.
-    return std::nullopt;
+    return;
 
   // Invalidate interleave groups if all blocks of loop will be predicated.
   if (CM.blockNeedsPredicationForAnyReason(OrigLoop->getHeader()) &&
@@ -7535,9 +7552,9 @@
     if (CM.selectUserVectorizationFactor(UserVF)) {
       LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
       CM.collectInLoopReductions();
-      buildVPlansWithVPRecipes(UserVF, UserVF);
+      buildVPlansWithVPRecipes(UserVF, UserVF, CM);
       LLVM_DEBUG(printPlans(dbgs()));
-      return {{UserVF, 0, 0}};
+      return;
     } else
       reportVectorizationInfo("UserVF ignored because of invalid costs.",
                               "InvalidCost", ORE, OrigLoop);
@@ -7558,81 +7575,17 @@
 
     // Collect the instructions (and their associated costs) that will be more
     // profitable to scalarize.
-    if (VF.isVector())
+    if (VF.isVector()) {
       CM.collectInstsToScalarize(VF);
+    }
   }
 
   CM.collectInLoopReductions();
-  buildVPlansWithVPRecipes(ElementCount::getFixed(1), MaxFactors.FixedVF);
-  buildVPlansWithVPRecipes(ElementCount::getScalable(1), MaxFactors.ScalableVF);
+  buildVPlansWithVPRecipes(ElementCount::getFixed(1), MaxFactors.FixedVF, CM);
+  buildVPlansWithVPRecipes(ElementCount::getScalable(1), MaxFactors.ScalableVF,
+                           CM);
 
   LLVM_DEBUG(printPlans(dbgs()));
-  if (!MaxFactors.hasVector())
-    return VectorizationFactor::Disabled();
-
-  // Emit a report of VFs with invalid costs in the loop.
-  if (!InvalidCosts.empty()) {
-    // Group the remarks per instruction, keeping the instruction order from
-    // InvalidCosts.
-    std::map<Instruction *, unsigned> Numbering;
-    unsigned I = 0;
-    for (auto &Pair : InvalidCosts)
-      if (!Numbering.count(Pair.first))
-        Numbering[Pair.first] = I++;
-
-    // Sort the list, first on instruction(number) then on VF.
-    llvm::sort(InvalidCosts,
-               [&Numbering](InstructionVFPair &A, InstructionVFPair &B) {
-                 if (Numbering[A.first] != Numbering[B.first])
-                   return Numbering[A.first] < Numbering[B.first];
-                 ElementCountComparator ECC;
-                 return ECC(A.second, B.second);
-               });
-
-    // For a list of ordered instruction-vf pairs:
-    //   [(load, vf1), (load, vf2), (store, vf1)]
-    // Group the instructions together to emit separate remarks for:
-    //   load  (vf1, vf2)
-    //   store (vf1)
-    auto Tail = ArrayRef<InstructionVFPair>(InvalidCosts);
-    auto Subset = ArrayRef<InstructionVFPair>();
-    do {
-      if (Subset.empty())
-        Subset = Tail.take_front(1);
-
-      Instruction *I = Subset.front().first;
-
-      // If the next instruction is different, or if there are no other pairs,
-      // emit a remark for the collated subset. e.g.
-      //   [(load, vf1), (load, vf2))]
-      // to emit:
-      //  remark: invalid costs for 'load' at VF=(vf, vf2)
-      if (Subset == Tail || Tail[Subset.size()].first != I) {
-        std::string OutString;
-        raw_string_ostream OS(OutString);
-        assert(!Subset.empty() && "Unexpected empty range");
-        OS << "Instruction with invalid costs prevented vectorization at VF=(";
-        for (const auto &Pair : Subset)
-          OS << (Pair.second == Subset.front().second ? "" : ", ")
-             << Pair.second;
-        OS << "):";
-        if (auto *CI = dyn_cast<CallInst>(I))
-          OS << " call to " << CI->getCalledFunction()->getName();
-        else
-          OS << " " << I->getOpcodeName();
-        OS.flush();
-        reportVectorizationInfo(OutString, "InvalidCost", ORE, OrigLoop, I);
-        Tail = Tail.drop_front(Subset.size());
-        Subset = {};
-      } else
-        // Grow the subset by one element
-        Subset = Tail.take_front(Subset.size() + 1);
-    } while (!Tail.empty());
-  }
-  // Select the optimal vectorization factor.
-  VectorizationFactor VF = selectVectorizationFactor();
-  assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
-  return VF;
 }
 
 VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
@@ -8094,11 +8047,12 @@
 /// vectorization decision can potentially shorten this sub-range during
 /// buildVPlan().
 void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
-                                           ElementCount MaxVF) {
+                                           ElementCount MaxVF,
+                                           LoopVectorizationCostModel &CM) {
   auto MaxVFPlusOne = MaxVF.getWithIncrement(1);
   for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) {
     VFRange SubRange = {VF, MaxVFPlusOne};
-    VPlans.emplace_back(buildVPlan(SubRange),
+    VPlans.emplace_back(buildVPlan(SubRange, CM),
                         SmallVector<std::pair<VectorizationFactor, bool>>());
     VF = SubRange.End;
   }
@@ -8709,8 +8663,8 @@
   return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan));
 }
 
-void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
-                                                        ElementCount MaxVF) {
+void LoopVectorizationPlanner::buildVPlansWithVPRecipes(
+    ElementCount MinVF, ElementCount MaxVF, LoopVectorizationCostModel &CM) {
   assert(OrigLoop->isInnermost() && "Inner loop expected.");
 
   // Add assume instructions we need to drop to DeadInstructions, to prevent
@@ -8727,11 +8681,13 @@
   auto MaxVFPlusOne = MaxVF.getWithIncrement(1);
   for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) {
     VFRange SubRange = {VF, MaxVFPlusOne};
-    auto Plan = buildVPlanWithVPRecipes(SubRange, DeadInstructions);
+    auto Plan = buildVPlanWithVPRecipes(SubRange, DeadInstructions, CM);
     SmallVector<std::pair<VectorizationFactor, bool>> Costs;
     for (ElementCount CostVF = VF;
          ElementCount::isKnownLT(CostVF, SubRange.End); CostVF *= 2) {
       auto [VecCost, IsVec] = CM.expectedCost(CostVF, &InvalidCosts);
+      if (!VecCost.isValid())
+        continue;
       Costs.emplace_back(VectorizationFactor(CostVF, VecCost, ScalarCost),
                          IsVec);
 #ifndef NDEBUG
@@ -8889,7 +8845,8 @@
 }
 
 VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
-    VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions) {
+    VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions,
+    LoopVectorizationCostModel &CM) {
 
   SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
 
@@ -8923,7 +8880,7 @@
   // placeholders for its members' Recipes which we'll be replacing with a
   // single VPInterleaveRecipe.
   for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) {
-    auto applyIG = [IG, this](ElementCount VF) -> bool {
+    auto applyIG = [IG, &CM](ElementCount VF) -> bool {
       return (VF.isVector() && // Query is illegal for VF == 1
               CM.getWideningDecision(IG->getInsertPos(), VF) ==
                   LoopVectorizationCostModel::CM_Interleave);
@@ -9066,7 +9023,7 @@
 
   // Adjust the recipes for any inloop reductions.
   adjustRecipesForReductions(cast<VPBasicBlock>(TopRegion->getExiting()), Plan,
-                             RecipeBuilder, Range.Start);
+                             RecipeBuilder, Range.Start, CM);
 
   // Sink users of fixed-order recurrence past the recipe defining the previous
   // value and introduce FirstOrderRecurrenceSplice VPInstructions.
@@ -9124,7 +9081,8 @@
   return Plan;
 }
 
-VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
+VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range,
+                                              LoopVectorizationCostModel &CM) {
   // Outer loop handling: They may require CFG and instruction level
   // transformations before even evaluating whether vectorization is profitable.
   // Since we cannot modify the incoming IR, we need to build VPlan upfront in
@@ -9167,7 +9125,7 @@
 // and live-out recipes when folding the tail.
 void LoopVectorizationPlanner::adjustRecipesForReductions(
     VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder,
-    ElementCount MinVF) {
+    ElementCount MinVF, LoopVectorizationCostModel &CM) {
   for (const auto &Reduction : CM.getInLoopReductionChains()) {
     PHINode *Phi = Reduction.first;
     const RecurrenceDescriptor &RdxDesc =
@@ -9927,7 +9885,8 @@
   // Use the planner for outer loop vectorization.
   // TODO: CM is not used at this point inside the planner. Turn CM into an
   // optional argument if we don't need it in the future.
-  LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE, Hints, ORE);
+  LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, ArrayRef(&CM), IAI, PSE,
+                               Hints, ORE);
 
   // Get user vectorization factor.
   ElementCount UserVF = Hints.getWidth();
@@ -10267,7 +10226,8 @@
   LoopVectorizationCostModel CM(SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE,
                                 F, &Hints, IAI);
   // Use the planner for vectorization.
-  LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE, Hints, ORE);
+  LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, {&CM}, IAI, PSE, Hints,
+                               ORE);
 
   // Get user vectorization factor and interleave count.
   ElementCount UserVF = Hints.getWidth();