diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -304,7 +304,7 @@
   /// Build a VPlan according to the information gathered by Legal. \return a
   /// VPlan for vectorization factors \p Range.Start and up to \p Range.End
   /// exclusive, possibly decreasing \p Range.End.
-  VPlanPtr buildVPlan(VFRange &Range);
+  VPlanPtr buildVPlan(const VFRange &Range);
 
   /// Build a VPlan using VPRecipes according to the information gather by
   /// Legal. This method is only used for the legacy inner loop vectorizer.
@@ -312,10 +312,17 @@
       VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions,
       const DenseMap<Instruction *, Instruction *> &SinkAfter);
 
+  /// Returns a new VPlan based on \p OriginalPlan, but with all VPInstruction
+  /// in \p OriginalPlan lowered to widened recipes.
+  VPlanPtr
+  convertToVPRecipes(VPlan &OriginalPlan, VFRange &Range,
+                     const DenseMap<Instruction *, Instruction *> &SinkAfter);
+
   /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
   /// according to the information gathered by Legal when it checked if it is
   /// legal to vectorize the loop. This method creates VPlans using VPRecipes.
-  void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
+  void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF,
+                                VPlan &OriginalPlan);
 
   /// Adjust the recipes for any inloop reductions. The chain of instructions
   /// leading from the loop exit instr to the phi need to be converted to
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7775,7 +7775,8 @@
     // profitable to scalarize.
     CM.selectUserVectorizationFactor(VF);
     CM.collectInLoopReductions();
-    buildVPlansWithVPRecipes(VF, VF);
+    auto InitialPlan = buildVPlan({VF, VF});
+    buildVPlansWithVPRecipes(VF, VF, *InitialPlan);
     LLVM_DEBUG(printPlans(dbgs()));
     return {{VF, 0}};
   }
@@ -7796,7 +7797,8 @@
 
   CM.collectInLoopReductions();
 
-  buildVPlansWithVPRecipes(ElementCount::getFixed(1), MaxVF);
+  auto InitialPlan = buildVPlan({ElementCount::getFixed(1), MaxVF});
+  buildVPlansWithVPRecipes(ElementCount::getFixed(1), MaxVF, *InitialPlan);
   LLVM_DEBUG(printPlans(dbgs()));
   if (MaxVF.isScalar())
     return VectorizationFactor::Disabled();
@@ -8290,10 +8292,31 @@
 /// buildVPlan().
 void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
                                            ElementCount MaxVF) {
+  // Outer loop handling: They may require CFG and instruction level
+  // transformations before even evaluating whether vectorization is profitable.
+  // Since we cannot modify the incoming IR, we need to build VPlan upfront in
+  // the vectorization pipeline.
+  assert(!OrigLoop->isInnermost());
+  assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
+
   auto MaxVFPlusOne = MaxVF.getWithIncrement(1);
   for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) {
     VFRange SubRange = {VF, MaxVFPlusOne};
-    VPlans.push_back(buildVPlan(SubRange));
+    auto Plan = buildVPlan(SubRange);
+
+    if (EnableVPlanPredication) {
+      VPlanPredicator VPP(*Plan);
+      VPP.predicate();
+      // Avoid running transformation to recipes until masked code generation in
+      // VPlan-native path is in place.
+    } else {
+      SmallPtrSet<Instruction *, 1> DeadInstructions;
+      VPlanTransforms::VPInstructionsToVPRecipes(
+          OrigLoop, Plan, Legal->getInductionVars(), DeadInstructions,
+          *PSE.getSE());
+    }
+
+    VPlans.emplace_back(std::move(Plan));
     VF = SubRange.End;
   }
 }
@@ -8761,9 +8784,31 @@
 }
 
 void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
-                                                        ElementCount MaxVF) {
+                                                        ElementCount MaxVF,
+                                                        VPlan &OriginalPlan) {
   assert(OrigLoop->isInnermost() && "Inner loop expected.");
 
+  // Remove the loop exit block and pre-header/entry block, to simplify the code
+  // dealing with VPlans for inner loops, as those blocks are not needed at the
+  // moment.
+  VPBasicBlock *Exit =
+      cast<VPBasicBlock>(OriginalPlan.getEntry()->getExitBasicBlock());
+  SmallVector<VPBlockBase *, 2> Preds(Exit->getPredecessors().begin(),
+                                      Exit->getPredecessors().end());
+  for (VPBlockBase *Pred : Preds)
+    VPBlockUtils::disconnectBlocks(Pred, Exit);
+  delete Exit;
+
+  auto *TopRegion = cast<VPRegionBlock>(OriginalPlan.getEntry());
+  VPBasicBlock *PreEntry = cast<VPBasicBlock>(TopRegion->getEntryBasicBlock());
+  assert(PreEntry->empty() && "Expecting empty pre-entry block.");
+  auto *NewEntry = PreEntry->getSingleSuccessor();
+  VPBlockUtils::disconnectBlocks(PreEntry, NewEntry);
+  assert(NewEntry->getNumPredecessors() == 1);
+  VPBlockUtils::disconnectBlocks(NewEntry->getPredecessors()[0], NewEntry);
+  TopRegion->setEntry(NewEntry);
+  delete PreEntry;
+
   // Collect instructions from the original loop that will become trivially dead
   // in the vectorized loop. We don't need to vectorize these instructions. For
   // example, original induction update instructions can become dead because we
@@ -8785,17 +8830,36 @@
   for (Instruction *I : DeadInstructions)
     SinkAfter.erase(I);
 
+  // Remove dead instructions from original VPlan.
+  auto *DummyVal = new VPValue();
+  OriginalPlan.addExternalDef(DummyVal);
+  for (VPBlockBase *Base :
+       depth_first<VPBlockBase *>(TopRegion->getEntryBasicBlock())) {
+    VPBasicBlock *OriginalVPBB = Base->getEntryBasicBlock();
+    for (VPRecipeBase &Ingredient :
+         make_early_inc_range(reverse(*OriginalVPBB))) {
+      VPInstruction *VPInst = dyn_cast<VPInstruction>(&Ingredient);
+      if (!VPInst)
+        continue;
+      Instruction *Instr = VPInst->getUnderlyingInstr();
+
+      if (DeadInstructions.contains(Instr) || isa<DbgInfoIntrinsic>(Instr)) {
+        Ingredient.getVPValue()->replaceAllUsesWith(DummyVal);
+        Ingredient.eraseFromParent();
+      }
+    }
+  }
+
   auto MaxVFPlusOne = MaxVF.getWithIncrement(1);
   for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) {
     VFRange SubRange = {VF, MaxVFPlusOne};
-    VPlans.push_back(
-        buildVPlanWithVPRecipes(SubRange, DeadInstructions, SinkAfter));
+    VPlans.push_back(convertToVPRecipes(OriginalPlan, SubRange, SinkAfter));
     VF = SubRange.End;
   }
 }
 
-VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
-    VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions,
+VPlanPtr LoopVectorizationPlanner::convertToVPRecipes(
+    VPlan &OriginalPlan, VFRange &Range,
     const DenseMap<Instruction *, Instruction *> &SinkAfter) {
 
   SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
@@ -8853,31 +8917,38 @@
 
   // Create a dummy pre-entry VPBasicBlock to start building the VPlan.
   auto Plan = std::make_unique<VPlan>();
-  VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
-  Plan->setEntry(VPBB);
 
-  // Scan the body of the loop in a topological order to visit each basic block
-  // after having visited its predecessor basic blocks.
-  LoopBlocksDFS DFS(OrigLoop);
-  DFS.perform(LI);
+  ReversePostOrderTraversal<VPBlockBase *> RPOT(
+      OriginalPlan.getEntry()->getEntryBasicBlock());
+
+  VPBasicBlock *VPBB = nullptr;
+  for (VPBlockBase *Base : RPOT) {
+    VPBasicBlock *OriginalVPBB = Base->getEntryBasicBlock();
 
-  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
     // Relevant instructions from basic block BB will be grouped into VPRecipe
     // ingredients and fill a new VPBasicBlock.
     unsigned VPBBsForBB = 0;
-    auto *FirstVPBBForBB = new VPBasicBlock(BB->getName());
-    VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB);
+    auto *FirstVPBBForBB = new VPBasicBlock(OriginalVPBB->getName());
+    if (VPBB)
+      VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB);
+    else
+      Plan->setEntry(FirstVPBBForBB);
     VPBB = FirstVPBBForBB;
     Builder.setInsertPoint(VPBB);
 
     // Introduce each ingredient into VPlan.
-    // TODO: Model and preserve debug instrinsics in VPlan.
-    for (Instruction &I : BB->instructionsWithoutDebug()) {
-      Instruction *Instr = &I;
+    for (auto I = OriginalVPBB->begin(), E = OriginalVPBB->end(); I != E;) {
+      VPRecipeBase *Ingredient = &*I++;
+      VPInstruction *VPInst = dyn_cast<VPInstruction>(Ingredient);
+      Instruction *Instr =
+          VPInst
+              ? VPInst->getUnderlyingInstr()
+              : cast<PHINode>(
+                    cast<VPWidenPHIRecipe>(Ingredient)->getUnderlyingInstr());
 
       // First filter out irrelevant instructions, to ensure no recipes are
       // built for them.
-      if (isa<BranchInst>(Instr) || DeadInstructions.count(Instr))
+      if (isa<BranchInst>(Instr))
         continue;
 
       SmallVector<VPValue *, 4> Operands;
@@ -8914,21 +8985,11 @@
           RecipeBuilder.handleReplication(Instr, Range, VPBB, Plan);
       if (NextVPBB != VPBB) {
         VPBB = NextVPBB;
-        VPBB->setName(BB->hasName() ? BB->getName() + "." + Twine(VPBBsForBB++)
-                                    : "");
+        VPBB->setName(OriginalVPBB->getName() + "." + Twine(VPBBsForBB++));
       }
     }
   }
 
-  // Discard empty dummy pre-entry VPBasicBlock. Note that other VPBasicBlocks
-  // may also be empty, such as the last one VPBB, reflecting original
-  // basic-blocks with no recipes.
-  VPBasicBlock *PreEntry = cast<VPBasicBlock>(Plan->getEntry());
-  assert(PreEntry->empty() && "Expecting empty pre-entry block.");
-  VPBlockBase *Entry = Plan->setEntry(PreEntry->getSingleSuccessor());
-  VPBlockUtils::disconnectBlocks(PreEntry, Entry);
-  delete PreEntry;
-
   // ---------------------------------------------------------------------------
   // Transform initial VPlan: Apply previously taken decisions, in order, to
   // bring the VPlan to its final state.
@@ -9015,14 +9076,7 @@
   return Plan;
 }
 
-VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
-  // Outer loop handling: They may require CFG and instruction level
-  // transformations before even evaluating whether vectorization is profitable.
-  // Since we cannot modify the incoming IR, we need to build VPlan upfront in
-  // the vectorization pipeline.
-  assert(!OrigLoop->isInnermost());
-  assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
-
+VPlanPtr LoopVectorizationPlanner::buildVPlan(const VFRange &Range) {
   // Create new empty VPlan
   auto Plan = std::make_unique<VPlan>();
 
@@ -9034,19 +9088,6 @@
        VF *= 2)
     Plan->addVF(VF);
 
-  if (EnableVPlanPredication) {
-    VPlanPredicator VPP(*Plan);
-    VPP.predicate();
-
-    // Avoid running transformation to recipes until masked code generation in
-    // VPlan-native path is in place.
-    return Plan;
-  }
-
-  SmallPtrSet<Instruction *, 1> DeadInstructions;
-  VPlanTransforms::VPInstructionsToVPRecipes(OrigLoop, Plan,
-                                             Legal->getInductionVars(),
-                                             DeadInstructions, *PSE.getSE());
   return Plan;
 }