Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -58,6 +58,7 @@
 #include "LoopVectorizationPlanner.h"
 #include "VPRecipeBuilder.h"
 #include "VPlanHCFGBuilder.h"
+#include "VPlanHCFGTransforms.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -6294,12 +6295,23 @@
     if (VPlanBuildStressTest)
       return NoVectorization;
 
+    // No codegen support for outer loop VPlans for now.
+    return NoVectorization;
+  }
+
+  if (UserVF) {
+    LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
+    assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
+    // Collect the instructions (and their associated costs) that will be more
+    // profitable to scalarize.
+    CM.selectUserVectorizationFactor(UserVF);
+    buildVPlans(UserVF, UserVF);
+    LLVM_DEBUG(printPlans(dbgs()));
     return {UserVF, 0};
   }
 
-  LLVM_DEBUG(
-      dbgs() << "LV: Not vectorizing. Inner loops aren't supported in the "
-                "VPlan-native path.\n");
+  LLVM_DEBUG(dbgs() << "LV: Not vectorizing. Loop is not supported in the "
+                       "VPlan-native path yet.\n");
   return NoVectorization;
 }
 
@@ -6379,7 +6391,20 @@
 
   // 2. Copy and widen instructions from the old loop into the new loop.
   assert(VPlans.size() == 1 && "Not a single VPlan to execute.");
-  VPlans.front()->execute(&State);
+  if (EnableVPlanNativePath) {
+    VPlanHCFGTransforms::sinkInstructions(VPlans.front(),
+                                          Legal->getSinkAfter());
+
+    VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, TTI, Legal, CM, Builder);
+    SmallPtrSet<Instruction *, 4> DeadInstructions;
+    collectTriviallyDeadInstructions(DeadInstructions);
+
+    VPlanPtr Widened = VPlanHCFGTransforms::VPInstructionsToVPRecipies(
+        OrigLoop, VPlans.front(), Legal->getInductionVars(), DeadInstructions);
+
+    Widened->execute(&State);
+  } else
+    VPlans.front()->execute(&State);
 
   // 3. Fix the vectorized code: take care of header phi's, live-outs,
   //    predication, updating analyses.
@@ -7045,11 +7070,9 @@
 
 LoopVectorizationPlanner::VPlanPtr
 LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
-  // Outer loop handling: They may require CFG and instruction level
-  // transformations before even evaluating whether vectorization is profitable.
   // Since we cannot modify the incoming IR, we need to build VPlan upfront in
-  // the vectorization pipeline.
-  assert(!OrigLoop->empty());
+  // the vectorization pipeline so we can apply CFG and instruction level
+  // transformations.
   assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
 
   // Create new empty VPlan
@@ -7059,6 +7082,19 @@
   VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI);
   HCFGBuilder.buildHierarchicalCFG(*Plan.get());
 
+  std::string PlanName;
+  raw_string_ostream RSO(PlanName);
+  unsigned VF = Range.Start;
+  Plan->addVF(VF);
+  RSO << "Initial VPlan for VF={" << VF;
+  for (VF *= 2; VF < Range.End; VF *= 2) {
+    Plan->addVF(VF);
+    RSO << "," << VF;
+  }
+  RSO << "},UF>=1";
+  RSO.flush();
+  Plan->setName(PlanName);
+
   return Plan;
 }
 
@@ -7260,11 +7296,20 @@
       Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
 
   // Plan how to best vectorize, return the best VF and its cost.
-  LVP.planInVPlanNativePath(OptForSize, UserVF);
+  VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
 
-  // Returning false. We are currently not generating vector code in the VPlan
-  // native path.
-  return false;
+  if (VF.Width < 2)
+    return false;
+
+  LVP.setBestPlan(VF.Width, 1);
+
+  // If we decided that it is *legal* to vectorize the loop, then do it.
+  InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL,
+                         &CM);
+  LVP.executePlan(LB, DT);
+  ++LoopsVectorized;
+
+  return true;
 }
 
 bool LoopVectorizePass::processLoop(Loop *L) {
@@ -7330,7 +7375,7 @@
   // even evaluating whether vectorization is profitable. Since we cannot modify
   // the incoming IR, we need to build VPlan upfront in the vectorization
   // pipeline.
-  if (!L->empty())
+  if (EnableVPlanNativePath)
     return processLoopInVPlanNativePath(L, PSE, LI, DT, &LVL, TTI, TLI, DB, AC,
                                         ORE, Hints);
 
Index: lib/Transforms/Vectorize/VPRecipeBuilder.h
===================================================================
--- lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -115,6 +115,8 @@
   bool tryToCreateRecipe(Instruction *Instr, VFRange &Range, VPlanPtr &Plan,
                          VPBasicBlock *VPBB);
 
+  void setInsertPoint(VPBasicBlock *VPBB) { Builder.setInsertPoint(VPBB); }
+
   /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it
   /// is predicated. \return \p VPBB augmented with this new recipe if \p I is
   /// not predicated, otherwise \return a new VPBasicBlock that succeeds the new
Index: lib/Transforms/Vectorize/VPlan.h
===================================================================
--- lib/Transforms/Vectorize/VPlan.h
+++ lib/Transforms/Vectorize/VPlan.h
@@ -1090,6 +1090,8 @@
 
   bool hasVF(unsigned VF) { return VFs.count(VF); }
 
+  const SmallSet<unsigned, 2> &getVFs() const { return VFs; }
+
   const std::string &getName() const { return Name; }
 
   void setName(const Twine &newName) { Name = newName.str(); }
Index: lib/Transforms/Vectorize/VPlanHCFGTransforms.h
===================================================================
--- lib/Transforms/Vectorize/VPlanHCFGTransforms.h
+++ lib/Transforms/Vectorize/VPlanHCFGTransforms.h
@@ -15,22 +15,29 @@
 #define LLVM_TRANSFORMS_VECTORIZE_VPLANHCFGTRANSFORMS_H
 
 #include "LoopVectorizationPlanner.h"
+#include "VPRecipeBuilder.h"
 #include "VPlan.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/IR/Instruction.h"
-
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
 namespace llvm {
 
 class VPlanHCFGTransforms {
-  using VPlanPtr = std::unique_ptr<VPlan>;
 
 public:
   /// Sinks instructions in \p Plan, depending on their underlying values in
   /// \p SinkAfter.
-  // FIXME: Migrate to using a VPlan based mapping, once
-  // LoopVectorizationLegality::getSinkAfter is moved to VPlan.
   static void
   sinkInstructions(VPlanPtr &Plan,
                    DenseMap<Instruction *, Instruction *> &SinkAfter);
+
+  /// Creates a new VPlan using VPRecipes from a VPInstruction VPlan
+  /// \p OriginalPlan
+  static VPlanPtr VPInstructionsToVPRecipies(
+      Loop *OrigLoop, VPlanPtr &OriginalPlan,
+      LoopVectorizationLegality::InductionList *Inductions,
+      SmallPtrSetImpl<Instruction *> &DeadInstructions);
 };
 
 } // namespace llvm
Index: lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
===================================================================
--- lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
+++ lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
@@ -14,6 +14,7 @@
 
 #include "VPlanHCFGTransforms.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/Debug.h"
 
 using namespace llvm;
@@ -55,3 +56,88 @@
     }
   }
 }
+
+VPlanPtr VPlanHCFGTransforms::VPInstructionsToVPRecipies(
+    Loop *OrigLoop, VPlanPtr &OriginalPlan,
+    LoopVectorizationLegality::InductionList *Inductions,
+    SmallPtrSetImpl<Instruction *> &DeadInstructions) {
+  // Hold a mapping from predicated instructions to their recipes, in order to
+  // fix their AlsoPack behavior if a user is determined to replicate and use a
+  // scalar instead of vector value.
+  DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe;
+
+  // Create a dummy pre-entry VPBasicBlock to start building VPBBs for the
+  // VPlan.
+  VPBasicBlock *PreEntryVPBB = new VPBasicBlock("Pre-Entry");
+  VPBasicBlock *VPBB = PreEntryVPBB;
+
+  VPRegionBlock *TopRegion = dyn_cast<VPRegionBlock>(OriginalPlan->getEntry());
+  ReversePostOrderTraversal<VPBlockBase *> RPOT(TopRegion->getEntry());
+  for (VPBlockBase *Base : RPOT) {
+    VPBasicBlock *OriginalVPBB = Base->getEntryBasicBlock();
+    // Skip entry and exit nodes for now. Currently the recipes will take
+    // care of creating instructions in entry and exit blocks.
+    if (TopRegion && (OriginalVPBB == TopRegion->getEntry() ||
+                      OriginalVPBB == TopRegion->getExit()))
+      continue;
+
+    auto *FirstVPBBForBB = new VPBasicBlock(OriginalVPBB->getName());
+    VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB);
+    VPBB = FirstVPBBForBB;
+
+    std::vector<VPRecipeBase *> Ingredients;
+
+    // Introduce each ingredient into VPlan.
+    for (VPRecipeBase &Ingredient : *OriginalVPBB) {
+      VPInstruction *VPInst = dyn_cast<VPInstruction>(&Ingredient);
+      assert(VPInst && "Can only handle VPInstructions.");
+      Instruction *Inst = dyn_cast<Instruction>(VPInst->getUnderlyingValue());
+      if (DeadInstructions.count(Inst) || isa<DbgInfoIntrinsic>(Inst))
+        continue;
+
+      // Create VPWidenMemoryInstructionRecipe for loads and stores.
+      if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst)) {
+        VPBB->appendRecipe(
+            new VPWidenMemoryInstructionRecipe(*Inst, nullptr /*Mask*/));
+        continue;
+      }
+      if (PHINode *Phi = dyn_cast<PHINode>(Inst)) {
+        InductionDescriptor II = Inductions->lookup(Phi);
+        if (II.getKind() == InductionDescriptor::IK_IntInduction ||
+            II.getKind() == InductionDescriptor::IK_FpInduction)
+          VPBB->appendRecipe(new VPWidenIntOrFpInductionRecipe(Phi));
+        else
+          VPBB->appendRecipe(new VPWidenPHIRecipe(Phi));
+        continue;
+      }
+
+      // Create VPWidenRecipe to widen this instruction. We optimize the common
+      // case where consecutive instructions can be represented by a single
+      // recipe.
+      if (!VPBB->empty()) {
+        VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back());
+        if (LastWidenRecipe && LastWidenRecipe->appendInstruction(Inst))
+          continue;
+      }
+
+      VPBB->appendRecipe(new VPWidenRecipe(Inst));
+    }
+  }
+
+  // Create VPlan.
+  assert(PreEntryVPBB->empty() && "Expecting empty pre-entry block.");
+  auto Plan = llvm::make_unique<VPlan>(PreEntryVPBB->getSingleSuccessor());
+
+  std::string PlanName;
+  raw_string_ostream RSO(PlanName);
+  RSO << "VPRecipe-based VPlan for VF={";
+  for (unsigned VF : OriginalPlan->getVFs()) {
+    Plan->addVF(VF);
+    RSO << "," << VF;
+  }
+  RSO << "},UF>=1";
+  RSO.flush();
+  Plan->setName(PlanName);
+
+  return Plan;
+}
Index: test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
===================================================================
--- test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
+++ test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll
@@ -1,5 +1,6 @@
 ; REQUIRES: asserts
 ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -instcombine -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -enable-vplan-native-path -instcombine -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s
 ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -instcombine -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s --check-prefix=INTER
 
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
Index: test/Transforms/LoopVectorize/i8-induction.ll
===================================================================
--- test/Transforms/LoopVectorize/i8-induction.ll
+++ test/Transforms/LoopVectorize/i8-induction.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-vplan-native-path -dce -instcombine -S
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
Index: test/Transforms/LoopVectorize/if-conversion.ll
===================================================================
--- test/Transforms/LoopVectorize/if-conversion.ll
+++ test/Transforms/LoopVectorize/if-conversion.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-vplan-native-path -enable-if-conversion -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
Index: test/Transforms/LoopVectorize/increment.ll
===================================================================
--- test/Transforms/LoopVectorize/increment.ll
+++ test/Transforms/LoopVectorize/increment.ll
@@ -1,3 +1,4 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-vplan-native-path -dce -instcombine -S | FileCheck %s
 ; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
Index: test/Transforms/LoopVectorize/induction.ll
===================================================================
--- test/Transforms/LoopVectorize/induction.ll
+++ test/Transforms/LoopVectorize/induction.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s
+; RUN: opt < %s -enable-vplan-native-path -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s
 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=UNROLL
 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -S | FileCheck %s --check-prefix=UNROLL-NO-IC
Index: test/Transforms/LoopVectorize/induction_plus.ll
===================================================================
--- test/Transforms/LoopVectorize/induction_plus.ll
+++ test/Transforms/LoopVectorize/induction_plus.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
+; RUN: opt < %s -enable-vplan-native-path -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
Index: test/Transforms/LoopVectorize/loop-scalars.ll
===================================================================
--- test/Transforms/LoopVectorize/loop-scalars.ll
+++ test/Transforms/LoopVectorize/loop-scalars.ll
@@ -1,5 +1,6 @@
 ; REQUIRES: asserts
 ; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -instcombine -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -enable-vplan-native-path -instcombine -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 
Index: test/Transforms/LoopVectorize/minmax_reduction.ll
===================================================================
--- test/Transforms/LoopVectorize/minmax_reduction.ll
+++ test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -1,4 +1,5 @@
-; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-interleave=1  < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-interleave=1 -enable-vplan-native-path < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"