diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1200,7 +1200,8 @@
                              InterleavedAccessInfo &IAI)
       : ScalarEpilogueStatus(SEL), TheLoop(L), PSE(PSE), LI(LI), Legal(Legal),
         TTI(TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), TheFunction(F),
-        Hints(Hints), InterleaveInfo(IAI) {}
+        Hints(Hints), InterleaveInfo(IAI),
+        UseVPlanNativePath(EnableVPlanNativePath && !L->isInnermost()) {}
 
   /// \return An upper bound for the vectorization factor, or None if
   /// vectorization and interleaving should be avoided up front.
@@ -1283,7 +1284,7 @@
 
     // Cost model is not run in the VPlan-native path - return conservative
     // result until this changes.
-    if (EnableVPlanNativePath)
+    if (UseVPlanNativePath)
       return false;
 
     auto Scalars = InstsToScalarize.find(VF);
@@ -1299,7 +1300,7 @@
 
     // Cost model is not run in the VPlan-native path - return conservative
     // result until this changes.
-    if (EnableVPlanNativePath)
+    if (UseVPlanNativePath)
       return false;
 
     auto UniformsPerVF = Uniforms.find(VF);
@@ -1315,7 +1316,7 @@
 
     // Cost model is not run in the VPlan-native path - return conservative
     // result until this changes.
-    if (EnableVPlanNativePath)
+    if (UseVPlanNativePath)
       return false;
 
     auto ScalarsPerVF = Scalars.find(VF);
@@ -1375,7 +1376,7 @@
     assert(VF.isVector() && "Expected VF to be a vector VF");
     // Cost model is not run in the VPlan-native path - return conservative
     // result until this changes.
-    if (EnableVPlanNativePath)
+    if (UseVPlanNativePath)
       return CM_GatherScatter;
 
     std::pair<Instruction *, ElementCount> InstOnVF = std::make_pair(I, VF);
@@ -1832,6 +1833,9 @@
 
   /// Profitable vector factors.
   SmallVector<VectorizationFactor, 8> ProfitableVFs;
+
+  /// Controls whether the VPlan native path is used or not.
+  bool UseVPlanNativePath;
 };
 
 } // end namespace llvm
@@ -3039,7 +3043,7 @@
 
 void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
   // VPlan-native path does not do any analysis for runtime checks currently.
-  if (EnableVPlanNativePath)
+  if (Cost->UseVPlanNativePath)
     return;
 
   // Reuse existing vector loop preheader for runtime memory checks.
@@ -3801,7 +3805,7 @@
 
   // Fix widened non-induction PHIs by setting up the PHI operands.
   if (OrigPHIsToFix.size()) {
-    assert(EnableVPlanNativePath &&
+    assert(Cost->UseVPlanNativePath &&
            "Unexpected non-induction PHIs for fixup in non VPlan-native path");
     fixNonInductionPHIs(State);
   }
@@ -4440,9 +4444,9 @@
                                               VPValue *StartVPV, VPValue *Def,
                                               VPTransformState &State) {
   PHINode *P = cast<PHINode>(PN);
-  if (EnableVPlanNativePath) {
-    // Currently we enter here in the VPlan-native path for non-induction
-    // PHIs where all control flow is uniform. We simply widen these PHIs.
+  if (Cost->UseVPlanNativePath) {
+    // We enter here in the VPlan-native path and when the loop is not the
+    // innermost loop. We handle non-induction PHIs here and simply widen them.
     // Create a vector phi with no operands - the vector phi operands will be
     // set at the end of vector code generation.
     Type *VecTy = (State.VF.isScalar())
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -599,7 +599,8 @@
   assert((EnableVPlanNativePath ||
           isa<UnreachableInst>(LastBB->getTerminator())) &&
          "Expected InnerLoop VPlan CFG to terminate with unreachable");
-  assert((!EnableVPlanNativePath || isa<BranchInst>(LastBB->getTerminator())) &&
+  assert((!EnableVPlanNativePath ||
+          (L->isInnermost() || isa<BranchInst>(LastBB->getTerminator()))) &&
          "Expected VPlan CFG to terminate with branch in NativePath");
   LastBB->getTerminator()->eraseFromParent();
   BranchInst::Create(VectorLatchBB, LastBB);
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-vectorize-single-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-single-loop.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-single-loop.ll
@@ -0,0 +1,107 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -loop-vectorize -force-vector-width=4 -enable-vplan-native-path -S %s | FileCheck %s
+
+; Test that when VPlan native path is enabled and no explicit loop is marked to
+; be vectorized that innermost loop will be vectorized wihtout any issues. The
+; result of the vectorization should be the same as using inner loop
+; vectorization wihtout enabling VPlan native path flag.
+; See PR42592 (https://bugs.llvm.org/show_bug.cgi?id=42592).
+
+target triple = "x86_64-unknown-linux-gnu"
+define void @kernel(float* nocapture readonly %0, float* nocapture readonly %1, float* nocapture %2, i64 %3, i64 %4) {
+; CHECK-LABEL: @kernel(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP2:%.*]] to i8*
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[TMP0:%.*]] to i8*
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4:%.*]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr float, float* [[TMP2]], i64 [[TMP4]]
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = bitcast float* [[SCEVGEP]] to i8*
+; CHECK-NEXT:    [[SCEVGEP2:%.*]] = getelementptr float, float* [[TMP0]], i64 [[TMP4]]
+; CHECK-NEXT:    [[SCEVGEP23:%.*]] = bitcast float* [[SCEVGEP2]] to i8*
+; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr float, float* [[TMP1:%.*]], i64 [[TMP3:%.*]]
+; CHECK-NEXT:    [[SCEVGEP45:%.*]] = bitcast float* [[SCEVGEP4]] to i8*
+; CHECK-NEXT:    [[SCEVGEP6:%.*]] = getelementptr float, float* [[TMP1]], i64 [[TMP3]]
+; CHECK-NEXT:    [[SCEVGEP67:%.*]] = bitcast float* [[SCEVGEP6]] to i8*
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[SCEVGEP67]], i64 1
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[TMP5]], [[SCEVGEP23]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[TMP6]], [[SCEVGEP1]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    [[BOUND08:%.*]] = icmp ult i8* [[TMP5]], [[UGLYGEP]]
+; CHECK-NEXT:    [[BOUND19:%.*]] = icmp ult i8* [[SCEVGEP45]], [[SCEVGEP1]]
+; CHECK-NEXT:    [[FOUND_CONFLICT10:%.*]] = and i1 [[BOUND08]], [[BOUND19]]
+; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT10]]
+; CHECK-NEXT:    [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
+; CHECK-NEXT:    br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP0]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP13:%.*]] = add nuw <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP11]], i32 0
+; CHECK-NEXT:    [[TMP15:%.*]] = bitcast float* [[TMP14]] to <4 x float>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP15]], align 4, !alias.scope !0
+; CHECK-NEXT:    [[TMP16:%.*]] = load float, float* [[TMP12]], align 4, !alias.scope !3
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP16]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP17:%.*]] = fdiv <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 0
+; CHECK-NEXT:    [[TMP20:%.*]] = bitcast float* [[TMP19]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP17]], <4 x float>* [[TMP20]], align 4, !alias.scope !5, !noalias !7
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[KERNEL_EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_0:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[R:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds float, float* [[TMP0]], i64 [[I_0]]
+; CHECK-NEXT:    [[ARRAYIDX5_I:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 [[TMP3]]
+; CHECK-NEXT:    [[AA:%.*]] = add nuw i64 [[I_0]], 2
+; CHECK-NEXT:    [[C:%.*]] = load float, float* [[ARRAYIDX_I]], align 4
+; CHECK-NEXT:    [[D:%.*]] = load float, float* [[ARRAYIDX5_I]], align 4
+; CHECK-NEXT:    [[DIV_I:%.*]] = fdiv float [[C]], [[D]]
+; CHECK-NEXT:    [[ARRAYIDX9_I:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 [[I_0]]
+; CHECK-NEXT:    store float [[DIV_I]], float* [[ARRAYIDX9_I]], align 4
+; CHECK-NEXT:    [[R]] = add nuw i64 [[I_0]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[R]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[KERNEL_EXIT]], label [[FOR_BODY]], [[LOOP10:!llvm.loop !.*]]
+; CHECK:       kernel.exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %i.0 = phi i64 [ 0, %entry ], [ %r, %for.body ]
+  %arrayidx.i = getelementptr inbounds float, float* %0, i64 %i.0
+  %arrayidx5.i = getelementptr inbounds float, float* %1, i64 %3
+  %aa = add nuw i64 %i.0, 2
+  %c = load float, float* %arrayidx.i, align 4
+  %d = load float, float* %arrayidx5.i, align 4
+  %div.i = fdiv float %c, %d
+  %arrayidx9.i = getelementptr inbounds float, float* %2, i64 %i.0
+  store float %div.i, float* %arrayidx9.i, align 4
+  %r = add nuw i64 %i.0, 1
+  %exitcond.not = icmp eq i64 %r, %4
+  br i1 %exitcond.not, label %kernel.exit, label %for.body
+
+kernel.exit:
+  ret void
+}