diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4440,9 +4440,9 @@
                                               VPValue *StartVPV, VPValue *Def,
                                               VPTransformState &State) {
   PHINode *P = cast<PHINode>(PN);
-  if (EnableVPlanNativePath) {
-    // Currently we enter here in the VPlan-native path for non-induction
-    // PHIs where all control flow is uniform. We simply widen these PHIs.
+  if (EnableVPlanNativePath && !OrigLoop->isInnermost()) {
+    // We enter here in the VPlan-native path and when the loop is not the
+    // innermost loop. We handle non-induction PHIs here and simply widen them.
     // Create a vector phi with no operands - the vector phi operands will be
     // set at the end of vector code generation.
     Type *VecTy = (State.VF.isScalar())
@@ -5181,7 +5181,8 @@
     // A uniform memory op is itself uniform.  We exclude uniform stores
     // here as they demand the last lane, not the first one.
     if (isa<LoadInst>(I) && Legal->isUniformMemOp(*I)) {
-      assert(WideningDecision == CM_Scalarize);
+      assert(WideningDecision == CM_Scalarize ||
+             WideningDecision == CM_GatherScatter);
       return true;
     }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -599,7 +599,8 @@
   assert((EnableVPlanNativePath ||
           isa<UnreachableInst>(LastBB->getTerminator())) &&
          "Expected InnerLoop VPlan CFG to terminate with unreachable");
-  assert((!EnableVPlanNativePath || isa<BranchInst>(LastBB->getTerminator())) &&
+  assert((!EnableVPlanNativePath ||
+          (L->isInnermost() || isa<BranchInst>(LastBB->getTerminator()))) &&
          "Expected VPlan CFG to terminate with branch in NativePath");
   LastBB->getTerminator()->eraseFromParent();
   BranchInst::Create(VectorLatchBB, LastBB);
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-vectorize-single-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-single-loop.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-single-loop.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -loop-vectorize -force-vector-width=4 -enable-vplan-native-path -S %s | FileCheck %s
+
+; Test that when VPlan native path is enabled and no explicit loop is marked to
+; be vectorized that single loop will be vectorized without any issues.
+; See PR42592 (https://bugs.llvm.org/show_bug.cgi?id=42592).
+
+target triple = "x86_64-unknown-linux-gnu"
+define void @kernel(float* nocapture readonly %0, float* nocapture readonly %1, float* nocapture %2, i64 %3, i64 %4) {
+; CHECK-LABEL: @kernel(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4:%.*]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP0:%.*]], <4 x i64> [[VEC_IND]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP1:%.*]], i64 [[TMP3:%.*]]
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float*> poison, float* [[TMP6]], i32 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float*> [[DOTSPLATINSERT]], <4 x float*> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = add nuw <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> [[TMP5]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER1:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> [[DOTSPLAT]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+; CHECK-NEXT:    [[TMP8:%.*]] = fdiv <4 x float> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER1]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP2:%.*]], <4 x i64> [[VEC_IND]]
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> [[TMP8]], <4 x float*> [[TMP9]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[KERNEL_EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_0:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[R:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds float, float* [[TMP0]], i64 [[I_0]]
+; CHECK-NEXT:    [[ARRAYIDX5_I:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 [[TMP3]]
+; CHECK-NEXT:    [[AA:%.*]] = add nuw i64 [[I_0]], 2
+; CHECK-NEXT:    [[C:%.*]] = load float, float* [[ARRAYIDX_I]], align 4
+; CHECK-NEXT:    [[D:%.*]] = load float, float* [[ARRAYIDX5_I]], align 4
+; CHECK-NEXT:    [[DIV_I:%.*]] = fdiv float [[C]], [[D]]
+; CHECK-NEXT:    [[ARRAYIDX9_I:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 [[I_0]]
+; CHECK-NEXT:    store float [[DIV_I]], float* [[ARRAYIDX9_I]], align 4
+; CHECK-NEXT:    [[R]] = add nuw i64 [[I_0]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[R]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[KERNEL_EXIT]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]]
+; CHECK:       kernel.exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %i.0 = phi i64 [ 0, %entry ], [ %r, %for.body ]
+  %arrayidx.i = getelementptr inbounds float, float* %0, i64 %i.0
+  %arrayidx5.i = getelementptr inbounds float, float* %1, i64 %3
+  %aa = add nuw i64 %i.0, 2
+  %c = load float, float* %arrayidx.i, align 4
+  %d = load float, float* %arrayidx5.i, align 4
+  %div.i = fdiv float %c, %d
+  %arrayidx9.i = getelementptr inbounds float, float* %2, i64 %i.0
+  store float %div.i, float* %arrayidx9.i, align 4
+  %r = add nuw i64 %i.0, 1
+  %exitcond.not = icmp eq i64 %r, %4
+  br i1 %exitcond.not, label %kernel.exit, label %for.body
+
+kernel.exit:
+  ret void
+}