diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -44,7 +44,7 @@
              "with zero meaning no minimum size is assumed. A value of -1 "
              "means use Zvl*b extension. This is primarily used to enable "
              "autovectorization with fixed width vectors."),
-    cl::init(0), cl::Hidden);
+    cl::init(-1), cl::Hidden);
 
 static cl::opt<unsigned> RVVVectorLMULMax(
     "riscv-v-fixed-length-vector-lmul-max",
diff --git a/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll b/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll
--- a/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll
@@ -15,16 +15,16 @@
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv1i1_i32 = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i32(i32 undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef)
diff --git a/llvm/test/CodeGen/RISCV/fold-vector-cmp.ll b/llvm/test/CodeGen/RISCV/fold-vector-cmp.ll
--- a/llvm/test/CodeGen/RISCV/fold-vector-cmp.ll
+++ b/llvm/test/CodeGen/RISCV/fold-vector-cmp.ll
@@ -1,14 +1,31 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -start-after codegenprepare -mtriple=riscv64 -mattr=-v -o - %s | FileCheck %s
-; RUN: llc -start-after codegenprepare -mtriple=riscv64 -mattr=+v -o - %s | FileCheck %s
+; RUN: llc -start-after codegenprepare -mtriple=riscv64 -mattr=-v -o - %s | FileCheck --check-prefix=CHECK-NOV %s
+; RUN: llc -start-after codegenprepare -mtriple=riscv64 -mattr=+v -o - %s | FileCheck --check-prefix=CHECK-V %s
 
 ; Reproducer for https://github.com/llvm/llvm-project/issues/55168.
 ; We should always return 1 (and not -1).
 define i32 @test(i32 %call.i) {
-; CHECK-LABEL: test:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a0, 1
-; CHECK-NEXT:    ret
+; CHECK-NOV-LABEL: test:
+; CHECK-NOV:       # %bb.0:
+; CHECK-NOV-NEXT:    li a0, 1
+; CHECK-NOV-NEXT:    ret
+;
+; CHECK-V-LABEL: test:
+; CHECK-V:       # %bb.0:
+; CHECK-V-NEXT:    lui a1, 524288
+; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
+; CHECK-V-NEXT:    vmv.v.x v8, a1
+; CHECK-V-NEXT:    vsetvli zero, zero, e32, mf2, tu, mu
+; CHECK-V-NEXT:    vmv.s.x v8, a0
+; CHECK-V-NEXT:    addiw a0, a1, 2
+; CHECK-V-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-V-NEXT:    vmslt.vx v0, v8, a0
+; CHECK-V-NEXT:    vmv.v.i v8, 0
+; CHECK-V-NEXT:    vmerge.vim v8, v8, 1, v0
+; CHECK-V-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
+; CHECK-V-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-V-NEXT:    vmv.x.s a0, v8
+; CHECK-V-NEXT:    ret
   %t2 = insertelement <2 x i32> <i32 poison, i32 -2147483648>, i32 %call.i, i64 0
   %t3 = icmp slt <2 x i32> %t2, <i32 -2147483646, i32 -2147483646>
   %t4 = zext <2 x i1> %t3 to <2 x i32>
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll
--- a/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll
@@ -101,17 +101,63 @@
 define void @uniform_store_i1(i1* noalias %dst, i64* noalias %start, i64 %N) {
 ; CHECK-LABEL: @uniform_store_i1(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], 1
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i64, i64* [[START:%.*]], i64 [[N_VEC]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64*> poison, i64* [[START]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64*> [[BROADCAST_SPLATINSERT]], <2 x i64*> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64*> poison, i64* [[START]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64*> [[BROADCAST_SPLATINSERT3]], <2 x i64*> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi i64* [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <2 x i64> <i64 0, i64 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <2 x i64> <i64 2, i64 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64*> [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i64, i64* [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i64, i64* [[TMP3]], i32 2
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[TMP6]] to <2 x i64>*
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, <2 x i64*> [[TMP1]], i64 1
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, <2 x i64*> [[TMP2]], i64 1
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq <2 x i64*> [[TMP8]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq <2 x i64*> [[TMP9]], [[BROADCAST_SPLAT4]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
+; CHECK-NEXT:    store i1 [[TMP12]], i1* [[DST:%.*]], align 1
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
+; CHECK-NEXT:    store i1 [[TMP13]], i1* [[DST]], align 1
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
+; CHECK-NEXT:    store i1 [[TMP14]], i1* [[DST]], align 1
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
+; CHECK-NEXT:    store i1 [[TMP15]], i1* [[DST]], align 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[PTR_IND]] = getelementptr i64, i64* [[POINTER_PHI]], i64 4
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[FIRST_SROA:%.*]] = phi i64* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[FIRST_SROA:%.*]] = phi i64* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
-; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[FIRST_SROA]], align 4
+; CHECK-NEXT:    [[TMP17:%.*]] = load i64, i64* [[FIRST_SROA]], align 4
 ; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i64, i64* [[FIRST_SROA]], i64 1
 ; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i64* [[INCDEC_PTR]], [[START]]
-; CHECK-NEXT:    store i1 [[CMP_NOT]], i1* [[DST:%.*]], align 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IV]], [[N:%.*]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]], !llvm.loop [[LOOP0]]
+; CHECK-NEXT:    store i1 [[CMP_NOT]], i1* [[DST]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IV]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[END]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       end:
 ; CHECK-NEXT:    ret void
 ;