diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -462,6 +462,7 @@
   void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
   void emitVSETVLIs(MachineBasicBlock &MBB);
   void doLocalPrepass(MachineBasicBlock &MBB);
+  void doLocalPostpass(MachineBasicBlock &MBB);
   void doPRE(MachineBasicBlock &MBB);
 };
 
@@ -478,6 +479,15 @@
          MI.getOpcode() == RISCV::PseudoVSETIVLI;
 }
 
+/// Return true if this is 'vsetvli x0, x0, vtype' which preserves
+/// VL and only sets VTYPE.
+static bool isVLPreservingConfig(const MachineInstr &MI) {
+  if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
+    return false;
+  assert(RISCV::X0 == MI.getOperand(1).getReg());
+  return RISCV::X0 == MI.getOperand(0).getReg();
+}
+
 static MachineInstr *elideCopies(MachineInstr *MI,
                                  const MachineRegisterInfo *MRI) {
   while (true) {
@@ -1065,9 +1075,6 @@
 
 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
   VSETVLIInfo CurInfo;
-  // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI.
-  MachineInstr *PrevVSETVLIMI = nullptr;
-
   for (MachineInstr &MI : MBB) {
     // If this is an explicit VSETVLI or VSETIVLI, update our state.
     if (isVectorConfigInstr(MI)) {
@@ -1078,7 +1085,6 @@
       MI.getOperand(3).setIsDead(false);
       MI.getOperand(4).setIsDead(false);
       CurInfo = getInfoForVSETVLI(MI);
-      PrevVSETVLIMI = &MI;
       continue;
     }
 
@@ -1125,29 +1131,10 @@
         // vl/vtype for succesor blocks.
         if (!canSkipVSETVLIForLoadStore(MI, NewInfo, CurInfo) &&
             needVSETVLI(NewInfo, CurInfo)) {
-          // If the previous VL/VTYPE is set by VSETVLI and do not use, Merge it
-          // with current VL/VTYPE.
-          bool NeedInsertVSETVLI = true;
-          if (PrevVSETVLIMI) {
-            // If these two VSETVLI have the same AVL and the same VLMAX,
-            // we could merge these two VSETVLI.
-            // TODO: If we remove this, we get a `vsetvli x0, x0, vtype'
-            // here.  We could simply let this be emitted, then remove
-            // the unused vsetvlis in a post-pass.
-            if (CurInfo.hasSameAVL(NewInfo) && CurInfo.hasSameVLMAX(NewInfo)) {
-              // WARNING: For correctness, it is essential the contents of VL
-              // and VTYPE stay the same after MI.  This greatly limits the
-              // mutation we can legally do here.
-              PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE());
-              NeedInsertVSETVLI = false;
-            }
-          }
-          if (NeedInsertVSETVLI)
-            insertVSETVLI(MBB, MI, NewInfo, CurInfo);
+          insertVSETVLI(MBB, MI, NewInfo, CurInfo);
           CurInfo = NewInfo;
         }
       }
-      PrevVSETVLIMI = nullptr;
     }
 
     // If this is something that updates VL/VTYPE that we don't know about, set
@@ -1155,7 +1142,6 @@
     if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
         MI.modifiesRegister(RISCV::VTYPE)) {
       CurInfo = VSETVLIInfo::getUnknown();
-      PrevVSETVLIMI = nullptr;
     }
   }
 
@@ -1378,6 +1364,52 @@
                 AvailableInfo, OldInfo);
 }
 
+void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
+  MachineInstr *PrevMI = nullptr;
+  bool UsedVL = false, UsedVTYPE = false;
+  SmallVector<MachineInstr*> ToDelete;
+  for (MachineInstr &MI : MBB) {
+    // Note: Must be *before* vsetvli handling to account for config cases
+    // which only change some subfields.
+    if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
+      UsedVL = true;
+    if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
+      UsedVTYPE = true;
+
+    if (!isVectorConfigInstr(MI))
+      continue;
+
+    if (PrevMI) {
+      if (!UsedVL && !UsedVTYPE) {
+        ToDelete.push_back(PrevMI);
+        // fallthrough
+      } else if (!UsedVTYPE && isVLPreservingConfig(MI)) {
+        // Note: `vsetvli x0, x0, vtype' is the canonical instruction
+        // for this case.  If you find yourself wanting to add other forms
+        // to this "unused VTYPE" case, we're probably missing a
+        // canonicalization earlier.
+        // Note: We don't need to explicitly check vtype compatibility
+        // here because this form is only legal (per ISA) when not
+        // changing VL.
+        PrevMI->getOperand(2).setImm(MI.getOperand(2).getImm());
+        ToDelete.push_back(&MI);
+        // Leave PrevMI unchanged
+        continue;
+      }
+    }
+    PrevMI = &MI;
+    UsedVL = false;
+    UsedVTYPE = false;
+    Register VRegDef = MI.getOperand(0).getReg();
+    if (VRegDef != RISCV::X0 &&
+        !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
+      UsedVL = true;
+  }
+
+  for (auto *MI : ToDelete)
+    MI->eraseFromParent();
+}
+
 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
   // Skip if the vector extension is not enabled.
   const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
@@ -1443,6 +1475,15 @@
   for (MachineBasicBlock &MBB : MF)
     emitVSETVLIs(MBB);
 
+  // Now that all vsetvlis are explicit, go through and do block local
+  // DSE and peephole based demanded fields based transforms.  Note that
+  // this *must* be done outside the main dataflow so long as we allow
+  // any cross block analysis within the dataflow.  We can't have both
+  // demanded fields based mutation and non-local analysis in the
+  // dataflow at the same time without introducing inconsistencies.
+  for (MachineBasicBlock &MBB : MF)
+    doLocalPostpass(MBB);
+
   // Once we're fully done rewriting all the instructions, do a final pass
   // through to check for VSETVLIs which write to an unused destination.
   // For the non X0, X0 variant, we can replace the destination register
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -747,7 +747,6 @@
 ; CHECK-LABEL: vector_init_vsetvli_fv2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    li a2, 0
-; CHECK-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
 ; CHECK-NEXT:    vsetvli a3, zero, e64, m1, ta, mu
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:  .LBB15_1: # %for.body
@@ -812,14 +811,13 @@
 ; Demonstrates a case where mutation in phase3 is problematic.  We mutate the
 ; vsetvli without considering that it changes the compatibility result of the
 ; vadd in the second block.
-; FIXME: This currently crashes with strict asserts enabled.
 define <vscale x 4 x i32> @cross_block_mutate(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b,
 ; CHECK-LABEL: cross_block_mutate:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetivli a0, 6, e32, m2, tu, mu
 ; CHECK-NEXT:    vmv.s.x v8, a0
-; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vadd.vv v8, v8, v10, v0.t
 ; CHECK-NEXT:    ret
                                          <vscale x 4 x i1> %mask) {
 entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
@@ -328,8 +328,7 @@
 define double @test17(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
 ; CHECK-LABEL: test17:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; CHECK-NEXT:    vsetvli a0, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    vfmv.f.s ft0, v8
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    vfadd.vv v8, v8, v9
@@ -468,7 +467,6 @@
 define void @avl_forward4(<vscale x 2 x i32> %v, <vscale x 2 x i32>* %p, i64 %reg) nounwind {
 ; CHECK-LABEL: avl_forward4:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
 ; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    ret
@@ -498,7 +496,6 @@
 define <vscale x 1 x i64> @vleNff(i64* %str, i64 %n, i64 %x) {
 ; CHECK-LABEL: vleNff:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0)
 ; CHECK-NEXT:    csrr a0, vl
@@ -526,7 +523,6 @@
 ; CHECK-LABEL: avl_forward5:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a1, 32
-; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    ret