Index: llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
===================================================================
--- llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
+++ llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
@@ -9,9 +9,14 @@
 /// \file This pass does a few optimisations related to MVE VPT blocks before
 /// register allocation is performed. The goal is to maximize the sizes of the
 /// blocks that will be created by the MVE VPT Block Insertion pass (which runs
-/// after register allocation). Currently, this pass replaces VCMPs with VPNOTs
-/// when possible, so the Block Insertion pass can delete them later to create
-/// larger VPT blocks.
+/// after register allocation). The first optimisation done by this pass is the
+/// replacement of VCMPs with VPNOTs when possible, so the Block Insertion pass
+/// can delete them later to create larger VPT blocks. The second optimisation
+/// replaces re-uses of old VPR values with VPNOTs when inside a block of
+/// predicated instructions. This is done to avoid spill/reloads of VPR in the
+/// middle of a block, which prevents the Block Insertion pass from creating
+/// large blocks.
+//
 //===----------------------------------------------------------------------===//
 
 #include "ARM.h"
@@ -143,6 +148,35 @@
   return BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT));
 }
 
+// Transforms
+//    <Instr that uses %A (at OpIdx)>
+// Into
+//    %K = VPNOT %Target
+//    <Instr that uses %K (at OpIdx)>
+// And returns %K.
+// This optimization is done in the hopes of preventing spills/reloads of VPR.
+MachineInstr &MVEVPTOptimisations::ReplaceUsageOfRegisterByVPNOT(
+    MachineBasicBlock &MBB, MachineInstr &Instr, unsigned OpIdx,
+    Register Target) {
+  MachineOperand &InstrOperand = Instr.getOperand(OpIdx);
+
+  Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target));
+  MachineInstrBuilder MIBuilder = BuildVPNOTBefore(MBB, Instr);
+  MIBuilder.add(MachineOperand::CreateReg(NewResult, /*isDef*/ true));
+  MIBuilder.add(MachineOperand::CreateReg(Target, /*isDef*/ false));
+  MIBuilder.addImm(0);
+  MIBuilder.addReg({});
+  InstrOperand.setReg(NewResult);
+
+  LLVM_DEBUG(dbgs() << "  Inserting VPNOT (for spill prevention): ";
+             MIBuilder.getInstr()->dump());
+
+  return *MIBuilder.getInstr();
+}
+
+// Replaces VCMPs by VPNOTs when possible, and tries to reduce spills by
+// replacing uses of old VPR values with VPNOTs inside predicated instruction
+// blocks.
 bool MVEVPTOptimisations::InsertVPNOTs(MachineBasicBlock &MBB) {
   SmallVector<MachineInstr *, 4> DeadInstructions;
 
@@ -184,6 +218,39 @@
     LLVM_DEBUG(dbgs() << "  Inserting VPNOT (to replace VCMP): ";
                MIBuilder.getInstr()->dump());
 
+    // While inside the block of predicated instructions, replace usages of old
+    // VCCR/VPR values by VPNOTs. That way, we avoid overlapping lifetimes
+    // of different VPR values (which always result in spill/reloads).
+    // Those VPNOTs can then be removed by the MVE VPT Block Insertion pass,
+    // and we should end up with clean blocks like "TETE", "TEET", etc.
+
+    Register ValueReg = PrevVCMPResultReg;
+    Register InverseValueReg = Instr.getOperand(0).getReg();
+    Register VPNOTOperand = InverseValueReg;
+
+    // On each iteration, try to replace an usage of "ValueReg" with a VPNOT
+    // on "VPNOTOperand".
+    for (MachineBasicBlock::instr_iterator Iter = ++Instr.getIterator();
+         Iter != MBB.end(); ++Iter) {
+      // Stop as soon as we leave the block of predicated instructions
+      if (getVPTInstrPredicate(*Iter) == ARMVCC::None)
+        break;
+
+      // Keep going until we find an instruction that uses ValueReg.
+      int Idx = Iter->findRegisterUseOperandIdx(ValueReg.id());
+      if (Idx == -1)
+        continue;
+
+      // Replace the usage of said register by a VPNOT on VPNOTOperand
+      MachineInstr &VPNOT =
+          ReplaceUsageOfRegisterByVPNOT(MBB, *Iter, Idx, VPNOTOperand);
+
+      // Continue: The result of the VPNOT we just inserted becomes the new
+      // VPNOTOperand, and ValueReg/InverseValueReg are swapped.
+      VPNOTOperand = VPNOT.getOperand(0).getReg();
+      std::swap(ValueReg, InverseValueReg);
+    }
+
     // Finally, mark the old VCMP for removal and reset PrevVCMP.
     DeadInstructions.push_back(&Instr);
     PrevVCMP = nullptr;
Index: llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir
===================================================================
--- llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir
+++ llvm/test/CodeGen/Thumb2/mve-vpt-optimisations.mir
@@ -42,6 +42,11 @@
     ret <4 x float> %inactive1
   }
 
+  define arm_aapcs_vfpcc <4 x float> @spill_prevention(<4 x float> %inactive1) #0 {
+  entry:
+    ret <4 x float> %inactive1
+  }
+
   attributes #0 = { "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" }
 ...
 ---
@@ -519,3 +524,28 @@
     renamable $vpr = MVE_VCMPs32 renamable $q1, renamable $q0, 10, 0, $noreg
     tBX_RET 14, $noreg, implicit $q0
 ...
+---
+name:            spill_prevention
+alignment:       4
+liveins:
+  - { reg: '$q0', virtual-reg: '' }
+  - { reg: '$q1', virtual-reg: '' }
+body:             |
+  bb.0:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: spill_prevention
+    ; CHECK: [[MVE_VCMPs32_:%[0-9]+]]:vccr = MVE_VCMPs32 renamable $q0, renamable $q1, 10, 0, $noreg
+    ; CHECK: [[MVE_VPNOT:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VCMPs32_]], 0, $noreg
+    ; CHECK: [[MVE_VORR:%[0-9]+]]:mqpr = MVE_VORR renamable $q1, renamable $q1, 1, [[MVE_VPNOT]], undef [[MVE_VORR]]
+    ; CHECK: [[MVE_VPNOT1:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VPNOT]], 0, $noreg
+    ; CHECK: [[MVE_VORR1:%[0-9]+]]:mqpr = MVE_VORR [[MVE_VORR]], [[MVE_VORR]], 1, [[MVE_VPNOT1]], undef [[MVE_VORR1]]
+    ; CHECK: [[MVE_VPNOT2:%[0-9]+]]:vccr = MVE_VPNOT [[MVE_VPNOT1]], 0, $noreg
+    ; CHECK: [[MVE_VORR2:%[0-9]+]]:mqpr = MVE_VORR [[MVE_VORR1]], [[MVE_VORR1]], 1, [[MVE_VPNOT2]], undef [[MVE_VORR2]]
+    ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0
+    %0:vccr = MVE_VCMPs32 renamable $q0, renamable $q1, 10, 0, $noreg
+    %1:vccr = MVE_VCMPs32 renamable $q1, renamable $q0, 12, 0, $noreg
+    %2:mqpr = MVE_VORR renamable $q1, renamable $q1, 1, %1, undef %2
+    %3:mqpr = MVE_VORR %2, %2, 1, %0:vccr, undef %3:mqpr
+    %4:mqpr = MVE_VORR %3, %3, 1, %1:vccr, undef %4:mqpr
+    tBX_RET 14, $noreg, implicit $q0
+...