diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -614,56 +614,6 @@
   }
 }
 
-static inline
-unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop) {
-  switch (Opcode) {
-  default:
-    llvm_unreachable("unhandled vctp opcode");
-    break;
-  case ARM::MVE_VCTP8:
-    return IsDoLoop ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8;
-  case ARM::MVE_VCTP16:
-    return IsDoLoop ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16;
-  case ARM::MVE_VCTP32:
-    return IsDoLoop ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32;
-  case ARM::MVE_VCTP64:
-    return IsDoLoop ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64;
-  }
-  return 0;
-}
-
-static inline unsigned getTailPredVectorWidth(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    llvm_unreachable("unhandled vctp opcode");
-  case ARM::MVE_VCTP8:  return 16;
-  case ARM::MVE_VCTP16: return 8;
-  case ARM::MVE_VCTP32: return 4;
-  case ARM::MVE_VCTP64: return 2;
-  }
-  return 0;
-}
-
-static inline bool isVCTP(const MachineInstr *MI) {
-  switch (MI->getOpcode()) {
-  default:
-    break;
-  case ARM::MVE_VCTP8:
-  case ARM::MVE_VCTP16:
-  case ARM::MVE_VCTP32:
-  case ARM::MVE_VCTP64:
-    return true;
-  }
-  return false;
-}
-
-static inline
-bool isLoopStart(MachineInstr &MI) {
-  return MI.getOpcode() == ARM::t2DoLoopStart ||
-         MI.getOpcode() == ARM::t2DoLoopStartTP ||
-         MI.getOpcode() == ARM::t2WhileLoopStart;
-}
-
 static inline
 bool isCondBranchOpcode(int Opc) {
   return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc;
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -19,6 +19,7 @@
 #include "ARMSubtarget.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
+#include "MVETailPredUtils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -56,6 +56,7 @@
 #include "ARMBaseRegisterInfo.h"
 #include "ARMBasicBlockInfo.h"
 #include "ARMSubtarget.h"
+#include "MVETailPredUtils.h"
 #include "Thumb2InstrInfo.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SmallSet.h"
@@ -1310,33 +1311,16 @@
 // another low register.
 void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
   LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI);
-  MachineBasicBlock *MBB = MI->getParent();
-  MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
-                                    TII->get(ARM::t2CMPri));
-  MIB.add(MI->getOperand(0));
-  MIB.addImm(0);
-  MIB.addImm(ARMCC::AL);
-  MIB.addReg(ARM::NoRegister);
-
   MachineBasicBlock *DestBB = MI->getOperand(1).getMBB();
   unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
     ARM::tBcc : ARM::t2Bcc;
 
-  MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
-  MIB.add(MI->getOperand(1));   // branch target
-  MIB.addImm(ARMCC::EQ);        // condition code
-  MIB.addReg(ARM::CPSR);
-  MI->eraseFromParent();
+  RevertWhileLoopStart(MI, TII, BrOpc);
 }
 
 void ARMLowOverheadLoops::RevertDo(MachineInstr *MI) const {
   LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to mov: " << *MI);
-  MachineBasicBlock *MBB = MI->getParent();
-  BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::tMOVr))
-      .add(MI->getOperand(0))
-      .add(MI->getOperand(1))
-      .add(predOps(ARMCC::AL));
-  MI->eraseFromParent();
+  RevertDoLoopStart(MI, TII);
 }
 
 bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
@@ -1354,21 +1338,7 @@
   bool SetFlags =
       RDA->isSafeToDefRegAt(MI, MCRegister::from(ARM::CPSR), Ignore);
 
-  MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
-                                    TII->get(ARM::t2SUBri));
-  MIB.addDef(ARM::LR);
-  MIB.add(MI->getOperand(1));
-  MIB.add(MI->getOperand(2));
-  MIB.addImm(ARMCC::AL);
-  MIB.addReg(0);
-
-  if (SetFlags) {
-    MIB.addReg(ARM::CPSR);
-    MIB->getOperand(5).setIsDef(true);
-  } else
-    MIB.addReg(0);
-
-  MI->eraseFromParent();
+  llvm::RevertLoopDec(MI, TII, SetFlags);
   return SetFlags;
 }
 
@@ -1376,28 +1346,11 @@
 void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const {
   LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI);
 
-  MachineBasicBlock *MBB = MI->getParent();
-  // Create cmp
-  if (!SkipCmp) {
-    MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
-                                      TII->get(ARM::t2CMPri));
-    MIB.addReg(ARM::LR);
-    MIB.addImm(0);
-    MIB.addImm(ARMCC::AL);
-    MIB.addReg(ARM::NoRegister);
-  }
-
   MachineBasicBlock *DestBB = MI->getOperand(1).getMBB();
   unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
     ARM::tBcc : ARM::t2Bcc;
 
-  // Create bne
-  MachineInstrBuilder MIB =
-    BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
-  MIB.add(MI->getOperand(1));   // branch target
-  MIB.addImm(ARMCC::NE);        // condition code
-  MIB.addReg(ARM::CPSR);
-  MI->eraseFromParent();
+  llvm::RevertLoopEnd(MI, TII, BrOpc, SkipCmp);
 }
 
 // Perform dead code elimation on the loop iteration count setup expression.
diff --git a/llvm/lib/Target/ARM/MVETailPredUtils.h b/llvm/lib/Target/ARM/MVETailPredUtils.h
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Target/ARM/MVETailPredUtils.h
@@ -0,0 +1,157 @@
+//===-- MVETailPredUtils.h - Tail predication utility functions -*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains utility functions for low overhead and tail predicated
+// loops, shared between the ARMLowOverheadLoops pass and anywhere else that
+// needs them.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
+#define LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+namespace llvm {
+
+static inline unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop) {
+  switch (Opcode) {
+  default:
+    llvm_unreachable("unhandled vctp opcode");
+    break;
+  case ARM::MVE_VCTP8:
+    return IsDoLoop ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8;
+  case ARM::MVE_VCTP16:
+    return IsDoLoop ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16;
+  case ARM::MVE_VCTP32:
+    return IsDoLoop ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32;
+  case ARM::MVE_VCTP64:
+    return IsDoLoop ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64;
+  }
+  return 0;
+}
+
+static inline unsigned getTailPredVectorWidth(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    llvm_unreachable("unhandled vctp opcode");
+  case ARM::MVE_VCTP8:
+    return 16;
+  case ARM::MVE_VCTP16:
+    return 8;
+  case ARM::MVE_VCTP32:
+    return 4;
+  case ARM::MVE_VCTP64:
+    return 2;
+  }
+  return 0;
+}
+
+static inline bool isVCTP(const MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  default:
+    break;
+  case ARM::MVE_VCTP8:
+  case ARM::MVE_VCTP16:
+  case ARM::MVE_VCTP32:
+  case ARM::MVE_VCTP64:
+    return true;
+  }
+  return false;
+}
+
+static inline bool isLoopStart(MachineInstr &MI) {
+  return MI.getOpcode() == ARM::t2DoLoopStart ||
+         MI.getOpcode() == ARM::t2DoLoopStartTP ||
+         MI.getOpcode() == ARM::t2WhileLoopStart;
+}
+
+// WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
+// beq that branches to the exit branch.
+inline void RevertWhileLoopStart(MachineInstr *MI, const TargetInstrInfo *TII,
+                        unsigned BrOpc = ARM::t2Bcc) {
+  MachineBasicBlock *MBB = MI->getParent();
+
+  // Cmp
+  MachineInstrBuilder MIB =
+      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri));
+  MIB.add(MI->getOperand(0));
+  MIB.addImm(0);
+  MIB.addImm(ARMCC::AL);
+  MIB.addReg(ARM::NoRegister);
+
+  // Branch
+  MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
+  MIB.add(MI->getOperand(1)); // branch target
+  MIB.addImm(ARMCC::EQ);      // condition code
+  MIB.addReg(ARM::CPSR);
+
+  MI->eraseFromParent();
+}
+
+inline void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII) {
+  MachineBasicBlock *MBB = MI->getParent();
+  BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::tMOVr))
+      .add(MI->getOperand(0))
+      .add(MI->getOperand(1))
+      .add(predOps(ARMCC::AL));
+
+  MI->eraseFromParent();
+}
+
+inline void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII,
+                          bool SetFlags = false) {
+  MachineBasicBlock *MBB = MI->getParent();
+
+  MachineInstrBuilder MIB =
+      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
+  MIB.add(MI->getOperand(0));
+  MIB.add(MI->getOperand(1));
+  MIB.add(MI->getOperand(2));
+  MIB.addImm(ARMCC::AL);
+  MIB.addReg(0);
+
+  if (SetFlags) {
+    MIB.addReg(ARM::CPSR);
+    MIB->getOperand(5).setIsDef(true);
+  } else
+    MIB.addReg(0);
+
+  MI->eraseFromParent();
+}
+
+// Generate a subs, or sub and cmp, and a branch instead of an LE.
+inline void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII,
+                          unsigned BrOpc = ARM::t2Bcc, bool SkipCmp = false) {
+  MachineBasicBlock *MBB = MI->getParent();
+
+  // Create cmp
+  if (!SkipCmp) {
+    MachineInstrBuilder MIB =
+        BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri));
+    MIB.add(MI->getOperand(0));
+    MIB.addImm(0);
+    MIB.addImm(ARMCC::AL);
+    MIB.addReg(ARM::NoRegister);
+  }
+
+  // Create bne
+  MachineInstrBuilder MIB =
+      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
+  MIB.add(MI->getOperand(1)); // branch target
+  MIB.addImm(ARMCC::NE);      // condition code
+  MIB.addReg(ARM::CPSR);
+  MI->eraseFromParent();
+}
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
diff --git a/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
--- a/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
+++ b/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
@@ -18,6 +18,7 @@
 #include "ARM.h"
 #include "ARMSubtarget.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
+#include "MVETailPredUtils.h"
 #include "Thumb2InstrInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -58,6 +59,7 @@
   }
 
 private:
+  bool RevertLoopWithCall(MachineLoop *ML);
   bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
   MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
                                             MachineInstr &Instr,
@@ -156,6 +158,31 @@
   return true;
 }
 
+bool MVEVPTOptimisations::RevertLoopWithCall(MachineLoop *ML) {
+  LLVM_DEBUG(dbgs() << "RevertLoopWithCall on loop " << ML->getHeader()->getName()
+                    << "\n");
+
+  MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
+  if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
+    return false;
+
+  // Check if there is an illegal instruction (a call) in the low overhead loop
+  // and if so revert it now before we get any further.
+  for (MachineBasicBlock *MBB : ML->blocks()) {
+    for (MachineInstr &MI : *MBB) {
+      if (MI.isCall()) {
+        LLVM_DEBUG(dbgs() << "Found call in loop, reverting: " << MI);
+        RevertDoLoopStart(LoopStart, TII);
+        RevertLoopDec(LoopDec, TII);
+        RevertLoopEnd(LoopEnd, TII);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
 // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
 // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
 // instruction, making the backend ARMLowOverheadLoops passes job of finding the
@@ -662,7 +689,7 @@
   const ARMSubtarget &STI =
       static_cast<const ARMSubtarget &>(Fn.getSubtarget());
 
-  if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
+  if (!STI.isThumb2() || !STI.hasLOB())
     return false;
 
   TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
@@ -674,8 +701,10 @@
                     << "********** Function: " << Fn.getName() << '\n');
 
   bool Modified = false;
-  for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder())
+  for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
+    Modified |= RevertLoopWithCall(ML);
     Modified |= ConvertTailPredLoop(ML, DT);
+  }
 
   for (MachineBasicBlock &MBB : Fn) {
     Modified |= ReplaceVCMPsByVPNOTs(MBB);
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir
@@ -272,7 +272,7 @@
   ; CHECK:   renamable $r6, renamable $r11 = t2SMLAL renamable $r9, killed renamable $r0, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg
   ; CHECK:   early-clobber renamable $r6, dead early-clobber renamable $r11 = MVE_ASRLr killed renamable $r6, killed renamable $r11, renamable $r2, 14 /* CC::al */, $noreg
   ; CHECK:   early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.i39)
-  ; CHECK:   dead $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr
+  ; CHECK:   dead renamable $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr
   ; CHECK:   renamable $r8 = t2SUBri killed renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   $r0 = tMOVr $r7, 14 /* CC::al */, $noreg
   ; CHECK:   $r4 = tMOVr $r5, 14 /* CC::al */, $noreg
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir
@@ -258,7 +258,7 @@
   ; CHECK:   renamable $r4 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.2)
   ; CHECK:   renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg
   ; CHECK:   renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i38)
-  ; CHECK:   dead $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr
+  ; CHECK:   dead renamable $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr
   ; CHECK:   renamable $r6, renamable $r11 = t2SMLAL killed renamable $r8, killed renamable $r4, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg
   ; CHECK:   renamable $r4 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load 4 from %stack.3)
   ; CHECK:   $r8 = tMOVr $r5, 14 /* CC::al */, $noreg
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir
@@ -296,7 +296,7 @@
   ; CHECK:   renamable $r4, dead $cpsr = tEOR killed renamable $r4, killed renamable $r5, 14 /* CC::al */, $noreg
   ; CHECK:   renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r11, 14 /* CC::al */, $noreg
   ; CHECK:   tSTRi killed renamable $r4, killed renamable $r6, 3, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep1)
-  ; CHECK:   t2CMPri killed $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK:   t2CMPri killed renamable $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
   ; CHECK:   tBcc %bb.4, 1 /* CC::ne */, killed $cpsr
   ; CHECK:   tB %bb.5, 14 /* CC::al */, $noreg
   ; CHECK: bb.5.bb13:
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir
@@ -8,7 +8,7 @@
 # CHECK:   tBcc %bb.4, 0
 # CHECK:   tB %bb.2
 # CHECK: bb.3.while.body:
-# CHECK:   t2CMPri $lr, 0, 14
+# CHECK:   t2CMPri renamable $lr, 0, 14
 # CHECK:   tBcc %bb.3, 1
 # CHECK:   tB %bb.4
 # CHECK: bb.4.while.end:
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revertcallearly.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revertcallearly.mir
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revertcallearly.mir
@@ -0,0 +1,145 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+lob -run-pass=arm-mve-vpt-opts --verify-machineinstrs %s -o - | FileCheck %s
+
+--- |
+  @d = local_unnamed_addr global i32 0, align 4
+  @c = local_unnamed_addr global [1 x i32] zeroinitializer, align 4
+
+  define i32 @e() optsize {
+  entry:
+    %.pr = load i32, i32* @d, align 4
+    %cmp13 = icmp sgt i32 %.pr, -1
+    br i1 %cmp13, label %for.cond1.preheader.preheader, label %for.end9
+
+  for.cond1.preheader.preheader:                    ; preds = %entry
+    %0 = add i32 %.pr, 1
+    %1 = call i32 @llvm.start.loop.iterations.i32(i32 %0)
+    br label %for.cond1.preheader
+
+  for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.cond1.preheader
+    %2 = phi i32 [ %1, %for.cond1.preheader.preheader ], [ %3, %for.cond1.preheader ]
+    call void @llvm.memset.p0i8.i32(i8* nonnull align 4 dereferenceable(24) bitcast ([1 x i32]* @c to i8*), i8 0, i32 24, i1 false)
+    %3 = call i32 @llvm.loop.decrement.reg.i32(i32 %2, i32 1)
+    %4 = icmp ne i32 %3, 0
+    br i1 %4, label %for.cond1.preheader, label %for.cond.for.end9_crit_edge
+
+  for.cond.for.end9_crit_edge:                      ; preds = %for.cond1.preheader
+    store i32 -1, i32* @d, align 4
+    br label %for.end9
+
+  for.end9:                                         ; preds = %for.cond.for.end9_crit_edge, %entry
+    ret i32 undef
+  }
+
+  declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg)
+  declare i32 @llvm.start.loop.iterations.i32(i32)
+  declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
+
+...
+---
+name:            e
+alignment:       2
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: gprnopc, preferred-register: '' }
+  - { id: 1, class: gpr, preferred-register: '' }
+  - { id: 2, class: gprlr, preferred-register: '' }
+  - { id: 3, class: gpr, preferred-register: '' }
+  - { id: 4, class: rgpr, preferred-register: '' }
+  - { id: 5, class: rgpr, preferred-register: '' }
+  - { id: 6, class: gprlr, preferred-register: '' }
+  - { id: 7, class: rgpr, preferred-register: '' }
+  - { id: 8, class: rgpr, preferred-register: '' }
+  - { id: 9, class: gprlr, preferred-register: '' }
+  - { id: 10, class: gprlr, preferred-register: '' }
+  - { id: 11, class: rgpr, preferred-register: '' }
+  - { id: 12, class: rgpr, preferred-register: '' }
+  - { id: 13, class: gpr, preferred-register: '' }
+liveins:         []
+body:             |
+  ; CHECK-LABEL: name: e
+  ; CHECK: bb.0.entry:
+  ; CHECK:   successors: %bb.1(0x50000000), %bb.4(0x30000000)
+  ; CHECK:   [[t2MOVi32imm:%[0-9]+]]:rgpr = t2MOVi32imm @d
+  ; CHECK:   [[t2LDRi12_:%[0-9]+]]:gprnopc = t2LDRi12 [[t2MOVi32imm]], 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d)
+  ; CHECK:   t2CMPri [[t2LDRi12_]], 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK:   t2Bcc %bb.4, 4 /* CC::mi */, $cpsr
+  ; CHECK:   t2B %bb.1, 14 /* CC::al */, $noreg
+  ; CHECK: bb.1.for.cond1.preheader.preheader:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[t2LDRi12_]], 1, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   [[tMOVr:%[0-9]+]]:gprlr = tMOVr killed [[t2ADDri]], 14 /* CC::al */, $noreg
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr = COPY [[tMOVr]]
+  ; CHECK:   [[t2MOVi32imm1:%[0-9]+]]:rgpr = t2MOVi32imm @c
+  ; CHECK:   [[t2MOVi:%[0-9]+]]:rgpr = t2MOVi 24, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK: bb.2.for.cond1.preheader:
+  ; CHECK:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+  ; CHECK:   [[PHI:%[0-9]+]]:gprlr = PHI [[COPY]], %bb.1, %3, %bb.2
+  ; CHECK:   ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
+  ; CHECK:   $r0 = COPY [[t2MOVi32imm1]]
+  ; CHECK:   $r1 = COPY [[t2MOVi]]
+  ; CHECK:   tBL 14 /* CC::al */, $noreg, &__aeabi_memclr4, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp
+  ; CHECK:   ADJCALLSTACKUP 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
+  ; CHECK:   [[t2SUBri:%[0-9]+]]:gprlr = t2SUBri [[PHI]], 1, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr = COPY [[t2SUBri]]
+  ; CHECK:   t2CMPri [[t2SUBri]], 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK:   t2Bcc %bb.2, 1 /* CC::ne */, $cpsr
+  ; CHECK:   t2B %bb.3, 14 /* CC::al */, $noreg
+  ; CHECK: bb.3.for.cond.for.end9_crit_edge:
+  ; CHECK:   successors: %bb.4(0x80000000)
+  ; CHECK:   [[t2MOVi1:%[0-9]+]]:rgpr = t2MOVi -1, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   t2STRi12 killed [[t2MOVi1]], [[t2MOVi32imm]], 0, 14 /* CC::al */, $noreg :: (store 4 into @d)
+  ; CHECK: bb.4.for.end9:
+  ; CHECK:   [[DEF:%[0-9]+]]:gpr = IMPLICIT_DEF
+  ; CHECK:   $r0 = COPY [[DEF]]
+  ; CHECK:   tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+  bb.0.entry:
+    successors: %bb.1(0x50000000), %bb.4(0x30000000)
+
+    %4:rgpr = t2MOVi32imm @d
+    %0:gprnopc = t2LDRi12 %4, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d)
+    t2CMPri %0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    t2Bcc %bb.4, 4 /* CC::mi */, $cpsr
+    t2B %bb.1, 14 /* CC::al */, $noreg
+
+  bb.1.for.cond1.preheader.preheader:
+    successors: %bb.2(0x80000000)
+
+    %5:rgpr = t2ADDri %0, 1, 14 /* CC::al */, $noreg, $noreg
+    %6:gprlr = t2DoLoopStart killed %5
+    %1:gpr = COPY %6
+    %7:rgpr = t2MOVi32imm @c
+    %8:rgpr = t2MOVi 24, 14 /* CC::al */, $noreg, $noreg
+
+  bb.2.for.cond1.preheader:
+    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+
+    %2:gprlr = PHI %1, %bb.1, %3, %bb.2
+    ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
+    $r0 = COPY %7
+    $r1 = COPY %8
+    tBL 14 /* CC::al */, $noreg, &__aeabi_memclr4, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp
+    ADJCALLSTACKUP 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
+    %9:gprlr = t2LoopDec %2, 1
+    %3:gpr = COPY %9
+    t2LoopEnd %9, %bb.2, implicit-def dead $cpsr
+    t2B %bb.3, 14 /* CC::al */, $noreg
+
+  bb.3.for.cond.for.end9_crit_edge:
+    successors: %bb.4(0x80000000)
+
+    %12:rgpr = t2MOVi -1, 14 /* CC::al */, $noreg, $noreg
+    t2STRi12 killed %12, %4, 0, 14 /* CC::al */, $noreg :: (store 4 into @d)
+
+  bb.4.for.end9:
+    %13:gpr = IMPLICIT_DEF
+    $r0 = COPY %13
+    tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+
+...
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir
@@ -113,7 +113,7 @@
   ; CHECK:   renamable $r3 = tLSRri $noreg, killed renamable $r3, 1, 2 /* CC::hs */, killed $cpsr, implicit killed renamable $r3, implicit killed $itstate
   ; CHECK:   early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep4)
   ; CHECK:   renamable $lr = tMOVr killed $lr, 14 /* CC::al */, $noreg
-  ; CHECK:   t2CMPri $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK:   t2CMPri renamable $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
   ; CHECK:   tBcc %bb.1, 1 /* CC::ne */, killed $cpsr
   ; CHECK:   tB %bb.2, 14 /* CC::al */, $noreg
   ; CHECK: bb.2.while.end: