diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -678,14 +678,14 @@
   assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) &&
          "Expected promotable zero stores.");
 
-  MachineBasicBlock::iterator NextI = I;
-  ++NextI;
+  MachineBasicBlock::iterator E = I->getParent()->end();
+  MachineBasicBlock::iterator NextI = next_nodbg(I, E);
   // If NextI is the second of the two instructions to be merged, we need
   // to skip one further. Either way we merge will invalidate the iterator,
   // and we don't need to scan the new instruction, as it's a pairwise
   // instruction, which we're not considering for further action anyway.
   if (NextI == MergeMI)
-    ++NextI;
+    NextI = next_nodbg(NextI, E);
 
   unsigned Opc = I->getOpcode();
   bool IsScaled = !TII->isUnscaledLdSt(Opc);
@@ -748,18 +748,17 @@
                               const TargetRegisterInfo *TRI, unsigned Limit,
                               std::function<bool(MachineInstr &, bool)> &Fn) {
   auto MBB = MI.getParent();
-  for (MachineBasicBlock::reverse_iterator I = MI.getReverseIterator(),
-                                           E = MBB->rend();
-       I != E; I++) {
+  for (MachineInstr &I :
+       instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
     if (!Limit)
       return false;
     --Limit;
 
-    bool isDef = any_of(I->operands(), [DefReg, TRI](MachineOperand &MOP) {
+    bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
       return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
              TRI->regsOverlap(MOP.getReg(), DefReg);
     });
-    if (!Fn(*I, isDef))
+    if (!Fn(I, isDef))
       return false;
     if (isDef)
       break;
@@ -783,14 +782,14 @@
 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
                                       MachineBasicBlock::iterator Paired,
                                       const LdStPairFlags &Flags) {
-  MachineBasicBlock::iterator NextI = I;
-  ++NextI;
+  MachineBasicBlock::iterator E = I->getParent()->end();
+  MachineBasicBlock::iterator NextI = next_nodbg(I, E);
   // If NextI is the second of the two instructions to be merged, we need
   // to skip one further. Either way we merge will invalidate the iterator,
   // and we don't need to scan the new instruction, as it's a pairwise
   // instruction, which we're not considering for further action anyway.
   if (NextI == Paired)
-    ++NextI;
+    NextI = next_nodbg(NextI, E);
 
   int SExtIdx = Flags.getSExtIdx();
   unsigned Opc =
@@ -1009,8 +1008,8 @@
 MachineBasicBlock::iterator
 AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
                                           MachineBasicBlock::iterator StoreI) {
-  MachineBasicBlock::iterator NextI = LoadI;
-  ++NextI;
+  MachineBasicBlock::iterator NextI =
+      next_nodbg(LoadI, LoadI->getParent()->end());
 
   int LoadSize = TII->getMemScale(*LoadI);
   int StoreSize = TII->getMemScale(*StoreI);
@@ -1188,7 +1187,7 @@
 
   unsigned Count = 0;
   do {
-    --MBBI;
+    MBBI = prev_nodbg(MBBI, B);
     MachineInstr &MI = *MBBI;
 
     // Don't count transient instructions towards the search limit since there
@@ -1440,7 +1439,7 @@
   MachineBasicBlock::iterator MBBI = I;
   MachineBasicBlock::iterator MBBIWithRenameReg;
   MachineInstr &FirstMI = *I;
-  ++MBBI;
+  MBBI = next_nodbg(MBBI, E);
 
   bool MayLoad = FirstMI.mayLoad();
   bool IsUnscaled = TII->isUnscaledLdSt(FirstMI);
@@ -1468,7 +1467,8 @@
   // Remember any instructions that read/write memory between FirstMI and MI.
   SmallVector<MachineInstr *, 4> MemInsns;
 
-  for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
+  for (unsigned Count = 0; MBBI != E && Count < Limit;
+       MBBI = next_nodbg(MBBI, E)) {
     MachineInstr &MI = *MBBI;
 
     UsedInBetween.accumulate(MI);
@@ -1637,12 +1637,13 @@
   assert((Update->getOpcode() == AArch64::ADDXri ||
           Update->getOpcode() == AArch64::SUBXri) &&
          "Unexpected base register update instruction to merge!");
-  MachineBasicBlock::iterator NextI = I;
+  MachineBasicBlock::iterator E = I->getParent()->end();
+  MachineBasicBlock::iterator NextI = next_nodbg(I, E);
   // Return the instruction following the merged instruction, which is
   // the instruction following our unmerged load. Unless that's the add/sub
   // instruction we're merging, in which case it's the one after that.
-  if (++NextI == Update)
-    ++NextI;
+  if (NextI == Update)
+    NextI = next_nodbg(NextI, E);
 
   int Value = Update->getOperand(2).getImm();
   assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
@@ -1780,7 +1781,7 @@
   // insn (inclusive) and the second insn.
   ModifiedRegUnits.clear();
   UsedRegUnits.clear();
-  ++MBBI;
+  MBBI = next_nodbg(MBBI, E);
 
   // We can't post-increment the stack pointer if any instruction between
   // the memory access (I) and the increment (MBBI) can access the memory
@@ -1796,7 +1797,8 @@
       return E;
   }
 
-  for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
+  for (unsigned Count = 0; MBBI != E && Count < Limit;
+       MBBI = next_nodbg(MBBI, E)) {
     MachineInstr &MI = *MBBI;
 
     // Don't count transient instructions towards the search limit since there
@@ -1854,7 +1856,7 @@
   UsedRegUnits.clear();
   unsigned Count = 0;
   do {
-    --MBBI;
+    MBBI = prev_nodbg(MBBI, B);
     MachineInstr &MI = *MBBI;
 
     // Don't count transient instructions towards the search limit since there
diff --git a/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir b/llvm/test/CodeGen/AArch64/ldst-opt-mte-with-dbg.mir
copy from llvm/test/CodeGen/AArch64/ldst-opt-mte.mir
copy to llvm/test/CodeGen/AArch64/ldst-opt-mte-with-dbg.mir
--- a/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir
+++ b/llvm/test/CodeGen/AArch64/ldst-opt-mte-with-dbg.mir
@@ -1,4 +1,8 @@
-# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt  -verify-machineinstrs  -o - %s | FileCheck %s
+# Strip out debug info, then run ldst-opt with limit=1.
+# RUN: llc -aarch64-load-store-scan-limit=1 -mtriple=aarch64-none-linux-gnu -run-pass mir-strip-debug,aarch64-ldst-opt -mir-strip-debugify-only=0 -verify-machineinstrs  -o - %s | FileCheck %s
+#
+# Run ldst-opt with limit=1, then strip out debug info.
+# RUN: llc -aarch64-load-store-scan-limit=1 -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt,mir-strip-debug -mir-strip-debugify-only=0 -verify-machineinstrs  -o - %s | FileCheck %s
 ---
 
 ### STG and its offset limits
@@ -11,7 +15,11 @@
     liveins: $x0
 
     STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -23,7 +31,11 @@
     liveins: $x0, $x1
 
     STGOffset $x1, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -36,7 +48,11 @@
     liveins: $x0
 
     STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 8, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -48,7 +64,11 @@
     liveins: $x0
 
     STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = SUBXri $x0, 4096, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -61,7 +81,11 @@
     liveins: $x0
 
     STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = SUBXri $x0, 4112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -73,7 +97,11 @@
     liveins: $x0
 
     STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 4080, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -86,7 +114,11 @@
     liveins: $x0
 
     STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 4096, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -100,7 +132,11 @@
     liveins: $x0
 
     STZGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -112,7 +148,11 @@
     liveins: $x0
 
     ST2GOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -124,7 +164,11 @@
     liveins: $x0
 
     STZ2GOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -138,7 +182,11 @@
     liveins: $x0, $x1, $x2
 
     STGPi $x1, $x2, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -150,7 +198,11 @@
     liveins: $x0, $x1, $x2
 
     STGPi $x1, $x2, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = SUBXri $x0, 1024, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -163,7 +215,11 @@
     liveins: $x0, $x1, $x2
 
     STGPi $x1, $x2, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = SUBXri $x0, 1040, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -175,7 +231,11 @@
     liveins: $x0, $x1, $x2
 
     STGPi $x1, $x2, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 1008, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -188,7 +248,11 @@
     liveins: $x0, $x1, $x2
 
     STGPi $x1, $x2, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 1024, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -202,7 +266,11 @@
     liveins: $x0
 
     STGOffset $x0, $x0, 10
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 160, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -214,7 +282,11 @@
     liveins: $x0, $x1, $x2
 
     STGPi $x1, $x2, $x0, 10
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 160, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -228,7 +300,11 @@
     liveins: $x0
 
     $x0 = ADDXri $x0, 32, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -240,7 +316,11 @@
     liveins: $x0, $x1, $x2
 
     $x0 = SUBXri $x0, 48, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     STGPi $x1, $x2, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -254,7 +334,11 @@
     liveins: $x0
 
     STGPi $x0, $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -266,7 +350,11 @@
     liveins: $x0
 
     STGPi $x0, $x0, $x0, 7
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -280,6 +368,10 @@
     liveins: $x0
 
     $x0 = SUBXri $x0, 48, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     STGPi $x0, $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
diff --git a/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir b/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir
--- a/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir
+++ b/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt  -verify-machineinstrs  -o - %s | FileCheck %s
+# RUN: llc -debugify-and-strip-all-safe -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt  -verify-machineinstrs  -o - %s | FileCheck %s
 ---
 
 ### STG and its offset limits