diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -678,14 +678,14 @@
   assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) &&
          "Expected promotable zero stores.");
 
-  MachineBasicBlock::iterator NextI = I;
-  ++NextI;
+  MachineBasicBlock::iterator E = I->getParent()->end();
+  MachineBasicBlock::iterator NextI = next_nodbg(I, E);
   // If NextI is the second of the two instructions to be merged, we need
   // to skip one further. Either way we merge will invalidate the iterator,
   // and we don't need to scan the new instruction, as it's a pairwise
   // instruction, which we're not considering for further action anyway.
   if (NextI == MergeMI)
-    ++NextI;
+    NextI = next_nodbg(NextI, E);
 
   unsigned Opc = I->getOpcode();
   bool IsScaled = !TII->isUnscaledLdSt(Opc);
@@ -750,7 +750,7 @@
   auto MBB = MI.getParent();
   for (MachineBasicBlock::reverse_iterator I = MI.getReverseIterator(),
                                            E = MBB->rend();
-       I != E; I++) {
+       I != E; I = next_nodbg(I, E)) {
     if (!Limit)
       return false;
     --Limit;
@@ -783,14 +783,14 @@
 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
                                       MachineBasicBlock::iterator Paired,
                                       const LdStPairFlags &Flags) {
-  MachineBasicBlock::iterator NextI = I;
-  ++NextI;
+  MachineBasicBlock::iterator E = I->getParent()->end();
+  MachineBasicBlock::iterator NextI = next_nodbg(I, E);
   // If NextI is the second of the two instructions to be merged, we need
   // to skip one further. Either way we merge will invalidate the iterator,
   // and we don't need to scan the new instruction, as it's a pairwise
   // instruction, which we're not considering for further action anyway.
   if (NextI == Paired)
-    ++NextI;
+    NextI = next_nodbg(NextI, E);
 
   int SExtIdx = Flags.getSExtIdx();
   unsigned Opc =
@@ -1009,8 +1009,8 @@
 MachineBasicBlock::iterator
 AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
                                           MachineBasicBlock::iterator StoreI) {
-  MachineBasicBlock::iterator NextI = LoadI;
-  ++NextI;
+  MachineBasicBlock::iterator E = LoadI->getParent()->end();
+  MachineBasicBlock::iterator NextI = next_nodbg(LoadI, E);
 
   int LoadSize = TII->getMemScale(*LoadI);
   int StoreSize = TII->getMemScale(*StoreI);
@@ -1188,7 +1188,7 @@
 
   unsigned Count = 0;
   do {
-    --MBBI;
+    MBBI = prev_nodbg(MBBI, B);
     MachineInstr &MI = *MBBI;
 
     // Don't count transient instructions towards the search limit since there
@@ -1440,7 +1440,7 @@
   MachineBasicBlock::iterator MBBI = I;
   MachineBasicBlock::iterator MBBIWithRenameReg;
   MachineInstr &FirstMI = *I;
-  ++MBBI;
+  MBBI = next_nodbg(MBBI, E);
 
   bool MayLoad = FirstMI.mayLoad();
   bool IsUnscaled = TII->isUnscaledLdSt(FirstMI);
@@ -1468,7 +1468,8 @@
   // Remember any instructions that read/write memory between FirstMI and MI.
   SmallVector<MachineInstr *, 4> MemInsns;
 
-  for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
+  for (unsigned Count = 0; MBBI != E && Count < Limit;
+       MBBI = next_nodbg(MBBI, E)) {
     MachineInstr &MI = *MBBI;
 
     UsedInBetween.accumulate(MI);
@@ -1637,12 +1638,13 @@
   assert((Update->getOpcode() == AArch64::ADDXri ||
           Update->getOpcode() == AArch64::SUBXri) &&
          "Unexpected base register update instruction to merge!");
-  MachineBasicBlock::iterator NextI = I;
+  MachineBasicBlock::iterator E = I->getParent()->end();
+  MachineBasicBlock::iterator NextI = next_nodbg(I, E);
   // Return the instruction following the merged instruction, which is
   // the instruction following our unmerged load. Unless that's the add/sub
   // instruction we're merging, in which case it's the one after that.
-  if (++NextI == Update)
-    ++NextI;
+  if (NextI == Update)
+    NextI = next_nodbg(NextI, E);
 
   int Value = Update->getOperand(2).getImm();
   assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
@@ -1780,7 +1782,7 @@
   // insn (inclusive) and the second insn.
   ModifiedRegUnits.clear();
   UsedRegUnits.clear();
-  ++MBBI;
+  MBBI = next_nodbg(MBBI, E);
 
   // We can't post-increment the stack pointer if any instruction between
   // the memory access (I) and the increment (MBBI) can access the memory
@@ -1796,7 +1798,8 @@
       return E;
   }
 
-  for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
+  for (unsigned Count = 0; MBBI != E && Count < Limit;
+       MBBI = next_nodbg(MBBI, E)) {
     MachineInstr &MI = *MBBI;
 
     // Don't count transient instructions towards the search limit since there
@@ -1854,7 +1857,7 @@
   UsedRegUnits.clear();
   unsigned Count = 0;
   do {
-    --MBBI;
+    MBBI = prev_nodbg(MBBI, B);
     MachineInstr &MI = *MBBI;
 
     // Don't count transient instructions towards the search limit since there
@@ -1932,6 +1935,7 @@
 // instruction.
 bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
   MachineInstr &MI = *MBBI;
+  MachineBasicBlock::iterator B = MI.getParent()->begin();
   MachineBasicBlock::iterator E = MI.getParent()->end();
 
   if (!TII->isCandidateToMergeOrPair(MI))
@@ -1959,11 +1963,11 @@
       ++NumUnscaledPairCreated;
     // Keeping the iterator straight is a pain, so we let the merge routine tell
     // us what the next instruction is after it's done mucking about.
-    auto Prev = std::prev(MBBI);
+    auto Prev = prev_nodbg(MBBI, B);
     MBBI = mergePairedInsns(MBBI, Paired, Flags);
     // Collect liveness info for instructions between Prev and the new position
     // MBBI.
-    for (auto I = std::next(Prev); I != MBBI; I++)
+    for (auto I = next_nodbg(Prev, E); I != MBBI; I++)
       updateDefinedRegisters(*I, DefinedInBB, TRI);
 
     return true;
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -224,7 +224,8 @@
   // Strip out the module-level Debug Info Version metadata.
   // FIXME: There must be an easier way to remove an operand from a NamedMDNode.
   NamedMDNode *NMD = M.getModuleFlagsMetadata();
-  assert(NMD && "debugify metadata present without Debug Info Version set?");
+  if (!NMD)
+    return Changed;
   SmallVector<MDNode *, 4> Flags;
   for (MDNode *Flag : NMD->operands())
     Flags.push_back(Flag);
diff --git a/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir b/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir
--- a/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir
+++ b/llvm/test/CodeGen/AArch64/ldst-opt-mte.mir
@@ -1,4 +1,11 @@
-# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt  -verify-machineinstrs  -o - %s | FileCheck %s
+# Strip out debug info, then run ldst-opt with limit=1.
+# RUN: llc -aarch64-load-store-scan-limit=1 -mtriple=aarch64-none-linux-gnu -run-pass mir-strip-debug,aarch64-ldst-opt -mir-strip-debugify-only=0 -verify-machineinstrs  -o - %s | FileCheck %s
+#
+# Run ldst-opt with limit=1, then strip out debug info.
+# RUN: llc -aarch64-load-store-scan-limit=1 -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt,mir-strip-debug -mir-strip-debugify-only=0 -verify-machineinstrs  -o - %s | FileCheck %s
+#
+# Run ldst-opt with limit=1 under mir-debugify.
+# RUN: llc -aarch64-load-store-scan-limit=1 -debugify-and-strip-all-safe -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt  -verify-machineinstrs  -o - %s | FileCheck %s
 ---
 
 ### STG and its offset limits
@@ -11,7 +18,11 @@
     liveins: $x0
 
     STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -23,7 +34,11 @@
     liveins: $x0, $x1
 
     STGOffset $x1, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -36,7 +51,11 @@
     liveins: $x0
 
     STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 8, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -48,7 +67,11 @@
     liveins: $x0
 
     STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = SUBXri $x0, 4096, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -61,7 +84,11 @@
     liveins: $x0
 
     STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = SUBXri $x0, 4112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -73,7 +100,11 @@
     liveins: $x0
 
     STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 4080, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -86,7 +117,11 @@
     liveins: $x0
 
     STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 4096, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -100,7 +135,11 @@
     liveins: $x0
 
     STZGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -112,7 +151,11 @@
     liveins: $x0
 
     ST2GOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -124,7 +167,11 @@
     liveins: $x0
 
     STZ2GOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -138,7 +185,11 @@
     liveins: $x0, $x1, $x2
 
     STGPi $x1, $x2, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -150,7 +201,11 @@
     liveins: $x0, $x1, $x2
 
     STGPi $x1, $x2, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = SUBXri $x0, 1024, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -163,7 +218,11 @@
     liveins: $x0, $x1, $x2
 
     STGPi $x1, $x2, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = SUBXri $x0, 1040, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -175,7 +234,11 @@
     liveins: $x0, $x1, $x2
 
     STGPi $x1, $x2, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 1008, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -188,7 +251,11 @@
     liveins: $x0, $x1, $x2
 
     STGPi $x1, $x2, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 1024, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -202,7 +269,11 @@
     liveins: $x0
 
     STGOffset $x0, $x0, 10
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 160, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -214,7 +285,11 @@
     liveins: $x0, $x1, $x2
 
     STGPi $x1, $x2, $x0, 10
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 160, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -228,7 +303,11 @@
     liveins: $x0
 
     $x0 = ADDXri $x0, 32, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     STGOffset $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -240,7 +319,11 @@
     liveins: $x0, $x1, $x2
 
     $x0 = SUBXri $x0, 48, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     STGPi $x1, $x2, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -254,7 +337,11 @@
     liveins: $x0
 
     STGPi $x0, $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -266,7 +353,11 @@
     liveins: $x0
 
     STGPi $x0, $x0, $x0, 7
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     $x0 = ADDXri $x0, 112, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...
 
@@ -280,6 +371,10 @@
     liveins: $x0
 
     $x0 = SUBXri $x0, 48, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     STGPi $x0, $x0, $x0, 0
+    DBG_VALUE $x0, 0
+    DBG_VALUE $x0, 0
     RET_ReallyLR implicit $x0
 ...