diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -302,114 +302,56 @@
   return HashMachineInstr(*I);
 }
 
-///  Whether MI should be counted as an instruction when calculating common tail.
+/// Whether MI should be counted as an instruction when calculating common tail.
 static bool countsAsInstruction(const MachineInstr &MI) {
   return !(MI.isDebugInstr() || MI.isCFIInstruction());
 }
 
-/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
-/// of instructions they actually have in common together at their end.  Return
-/// iterators for the first shared instruction in each block.
+/// Iterate backwards from the given iterator \p I, towards the beginning of the
+/// block. If a MI satisfying 'countsAsInstruction' is found, return an iterator
+/// pointing to that MI. If no such MI is found, return the end iterator.
+static MachineBasicBlock::iterator
+skipBackwardPastNonInstructions(MachineBasicBlock::iterator I,
+                                MachineBasicBlock *MBB) {
+  while (I != MBB->begin()) {
+    --I;
+    if (countsAsInstruction(*I))
+      return I;
+  }
+  return MBB->end();
+}
+
+/// Given two machine basic blocks, return the number of instructions they
+/// actually have in common together at their end. If a common tail is found (at
+/// least by one instruction), then iterators for the first shared instruction
+/// in each block are returned as well.
+///
+/// Non-instructions according to countsAsInstruction are ignored.
 static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
                                         MachineBasicBlock *MBB2,
                                         MachineBasicBlock::iterator &I1,
                                         MachineBasicBlock::iterator &I2) {
-  I1 = MBB1->end();
-  I2 = MBB2->end();
+  MachineBasicBlock::iterator MBBI1 = MBB1->end();
+  MachineBasicBlock::iterator MBBI2 = MBB2->end();
 
   unsigned TailLen = 0;
-  while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
-    --I1; --I2;
-    // Skip debugging pseudos; necessary to avoid changing the code.
-    while (!countsAsInstruction(*I1)) {
-      if (I1==MBB1->begin()) {
-        while (!countsAsInstruction(*I2)) {
-          if (I2==MBB2->begin()) {
-            // I1==DBG at begin; I2==DBG at begin
-            goto SkipTopCFIAndReturn;
-          }
-          --I2;
-        }
-        ++I2;
-        // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
-        goto SkipTopCFIAndReturn;
-      }
-      --I1;
-    }
-    // I1==first (untested) non-DBG preceding known match
-    while (!countsAsInstruction(*I2)) {
-      if (I2==MBB2->begin()) {
-        ++I1;
-        // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
-        goto SkipTopCFIAndReturn;
-      }
-      --I2;
-    }
-    // I1, I2==first (untested) non-DBGs preceding known match
-    if (!I1->isIdenticalTo(*I2) ||
+  while (true) {
+    MBBI1 = skipBackwardPastNonInstructions(MBBI1, MBB1);
+    MBBI2 = skipBackwardPastNonInstructions(MBBI2, MBB2);
+    if (MBBI1 == MBB1->end() || MBBI2 == MBB2->end())
+      break;
+    if (!MBBI1->isIdenticalTo(*MBBI2) ||
         // FIXME: This check is dubious. It's used to get around a problem where
         // people incorrectly expect inline asm directives to remain in the same
         // relative order. This is untenable because normal compiler
         // optimizations (like this one) may reorder and/or merge these
         // directives.
-        I1->isInlineAsm()) {
-      ++I1; ++I2;
+        MBBI1->isInlineAsm()) {
       break;
     }
     ++TailLen;
-  }
-  // Back past possible debugging pseudos at beginning of block.  This matters
-  // when one block differs from the other only by whether debugging pseudos
-  // are present at the beginning. (This way, the various checks later for
-  // I1==MBB1->begin() work as expected.)
-  if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
-    --I2;
-    while (I2->isDebugInstr()) {
-      if (I2 == MBB2->begin())
-        return TailLen;
-      --I2;
-    }
-    ++I2;
-  }
-  if (I2 == MBB2->begin() && I1 != MBB1->begin()) {
-    --I1;
-    while (I1->isDebugInstr()) {
-      if (I1 == MBB1->begin())
-        return TailLen;
-      --I1;
-    }
-    ++I1;
-  }
-
-SkipTopCFIAndReturn:
-  // Ensure that I1 and I2 do not point to a CFI_INSTRUCTION. This can happen if
-  // I1 and I2 are non-identical when compared and then one or both of them ends
-  // up pointing to a CFI instruction after being incremented. For example:
-  /*
-    BB1:
-    ...
-    INSTRUCTION_A
-    ADD32ri8  <- last common instruction
-    ...
-    BB2:
-    ...
-    INSTRUCTION_B
-    CFI_INSTRUCTION
-    ADD32ri8  <- last common instruction
-    ...
-  */
-  // When INSTRUCTION_A and INSTRUCTION_B are compared as not equal, after
-  // incrementing the iterators, I1 will point to ADD, however I2 will point to
-  // the CFI instruction. Later on, this leads to BB2 being 'hacked off' at the
-  // wrong place (in ReplaceTailWithBranchTo()) which results in losing this CFI
-  // instruction.
-  // Skip CFI_INSTRUCTION and debugging instruction; necessary to avoid changing the code.
-  while (I1 != MBB1->end() && !countsAsInstruction(*I1)) {
-    ++I1;
-  }
-
-  while (I2 != MBB2->end() && !countsAsInstruction(*I2)) {
-    ++I2;
+    I1 = MBBI1;
+    I2 = MBBI2;
   }
 
   return TailLen;
@@ -661,6 +603,17 @@
                     << " and " << printMBBReference(*MBB2) << " is "
                     << CommonTailLen << '\n');
 
+  // Move the iterators to the beginning of the MBB if we only got debug
+  // instructions before the tail. This is to avoid splitting a block when we
+  // only got debug instructions before the tail (to be invariant on -g).
+  if (skipDebugInstructionsForward(MBB1->begin(), MBB1->end()) == I1)
+    I1 = MBB1->begin();
+  if (skipDebugInstructionsForward(MBB2->begin(), MBB2->end()) == I2)
+    I2 = MBB2->begin();
+
+  bool FullBlockTail1 = I1 == MBB1->begin();
+  bool FullBlockTail2 = I2 == MBB2->begin();
+
   // It's almost always profitable to merge any number of non-terminator
   // instructions with the block that falls through into the common successor.
   // This is true only for a single successor. For multiple successors, we are
@@ -679,7 +632,7 @@
   // are unlikely to become a fallthrough target after machine block placement.
   // Tail merging these blocks is unlikely to create additional unconditional
   // branches, and will reduce the size of this cold code.
-  if (I1 == MBB1->begin() && I2 == MBB2->begin() &&
+  if (FullBlockTail1 && FullBlockTail2 &&
       blockEndsInUnreachable(MBB1) && blockEndsInUnreachable(MBB2))
     return true;
 
@@ -687,16 +640,16 @@
   // a position where the other could fall through into it, merge any number
   // of instructions, because it can be done without a branch.
   // TODO: If the blocks are not adjacent, move one of them so that they are?
-  if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
+  if (MBB1->isLayoutSuccessor(MBB2) && FullBlockTail2)
     return true;
-  if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
+  if (MBB2->isLayoutSuccessor(MBB1) && FullBlockTail1)
     return true;
 
   // If both blocks are identical and end in a branch, merge them unless they
   // both have a fallthrough predecessor and successor.
   // We can only do this after block placement because it depends on whether
   // there are fallthroughs, and we don't know until after layout.
-  if (AfterPlacement && I1 == MBB1->begin() && I2 == MBB2->begin()) {
+  if (AfterPlacement && FullBlockTail1 && FullBlockTail2) {
     auto BothFallThrough = [](MachineBasicBlock *MBB) {
       if (MBB->succ_size() != 0 && !MBB->canFallThrough())
         return false;
@@ -730,7 +683,7 @@
   // instructions that would be deleted in the merge.
   MachineFunction *MF = MBB1->getParent();
   return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() &&
-         (I1 == MBB1->begin() || I2 == MBB2->begin());
+         (FullBlockTail1 || FullBlockTail2);
 }
 
 unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
diff --git a/llvm/test/CodeGen/X86/branchfolding-debug-invariant.mir b/llvm/test/CodeGen/X86/branchfolding-debug-invariant.mir
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/X86/branchfolding-debug-invariant.mir
@@ -0,0 +1,135 @@
+# RUN: llc -mtriple=x86_64-- -run-pass branch-folder -O3 -o - %s | FileCheck %s
+
+---
+name:            test1a
+body:             |
+  ; CHECK-LABEL: name: test1a
+  ; CHECK: bb.0:
+  ; CHECK:   TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+  ; CHECK:   JCC_1 %bb.2, 5, implicit $eflags
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+  ; CHECK-NOT: RET
+  ; CHECK: bb.2:
+  ; CHECK:   MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+  ; CHECK:   RET 0
+  bb.0:
+    TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+    JCC_1 %bb.2, 5, implicit killed $eflags
+
+  bb.1:
+    MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+    MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+    RET 0
+
+  bb.2:
+    MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+    RET 0
+...
+
+---
+name:            test1b
+body:             |
+
+  ; Verify that we get the same rewrites as in test1a when adding some
+  ; DBG_VALUE instructions in the mix.
+  ;
+  ; CHECK-LABEL: name: test1b
+  ; CHECK: bb.0:
+  ; CHECK:   TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+  ; CHECK:   JCC_1 %bb.2, 5, implicit $eflags
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+  ; CHECK-NOT: RET
+  ; CHECK: bb.2:
+  ; CHECK:   DBG_VALUE
+  ; CHECK:   DBG_VALUE
+  ; CHECK:   MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+  ; CHECK:   RET 0
+  bb.0:
+    TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+    JCC_1 %bb.2, 5, implicit killed $eflags
+
+  bb.1:
+    MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+    MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+    RET 0
+
+  bb.2:
+    DBG_VALUE
+    DBG_VALUE
+    MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+    RET 0
+...
+
+---
+name:            test2a
+body:             |
+  ; CFI instruction currently prevents the rewrite here (although technically
+  ; I suppose that branch folding could let bb.1 fallthrough into bb.2 here).
+  ;
+  ; CHECK-LABEL: name: test2a
+  ; CHECK: bb.0:
+  ; CHECK:   TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+  ; CHECK:   JCC_1 %bb.2, 5, implicit killed $eflags
+  ; CHECK: bb.1:
+  ; CHECK:   MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+  ; CHECK:   MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+  ; CHECK:   RET 0
+  ; CHECK: bb.2:
+  ; CHECK:   CFI_INSTRUCTION def_cfa_offset 8
+  ; CHECK:   MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+  ; CHECK:   RET 0
+  bb.0:
+    TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+    JCC_1 %bb.2, 5, implicit killed $eflags
+
+  bb.1:
+    MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+    MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+    RET 0
+
+  bb.2:
+    CFI_INSTRUCTION def_cfa_offset 8
+    MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+    RET 0
+...
+
+---
+name:            test2b
+body:             |
+  ; Verify that we get the same rewrites as in test1a when adding some
+  ; DBG_VALUE instructions in the mix.
+  ;
+  ; CHECK-LABEL: name: test2b
+  ; CHECK: bb.0:
+  ; CHECK:   TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+  ; CHECK:   JCC_1 %bb.2, 5, implicit killed $eflags
+  ; CHECK: bb.1:
+  ; CHECK:   MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+  ; CHECK:   MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+  ; CHECK:   RET 0
+  ; CHECK: bb.2:
+  ; CHECK:   DBG_VALUE
+  ; CHECK:   CFI_INSTRUCTION def_cfa_offset 8
+  ; CHECK:   DBG_VALUE
+  ; CHECK:   MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+  ; CHECK:   RET 0
+  bb.0:
+    TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+    JCC_1 %bb.2, 5, implicit killed $eflags
+
+  bb.1:
+    MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+    MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+    RET 0
+
+  bb.2:
+    DBG_VALUE
+    CFI_INSTRUCTION def_cfa_offset 8
+    DBG_VALUE
+    MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+    RET 0
+...