diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -301,114 +301,58 @@
   return HashMachineInstr(*I);
 }
 
-///  Whether MI should be counted as an instruction when calculating common tail.
+/// Whether MI should be counted as an instruction when calculating common tail.
 static bool countsAsInstruction(const MachineInstr &MI) {
   return !(MI.isDebugInstr() || MI.isCFIInstruction());
 }
 
-/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
-/// of instructions they actually have in common together at their end.  Return
-/// iterators for the first shared instruction in each block.
+/// Iterate backwards from the given iterator \p I, towards the beginning of the
+/// block. If a MI satisfying 'countsAsInstruction' is found, return an iterator
+/// pointing to that MI. If no such MI is found, return the end iterator.
+static MachineBasicBlock::iterator
+skipBackwardPastNonInstructions(MachineBasicBlock::iterator I,
+                                MachineBasicBlock *MBB) {
+  while (I != MBB->begin()) {
+    --I;
+    if (countsAsInstruction(*I))
+      return I;
+  }
+  return MBB->end();
+}
+
+/// Given two machine basic blocks, return the number of instructions they
+/// actually have in common together at their end. If a common tail is found (at
+/// least by one instruction), then iterators for the first shared instruction
+/// in each block are returned as well.
+///
+/// Non-instructions according to countsAsInstruction are ignored.
 static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
                                         MachineBasicBlock *MBB2,
                                         MachineBasicBlock::iterator &I1,
                                         MachineBasicBlock::iterator &I2) {
-  I1 = MBB1->end();
-  I2 = MBB2->end();
+  MachineBasicBlock::iterator MBBI1 = MBB1->end();
+  MachineBasicBlock::iterator MBBI2 = MBB2->end();
 
   unsigned TailLen = 0;
-  while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
-    --I1; --I2;
-    // Skip debugging pseudos; necessary to avoid changing the code.
-    while (!countsAsInstruction(*I1)) {
-      if (I1==MBB1->begin()) {
-        while (!countsAsInstruction(*I2)) {
-          if (I2==MBB2->begin()) {
-            // I1==DBG at begin; I2==DBG at begin
-            goto SkipTopCFIAndReturn;
-          }
-          --I2;
-        }
-        ++I2;
-        // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
-        goto SkipTopCFIAndReturn;
-      }
-      --I1;
-    }
-    // I1==first (untested) non-DBG preceding known match
-    while (!countsAsInstruction(*I2)) {
-      if (I2==MBB2->begin()) {
-        ++I1;
-        // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
-        goto SkipTopCFIAndReturn;
-      }
-      --I2;
-    }
-    // I1, I2==first (untested) non-DBGs preceding known match
-    if (!I1->isIdenticalTo(*I2) ||
+  for (;;) {
+    MBBI1 = skipBackwardPastNonInstructions(MBBI1, MBB1);
+    MBBI2 = skipBackwardPastNonInstructions(MBBI2, MBB2);
+    if (MBBI1 == MBB1->end() || MBBI2 == MBB2->end())
+      break;
+    if (!MBBI1->isIdenticalTo(*MBBI2) ||
         // FIXME: This check is dubious. It's used to get around a problem where
         // people incorrectly expect inline asm directives to remain in the same
         // relative order. This is untenable because normal compiler
         // optimizations (like this one) may reorder and/or merge these
         // directives.
-        I1->isInlineAsm()) {
-      ++I1; ++I2;
+        // FIXME: If inline asm check really is needed, then why don't we also
+        // check MBBI2?
+        MBBI1->isInlineAsm()) {
       break;
     }
     ++TailLen;
-  }
-  // Back past possible debugging pseudos at beginning of block.  This matters
-  // when one block differs from the other only by whether debugging pseudos
-  // are present at the beginning. (This way, the various checks later for
-  // I1==MBB1->begin() work as expected.)
-  if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
-    --I2;
-    while (I2->isDebugInstr()) {
-      if (I2 == MBB2->begin())
-        return TailLen;
-      --I2;
-    }
-    ++I2;
-  }
-  if (I2 == MBB2->begin() && I1 != MBB1->begin()) {
-    --I1;
-    while (I1->isDebugInstr()) {
-      if (I1 == MBB1->begin())
-        return TailLen;
-      --I1;
-    }
-    ++I1;
-  }
-
-SkipTopCFIAndReturn:
-  // Ensure that I1 and I2 do not point to a CFI_INSTRUCTION. This can happen if
-  // I1 and I2 are non-identical when compared and then one or both of them ends
-  // up pointing to a CFI instruction after being incremented. For example:
-  /*
-    BB1:
-    ...
-    INSTRUCTION_A
-    ADD32ri8  <- last common instruction
-    ...
-    BB2:
-    ...
-    INSTRUCTION_B
-    CFI_INSTRUCTION
-    ADD32ri8  <- last common instruction
-    ...
-  */
-  // When INSTRUCTION_A and INSTRUCTION_B are compared as not equal, after
-  // incrementing the iterators, I1 will point to ADD, however I2 will point to
-  // the CFI instruction. Later on, this leads to BB2 being 'hacked off' at the
-  // wrong place (in ReplaceTailWithBranchTo()) which results in losing this CFI
-  // instruction.
-  // Skip CFI_INSTRUCTION and debugging instruction; necessary to avoid changing the code.
-  while (I1 != MBB1->end() && !countsAsInstruction(*I1)) {
-    ++I1;
-  }
-
-  while (I2 != MBB2->end() && !countsAsInstruction(*I2)) {
-    ++I2;
+    I1 = MBBI1;
+    I2 = MBBI2;
   }
 
   return TailLen;
@@ -660,6 +604,24 @@
                     << " and " << printMBBReference(*MBB2) << " is "
                     << CommonTailLen << '\n');
 
+  // Make sure that any non-instruction instructions (debuginfo/cfi directives)
+  // at the beginning of the MBB do not impact if we answer true/false here. I1
+  // and I2 always points at the first real instruction after
+  // ComputeCommonTailLength.
+  //
+  // Exactly what happens when we say that it is profitable to merge and there
+  // are non-instructions present in the code is not really considered in this
+  // function. Goal here is to answer the question if it is profitable or not to
+  // merge tails. We might for example end up splitting the block just above
+  // I1/I2 due to CFI directives being present at the beginning of a MBB,
+  // creating a block with only debuginfo and CFI directives. As long as
+  // subsequent optimizations handles such "empty" blocks the codegen should be
+  // invariant.
+  bool OnlyNonInstrBeforeI1 =
+    skipBackwardPastNonInstructions(I1, MBB1) == MBB1->end();
+  bool OnlyNonInstrBeforeI2 =
+    skipBackwardPastNonInstructions(I2, MBB2) == MBB2->end();
+
   // It's almost always profitable to merge any number of non-terminator
   // instructions with the block that falls through into the common successor.
   // This is true only for a single successor. For multiple successors, we are
@@ -678,7 +640,7 @@
   // are unlikely to become a fallthrough target after machine block placement.
   // Tail merging these blocks is unlikely to create additional unconditional
   // branches, and will reduce the size of this cold code.
-  if (I1 == MBB1->begin() && I2 == MBB2->begin() &&
+  if (OnlyNonInstrBeforeI1 && OnlyNonInstrBeforeI2 &&
       blockEndsInUnreachable(MBB1) && blockEndsInUnreachable(MBB2))
     return true;
 
@@ -686,16 +648,16 @@
   // a position where the other could fall through into it, merge any number
   // of instructions, because it can be done without a branch.
   // TODO: If the blocks are not adjacent, move one of them so that they are?
-  if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
+  if (MBB1->isLayoutSuccessor(MBB2) && OnlyNonInstrBeforeI2)
     return true;
-  if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
+  if (MBB2->isLayoutSuccessor(MBB1) && OnlyNonInstrBeforeI1)
     return true;
 
   // If both blocks are identical and end in a branch, merge them unless they
   // both have a fallthrough predecessor and successor.
   // We can only do this after block placement because it depends on whether
   // there are fallthroughs, and we don't know until after layout.
-  if (AfterPlacement && I1 == MBB1->begin() && I2 == MBB2->begin()) {
+  if (AfterPlacement && OnlyNonInstrBeforeI1 && OnlyNonInstrBeforeI2) {
     auto BothFallThrough = [](MachineBasicBlock *MBB) {
       if (MBB->succ_size() != 0 && !MBB->canFallThrough())
         return false;
@@ -729,7 +691,7 @@
   // instructions that would be deleted in the merge.
   MachineFunction *MF = MBB1->getParent();
   return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() &&
-         (I1 == MBB1->begin() || I2 == MBB2->begin());
+         (OnlyNonInstrBeforeI1 || OnlyNonInstrBeforeI2);
 }
 
 unsigned BranchFolder::ComputeSameTails(unsigned CurHash,