diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -301,114 +301,58 @@ return HashMachineInstr(*I); } -/// Whether MI should be counted as an instruction when calculating common tail. +/// Whether MI should be counted as an instruction when calculating common tail. static bool countsAsInstruction(const MachineInstr &MI) { return !(MI.isDebugInstr() || MI.isCFIInstruction()); } -/// ComputeCommonTailLength - Given two machine basic blocks, compute the number -/// of instructions they actually have in common together at their end. Return -/// iterators for the first shared instruction in each block. +/// Iterate backwards from the given iterator \p I, towards the beginning of the +/// block. If a MI satisfying 'countsAsInstruction' is found, return an iterator +/// pointing to that MI. If no such MI is found, return the end iterator. +static MachineBasicBlock::iterator +skipBackwardPastNonInstructions(MachineBasicBlock::iterator I, + MachineBasicBlock *MBB) { + while (I != MBB->begin()) { + --I; + if (countsAsInstruction(*I)) + return I; + } + return MBB->end(); +} + +/// Given two machine basic blocks, return the number of instructions they +/// actually have in common together at their end. If a common tail is found (at +/// least by one instruction), then iterators for the first shared instruction +/// in each block are returned as well. +/// +/// Non-instructions according to countsAsInstruction are ignored. static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, MachineBasicBlock::iterator &I1, MachineBasicBlock::iterator &I2) { - I1 = MBB1->end(); - I2 = MBB2->end(); + MachineBasicBlock::iterator MBBI1 = MBB1->end(); + MachineBasicBlock::iterator MBBI2 = MBB2->end(); unsigned TailLen = 0; - while (I1 != MBB1->begin() && I2 != MBB2->begin()) { - --I1; --I2; - // Skip debugging pseudos; necessary to avoid changing the code. - while (!countsAsInstruction(*I1)) { - if (I1==MBB1->begin()) { - while (!countsAsInstruction(*I2)) { - if (I2==MBB2->begin()) { - // I1==DBG at begin; I2==DBG at begin - goto SkipTopCFIAndReturn; - } - --I2; - } - ++I2; - // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin - goto SkipTopCFIAndReturn; - } - --I1; - } - // I1==first (untested) non-DBG preceding known match - while (!countsAsInstruction(*I2)) { - if (I2==MBB2->begin()) { - ++I1; - // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin - goto SkipTopCFIAndReturn; - } - --I2; - } - // I1, I2==first (untested) non-DBGs preceding known match - if (!I1->isIdenticalTo(*I2) || + for (;;) { + MBBI1 = skipBackwardPastNonInstructions(MBBI1, MBB1); + MBBI2 = skipBackwardPastNonInstructions(MBBI2, MBB2); + if (MBBI1 == MBB1->end() || MBBI2 == MBB2->end()) + break; + if (!MBBI1->isIdenticalTo(*MBBI2) || // FIXME: This check is dubious. It's used to get around a problem where // people incorrectly expect inline asm directives to remain in the same // relative order. This is untenable because normal compiler // optimizations (like this one) may reorder and/or merge these // directives. - I1->isInlineAsm()) { - ++I1; ++I2; + // FIXME: If inline asm check really is needed, then why don't we also + // check MBBI2? + MBBI1->isInlineAsm()) { break; } ++TailLen; - } - // Back past possible debugging pseudos at beginning of block. This matters - // when one block differs from the other only by whether debugging pseudos - // are present at the beginning. (This way, the various checks later for - // I1==MBB1->begin() work as expected.) - if (I1 == MBB1->begin() && I2 != MBB2->begin()) { - --I2; - while (I2->isDebugInstr()) { - if (I2 == MBB2->begin()) - return TailLen; - --I2; - } - ++I2; - } - if (I2 == MBB2->begin() && I1 != MBB1->begin()) { - --I1; - while (I1->isDebugInstr()) { - if (I1 == MBB1->begin()) - return TailLen; - --I1; - } - ++I1; - } - -SkipTopCFIAndReturn: - // Ensure that I1 and I2 do not point to a CFI_INSTRUCTION. This can happen if - // I1 and I2 are non-identical when compared and then one or both of them ends - // up pointing to a CFI instruction after being incremented. For example: - /* - BB1: - ... - INSTRUCTION_A - ADD32ri8 <- last common instruction - ... - BB2: - ... - INSTRUCTION_B - CFI_INSTRUCTION - ADD32ri8 <- last common instruction - ... - */ - // When INSTRUCTION_A and INSTRUCTION_B are compared as not equal, after - // incrementing the iterators, I1 will point to ADD, however I2 will point to - // the CFI instruction. Later on, this leads to BB2 being 'hacked off' at the - // wrong place (in ReplaceTailWithBranchTo()) which results in losing this CFI - // instruction. - // Skip CFI_INSTRUCTION and debugging instruction; necessary to avoid changing the code. - while (I1 != MBB1->end() && !countsAsInstruction(*I1)) { - ++I1; - } - - while (I2 != MBB2->end() && !countsAsInstruction(*I2)) { - ++I2; + I1 = MBBI1; + I2 = MBBI2; } return TailLen; @@ -660,6 +604,24 @@ << " and " << printMBBReference(*MBB2) << " is " << CommonTailLen << '\n'); + // Make sure that any non-instruction instructions (debuginfo/cfi directives) + // at the beginning of the MBB do not impact if we answer true/false here. I1 + // and I2 always points at the first real instruction after + // ComputeCommonTailLength. + // + // Exactly what happens when we say that it is profitable to merge and there + // are non-instructions present in the code is not really considered in this + // function. Goal here is to answer the question if it is profitable or not to + // merge tails. We might for example end up splitting the block just above + // I1/I2 due to CFI directives being present at the beginning of a MBB, + // creating a block with only debuginfo and CFI directives. As long as + // subsequent optimizations handles such "empty" blocks the codegen should be + // invariant. + bool OnlyNonInstrBeforeI1 = + skipBackwardPastNonInstructions(I1, MBB1) == MBB1->end(); + bool OnlyNonInstrBeforeI2 = + skipBackwardPastNonInstructions(I2, MBB2) == MBB2->end(); + // It's almost always profitable to merge any number of non-terminator // instructions with the block that falls through into the common successor. // This is true only for a single successor. For multiple successors, we are @@ -678,7 +640,7 @@ // are unlikely to become a fallthrough target after machine block placement. // Tail merging these blocks is unlikely to create additional unconditional // branches, and will reduce the size of this cold code. - if (I1 == MBB1->begin() && I2 == MBB2->begin() && + if (OnlyNonInstrBeforeI1 && OnlyNonInstrBeforeI2 && blockEndsInUnreachable(MBB1) && blockEndsInUnreachable(MBB2)) return true; @@ -686,16 +648,16 @@ // a position where the other could fall through into it, merge any number // of instructions, because it can be done without a branch. // TODO: If the blocks are not adjacent, move one of them so that they are? - if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin()) + if (MBB1->isLayoutSuccessor(MBB2) && OnlyNonInstrBeforeI2) return true; - if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin()) + if (MBB2->isLayoutSuccessor(MBB1) && OnlyNonInstrBeforeI1) return true; // If both blocks are identical and end in a branch, merge them unless they // both have a fallthrough predecessor and successor. // We can only do this after block placement because it depends on whether // there are fallthroughs, and we don't know until after layout. - if (AfterPlacement && I1 == MBB1->begin() && I2 == MBB2->begin()) { + if (AfterPlacement && OnlyNonInstrBeforeI1 && OnlyNonInstrBeforeI2) { auto BothFallThrough = [](MachineBasicBlock *MBB) { if (MBB->succ_size() != 0 && !MBB->canFallThrough()) return false; @@ -729,7 +691,7 @@ // instructions that would be deleted in the merge. MachineFunction *MF = MBB1->getParent(); return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() && - (I1 == MBB1->begin() || I2 == MBB2->begin()); + (OnlyNonInstrBeforeI1 || OnlyNonInstrBeforeI2); } unsigned BranchFolder::ComputeSameTails(unsigned CurHash,