diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -302,114 +302,56 @@ return HashMachineInstr(*I); } -/// Whether MI should be counted as an instruction when calculating common tail. +/// Whether MI should be counted as an instruction when calculating common tail. static bool countsAsInstruction(const MachineInstr &MI) { return !(MI.isDebugInstr() || MI.isCFIInstruction()); } -/// ComputeCommonTailLength - Given two machine basic blocks, compute the number -/// of instructions they actually have in common together at their end. Return -/// iterators for the first shared instruction in each block. +/// Iterate backwards from the given iterator \p I, towards the beginning of the +/// block. If a MI satisfying 'countsAsInstruction' is found, return an iterator +/// pointing to that MI. If no such MI is found, return the end iterator. +static MachineBasicBlock::iterator +skipBackwardPastNonInstructions(MachineBasicBlock::iterator I, + MachineBasicBlock *MBB) { + while (I != MBB->begin()) { + --I; + if (countsAsInstruction(*I)) + return I; + } + return MBB->end(); +} + +/// Given two machine basic blocks, return the number of instructions they +/// actually have in common together at their end. If a common tail is found (at +/// least by one instruction), then iterators for the first shared instruction +/// in each block are returned as well. +/// +/// Non-instructions according to countsAsInstruction are ignored. static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, MachineBasicBlock::iterator &I1, MachineBasicBlock::iterator &I2) { - I1 = MBB1->end(); - I2 = MBB2->end(); + MachineBasicBlock::iterator MBBI1 = MBB1->end(); + MachineBasicBlock::iterator MBBI2 = MBB2->end(); unsigned TailLen = 0; - while (I1 != MBB1->begin() && I2 != MBB2->begin()) { - --I1; --I2; - // Skip debugging pseudos; necessary to avoid changing the code. - while (!countsAsInstruction(*I1)) { - if (I1==MBB1->begin()) { - while (!countsAsInstruction(*I2)) { - if (I2==MBB2->begin()) { - // I1==DBG at begin; I2==DBG at begin - goto SkipTopCFIAndReturn; - } - --I2; - } - ++I2; - // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin - goto SkipTopCFIAndReturn; - } - --I1; - } - // I1==first (untested) non-DBG preceding known match - while (!countsAsInstruction(*I2)) { - if (I2==MBB2->begin()) { - ++I1; - // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin - goto SkipTopCFIAndReturn; - } - --I2; - } - // I1, I2==first (untested) non-DBGs preceding known match - if (!I1->isIdenticalTo(*I2) || + while (true) { + MBBI1 = skipBackwardPastNonInstructions(MBBI1, MBB1); + MBBI2 = skipBackwardPastNonInstructions(MBBI2, MBB2); + if (MBBI1 == MBB1->end() || MBBI2 == MBB2->end()) + break; + if (!MBBI1->isIdenticalTo(*MBBI2) || // FIXME: This check is dubious. It's used to get around a problem where // people incorrectly expect inline asm directives to remain in the same // relative order. This is untenable because normal compiler // optimizations (like this one) may reorder and/or merge these // directives. - I1->isInlineAsm()) { - ++I1; ++I2; + MBBI1->isInlineAsm()) { break; } ++TailLen; - } - // Back past possible debugging pseudos at beginning of block. This matters - // when one block differs from the other only by whether debugging pseudos - // are present at the beginning. (This way, the various checks later for - // I1==MBB1->begin() work as expected.) - if (I1 == MBB1->begin() && I2 != MBB2->begin()) { - --I2; - while (I2->isDebugInstr()) { - if (I2 == MBB2->begin()) - return TailLen; - --I2; - } - ++I2; - } - if (I2 == MBB2->begin() && I1 != MBB1->begin()) { - --I1; - while (I1->isDebugInstr()) { - if (I1 == MBB1->begin()) - return TailLen; - --I1; - } - ++I1; - } - -SkipTopCFIAndReturn: - // Ensure that I1 and I2 do not point to a CFI_INSTRUCTION. This can happen if - // I1 and I2 are non-identical when compared and then one or both of them ends - // up pointing to a CFI instruction after being incremented. For example: - /* - BB1: - ... - INSTRUCTION_A - ADD32ri8 <- last common instruction - ... - BB2: - ... - INSTRUCTION_B - CFI_INSTRUCTION - ADD32ri8 <- last common instruction - ... - */ - // When INSTRUCTION_A and INSTRUCTION_B are compared as not equal, after - // incrementing the iterators, I1 will point to ADD, however I2 will point to - // the CFI instruction. Later on, this leads to BB2 being 'hacked off' at the - // wrong place (in ReplaceTailWithBranchTo()) which results in losing this CFI - // instruction. - // Skip CFI_INSTRUCTION and debugging instruction; necessary to avoid changing the code. - while (I1 != MBB1->end() && !countsAsInstruction(*I1)) { - ++I1; - } - - while (I2 != MBB2->end() && !countsAsInstruction(*I2)) { - ++I2; + I1 = MBBI1; + I2 = MBBI2; } return TailLen; @@ -661,6 +603,17 @@ << " and " << printMBBReference(*MBB2) << " is " << CommonTailLen << '\n'); + // Move the iterators to the beginning of the MBB if we only got debug + // instructions before the tail. This is to avoid splitting a block when we + // only got debug instructions before the tail (to be invariant on -g). + if (skipDebugInstructionsForward(MBB1->begin(), MBB1->end()) == I1) + I1 = MBB1->begin(); + if (skipDebugInstructionsForward(MBB2->begin(), MBB2->end()) == I2) + I2 = MBB2->begin(); + + bool FullBlockTail1 = I1 == MBB1->begin(); + bool FullBlockTail2 = I2 == MBB2->begin(); + // It's almost always profitable to merge any number of non-terminator // instructions with the block that falls through into the common successor. // This is true only for a single successor. For multiple successors, we are @@ -679,7 +632,7 @@ // are unlikely to become a fallthrough target after machine block placement. // Tail merging these blocks is unlikely to create additional unconditional // branches, and will reduce the size of this cold code. - if (I1 == MBB1->begin() && I2 == MBB2->begin() && + if (FullBlockTail1 && FullBlockTail2 && blockEndsInUnreachable(MBB1) && blockEndsInUnreachable(MBB2)) return true; @@ -687,16 +640,16 @@ // a position where the other could fall through into it, merge any number // of instructions, because it can be done without a branch. // TODO: If the blocks are not adjacent, move one of them so that they are? - if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin()) + if (MBB1->isLayoutSuccessor(MBB2) && FullBlockTail2) return true; - if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin()) + if (MBB2->isLayoutSuccessor(MBB1) && FullBlockTail1) return true; // If both blocks are identical and end in a branch, merge them unless they // both have a fallthrough predecessor and successor. // We can only do this after block placement because it depends on whether // there are fallthroughs, and we don't know until after layout. - if (AfterPlacement && I1 == MBB1->begin() && I2 == MBB2->begin()) { + if (AfterPlacement && FullBlockTail1 && FullBlockTail2) { auto BothFallThrough = [](MachineBasicBlock *MBB) { if (MBB->succ_size() != 0 && !MBB->canFallThrough()) return false; @@ -730,7 +683,7 @@ // instructions that would be deleted in the merge. MachineFunction *MF = MBB1->getParent(); return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() && - (I1 == MBB1->begin() || I2 == MBB2->begin()); + (FullBlockTail1 || FullBlockTail2); } unsigned BranchFolder::ComputeSameTails(unsigned CurHash, diff --git a/llvm/test/CodeGen/X86/branchfolding-debug-invariant.mir b/llvm/test/CodeGen/X86/branchfolding-debug-invariant.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/branchfolding-debug-invariant.mir @@ -0,0 +1,135 @@ +# RUN: llc -mtriple=x86_64-- -run-pass branch-folder -O3 -o - %s | FileCheck %s + +--- +name: test1a +body: | + ; CHECK-LABEL: name: test1a + ; CHECK: bb.0: + ; CHECK: TEST8rr killed renamable $al, renamable $al, implicit-def $eflags + ; CHECK: JCC_1 %bb.2, 5, implicit $eflags + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: MOV8mi $r12, 1, $noreg, 0, $noreg, 0 + ; CHECK-NOT: RET + ; CHECK: bb.2: + ; CHECK: MOV8mi $r13, 1, $noreg, 0, $noreg, 0 + ; CHECK: RET 0 + bb.0: + TEST8rr killed renamable $al, renamable $al, implicit-def $eflags + JCC_1 %bb.2, 5, implicit killed $eflags + + bb.1: + MOV8mi $r12, 1, $noreg, 0, $noreg, 0 + MOV8mi $r13, 1, $noreg, 0, $noreg, 0 + RET 0 + + bb.2: + MOV8mi $r13, 1, $noreg, 0, $noreg, 0 + RET 0 +... + +--- +name: test1b +body: | + + ; Verify that we get the same rewrites as in test1a when adding some + ; DBG_VALUE instructions in the mix. + ; + ; CHECK-LABEL: name: test1b + ; CHECK: bb.0: + ; CHECK: TEST8rr killed renamable $al, renamable $al, implicit-def $eflags + ; CHECK: JCC_1 %bb.2, 5, implicit $eflags + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: MOV8mi $r12, 1, $noreg, 0, $noreg, 0 + ; CHECK-NOT: RET + ; CHECK: bb.2: + ; CHECK: DBG_VALUE + ; CHECK: DBG_VALUE + ; CHECK: MOV8mi $r13, 1, $noreg, 0, $noreg, 0 + ; CHECK: RET 0 + bb.0: + TEST8rr killed renamable $al, renamable $al, implicit-def $eflags + JCC_1 %bb.2, 5, implicit killed $eflags + + bb.1: + MOV8mi $r12, 1, $noreg, 0, $noreg, 0 + MOV8mi $r13, 1, $noreg, 0, $noreg, 0 + RET 0 + + bb.2: + DBG_VALUE + DBG_VALUE + MOV8mi $r13, 1, $noreg, 0, $noreg, 0 + RET 0 +... + +--- +name: test2a +body: | + ; CFI instruction currently prevents the rewrite here (although technically + ; I suppose that branch folding could let bb.1 fallthrough into bb.2 here). + ; + ; CHECK-LABEL: name: test2a + ; CHECK: bb.0: + ; CHECK: TEST8rr killed renamable $al, renamable $al, implicit-def $eflags + ; CHECK: JCC_1 %bb.2, 5, implicit killed $eflags + ; CHECK: bb.1: + ; CHECK: MOV8mi $r12, 1, $noreg, 0, $noreg, 0 + ; CHECK: MOV8mi $r13, 1, $noreg, 0, $noreg, 0 + ; CHECK: RET 0 + ; CHECK: bb.2: + ; CHECK: CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: MOV8mi $r13, 1, $noreg, 0, $noreg, 0 + ; CHECK: RET 0 + bb.0: + TEST8rr killed renamable $al, renamable $al, implicit-def $eflags + JCC_1 %bb.2, 5, implicit killed $eflags + + bb.1: + MOV8mi $r12, 1, $noreg, 0, $noreg, 0 + MOV8mi $r13, 1, $noreg, 0, $noreg, 0 + RET 0 + + bb.2: + CFI_INSTRUCTION def_cfa_offset 8 + MOV8mi $r13, 1, $noreg, 0, $noreg, 0 + RET 0 +... + +--- +name: test2b +body: | + ; Verify that we get the same rewrites as in test1a when adding some + ; DBG_VALUE instructions in the mix. + ; + ; CHECK-LABEL: name: test2b + ; CHECK: bb.0: + ; CHECK: TEST8rr killed renamable $al, renamable $al, implicit-def $eflags + ; CHECK: JCC_1 %bb.2, 5, implicit killed $eflags + ; CHECK: bb.1: + ; CHECK: MOV8mi $r12, 1, $noreg, 0, $noreg, 0 + ; CHECK: MOV8mi $r13, 1, $noreg, 0, $noreg, 0 + ; CHECK: RET 0 + ; CHECK: bb.2: + ; CHECK: DBG_VALUE + ; CHECK: CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: DBG_VALUE + ; CHECK: MOV8mi $r13, 1, $noreg, 0, $noreg, 0 + ; CHECK: RET 0 + bb.0: + TEST8rr killed renamable $al, renamable $al, implicit-def $eflags + JCC_1 %bb.2, 5, implicit killed $eflags + + bb.1: + MOV8mi $r12, 1, $noreg, 0, $noreg, 0 + MOV8mi $r13, 1, $noreg, 0, $noreg, 0 + RET 0 + + bb.2: + DBG_VALUE + CFI_INSTRUCTION def_cfa_offset 8 + DBG_VALUE + MOV8mi $r13, 1, $noreg, 0, $noreg, 0 + RET 0 +...