Index: lib/CodeGen/BranchFolding.cpp =================================================================== --- lib/CodeGen/BranchFolding.cpp +++ lib/CodeGen/BranchFolding.cpp @@ -996,6 +996,24 @@ MachineBasicBlock *IBB = &*I; MachineBasicBlock *PredBB = &*std::prev(I); MergePotentials.clear(); + MachineLoop *ML; + + // Bail out if IBB is the loop header after the block placement because + // -- If merging predecessors that belong to the same loop as IBB, the + // common tail of merged predecessors may become the loop top if block + // placement is called again and the predecessors may branch to this common + // tail and require more branches. This can be relaxed if + // MachineBlockPlacement::findBestLoopTop is more flexable. + // --If merging predecessors that do not belong to the same loop as IBB, the + // loop info of IBB's loop and the other loops may be affected. Calling the + // block placement again may make big change to the layout and eliminate the + // reasone to do tail merging here. + if (AfterBlockPlacement && MLI) { + ML = MLI->getLoopFor(IBB); + if (ML && IBB == ML->getHeader()) + continue; + } + for (MachineBasicBlock *PBB : I->predecessors()) { if (MergePotentials.size() == TailMergeThreshold) break; @@ -1015,16 +1033,12 @@ if (PBB->hasEHPadSuccessor()) continue; - // Bail out if the loop header (IBB) is not the top of the loop chain - // after the block placement. Otherwise, the common tail of IBB's - // predecessors may become the loop top if block placement is called again - // and the predecessors may branch to this common tail. - // FIXME: Relaxed this check if the algorithm of finding loop top is - // changed in MBP. + // After block placement, only consider predecessors that belong to the + // same loop as IBB. The reason is the same as above when skipping loop + // header. if (AfterBlockPlacement && MLI) - if (MachineLoop *ML = MLI->getLoopFor(IBB)) - if (IBB == ML->getHeader() && ML == MLI->getLoopFor(PBB)) - continue; + if (ML != MLI->getLoopFor(PBB)) + continue; MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; Index: test/CodeGen/ARM/arm-and-tst-peephole.ll =================================================================== --- test/CodeGen/ARM/arm-and-tst-peephole.ll +++ test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -49,7 +49,7 @@ ; V8-NEXT: beq ; V8-NEXT: %tailrecurse.switch ; V8: cmp -; V8-NEXT: beq +; V8-NEXT: bne ; V8-NEXT: b ; The trailing space in the last line checks that the branch is unconditional switch i32 %and, label %sw.epilog [ Index: test/CodeGen/X86/tail-merge-after-mbp.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/tail-merge-after-mbp.ll @@ -0,0 +1,91 @@ +; RUN: llc -o - %s | FileCheck %s + +%0 = type { %1, %3* } +%1 = type { %2* } +%2 = type { %2*, i8* } +%3 = type { i32, i32 (i32, i32)* } + + +declare i32 @Up(...) local_unnamed_addr + +; check loop block LBB0_8 is not merged with BB#6 +; check loop block BB#9 is not merged with BB#5, BB#7 +define i32 @foo(%0* nocapture readonly, i32) local_unnamed_addr { +; CHECK-LABEL: foo: +; CHECK:# BB#5: +; CHECK-NEXT: movq (%rax), %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: je .LBB0_12 +; CHECK-NEXT:# BB#6: +; CHECK-NEXT: cmpq $0, 8(%rax) +; CHECK-NEXT: jne .LBB0_11 +; CHECK-NEXT:# BB#7: +; CHECK-NEXT: movq (%rax), %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: je .LBB0_12 +; CHECK-NEXT:.LBB0_8: +; CHECK-NEXT: cmpq $0, 8(%rax) +; CHECK-NEXT: jne .LBB0_11 +; CHECK-NEXT:# BB#9: +; CHECK-NEXT: movq (%rax), %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: jne .LBB0_8 + br i1 undef, label %34, label %3 + +;