diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -550,6 +550,14 @@ } } +static bool hasMultipSuccsAsLatch(const MachineBasicBlock *A) { + const SmallPtrSet Preds(A->pred_begin(), + A->pred_end()); + return llvm::count_if(A->successors(), [&](const MachineBasicBlock *SuccBB) { + return Preds.count(SuccBB); + }) > 2; +} + /// Determine if it is profitable to duplicate this block. bool TailDuplicator::shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB) { @@ -563,6 +571,10 @@ if (TailBB.isSuccessor(&TailBB)) return false; + // Don't tail-duplicate loop header with multiple successors as its latches. + if (hasMultipSuccsAsLatch(&TailBB)) + return false; + // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. diff --git a/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll b/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +define i8* @large_loop_switch(i8* %p) { +; CHECK-LABEL: large_loop_switch: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rsi +; CHECK-NEXT: movl $6, %ebx +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8) +; CHECK-NEXT: .LBB0_2: # %sw.bb1 +; CHECK-NEXT: movl $531, %edi # imm = 0x213 +; CHECK-NEXT: .LBB0_3: # %for.body +; CHECK-NEXT: callq ccc@PLT +; CHECK-NEXT: .LBB0_4: # %for.body +; CHECK-NEXT: movq %rax, %rsi +; CHECK-NEXT: decl %ebx +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8) +; CHECK-NEXT: .LBB0_5: # %sw.bb3 +; CHECK-NEXT: movl $532, %edi # imm = 0x214 +; CHECK-NEXT: callq bbb@PLT +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_7: # %sw.bb5 +; CHECK-NEXT: movl $533, %edi # imm = 0x215 +; CHECK-NEXT: callq bbb@PLT +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_8: # %sw.bb7 +; CHECK-NEXT: movl $535, %edi # imm = 0x217 +; CHECK-NEXT: callq bbb@PLT +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_9: # %sw.bb9 +; CHECK-NEXT: movl $536, %edi # imm = 0x218 +; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: .LBB0_10: # %sw.bb11 +; CHECK-NEXT: movl $658, %edi # imm = 0x292 +; CHECK-NEXT: callq bbb@PLT +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_11: # %for.cond.cleanup +; CHECK-NEXT: movl $530, %edi # imm = 0x212 +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: jmp ccc@PLT # TAILCALL +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + %call = tail call i8* @ccc(i32 signext 530, i8* %p.addr.03006) + ret i8* %call + +for.body: ; preds = %for.inc, %entry + %i.03007 = phi i32 [ 6, %entry ], [ %dec, %for.inc ] + %p.addr.03006 = phi i8* [ %p, %entry ], [ %p.addr.1, %for.inc ] + switch i32 %i.03007, label %for.body.unreachabledefault [ + i32 0, label %for.cond.cleanup + i32 1, label %sw.bb1 + i32 2, label %sw.bb3 + i32 3, label %sw.bb5 + i32 4, label %sw.bb7 + i32 5, label %sw.bb9 + i32 6, label %sw.bb11 + ] + +sw.bb1: ; preds = %for.body + %call2 = tail call i8* @ccc(i32 signext 531, i8* %p.addr.03006) + br label %for.inc + +sw.bb3: ; preds = %for.body + %call4 = tail call i8* @bbb(i32 signext 532, i8* %p.addr.03006) + br label %for.inc + +sw.bb5: ; preds = %for.body + %call6 = tail call i8* @bbb(i32 signext 533, i8* %p.addr.03006) + br label %for.inc + +sw.bb7: ; preds = %for.body + %call8 = tail call i8* @bbb(i32 signext 535, i8* %p.addr.03006) + br label %for.inc + +sw.bb9: ; preds = %for.body + %call10 = tail call i8* @ccc(i32 signext 536, i8* %p.addr.03006) + br label %for.inc + +sw.bb11: ; preds = %for.body + %call12 = tail call i8* @bbb(i32 signext 658, i8* %p.addr.03006) + br label %for.inc + +for.body.unreachabledefault: ; preds = %for.body + unreachable + +for.inc: ; preds = %sw.bb1, %sw.bb3, %sw.bb5, %sw.bb7, %sw.bb9, %sw.bb11 + %p.addr.1 = phi i8* [ %call12, %sw.bb11 ], [ %call10, %sw.bb9 ], [ %call8, %sw.bb7 ], [ %call6, %sw.bb5 ], [ %call4, %sw.bb3 ], [ %call2, %sw.bb1 ] + %dec = add nsw i32 %i.03007, -1 + br label %for.body +} + +declare i8* @bbb(i32 signext, i8*) +declare i8* @ccc(i32 signext, i8*)