diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -70,6 +70,12 @@ "end with indirect branches."), cl::init(20), cl::Hidden); +static cl::opt TailDupJmpTableLoopSize( + "tail-dup-jmptable-loop-size", + cl::desc("Maximum loop latches to consider tail duplication that are " + "successors of loop header."), + cl::init(128), cl::Hidden); + static cl::opt TailDupVerify("tail-dup-verify", cl::desc("Verify sanity of PHI instructions during taildup"), @@ -550,6 +556,17 @@ } } +/// Whether there is a loop contains only one large jump table. +static bool isLargeJumpTableLoop(const MachineBasicBlock *A) { + const SmallPtrSet Preds(A->pred_begin(), + A->pred_end()); + // Check the basic block has large number of successors, all of them only + // have one successor which is the basic block itself. + return llvm::count_if(A->successors(), [&](const MachineBasicBlock *SuccBB) { + return Preds.count(SuccBB) && SuccBB->succ_size() == 1; + }) > TailDupJmpTableLoopSize; +} + /// Determine if it is profitable to duplicate this block. bool TailDuplicator::shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB) { @@ -563,6 +580,10 @@ if (TailBB.isSuccessor(&TailBB)) return false; + // Don't tail-duplicate loops that their bodies are large jump table. + if (isLargeJumpTableLoop(&TailBB)) + return false; + // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. diff --git a/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll b/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll --- a/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll +++ b/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll @@ -1,76 +1,48 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +; RUN: llc < %s -tail-dup-jmptable-loop-size=5 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s define i8* @large_loop_switch(i8* %p) { ; CHECK-LABEL: large_loop_switch: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset %rbx, -16 -; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: movq %rdi, %rsi ; CHECK-NEXT: movl $6, %ebx -; CHECK-NEXT: movl %ebx, %ecx -; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) -; CHECK-NEXT: .LBB0_1: # %for.cond.cleanup -; CHECK-NEXT: movl $530, %edi # imm = 0x212 -; CHECK-NEXT: movq %rax, %rsi -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: jmp ccc@PLT # TAILCALL -; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8) ; CHECK-NEXT: .LBB0_2: # %sw.bb1 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: movl $531, %edi # imm = 0x213 -; CHECK-NEXT: movq %rax, %rsi +; CHECK-NEXT: .LBB0_3: # %for.body ; CHECK-NEXT: callq ccc@PLT +; CHECK-NEXT: .LBB0_4: # %for.body +; CHECK-NEXT: movq %rax, %rsi ; CHECK-NEXT: decl %ebx -; CHECK-NEXT: movl %ebx, %ecx -; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_3: # %sw.bb3 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8) +; CHECK-NEXT: .LBB0_5: # %sw.bb3 ; CHECK-NEXT: movl $532, %edi # imm = 0x214 -; CHECK-NEXT: movq %rax, %rsi ; CHECK-NEXT: callq bbb@PLT -; CHECK-NEXT: decl %ebx -; CHECK-NEXT: movl %ebx, %ecx -; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_4: # %sw.bb5 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_7: # %sw.bb5 ; CHECK-NEXT: movl $533, %edi # imm = 0x215 -; CHECK-NEXT: movq %rax, %rsi ; CHECK-NEXT: callq bbb@PLT -; CHECK-NEXT: decl %ebx -; CHECK-NEXT: movl %ebx, %ecx -; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_5: # %sw.bb7 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_8: # %sw.bb7 ; CHECK-NEXT: movl $535, %edi # imm = 0x217 -; CHECK-NEXT: movq %rax, %rsi ; CHECK-NEXT: callq bbb@PLT -; CHECK-NEXT: decl %ebx -; CHECK-NEXT: movl %ebx, %ecx -; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_6: # %sw.bb9 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_9: # %sw.bb9 ; CHECK-NEXT: movl $536, %edi # imm = 0x218 -; CHECK-NEXT: movq %rax, %rsi -; CHECK-NEXT: callq ccc@PLT -; CHECK-NEXT: decl %ebx -; CHECK-NEXT: movl %ebx, %ecx -; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_7: # %sw.bb11 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: .LBB0_10: # %sw.bb11 ; CHECK-NEXT: movl $658, %edi # imm = 0x292 -; CHECK-NEXT: movq %rax, %rsi ; CHECK-NEXT: callq bbb@PLT -; CHECK-NEXT: decl %ebx -; CHECK-NEXT: movl %ebx, %ecx -; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_11: # %for.cond.cleanup +; CHECK-NEXT: movl $530, %edi # imm = 0x212 +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: jmp ccc@PLT # TAILCALL entry: br label %for.body