diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -653,6 +653,10 @@ /// gen prepare. virtual bool preferZeroCompareBranch() const { return false; } + /// Return true if the heuristics to prefer icmp zero should be used in + /// code gen prepare. + virtual bool preferAnyZeroCompareBranch() const { return false; } + /// Return true if it is cheaper to split the store of a merged int val /// from a pair of smaller values into multiple stores. virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const { diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -7966,7 +7966,8 @@ // br %c, bla, blb // Creating the cmp to zero can be better for the backend, especially if the // lshr produces flags that can be used automatically. - if (!TLI.preferZeroCompareBranch() || !Branch->isConditional()) + if ((!TLI.preferZeroCompareBranch() && !TLI.preferAnyZeroCompareBranch()) || + !Branch->isConditional()) return false; ICmpInst *Cmp = dyn_cast(Branch->getCondition()); @@ -7999,7 +8000,7 @@ replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc); return true; } - if (Cmp->isEquality() && + if ((Cmp->isEquality() || TLI.preferAnyZeroCompareBranch()) && (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) || match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))))) { IRBuilder<> Builder(Branch); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1097,6 +1097,8 @@ bool isCtlzFast() const override; + bool preferAnyZeroCompareBranch() const override { return true; } + bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { // If the pair to store is a mixture of float and int values, we will // save two bitwise instructions and one float-to-int instruction and diff --git a/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll b/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll --- a/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll +++ b/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll @@ -1,6 +1,7 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; REQUIRES: asserts ; RUN: llc < %s -mtriple=i386-linux-gnu -mcpu=penryn -mattr=+sse2 -stats 2>&1 | \ -; RUN: grep "asm-printer" | grep 33 +; RUN: grep "asm-printer" | grep 32 target datalayout = "e-p:32:32" define void @foo(ptr %mc, ptr %bp, ptr %ms, ptr %xmb, ptr %mpp, ptr %tpmm, ptr %ip, ptr %tpim, ptr %dpp, ptr %tpdm, ptr %bpi, i32 %M) nounwind { diff --git a/llvm/test/CodeGen/X86/branch-on-zero.ll b/llvm/test/CodeGen/X86/branch-on-zero.ll --- a/llvm/test/CodeGen/X86/branch-on-zero.ll +++ b/llvm/test/CodeGen/X86/branch-on-zero.ll @@ -6,12 +6,9 @@ define void @test1_slt(i32 %0) { ; CHECK-LABEL: test1_slt: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $2, %edi -; CHECK-NEXT: jg .LBB0_1 -; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: addl $-3, %edi -; CHECK-NEXT: jmp foo@PLT # TAILCALL -; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: js foo@PLT # TAILCALL +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: retq %2 = icmp slt i32 %0, 3 br i1 %2, label %3, label %5 @@ -28,12 +25,9 @@ define void @test2_sle(i32 %0) { ; CHECK-LABEL: test2_sle: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $3, %edi -; CHECK-NEXT: jg .LBB1_1 -; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: addl $-3, %edi -; CHECK-NEXT: jmp foo@PLT # TAILCALL -; CHECK-NEXT: .LBB1_1: +; CHECK-NEXT: jle foo@PLT # TAILCALL +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: retq %2 = icmp sle i32 %0, 3 br i1 %2, label %3, label %5 @@ -50,12 +44,9 @@ define void @test3_ugt(i32 %0) { ; CHECK-LABEL: test3_ugt: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpl $4, %edi -; CHECK-NEXT: jb .LBB2_1 -; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: addl $-3, %edi -; CHECK-NEXT: jmp foo@PLT # TAILCALL -; CHECK-NEXT: .LBB2_1: +; CHECK-NEXT: jne foo@PLT # TAILCALL +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: retq %2 = icmp ugt i32 %0, 3 br i1 %2, label %3, label %5 diff --git a/llvm/test/CodeGen/X86/peep-test-5.ll b/llvm/test/CodeGen/X86/peep-test-5.ll --- a/llvm/test/CodeGen/X86/peep-test-5.ll +++ b/llvm/test/CodeGen/X86/peep-test-5.ll @@ -15,20 +15,20 @@ ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: cmpl $1, %eax -; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: # %bb.1: # %bb_free -; CHECK-NEXT: callq free_object@PLT -; CHECK-NEXT: .LBB0_4: # %end +; CHECK-NEXT: decl %eax +; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: # %bb.1: # %bb2 +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: jle .LBB0_4 +; CHECK-NEXT: # %bb.2: # %bb_dec +; CHECK-NEXT: movl %eax, (%rdi) ; CHECK-NEXT: popq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB0_2: # %bb2 +; CHECK-NEXT: .LBB0_3: # %bb_free ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: jle .LBB0_4 -; CHECK-NEXT: # %bb.3: # %bb_dec -; CHECK-NEXT: decl %eax -; CHECK-NEXT: movl %eax, (%rdi) +; CHECK-NEXT: callq free_object@PLT +; CHECK-NEXT: .LBB0_4: # %end ; CHECK-NEXT: popq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq