diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3972,8 +3972,10 @@ /// Check whether the definition can be converted /// to remove a comparison against zero. -inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) { +inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag, + bool &ClearsOverflowFlag) { NoSignFlag = false; + ClearsOverflowFlag = false; switch (MI.getOpcode()) { default: return false; @@ -4039,12 +4041,6 @@ case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1:case X86::SHL64r1: case X86::ANDN32rr: case X86::ANDN32rm: case X86::ANDN64rr: case X86::ANDN64rm: - case X86::BLSI32rr: case X86::BLSI32rm: - case X86::BLSI64rr: case X86::BLSI64rm: - case X86::BLSMSK32rr:case X86::BLSMSK32rm: - case X86::BLSMSK64rr:case X86::BLSMSK64rm: - case X86::BLSR32rr: case X86::BLSR32rm: - case X86::BLSR64rr: case X86::BLSR64rm: case X86::BZHI32rr: case X86::BZHI32rm: case X86::BZHI64rr: case X86::BZHI64rm: case X86::LZCNT16rr: case X86::LZCNT16rm: @@ -4056,6 +4052,13 @@ case X86::TZCNT16rr: case X86::TZCNT16rm: case X86::TZCNT32rr: case X86::TZCNT32rm: case X86::TZCNT64rr: case X86::TZCNT64rm: + return true; + case X86::BLSI32rr: case X86::BLSI32rm: + case X86::BLSI64rr: case X86::BLSI64rm: + case X86::BLSMSK32rr: case X86::BLSMSK32rm: + case X86::BLSMSK64rr: case X86::BLSMSK64rm: + case X86::BLSR32rr: case X86::BLSR32rm: + case X86::BLSR64rr: case X86::BLSR64rm: case X86::BLCFILL32rr: case X86::BLCFILL32rm: case X86::BLCFILL64rr: case X86::BLCFILL64rm: case X86::BLCI32rr: case X86::BLCI32rm: @@ -4074,12 +4077,17 @@ case X86::T1MSKC64rr: case X86::T1MSKC64rm: case X86::TZMSK32rr: case X86::TZMSK32rm: case X86::TZMSK64rr: case X86::TZMSK64rm: + // These instructions clear the overflow flag just like TEST. + // FIXME: These are not the only instructions in this switch that clear the + // overflow flag. + ClearsOverflowFlag = true; return true; case X86::BEXTR32rr: case X86::BEXTR64rr: case X86::BEXTR32rm: case X86::BEXTR64rm: case X86::BEXTRI32ri: case X86::BEXTRI32mi: case X86::BEXTRI64ri: case X86::BEXTRI64mi: - // BEXTR doesn't update the sign flag so we can't use it. + // BEXTR doesn't update the sign flag so we can't use it. It does clear + // the overflow flag, but that's not useful without the sign flag. NoSignFlag = true; return true; } @@ -4199,8 +4207,9 @@ // right way. bool ShouldUpdateCC = false; bool NoSignFlag = false; + bool ClearsOverflowFlag = false; X86::CondCode NewCC = X86::COND_INVALID; - if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag)) { + if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag, ClearsOverflowFlag)) { // Scan forward from the use until we hit the use we're looking for or the // compare instruction. for (MachineBasicBlock::iterator J = MI;; ++J) { @@ -4312,11 +4321,15 @@ default: break; case X86::COND_A: case X86::COND_AE: case X86::COND_B: case X86::COND_BE: + // CF is used, we can't perform this optimization. + return false; case X86::COND_G: case X86::COND_GE: case X86::COND_L: case X86::COND_LE: case X86::COND_O: case X86::COND_NO: - // CF and OF are used, we can't perform this optimization. - return false; + // If OF is used, the instruction needs to clear it like CmpZero does. + if (!ClearsOverflowFlag) + return false; + break; case X86::COND_S: case X86::COND_NS: // If SF is used, but the instruction doesn't update the SF, then we // can't do the optimization. diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll --- a/llvm/test/CodeGen/X86/bmi.ll +++ b/llvm/test/CodeGen/X86/bmi.ll @@ -539,11 +539,12 @@ ret i32 %t3 } +; Inspired by PR48768, but using cmovcc instead of setcc. There should be +; no test instruction. define i32 @blsi32_sle(i32 %a, i32 %b, i32 %c) nounwind { ; X86-LABEL: blsi32_sle: ; X86: # %bb.0: ; X86-NEXT: blsil {{[0-9]+}}(%esp), %eax -; X86-NEXT: testl %eax, %eax ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X86-NEXT: cmovlel %eax, %ecx @@ -554,7 +555,6 @@ ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax ; X64-NEXT: blsil %edi, %ecx -; X64-NEXT: testl %ecx, %ecx ; X64-NEXT: cmovgl %edx, %eax ; X64-NEXT: retq %t0 = sub i32 0, %a @@ -685,7 +685,6 @@ ; X64: # %bb.0: ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: blsiq %rdi, %rcx -; X64-NEXT: testq %rcx, %rcx ; X64-NEXT: cmovgq %rdx, %rax ; X64-NEXT: retq %t0 = sub i64 0, %a @@ -776,7 +775,6 @@ ; X86-LABEL: blsmsk32_sle: ; X86: # %bb.0: ; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax -; X86-NEXT: testl %eax, %eax ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X86-NEXT: cmovlel %eax, %ecx @@ -787,7 +785,6 @@ ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax ; X64-NEXT: blsmskl %edi, %ecx -; X64-NEXT: testl %ecx, %ecx ; X64-NEXT: cmovgl %edx, %eax ; X64-NEXT: retq %t0 = sub i32 %a, 1 @@ -918,7 +915,6 @@ ; X64: # %bb.0: ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: blsmskq %rdi, %rcx -; X64-NEXT: testq %rcx, %rcx ; X64-NEXT: cmovgq %rdx, %rax ; X64-NEXT: retq %t0 = sub i64 %a, 1 @@ -1009,7 +1005,6 @@ ; X86-LABEL: blsr32_sle: ; X86: # %bb.0: ; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax -; X86-NEXT: testl %eax, %eax ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X86-NEXT: cmovlel %eax, %ecx @@ -1020,7 +1015,6 @@ ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax ; X64-NEXT: blsrl %edi, %ecx -; X64-NEXT: testl %ecx, %ecx ; X64-NEXT: cmovgl %edx, %eax ; X64-NEXT: retq %t0 = sub i32 %a, 1 @@ -1151,7 +1145,6 @@ ; X64: # %bb.0: ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: blsrq %rdi, %rcx -; X64-NEXT: testq %rcx, %rcx ; X64-NEXT: cmovgq %rdx, %rax ; X64-NEXT: retq %t0 = sub i64 %a, 1 diff --git a/llvm/test/CodeGen/X86/tbm_patterns.ll b/llvm/test/CodeGen/X86/tbm_patterns.ll --- a/llvm/test/CodeGen/X86/tbm_patterns.ll +++ b/llvm/test/CodeGen/X86/tbm_patterns.ll @@ -193,7 +193,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: blcfilll %edi, %ecx -; CHECK-NEXT: testl %ecx, %ecx ; CHECK-NEXT: cmovgl %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 @@ -245,7 +244,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: blcfillq %rdi, %rcx -; CHECK-NEXT: testq %rcx, %rcx ; CHECK-NEXT: cmovgq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 @@ -300,7 +298,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: blcil %edi, %ecx -; CHECK-NEXT: testl %ecx, %ecx ; CHECK-NEXT: cmovgl %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 1, %a @@ -356,7 +353,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: blciq %rdi, %rcx -; CHECK-NEXT: testq %rcx, %rcx ; CHECK-NEXT: cmovgq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 1, %a @@ -432,7 +428,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: blcicl %edi, %ecx -; CHECK-NEXT: testl %ecx, %ecx ; CHECK-NEXT: cmovgl %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 @@ -488,7 +483,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: blcicq %rdi, %rcx -; CHECK-NEXT: testq %rcx, %rcx ; CHECK-NEXT: cmovgq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 @@ -541,7 +535,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: blcmskl %edi, %ecx -; CHECK-NEXT: testl %ecx, %ecx ; CHECK-NEXT: cmovgl %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 @@ -593,7 +586,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: blcmskq %rdi, %rcx -; CHECK-NEXT: testq %rcx, %rcx ; CHECK-NEXT: cmovgq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 @@ -645,7 +637,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: blcsl %edi, %ecx -; CHECK-NEXT: testl %ecx, %ecx ; CHECK-NEXT: cmovgl %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, 1 @@ -697,7 +688,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: blcsq %rdi, %rcx -; CHECK-NEXT: testq %rcx, %rcx ; CHECK-NEXT: cmovgq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, 1 @@ -749,7 +739,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: blsfilll %edi, %ecx -; CHECK-NEXT: testl %ecx, %ecx ; CHECK-NEXT: cmovgl %edx, %eax ; CHECK-NEXT: retq %t0 = add i32 %a, -1 @@ -801,7 +790,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: blsfillq %rdi, %rcx -; CHECK-NEXT: testq %rcx, %rcx ; CHECK-NEXT: cmovgq %rdx, %rax ; CHECK-NEXT: retq %t0 = add i64 %a, -1 @@ -856,7 +844,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: blsicl %edi, %ecx -; CHECK-NEXT: testl %ecx, %ecx ; CHECK-NEXT: cmovgl %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 @@ -912,7 +899,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: blsicq %rdi, %rcx -; CHECK-NEXT: testq %rcx, %rcx ; CHECK-NEXT: cmovgq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 @@ -968,7 +954,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: t1mskcl %edi, %ecx -; CHECK-NEXT: testl %ecx, %ecx ; CHECK-NEXT: cmovgl %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 @@ -1024,7 +1009,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: t1mskcq %rdi, %rcx -; CHECK-NEXT: testq %rcx, %rcx ; CHECK-NEXT: cmovgq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1 @@ -1080,7 +1064,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: tzmskl %edi, %ecx -; CHECK-NEXT: testl %ecx, %ecx ; CHECK-NEXT: cmovgl %edx, %eax ; CHECK-NEXT: retq %t0 = xor i32 %a, -1 @@ -1136,7 +1119,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: tzmskq %rdi, %rcx -; CHECK-NEXT: testq %rcx, %rcx ; CHECK-NEXT: cmovgq %rdx, %rax ; CHECK-NEXT: retq %t0 = xor i64 %a, -1