diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3905,6 +3905,18 @@ if (SDValue V = optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl)) return V; + + // If we take and produce non-i1 type, and the input has only the 0'th bit, + // and we are only checking whether the input is not empty, + // then just use the casted operand. + if (OpVT.getScalarSizeInBits() > 1 && VT.getScalarSizeInBits() > 1 && + Cond == ISD::SETNE && C1.isNullValue() && + (getBooleanContents(OpVT) == UndefinedBooleanContent || + getBooleanContents(OpVT) == ZeroOrOneBooleanContent)) { + KnownBits N0Known = DAG.computeKnownBits(N0); + if (N0Known.getMaxValue().isOneValue()) + return DAG.getBoolExtOrTrunc(N0, dl, VT, OpVT); + } } // These simplifications apply to splat vectors as well. diff --git a/llvm/test/CodeGen/X86/lzcnt-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-cmp.ll --- a/llvm/test/CodeGen/X86/lzcnt-cmp.ll +++ b/llvm/test/CodeGen/X86/lzcnt-cmp.ll @@ -50,8 +50,8 @@ ; X86-BSR-NEXT: xorl $31, %eax ; X86-BSR-NEXT: addl $32, %eax ; X86-BSR-NEXT: .LBB1_2: -; X86-BSR-NEXT: testl $-64, %eax -; X86-BSR-NEXT: setne %al +; X86-BSR-NEXT: shrl $6, %eax +; X86-BSR-NEXT: # kill: def $al killed $al killed $eax ; X86-BSR-NEXT: retl ; ; X86-LZCNT-LABEL: lshr_ctlz_undef_cmpeq_one_i64: @@ -63,8 +63,8 @@ ; X86-LZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax ; X86-LZCNT-NEXT: addl $32, %eax ; X86-LZCNT-NEXT: .LBB1_2: -; X86-LZCNT-NEXT: testb $64, %al -; X86-LZCNT-NEXT: setne %al +; X86-LZCNT-NEXT: shrl $6, %eax +; X86-LZCNT-NEXT: # kill: def $al killed $al killed $eax ; X86-LZCNT-NEXT: retl ; ; X64-BSR-LABEL: lshr_ctlz_undef_cmpeq_one_i64: @@ -133,8 +133,8 @@ ; X86-BSR-NEXT: xorl $31, %eax ; X86-BSR-NEXT: addl $32, %eax ; X86-BSR-NEXT: .LBB3_2: -; X86-BSR-NEXT: testl $-64, %eax -; X86-BSR-NEXT: setne %al +; X86-BSR-NEXT: shrl $6, %eax +; X86-BSR-NEXT: # kill: def $al killed $al killed $eax ; X86-BSR-NEXT: retl ; ; X86-LZCNT-LABEL: lshr_ctlz_undef_cmpne_zero_i64: @@ -146,22 +146,22 @@ ; X86-LZCNT-NEXT: lzcntl {{[0-9]+}}(%esp), %eax ; X86-LZCNT-NEXT: addl $32, %eax ; X86-LZCNT-NEXT: .LBB3_2: -; X86-LZCNT-NEXT: testb $64, %al -; X86-LZCNT-NEXT: setne %al +; X86-LZCNT-NEXT: shrl $6, %eax +; X86-LZCNT-NEXT: # kill: def $al killed $al killed $eax ; X86-LZCNT-NEXT: retl ; ; X64-BSR-LABEL: lshr_ctlz_undef_cmpne_zero_i64: ; X64-BSR: # %bb.0: ; X64-BSR-NEXT: bsrq %rdi, %rax -; X64-BSR-NEXT: testq $-64, %rax -; X64-BSR-NEXT: setne %al +; X64-BSR-NEXT: shrq $6, %rax +; X64-BSR-NEXT: # kill: def $al killed $al killed $rax ; X64-BSR-NEXT: retq ; ; X64-LZCNT-LABEL: lshr_ctlz_undef_cmpne_zero_i64: ; X64-LZCNT: # %bb.0: ; X64-LZCNT-NEXT: lzcntq %rdi, %rax -; X64-LZCNT-NEXT: testb $64, %al -; X64-LZCNT-NEXT: setne %al +; X64-LZCNT-NEXT: shrq $6, %rax +; X64-LZCNT-NEXT: # kill: def $al killed $al killed $rax ; X64-LZCNT-NEXT: retq %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 -1) %lshr = lshr i64 %ctlz, 6 diff --git a/llvm/test/CodeGen/X86/parity-vec.ll b/llvm/test/CodeGen/X86/parity-vec.ll --- a/llvm/test/CodeGen/X86/parity-vec.ll +++ b/llvm/test/CodeGen/X86/parity-vec.ll @@ -19,8 +19,8 @@ ; CHECK-NEXT: psllw $7, %xmm0 ; CHECK-NEXT: pmovmskb %xmm0, %eax ; CHECK-NEXT: popcntw %ax, %ax -; CHECK-NEXT: testb $1, %al -; CHECK-NEXT: setne %al +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %i1 = bitcast <16 x i1> %x to i16 %i2 = call i16 @llvm.ctpop.i16(i16 %i1) diff --git a/llvm/test/CodeGen/X86/umul_fix_sat.ll b/llvm/test/CodeGen/X86/umul_fix_sat.ll --- a/llvm/test/CodeGen/X86/umul_fix_sat.ll +++ b/llvm/test/CodeGen/X86/umul_fix_sat.ll @@ -501,36 +501,32 @@ ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: mull %esi +; X86-NEXT: mull %ebp ; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %eax, %ebx ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: addl %ebx, %ebp +; X86-NEXT: mull %esi +; X86-NEXT: addl %edx, %ebx ; X86-NEXT: adcl $0, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %esi +; X86-NEXT: mull %ebp ; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl %eax, %ebx +; X86-NEXT: movl %eax, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %ebp, %eax +; X86-NEXT: mull %esi +; X86-NEXT: addl %ebx, %eax ; X86-NEXT: adcl %edi, %edx ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: addl %ebx, %edx +; X86-NEXT: addl %ebp, %edx ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: shrdl $31, %edx, %eax -; X86-NEXT: movl %edx, %esi -; X86-NEXT: shrl $31, %esi -; X86-NEXT: xorl %edi, %edi -; X86-NEXT: cmpl $1, %esi -; X86-NEXT: sbbl %edi, %edi -; X86-NEXT: notl %edi -; X86-NEXT: orl %edi, %eax ; X86-NEXT: shldl $1, %edx, %ecx -; X86-NEXT: orl %edi, %ecx +; X86-NEXT: shrdl $31, %edx, %eax +; X86-NEXT: testl $-2147483648, %edx # imm = 0x80000000 +; X86-NEXT: movl $-1, %edx +; X86-NEXT: cmovnel %edx, %eax +; X86-NEXT: cmovnel %edx, %ecx ; X86-NEXT: movl %ecx, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi