diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3254,17 +3254,29 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, DAGCombinerInfo &DCI) const { - // Match these patterns in any of their permutations: - // (X & Y) == Y - // (X & Y) != Y if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND) std::swap(N0, N1); + SelectionDAG &DAG = DCI.DAG; EVT OpVT = N0.getValueType(); if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() || (Cond != ISD::SETEQ && Cond != ISD::SETNE)) return SDValue(); + // (X & Y) != 0 --> zextOrTrunc(X & Y) + // iff everything but LSB is known zero: + if (isNullConstant(N1) && Cond == ISD::SETNE && + (getBooleanContents(VT) == TargetLowering::UndefinedBooleanContent || + getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent)) { + unsigned NumEltBits = OpVT.getScalarSizeInBits(); + APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1); + if (DAG.MaskedValueIsZero(N0, UpperBits)) + return DAG.getZExtOrTrunc(N0, DL, VT); + } + + // Match these patterns in any of their permutations: + // (X & Y) == Y + // (X & Y) != Y SDValue X, Y; if (N0.getOperand(0) == N1) { X = N0.getOperand(1); @@ -3276,7 +3288,6 @@ return SDValue(); } - SelectionDAG &DAG = DCI.DAG; SDValue Zero = DAG.getConstant(0, DL, OpVT); if (DAG.isKnownToBeAPowerOfTwo(Y)) { // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set. diff --git a/llvm/test/CodeGen/X86/parity-vec.ll b/llvm/test/CodeGen/X86/parity-vec.ll --- a/llvm/test/CodeGen/X86/parity-vec.ll +++ b/llvm/test/CodeGen/X86/parity-vec.ll @@ -36,8 +36,8 @@ ; POPCNT-NEXT: psllw $7, %xmm0 ; POPCNT-NEXT: pmovmskb %xmm0, %eax ; POPCNT-NEXT: popcntl %eax, %eax -; POPCNT-NEXT: testb $1, %al -; POPCNT-NEXT: setne %al +; POPCNT-NEXT: andl $1, %eax +; POPCNT-NEXT: # kill: def $al killed $al killed $eax ; POPCNT-NEXT: retq %i1 = bitcast <16 x i1> %x to i16 %i2 = call i16 @llvm.ctpop.i16(i16 %i1)