diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -5300,6 +5300,11 @@ // combiner can fold the new nodes. SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const; + // Return true if `X & Y eq/ne 0` is preferable to `X & Y ne/eq Y` + virtual bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode, EVT) const { + return true; + } + private: SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, DAGCombinerInfo &DCI) const; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3823,8 +3823,12 @@ return SDValue(); } + // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if + // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as + // its liable to create and infinite loop. SDValue Zero = DAG.getConstant(0, DL, OpVT); - if (DAG.isKnownToBeAPowerOfTwo(Y)) { + if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) && + DAG.isKnownToBeAPowerOfTwo(Y)) { // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set. // Note that where Y is variable and is known to have at most one bit set // (for example, if it is Z & 1) we cannot do this; the expressions are not diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1054,6 +1054,9 @@ bool preferABDSToABSWithNSW(EVT VT) const override; + bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond, + EVT VT) const override; + /// Return true if the target has native support for /// the specified value type and it is 'desirable' to use the type for the /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -22317,6 +22317,11 @@ return Sub.getValue(1); } +bool X86TargetLowering::isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond, + EVT VT) const { + return !VT.isVector() || Cond != ISD::CondCode::SETEQ; +} + /// Check if replacement of SQRT with RSQRT should be disabled. bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll --- a/llvm/test/CodeGen/X86/known-pow2.ll +++ b/llvm/test/CodeGen/X86/known-pow2.ll @@ -619,11 +619,8 @@ ; CHECK-NEXT: pand %xmm0, %xmm2 ; CHECK-NEXT: pandn %xmm7, %xmm0 ; CHECK-NEXT: por %xmm2, %xmm0 -; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 -; CHECK-NEXT: pand %xmm1, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: pand %xmm0, %xmm1 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 -; CHECK-NEXT: pxor %xmm2, %xmm0 ; CHECK-NEXT: retq %yy = shl <4 x i32> , %y %zz = lshr <4 x i32> , %z