Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -334,6 +334,22 @@ return MaskAndBranchFoldingIsLegal; } + /// Return true if the target should transform: + /// (X & Y) == Y ---> (~X & Y) == 0 + /// (X & Y) != Y ---> (~X & Y) != 0 + /// + /// This may be profitable if the target has a bitwise and-not operation that + /// sets comparison flags. A target may want to limit the transformation based + /// on the type of Y or if Y is a constant. + /// + /// Note that the transform will not occur if Y is known to be a power-of-2 + /// because a mask and compare of a single bit can be handled by inverting the + /// predicate, for example: + /// (X & 8) == 8 ---> (X & 8) != 0 + virtual bool hasAndNotCompare(SDValue Y) const { + return false; + } + /// \brief Return true if the target wants to use the optimization that /// turns ext(promotableInst1(...(promotableInstN(load)))) into /// promotedInst1(...(promotedInstN(ext(load)))). Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1304,6 +1304,52 @@ llvm_unreachable("Unexpected enumeration."); } +/// If the target supports an 'and-not' or 'and-complement' logic operation, +/// try to use that to make a comparison operation more efficient. +static SDValue createAndNotSetCC(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, SelectionDAG &DAG, + SDLoc dl) { + // Match these patterns in any of their permutations: + // (X & Y) == Y + // (X & Y) != Y + if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND) + std::swap(N0, N1); + + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || + (Cond != ISD::SETEQ && Cond != ISD::SETNE)) + return SDValue(); + + SDValue X, Y; + if (N0.getOperand(0) == N1) { + X = N0.getOperand(1); + Y = N0.getOperand(0); + } else if (N0.getOperand(1) == N1) { + X = N0.getOperand(0); + Y = N0.getOperand(1); + } else { + return SDValue(); + } + + // Bail out if the compare operand that we want to turn into a zero is already + // a zero (otherwise, infinite loop). + auto *YConst = dyn_cast(Y); + if (YConst && YConst->isNullValue()) + return SDValue(); + + // We don't want to do this transform if the mask is a single bit because + // there are more efficient ways to deal with that case (for example, 'bt' on + // x86 or 'rlwinm' on PPC). + if (!DAG.getTargetLoweringInfo().hasAndNotCompare(Y) || + valueHasExactlyOneBitSet(Y, DAG)) + return SDValue(); + + // Transform this into: ~X & Y == 0. + EVT OpVT = X.getValueType(); + SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT); + SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y); + return DAG.getSetCC(dl, VT, NewAnd, DAG.getConstant(0, dl, OpVT), Cond); +} + /// Try to simplify a setcc built with the specified operands and cc. If it is /// unable to simplify it, return a null SDValue. SDValue @@ -2166,6 +2212,9 @@ return N0; } + if (SDValue AndNotCC = createAndNotSetCC(VT, N0, N1, Cond, DAG, dl)) + return AndNotCC; + // Could not fold it. return SDValue(); } Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -751,6 +751,8 @@ bool isCheapToSpeculateCtlz() const override; + bool hasAndNotCompare(SDValue Y) const override; + /// Return the value type to use for ISD::SETCC. EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -4122,6 +4122,18 @@ return Subtarget.hasLZCNT(); } +bool X86TargetLowering::hasAndNotCompare(SDValue Y) const { + if (!Subtarget.hasBMI()) + return false; + + // There are only 32-bit and 64-bit forms for 'andn'. + EVT VT = Y.getValueType(); + if (VT != MVT::i32 && VT != MVT::i64) + return false; + + return true; +} + /// Return true if every element in Mask, beginning /// from position Pos and ending in Pos+Size is undef. static bool isUndefInRange(ArrayRef Mask, unsigned Pos, unsigned Size) { Index: test/CodeGen/X86/bmi.ll =================================================================== --- test/CodeGen/X86/bmi.ll +++ test/CodeGen/X86/bmi.ll @@ -135,12 +135,11 @@ ret i1 %cmp } -; TODO: Recognize a disguised andn in the following 4 tests. +; Recognize a disguised andn in the following 4 tests. define i1 @and_cmp1(i32 %x, i32 %y) { ; CHECK-LABEL: and_cmp1: ; CHECK: # BB#0: -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: andnl %esi, %edi, %eax ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq %and = and i32 %x, %y @@ -151,8 +150,7 @@ define i1 @and_cmp2(i32 %x, i32 %y) { ; CHECK-LABEL: and_cmp2: ; CHECK: # BB#0: -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: andnl %esi, %edi, %eax ; CHECK-NEXT: setne %al ; CHECK-NEXT: retq %and = and i32 %y, %x @@ -163,8 +161,7 @@ define i1 @and_cmp3(i32 %x, i32 %y) { ; CHECK-LABEL: and_cmp3: ; CHECK: # BB#0: -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: cmpl %edi, %esi +; CHECK-NEXT: andnl %esi, %edi, %eax ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq %and = and i32 %x, %y @@ -175,8 +172,7 @@ define i1 @and_cmp4(i32 %x, i32 %y) { ; CHECK-LABEL: and_cmp4: ; CHECK: # BB#0: -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: cmpl %edi, %esi +; CHECK-NEXT: andnl %esi, %edi, %eax ; CHECK-NEXT: setne %al ; CHECK-NEXT: retq %and = and i32 %y, %x @@ -189,8 +185,8 @@ define i1 @and_cmp_const(i32 %x) { ; CHECK-LABEL: and_cmp_const: ; CHECK: # BB#0: -; CHECK-NEXT: andl $43, %edi -; CHECK-NEXT: cmpl $43, %edi +; CHECK-NEXT: movl $43, %eax +; CHECK-NEXT: andnl %eax, %edi, %eax ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq %and = and i32 %x, 43 @@ -198,6 +194,63 @@ ret i1 %cmp } +; But don't use 'andn' if the mask is a power-of-two. +define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) { +; CHECK-LABEL: and_cmp_const_power_of_two: +; CHECK: # BB#0: +; CHECK-NEXT: btl %esi, %edi +; CHECK-NEXT: setae %al +; CHECK-NEXT: retq +; + %shl = shl i32 1, %y + %and = and i32 %x, %shl + %cmp = icmp ne i32 %and, %shl + ret i1 %cmp +} + +; Don't transform to 'andn' if there's another use of the 'and'. +define i32 @and_cmp_not_one_use(i32 %x) { +; CHECK-LABEL: and_cmp_not_one_use: +; CHECK: # BB#0: +; CHECK-NEXT: andl $37, %edi +; CHECK-NEXT: cmpl $37, %edi +; CHECK-NEXT: sete %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: retq +; + %and = and i32 %x, 37 + %cmp = icmp eq i32 %and, 37 + %ext = zext i1 %cmp to i32 + %add = add i32 %and, %ext + ret i32 %add +} + +; Verify that we're not transforming invalid comparison predicates. +define i1 @not_an_andn1(i32 %x, i32 %y) { +; CHECK-LABEL: not_an_andn1: +; CHECK: # BB#0: +; CHECK-NEXT: andl %esi, %edi +; CHECK-NEXT: cmpl %edi, %esi +; CHECK-NEXT: setg %al +; CHECK-NEXT: retq + %and = and i32 %x, %y + %cmp = icmp sgt i32 %y, %and + ret i1 %cmp +} + +define i1 @not_an_andn2(i32 %x, i32 %y) { +; CHECK-LABEL: not_an_andn2: +; CHECK: # BB#0: +; CHECK-NEXT: andl %esi, %edi +; CHECK-NEXT: cmpl %edi, %esi +; CHECK-NEXT: setbe %al +; CHECK-NEXT: retq + %and = and i32 %y, %x + %cmp = icmp ule i32 %y, %and + ret i1 %cmp +} + ; Don't choose a 'test' if an 'andn' can be used. define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) { ; CHECK-LABEL: andn_cmp_swap_ops: