diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4005,6 +4005,23 @@ return true; } + // Return true if its desirable to try and optimize LogicOp(SETCC0, SETCC1). + // An example (what is implemented as of writing this) is: + // With C as a power of 2 and C != 0 and C != INT_MIN: + // (icmp eq A, C) | (icmp eq A, -C) + // -> (icmp eq and(add(A, C), ~(C + C)), 0) + // (icmp ne A, C) & (icmp ne A, -C)w + // -> (icmp ne and(add(A, C), ~(C + C)), 0) + // + // @param LogicOp the logic op + // @param SETCC0 the first of the SETCC nodes + // @param SETCC0 the second of the SETCC nodes + virtual bool isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, + const SDNode *SETCC0, + const SDNode *SETCC1) const { + return false; + } + /// Return true if it is profitable to combine an XOR of a logical shift /// to create a logical shift of NOT. This transformation may not be desirable /// if it disrupts a particularly auspicious target-specific tree (e.g. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5629,6 +5629,65 @@ return SDValue(); } +static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) { + assert( + (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) && + "Invalid Op to combine SETCC with"); + + // TODO: Search past casts/truncates. + SDValue LHS = LogicOp->getOperand(0); + SDValue RHS = LogicOp->getOperand(1); + if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC) + return SDValue(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isDesirableToCombineLogicOpOfSETCC(LogicOp, LHS.getNode(), + RHS.getNode())) + return SDValue(); + + ISD::CondCode CCL = cast(LHS.getOperand(2))->get(); + ISD::CondCode CCR = cast(RHS.getOperand(2))->get(); + + SDValue LHS0 = LHS->getOperand(0); + SDValue RHS0 = RHS->getOperand(0); + SDValue LHS1 = LHS->getOperand(1); + SDValue RHS1 = RHS->getOperand(1); + + auto *LHS1C = dyn_cast(LHS1); + auto *RHS1C = dyn_cast(RHS1); + EVT VT = LogicOp->getValueType(0); + EVT OpVT = LHS0.getValueType(); + SDLoc DL(LogicOp); + + // With C as a power of 2 and C != 0 and C != INT_MIN: + // (icmp eq A, C) | (icmp eq A, -C) + // -> (icmp eq and(add(A, C), ~(C + C)), 0) + // (icmp ne A, C) & (icmp ne A, -C)w + // -> (icmp ne and(add(A, C), ~(C + C)), 0) + if (CCL == CCR && + CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) && + LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger() && LHS.hasOneUse() && + RHS.hasOneUse() && LHS1C->getAPIntValue() == (-RHS1C->getAPIntValue())) { + const ConstantSDNode *Pow2 = nullptr; + if (LHS1C->getAPIntValue().isPowerOf2()) + Pow2 = LHS1C; + else if (RHS1C->getAPIntValue().isPowerOf2()) + Pow2 = RHS1C; + // isPowerOf2 is only for non-zero powers of 2. + if (Pow2 != nullptr && !Pow2->getAPIntValue().isMinSignedValue()) { + const APInt &C = Pow2->getAPIntValue(); + SDValue AddOp = + DAG.getNode(ISD::ADD, DL, OpVT, LHS0, DAG.getConstant(C, DL, OpVT)); + SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp, + DAG.getConstant(~(C + C), DL, OpVT)); + return DAG.getNode(ISD::SETCC, DL, VT, AndOp, + DAG.getConstant(0, DL, OpVT), LHS.getOperand(2)); + } + } + + return SDValue(); +} + /// This contains all DAGCombine rules which reduce two values combined by /// an And operation to a single value. This makes them reusable in the context /// of visitSELECT(). Rules involving constants are not included as @@ -6330,6 +6389,9 @@ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth))) return DAG.getConstant(0, SDLoc(N), VT); + if (SDValue R = foldAndOrOfSETCC(N, DAG)) + return R; + if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -7215,6 +7277,9 @@ if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) return N1; + if (SDValue R = foldAndOrOfSETCC(N, DAG)) + return R; + if (SDValue Combined = visitORLike(N0, N1, N)) return Combined; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1052,6 +1052,12 @@ /// and some i16 instructions are slow. bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; + /// Return true if this is operating on scalar integers. + bool + isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, + const SDNode *SETCC0, + const SDNode *SETCC1) const override; + /// Return the newly negated expression if the cost is not expensive and /// set the cost in \p Cost to indicate that if it is cheaper or neutral to /// do the negation. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -56593,6 +56593,12 @@ return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, DAG); } +bool X86TargetLowering::isDesirableToCombineLogicOpOfSETCC( + const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const { + EVT VT = LogicOp->getValueType(0); + return VT.isScalarInteger(); +} + bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { EVT VT = Op.getValueType(); bool Is8BitMulByConstant = VT == MVT::i8 && Op.getOpcode() == ISD::MUL && diff --git a/llvm/test/CodeGen/X86/icmp-pow2-logic-npow2.ll b/llvm/test/CodeGen/X86/icmp-pow2-logic-npow2.ll --- a/llvm/test/CodeGen/X86/icmp-pow2-logic-npow2.ll +++ b/llvm/test/CodeGen/X86/icmp-pow2-logic-npow2.ll @@ -12,20 +12,16 @@ ; X86-LABEL: eq_pow_or: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $32, %eax -; X86-NEXT: sete %cl -; X86-NEXT: cmpl $-32, %eax +; X86-NEXT: addl $32, %eax +; X86-NEXT: testl $-65, %eax ; X86-NEXT: sete %al -; X86-NEXT: orb %cl, %al ; X86-NEXT: retl ; ; X64-LABEL: eq_pow_or: ; X64: # %bb.0: -; X64-NEXT: cmpl $32, %edi -; X64-NEXT: sete %cl -; X64-NEXT: cmpl $-32, %edi +; X64-NEXT: addl $32, %edi +; X64-NEXT: testl $-65, %edi ; X64-NEXT: sete %al -; X64-NEXT: orb %cl, %al ; X64-NEXT: retq %2 = icmp eq i32 %0, 32 %3 = icmp eq i32 %0, -32 @@ -37,20 +33,16 @@ ; X86-LABEL: ne_pow_and: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpb $16, %al -; X86-NEXT: setne %cl -; X86-NEXT: cmpb $-16, %al +; X86-NEXT: addb $16, %al +; X86-NEXT: testb $-33, %al ; X86-NEXT: setne %al -; X86-NEXT: andb %cl, %al ; X86-NEXT: retl ; ; X64-LABEL: ne_pow_and: ; X64: # %bb.0: -; X64-NEXT: cmpb $16, %dil -; X64-NEXT: setne %cl -; X64-NEXT: cmpb $-16, %dil +; X64-NEXT: addb $16, %dil +; X64-NEXT: testb $-33, %dil ; X64-NEXT: setne %al -; X64-NEXT: andb %cl, %al ; X64-NEXT: retq %2 = icmp ne i8 %0, 16 %3 = icmp ne i8 %0, -16