Index: llvm/trunk/include/llvm/Target/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h +++ llvm/trunk/include/llvm/Target/TargetLowering.h @@ -322,6 +322,11 @@ return false; } + /// \brief Return true if ctlz instruction is fast. + virtual bool isCtlzFast() const { + return false; + } + /// Return true if it is safe to transform an integer-domain bitwise operation /// into the equivalent floating-point operation. This should be set to true /// if the target has IEEE-754-compliant fabs/fneg operations for the input @@ -3053,6 +3058,12 @@ virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const; + // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits))) + // If we're comparing for equality to zero and isCtlzFast is true, expose the + // fact that this can be implemented as a ctlz/srl pair, so that the dag + // combiner can fold the new nodes. + SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const; + private: SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI, Index: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3562,3 +3562,28 @@ "Emulated TLS must have zero offset in GlobalAddressSDNode"); return CallResult.first; } + +SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op, + SelectionDAG &DAG) const { + assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node."); + if (!isCtlzFast()) + return SDValue(); + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + SDLoc dl(Op); + if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { + if (C->isNullValue() && CC == ISD::SETEQ) { + EVT VT = Op.getOperand(0).getValueType(); + SDValue Zext = Op.getOperand(0); + if (VT.bitsLT(MVT::i32)) { + VT = MVT::i32; + Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); + } + unsigned Log2b = Log2_32(VT.getSizeInBits()); + SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); + SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, + DAG.getConstant(Log2b, dl, MVT::i32)); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); + } + } + return SDValue(); +} Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h @@ -492,6 +492,10 @@ return true; } + bool isCtlzFast() const override { + return true; + } + bool supportSplitCSR(MachineFunction *MF) const override { return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2362,20 +2362,10 @@ // If we're comparing for equality to zero, expose the fact that this is // implemented as a ctlz/srl pair on ppc, so that the dag combiner can // fold the new nodes. + if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG)) + return V; + if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { - if (C->isNullValue() && CC == ISD::SETEQ) { - EVT VT = Op.getOperand(0).getValueType(); - SDValue Zext = Op.getOperand(0); - if (VT.bitsLT(MVT::i32)) { - VT = MVT::i32; - Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); - } - unsigned Log2b = Log2_32(VT.getSizeInBits()); - SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); - SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, - DAG.getConstant(Log2b, dl, MVT::i32)); - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); - } // Leave comparisons against 0 and -1 alone for now, since they're usually // optimized. FIXME: revisit this when we can custom lower all setcc // optimizations.