diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -605,6 +605,12 @@ return false; } + /// Return the maximum number of "x & (x - 1)" operations that can be done + /// instead of deferring to a custom CTPOP. + virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const { + return 1; + } + /// Return true if instruction generated for equality comparison is folded /// with instruction generated for signed comparison. virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3403,15 +3403,26 @@ // (ctpop x) u< 2 -> (x & x-1) == 0 // (ctpop x) u> 1 -> (x & x-1) != 0 - if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)) { + if (Cond == ISD::SETULT || Cond == ISD::SETUGT) { // This based on X86's custom lowering for vector CTPOP which produces more // instructions than the expansion here. + unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond); + if (C1.ugt(CostLimit + (Cond == ISD::SETULT))) + return SDValue(); + if (C1 == 0 && (Cond == ISD::SETULT)) + return SDValue(); // This is handled elsewhere. + + unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT); + SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); - SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); - SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); + SDValue Result = CTOp; + for (unsigned i = 0; i < Passes; i++) { + SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne); + Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add); + } ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; - return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC); + return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC); } // If ctpop is not supported, expand a power-of-2 comparison based on it.