Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -502,6 +502,7 @@ bool shrinkAndImmediate(SDNode *N); bool isMaskZeroExtended(SDNode *N) const; bool tryShiftAmountMod(SDNode *N); + bool combineIncDecVector(SDNode *Node); bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask); MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, @@ -3428,6 +3429,51 @@ return true; } +/// Convert vector increment or decrement to sub/add with an all-ones constant: +/// add X, <1, 1...> --> sub X, <-1, -1...> +/// sub X, <1, 1...> --> add X, <-1, -1...> +/// The all-ones vector constant can be materialized using a pcmpeq instruction +/// that is commonly recognized as an idiom (has no register dependency), so +/// that's better/smaller than loading a splat 1 constant. +bool X86DAGToDAGISel::combineIncDecVector(SDNode *Node) { + assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB) && + "Unexpected opcode for increment/decrement transform"); + + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Should only be called for vectors."); + + SDValue X = Node->getOperand(0); + SDValue OneVec = Node->getOperand(1); + + APInt SplatVal; + if (!X86::isConstantSplat(OneVec, SplatVal) || !SplatVal.isOneValue()) + return false; + + SDLoc DL(Node); + SDValue AllOnesVec; + + { + APInt Ones = APInt::getAllOnesValue(32); + assert(VT.getSizeInBits() % 32 == 0 && + "Expected bit count to be a multiple of 32"); + unsigned NumElts = VT.getSizeInBits() / 32; + assert(NumElts > 0 && "Expected to get non-empty vector."); + AllOnesVec = + CurDAG->getConstant(Ones, DL, MVT::getVectorVT(MVT::i32, NumElts)); + insertDAGNode(*CurDAG, X, AllOnesVec); + + AllOnesVec = CurDAG->getBitcast(VT, AllOnesVec); + insertDAGNode(*CurDAG, X, AllOnesVec); + } + + unsigned NewOpcode = Node->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; + SDValue NewNode = CurDAG->getNode(NewOpcode, DL, VT, X, AllOnesVec); + + ReplaceNode(Node, NewNode.getNode()); + SelectCode(NewNode.getNode()); + return true; +} + /// If the high bits of an 'and' operand are known zero, try setting the /// high bits of an 'and' constant operand to produce a smaller encoding by /// creating a small, sign-extended negative immediate rather than a large @@ -3967,6 +4013,12 @@ return; break; + case ISD::ADD: + case ISD::SUB: + if (NVT.isVector() && combineIncDecVector(Node)) + return; + break; + case ISD::AND: if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) { // Try to form a masked VPTESTM. Operands can be in either order. Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -675,6 +675,7 @@ bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO); + bool isConstantSplat(SDValue Op, APInt &SplatVal); } // end namespace X86 //===--------------------------------------------------------------------===// Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -6034,7 +6034,9 @@ return false; } -static bool isConstantSplat(SDValue Op, APInt &SplatVal) { +namespace llvm { +namespace X86 { +bool isConstantSplat(SDValue Op, APInt &SplatVal) { APInt UndefElts; SmallVector EltBits; if (getTargetConstantBitsFromNode(Op, Op.getScalarValueSizeInBits(), @@ -6057,6 +6059,8 @@ return false; } +} // namespace X86 +} // namespace llvm static bool getTargetShuffleMaskIndices(SDValue MaskNode, unsigned MaskEltSizeInBits, @@ -17712,7 +17716,7 @@ std::swap(Op0, Op1); APInt APIntShiftAmt; - if (isConstantSplat(Amt, APIntShiftAmt)) { + if (X86::isConstantSplat(Amt, APIntShiftAmt)) { uint64_t ShiftAmt = APIntShiftAmt.getZExtValue(); return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8)); @@ -24743,7 +24747,7 @@ // Optimize shl/srl/sra with constant shift amount. APInt APIntShiftAmt; - if (!isConstantSplat(Amt, APIntShiftAmt)) + if (!X86::isConstantSplat(Amt, APIntShiftAmt)) return SDValue(); uint64_t ShiftAmt = APIntShiftAmt.getZExtValue(); @@ -42320,31 +42324,6 @@ return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); } -/// Convert vector increment or decrement to sub/add with an all-ones constant: -/// add X, <1, 1...> --> sub X, <-1, -1...> -/// sub X, <1, 1...> --> add X, <-1, -1...> -/// The all-ones vector constant can be materialized using a pcmpeq instruction -/// that is commonly recognized as an idiom (has no register dependency), so -/// that's better/smaller than loading a splat 1 constant. -static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) { - assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && - "Unexpected opcode for increment/decrement transform"); - - // Pseudo-legality check: getOnesVector() expects one of these types, so bail - // out and wait for legalization if we have an unsupported vector length. - EVT VT = N->getValueType(0); - if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector()) - return SDValue(); - - APInt SplatVal; - if (!isConstantSplat(N->getOperand(1), SplatVal) || !SplatVal.isOneValue()) - return SDValue(); - - SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N)); - unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; - return DAG.getNode(NewOpcode, SDLoc(N), VT, N->getOperand(0), AllOnesVec); -} - static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1, const SDLoc &DL, EVT VT, const X86Subtarget &Subtarget) { @@ -42606,9 +42585,6 @@ HADDBuilder); } - if (SDValue V = combineIncDecVector(N, DAG)) - return V; - return combineAddOrSubToADCOrSBB(N, DAG); } @@ -42737,9 +42713,6 @@ HSUBBuilder); } - if (SDValue V = combineIncDecVector(N, DAG)) - return V; - // Try to create PSUBUS if SUB's argument is max/min if (SDValue V = combineSubToSubus(N, DAG, Subtarget)) return V;