Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19474,6 +19474,38 @@ } } + // Make sure all but the first op are undef. + auto ConcatWithUndef = [](SDValue Concat) { + assert(Concat.getOpcode() == ISD::CONCAT_VECTORS && "Expected concat!"); + return std::all_of(std::next(Concat->op_begin()), Concat->op_end(), + [](const SDValue &Op) { + return Op.isUndef(); + }); + }; + + // The following pattern is likely to emerge with vector reduction ops. Moving + // the binary operation ahead of the concat may allow using a narrower vector + // instruction that has better performance than the wide version of the op: + // VBinOp (concat X, undef), (concat Y, undef) --> concat (VBinOp X, Y), VecC + if (LHS.getOpcode() == ISD::CONCAT_VECTORS && ConcatWithUndef(LHS) && + RHS.getOpcode() == ISD::CONCAT_VECTORS && ConcatWithUndef(RHS) && + (LHS.hasOneUse() || RHS.hasOneUse())) { + SDValue X = LHS.getOperand(0); + SDValue Y = RHS.getOperand(0); + EVT NarrowVT = X.getValueType(); + if (NarrowVT == Y.getValueType() && + TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) { + // (binop undef, undef) may not return undef, so compute that result. + SDLoc DL(N); + SDValue VecC = + DAG.getNode(Opcode, DL, NarrowVT, DAG.getUNDEF(NarrowVT), + DAG.getUNDEF(NarrowVT)); + SmallVector Ops(LHS.getNumOperands(), VecC); + Ops[0] = DAG.getNode(Opcode, DL, NarrowVT, X, Y); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops); + } + } + if (SDValue V = scalarizeBinOpOfSplats(N, DAG)) return V; Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -15013,7 +15013,7 @@ static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2, ArrayRef HalfMask, int HalfIdx1, int HalfIdx2, bool UndefLower, - SelectionDAG &DAG) { + SelectionDAG &DAG, bool UseConcat = false) { assert(V1.getValueType() == V2.getValueType() && "Different sized vectors?"); assert(V1.getValueType().isSimple() && "Expecting only simple types"); @@ -15034,6 +15034,14 @@ SDValue Half1 = getHalfVector(HalfIdx1); SDValue Half2 = getHalfVector(HalfIdx2); SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask); + if (UseConcat) { + SDValue Op0 = V; + SDValue Op1 = DAG.getUNDEF(HalfVT); + if (UndefLower) + std::swap(Op0, Op1); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Op0, Op1); + } + unsigned Offset = UndefLower ? HalfNumElts : 0; return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, DAG.getIntPtrConstant(Offset, DL)); @@ -33974,7 +33982,7 @@ // the wide shuffle that we started with. return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0), Shuf->getOperand(1), HalfMask, HalfIdx1, - HalfIdx2, false, DAG); + HalfIdx2, false, DAG, /*UseConcat*/true); } static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,