diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21299,10 +21299,11 @@ } } - // See if we can replace a shuffle with an insert_subvector. + // See if we can replace a shuffle with an insert_subvector sequence. // e.g. v2i32 into v8i32: - // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7). - // --> insert_subvector(lhs,rhs1,4). + // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),12,13,2,3,10,11,6,7). + // --> + // insert_subvector(insert_subvector(lhs, rhs2, 0), rhs1, 4). if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) && TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) { auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef Mask) { @@ -21315,41 +21316,65 @@ if (!TLI.isTypeLegal(SubVT)) return SDValue(); - // Don't bother if we have an unary shuffle (matches undef + LHS elts). - if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; })) - return SDValue(); + SmallVector, 4> InsertSubvecSequence; - // Search [NumSubElts] spans for RHS sequence. + // Search [NumSubElts] spans in mask for subvector insertions. // TODO: Can we avoid nested loops to increase performance? - SmallVector InsertionMask(NumElts); - for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) { - for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) { - // Reset mask to identity. - std::iota(InsertionMask.begin(), InsertionMask.end(), 0); + for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) { + ArrayRef ActualSubmask = + Mask.drop_front(SubIdx).take_front(NumSubElts); + auto IdentitySubmask = seq(SubIdx, SubIdx + NumSubElts); + assert(ActualSubmask.size() == IdentitySubmask.size() && + ActualSubmask.size() == (unsigned)NumSubElts && + "Miscalculated mask subsamples?"); - // Add subvector insertion. - std::iota(InsertionMask.begin() + SubIdx, - InsertionMask.begin() + SubIdx + NumSubElts, - NumElts + (SubVec * NumSubElts)); + // If all of the actual shuffle mask elements in this subsection + // are undef/identity then skip this subsection - keep LHS elements. + if (all_of(zip(ActualSubmask, IdentitySubmask), [](auto I) { + int ActualIdx, IdentityIdx; + std::tie(ActualIdx, IdentityIdx) = I; + return ActualIdx < 0 || ActualIdx == IdentityIdx; + })) + continue; - // See if the shuffle mask matches the reference insertion mask. - bool MatchingShuffle = true; - for (int i = 0; i != (int)NumElts; ++i) { - int ExpectIdx = InsertionMask[i]; - int ActualIdx = Mask[i]; - if (0 <= ActualIdx && ExpectIdx != ActualIdx) { - MatchingShuffle = false; - break; - } - } + // Otherwise, does this subsection insert a subvector from RHS? + Optional> Step; + for (int CandidateSubVec : seq(0, NumSubVecs)) { + auto CandidateSubmask = + seq(NumElts + (CandidateSubVec * NumSubElts), + NumElts + (CandidateSubVec * NumSubElts) + NumSubElts); + assert(ActualSubmask.size() == CandidateSubmask.size() && + ActualSubmask.size() == (unsigned)NumSubElts && + "Miscalculated mask subsamples?"); - if (MatchingShuffle) - return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS, - RHS.getOperand(SubVec), - DAG.getVectorIdxConstant(SubIdx, SDLoc(N))); + // Does the shuffle insert subvector \p SubVec at position \p SubIdx? + if (!all_of(zip(ActualSubmask, CandidateSubmask), [](auto I) { + int ActualIdx, ExpectIdx; + std::tie(ActualIdx, ExpectIdx) = I; + return ActualIdx < 0 || ActualIdx == ExpectIdx; + })) + continue; // Maybe it inserts some other subvector? + + Step = {RHS.getOperand(CandidateSubVec), SubIdx}; + break; } + // Okay, so did we find an subvector that this submask inserts? + if (!Step) + return SDValue(); // We did not. Shuffle stays - abort. + + InsertSubvecSequence.emplace_back(*Step); } - return SDValue(); + assert( + !InsertSubvecSequence.empty() && + "Did not discover a sequence of insertions yet didn't early-return?"); + + SDLoc DL(N); + SDValue Res = LHS; + for (std::pair Step : + InsertSubvecSequence) + Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Res, Step.first, + DAG.getVectorIdxConstant(Step.second, DL)); + return Res; }; ArrayRef Mask = SVN->getMask(); if (N1.getOpcode() == ISD::CONCAT_VECTORS)