This is an archive of the discontinued LLVM Phabricator instance.

[DAG] DAGCombiner::visitVECTOR_SHUFFLE - recognise chain of INSERT_SUBVECTOR patterns
AbandonedPublic

Authored by lebedev.ri on Aug 5 2021, 8:00 AM.

Download Raw Diff

Details

Reviewers

RKSimon
craig.topper
spatel
efriedma
dmgreen
t.p.northover

Summary

A logical extension to D107068 - we don't strictly need to have
a single insertion into an otherwise-identity LHS.

While the general case of iteratively chopping off bits of a mask,
but keeping the shuffle result in obviously-bad codegen regressions,
the case where we reduce the entire shuffle into a seqnence of
subvector insertions, and drop said shuffle, seems somewhat promising.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

lebedev.ri created this revision.Aug 5 2021, 8:00 AM

Herald added subscribers: ecnelises, pengfei, hiraditya. · View Herald TranscriptAug 5 2021, 8:00 AM

lebedev.ri requested review of this revision.Aug 5 2021, 8:00 AM

lebedev.ri edited the summary of this revision. (Show Details)

lebedev.ri planned changes to this revision.Aug 5 2021, 8:13 AM

I'm working on a concat(concat,concat) fold that should help a lot more than this (although I might not get it finished before going on holiday next week) - what could be done as an extension of D107068 is handle the case where the insert_subvector straddles multiple concat entries (i.e. slice the concat ops into a smaller concat and insert that as a subvector).

Harbormaster completed remote builds in B118165: Diff 364477.Aug 5 2021, 8:37 AM

In D107572#2928704, @RKSimon wrote:

I'm working on a concat(concat,concat) fold that should help a lot more than this (although I might not get it finished before going on holiday next week) - what could be done as an extension of D107068 is handle the case where the insert_subvector straddles multiple concat entries (i.e. slice the concat ops into a smaller concat and insert that as a subvector).

Hmm yeah, now that i have fixed the code to actually work as planned, it currently does not trigger on any of the existing tests.

lebedev.ri planned changes to this revision.Aug 5 2021, 9:58 AM

Harbormaster completed remote builds in B118198: Diff 364523.Aug 5 2021, 10:26 AM

lebedev.ri abandoned this revision.Jan 17 2022, 2:35 PM

Revision Contents

Path

Size

llvm/

lib/

CodeGen/

SelectionDAG/

DAGCombiner.cpp

87 lines

Diff 364523

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 21,293 Lines • ▼ Show 20 Lines	if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
if (TLI.isShuffleMaskLegal(NewMask, VT)) {		if (TLI.isShuffleMaskLegal(NewMask, VT)) {
SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());		SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,		SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
N0.getOperand(0), UndefVec);		N0.getOperand(0), UndefVec);
return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);		return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
}		}
}		}

// See if we can replace a shuffle with an insert_subvector.		// See if we can replace a shuffle with an insert_subvector sequence.
// e.g. v2i32 into v8i32:		// e.g. v2i32 into v8i32:
// shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).		// shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),12,13,2,3,10,11,6,7).
// --> insert_subvector(lhs,rhs1,4).		// -->
		// insert_subvector(insert_subvector(lhs, rhs2, 0), rhs1, 4).
if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&		if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {		TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {		auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
// Ensure RHS subvectors are legal.		// Ensure RHS subvectors are legal.
assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");		assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
EVT SubVT = RHS.getOperand(0).getValueType();		EVT SubVT = RHS.getOperand(0).getValueType();
int NumSubVecs = RHS.getNumOperands();		int NumSubVecs = RHS.getNumOperands();
int NumSubElts = SubVT.getVectorNumElements();		int NumSubElts = SubVT.getVectorNumElements();
assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");		assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
if (!TLI.isTypeLegal(SubVT))		if (!TLI.isTypeLegal(SubVT))
return SDValue();		return SDValue();

// Don't bother if we have an unary shuffle (matches undef + LHS elts).		SmallVector<std::pair<SDValue, int>, 4> InsertSubvecSequence;
if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
return SDValue();

// Search [NumSubElts] spans for RHS sequence.		// Search [NumSubElts] spans in mask for subvector insertions.
// TODO: Can we avoid nested loops to increase performance?		// TODO: Can we avoid nested loops to increase performance?
SmallVector<int> InsertionMask(NumElts);
for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {		for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
// Reset mask to identity.		ArrayRef<int> ActualSubmask =
std::iota(InsertionMask.begin(), InsertionMask.end(), 0);		Mask.drop_front(SubIdx).take_front(NumSubElts);
		auto IdentitySubmask = seq<int>(SubIdx, SubIdx + NumSubElts);
		assert(ActualSubmask.size() == IdentitySubmask.size() &&
		ActualSubmask.size() == (unsigned)NumSubElts &&
		"Miscalculated mask subsamples?");

		// If all of the actual shuffle mask elements in this subsection
		// are undef/identity then skip this subsection - keep LHS elements.
		if (all_of(zip(ActualSubmask, IdentitySubmask), [](auto I) {
		int ActualIdx, IdentityIdx;
		std::tie(ActualIdx, IdentityIdx) = I;
		return ActualIdx < 0 \|\| ActualIdx == IdentityIdx;
		}))
		continue;

		// Otherwise, does this subsection insert a subvector from RHS?
		Optional<std::pair<SDValue, int>> Step;
		for (int CandidateSubVec : seq<int>(0, NumSubVecs)) {
		auto CandidateSubmask =
		seq<int>(NumElts + (CandidateSubVec * NumSubElts),
		NumElts + (CandidateSubVec * NumSubElts) + NumSubElts);
		assert(ActualSubmask.size() == CandidateSubmask.size() &&
		ActualSubmask.size() == (unsigned)NumSubElts &&
		"Miscalculated mask subsamples?");

		// Does the shuffle insert subvector \p SubVec at position \p SubIdx?
		if (!all_of(zip(ActualSubmask, CandidateSubmask), [](auto I) {
		int ActualIdx, ExpectIdx;
		std::tie(ActualIdx, ExpectIdx) = I;
		return ActualIdx < 0 \|\| ActualIdx == ExpectIdx;
		}))
		continue; // Maybe it inserts some other subvector?

// Add subvector insertion.		Step = {RHS.getOperand(CandidateSubVec), SubIdx};
std::iota(InsertionMask.begin() + SubIdx,
InsertionMask.begin() + SubIdx + NumSubElts,
NumElts + (SubVec * NumSubElts));

// See if the shuffle mask matches the reference insertion mask.
bool MatchingShuffle = true;
for (int i = 0; i != (int)NumElts; ++i) {
int ExpectIdx = InsertionMask[i];
int ActualIdx = Mask[i];
if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
MatchingShuffle = false;
break;		break;
}		}
}		// Okay, so did we find an subvector that this submask inserts?
		if (!Step)
		return SDValue(); // We did not. Shuffle stays - abort.

if (MatchingShuffle)		InsertSubvecSequence.emplace_back(*Step);
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
RHS.getOperand(SubVec),
DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
}		}
}		assert(
return SDValue();		!InsertSubvecSequence.empty() &&
		"Did not discover a sequence of insertions yet didn't early-return?");

		SDLoc DL(N);
		SDValue Res = LHS;
		for (std::pair<SDValue /SubVec/, int /SubIdx/> Step :
		InsertSubvecSequence)
		Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Res, Step.first,
		DAG.getVectorIdxConstant(Step.second, DL));
		return Res;
};		};
ArrayRef<int> Mask = SVN->getMask();		ArrayRef<int> Mask = SVN->getMask();
if (N1.getOpcode() == ISD::CONCAT_VECTORS)		if (N1.getOpcode() == ISD::CONCAT_VECTORS)
if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))		if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
return InsertN1;		return InsertN1;
if (N0.getOpcode() == ISD::CONCAT_VECTORS) {		if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
SmallVector<int> CommuteMask(Mask.begin(), Mask.end());		SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
ShuffleVectorSDNode::commuteMask(CommuteMask);		ShuffleVectorSDNode::commuteMask(CommuteMask);
▲ Show 20 Lines • Show All 2,146 Lines • Show Last 20 Lines