This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] Don't rely on (zext (trunc x)) pattern to detect zext_inreg MULL patterns - use value tracking directly
ClosedPublic

Authored by RKSimon on Sep 21 2023, 5:55 AM.

Download Raw Diff

Details

Reviewers

dmgreen
jaykang10
david-arm

Commits

rG6d2679992e58: [AArch64] Don't rely on (zext (trunc x)) pattern to detect zext_inreg MULL…

Summary

As explained on D159533, I'm trying to generalize the "(zext (trunc x)) -> x iff the upper bits are known zero" fold in getNode() and I was seeing assertions in the aarch64 mull matching code as it was assuming these 'zero-extend-inreg' patterns will remain from earlier in LowerMUL.

Instead I've updated selectUmullSmull/skipExtensionForVectorMULL to just use value tracking to detect when the upper bits are known zero, and to insert the truncation nodes later if necessary.

I really don't like creating SDValue(N, 0) on the fly from SDNode value as technically we could be using any result index from these nodes - so I've ended up cleaning up a lot of mul code to use SDValue directly instead of peeking through to the SDNode. I'm happy to undo this and just rely use SDValue(N, 0) if there's resistance, but this is much cleaner imo. I'd push this change as pre-commit NFC.

(Sorry for still using Phab but I'm frantically trying to get my local backlog dealt with before moving over to using github branches).

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

RKSimon created this revision.Sep 21 2023, 5:55 AM

Herald added a project: Restricted Project. · View Herald TranscriptSep 21 2023, 5:55 AM

Herald added subscribers: hiraditya, kristof.beyls. · View Herald Transcript

RKSimon requested review of this revision.Sep 21 2023, 5:55 AM

Herald added a project: Restricted Project. · View Herald TranscriptSep 21 2023, 5:55 AM

RKSimon mentioned this in D159533: [DAG] getNode() - fold (zext (trunc x)) -> x iff the upper bits are known zero - add SRL support.Sep 21 2023, 5:57 AM

Harbormaster completed remote builds in B257492: Diff 557174.Sep 21 2023, 6:56 AM

RKSimon edited the summary of this revision. (Show Details)Sep 21 2023, 7:32 AM

RKSimon edited the summary of this revision. (Show Details)

I think this looks OK. Thanks

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
4446–4447	I believe this should always be a 128bit vector at this point.

This revision is now accepted and ready to land.Sep 21 2023, 10:38 AM

RKSimon added inline comments.Sep 21 2023, 10:39 AM

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
4446–4447	Thanks, I'll make it an assert

RKSimon mentioned this in rG05926a5a5578: [DAG] getNode() - remove oneuse limit from (zext (trunc (assertzext x))) ->….Sep 22 2023, 2:11 AM

Closed by commit rG6d2679992e58: [AArch64] Don't rely on (zext (trunc x)) pattern to detect zext_inreg MULL… (authored by RKSimon). · Explain WhySep 22 2023, 2:45 AM

This revision was automatically updated to reflect the committed changes.

RKSimon mentioned this in rGbba83e20deec: [AArch64] LowerMUL - use SDValue directly instead of SDNode. NFC..

RKSimon added a commit: rG6d2679992e58: [AArch64] Don't rely on (zext (trunc x)) pattern to detect zext_inreg MULL….

RKSimon mentioned this in rGb61b2426aca5: [DAG] getNode() - remove oneuse limit from (zext (trunc (assertzext x))) ->….Sep 22 2023, 3:01 AM

Revision Contents

Path

Size

llvm/

lib/

Target/

AArch64/

AArch64ISelLowering.cpp

46 lines

Diff 557233

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 4,420 Lines • ▼ Show 20 Lines	for (const SDValue &Elt : N->op_values()) {
}		}
return false;		return false;
}		}

return true;		return true;
}		}

static SDValue skipExtensionForVectorMULL(SDValue N, SelectionDAG &DAG) {		static SDValue skipExtensionForVectorMULL(SDValue N, SelectionDAG &DAG) {
		EVT VT = N.getValueType();
		assert(VT.is128BitVector() && "Unexpected vector MULL size");

		unsigned NumElts = VT.getVectorNumElements();
		unsigned OrigEltSize = VT.getScalarSizeInBits();
		unsigned EltSize = OrigEltSize / 2;
		MVT TruncVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);

		APInt HiBits = APInt::getHighBitsSet(OrigEltSize, EltSize);
		if (DAG.MaskedValueIsZero(N, HiBits))
		return DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N);

if (ISD::isExtOpcode(N.getOpcode()))		if (ISD::isExtOpcode(N.getOpcode()))
return addRequiredExtensionForVectorMULL(N.getOperand(0), DAG,		return addRequiredExtensionForVectorMULL(N.getOperand(0), DAG,
N.getOperand(0).getValueType(),		N.getOperand(0).getValueType(), VT,
N.getValueType(),
N.getOpcode());		N.getOpcode());

assert(N.getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");		assert(N.getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
EVT VT = N.getValueType();
SDLoc dl(N);		SDLoc dl(N);
		dmgreenUnsubmitted Not Done Reply Inline Actions I believe this should always be a 128bit vector at this point. dmgreen: I believe this should always be a 128bit vector at this point.
		RKSimonAuthorUnsubmitted Not Done Reply Inline Actions Thanks, I'll make it an assert RKSimon: Thanks, I'll make it an assert
unsigned EltSize = VT.getScalarSizeInBits() / 2;
unsigned NumElts = VT.getVectorNumElements();
MVT TruncVT = MVT::getIntegerVT(EltSize);
SmallVector<SDValue, 8> Ops;		SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i != NumElts; ++i) {		for (unsigned i = 0; i != NumElts; ++i) {
ConstantSDNode *C = cast<ConstantSDNode>(N.getOperand(i));		ConstantSDNode *C = cast<ConstantSDNode>(N.getOperand(i));
const APInt &CInt = C->getAPIntValue();		const APInt &CInt = C->getAPIntValue();
// Element types smaller than 32 bits are not legal, so use i32 elements.		// Element types smaller than 32 bits are not legal, so use i32 elements.
// The values are implicitly truncated so sext vs. zext doesn't matter.		// The values are implicitly truncated so sext vs. zext doesn't matter.
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));		Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
}		}
return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);		return DAG.getBuildVector(TruncVT, dl, Ops);
}		}

static bool isSignExtended(SDValue N, SelectionDAG &DAG) {		static bool isSignExtended(SDValue N, SelectionDAG &DAG) {
return N.getOpcode() == ISD::SIGN_EXTEND \|\|		return N.getOpcode() == ISD::SIGN_EXTEND \|\|
N.getOpcode() == ISD::ANY_EXTEND \|\|		N.getOpcode() == ISD::ANY_EXTEND \|\|
isExtendedBUILD_VECTOR(N, DAG, true);		isExtendedBUILD_VECTOR(N, DAG, true);
}		}

▲ Show 20 Lines • Show All 125 Lines • ▼ Show 20 Lines	if (((IsN0SExt && IsN1ZExt) \|\| (IsN0ZExt && IsN1SExt)) &&
}		}
}		}

// Select UMULL if we can replace the other operand with an extend.		// Select UMULL if we can replace the other operand with an extend.
if (IsN0ZExt \|\| IsN1ZExt) {		if (IsN0ZExt \|\| IsN1ZExt) {
EVT VT = N0.getValueType();		EVT VT = N0.getValueType();
APInt Mask = APInt::getHighBitsSet(VT.getScalarSizeInBits(),		APInt Mask = APInt::getHighBitsSet(VT.getScalarSizeInBits(),
VT.getScalarSizeInBits() / 2);		VT.getScalarSizeInBits() / 2);
if (DAG.MaskedValueIsZero(IsN0ZExt ? N1 : N0, Mask)) {		if (DAG.MaskedValueIsZero(IsN0ZExt ? N1 : N0, Mask))
EVT HalfVT;
switch (VT.getSimpleVT().SimpleTy) {
case MVT::v2i64:
HalfVT = MVT::v2i32;
break;
case MVT::v4i32:
HalfVT = MVT::v4i16;
break;
case MVT::v8i16:
HalfVT = MVT::v8i8;
break;
default:
return 0;
}
// Truncate and then extend the result.
SDValue NewExt =
DAG.getNode(ISD::TRUNCATE, DL, HalfVT, IsN0ZExt ? N1 : N0);
NewExt = DAG.getZExtOrTrunc(NewExt, DL, VT);
if (IsN0ZExt)
N1 = NewExt;
else
N0 = NewExt;
return AArch64ISD::UMULL;		return AArch64ISD::UMULL;
}		}
}

if (!IsN1SExt && !IsN1ZExt)		if (!IsN1SExt && !IsN1ZExt)
return 0;		return 0;

// Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these		// Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
// into (s/zext A * s/zext C) + (s/zext B * s/zext C)		// into (s/zext A * s/zext C) + (s/zext B * s/zext C)
if (IsN1SExt && isAddSubSExt(N0, DAG)) {		if (IsN1SExt && isAddSubSExt(N0, DAG)) {
IsMLA = true;		IsMLA = true;
▲ Show 20 Lines • Show All 21,656 Lines • Show Last 20 Lines