Diff 295801

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 12,157 Lines • ▼ Show 20 Lines	Cmp = getAArch64Cmp(
ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, CmpVT), CCVal, DAG,		ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, CmpVT), CCVal, DAG,
dl);		dl);

EVT VT = Op->getValueType(0);		EVT VT = Op->getValueType(0);
LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));		LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);		return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
}		}

		// ADD(UADDV a, UADDV b) --> UADDV((ADD a, b))
		static SDValue performUADDVCombine(SDNode *N,
		TargetLowering::DAGCombinerInfo &DCI,
		SelectionDAG &DAG) {
		MVT VT = N->getSimpleValueType(0);
		// TODO: Currently handled for 32 bit integer vectors.
		if (N->getOpcode() != ISD::ADD \|\| VT != MVT::i32)
		return SDValue();

		SDValue LHS = N->getOperand(0);
		SDValue RHS = N->getOperand(1);
		if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
		RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\| LHS.getValueType() != VT)
		return SDValue();

		auto LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
		auto RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
		if (!LHSN1 \|\| !RHSN1 \|\| !LHSN1->isNullValue() \|\| !RHSN1->isNullValue())
		return SDValue();

		SDValue Op1 = LHS->getOperand(0);
		SDValue Op2 = RHS->getOperand(0);
		if (Op1.getOpcode() != AArch64ISD::UADDV \|\|
		Op1.getValueType() != MVT::v4i32 \|\|
		Op2.getOpcode() != AArch64ISD::UADDV \|\| Op2.getValueType() != MVT::v4i32)
		return SDValue();

		SDValue Val1 = Op1.getOperand(0);
		SDValue Val2 = Op2.getOperand(0);
		EVT ValVT = Val1->getValueType(0);
		SDLoc DL(N);
		SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2);
		return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
		DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal),
		DAG.getConstant(0, DL, MVT::i64));
		}

// The basic add/sub long vector instructions have variants with "2" on the end		// The basic add/sub long vector instructions have variants with "2" on the end
// which act on the high-half of their inputs. They are normally matched by		// which act on the high-half of their inputs. They are normally matched by
// patterns like:		// patterns like:
//		//
// (add (zeroext (extract_high LHS)),		// (add (zeroext (extract_high LHS)),
// (zeroext (extract_high RHS)))		// (zeroext (extract_high RHS)))
// -> uaddl2 vD, vN, vM		// -> uaddl2 vD, vN, vM
//		//
Show All 11 Lines	if (!VT.is128BitVector()) {
if (N->getOpcode() == ISD::ADD)		if (N->getOpcode() == ISD::ADD)
return performSetccAddFolding(N, DAG);		return performSetccAddFolding(N, DAG);
return SDValue();		return SDValue();
}		}

// Make sure both branches are extended in the same way.		// Make sure both branches are extended in the same way.
SDValue LHS = N->getOperand(0);		SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);		SDValue RHS = N->getOperand(1);
if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&		if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&
		Lint: Pre-merge checks Inline Actions clang-tidy: warning: 'auto LHSN1' can be declared as 'auto LHSN1' [llvm-qualified-auto] not useful Lint: Pre-merge checks:* clang-tidy: warning: 'auto LHSN1' can be declared as 'auto *LHSN1' [llvm-qualified-auto]…
LHS.getOpcode() != ISD::SIGN_EXTEND) \|\|		LHS.getOpcode() != ISD::SIGN_EXTEND) \|\|
		Lint: Pre-merge checks Inline Actions clang-tidy: warning: 'auto RHSN1' can be declared as 'auto RHSN1' [llvm-qualified-auto] not useful Lint: Pre-merge checks:* clang-tidy: warning: 'auto RHSN1' can be declared as 'auto *RHSN1' [llvm-qualified-auto]…
LHS.getOpcode() != RHS.getOpcode())		LHS.getOpcode() != RHS.getOpcode())
return SDValue();		return SDValue();

unsigned ExtType = LHS.getOpcode();		unsigned ExtType = LHS.getOpcode();

// It's not worth doing if at least one of the inputs isn't already an		// It's not worth doing if at least one of the inputs isn't already an
// extract, but we don't know which it'll be so we have to try both.		// extract, but we don't know which it'll be so we have to try both.
if (isEssentiallyExtractHighSubvector(LHS.getOperand(0))) {		if (isEssentiallyExtractHighSubvector(LHS.getOperand(0))) {
RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);		RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
if (!RHS.getNode())		if (!RHS.getNode())
return SDValue();		return SDValue();

RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);		RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
} else if (isEssentiallyExtractHighSubvector(RHS.getOperand(0))) {		} else if (isEssentiallyExtractHighSubvector(RHS.getOperand(0))) {
LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);		LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
if (!LHS.getNode())		if (!LHS.getNode())
return SDValue();		return SDValue();

LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);		LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
}		}

return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);		return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
}		}

		static SDValue performAddSubCombine(SDNode *N,
		TargetLowering::DAGCombinerInfo &DCI,
		SelectionDAG &DAG) {
		// Try to change sum of two reductions.
		SDValue Val = performUADDVCombine(N, DCI, DAG);
		if (Val.getNode()) {
		return Val;
		}

		return performAddSubLongCombine(N, DCI, DAG);
		}

// Massage DAGs which we can use the high-half "long" operations on into		// Massage DAGs which we can use the high-half "long" operations on into
// something isel will recognize better. E.g.		// something isel will recognize better. E.g.
//		//
// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->		// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
// (aarch64_neon_umull (extract_high (v2i64 vec)))		// (aarch64_neon_umull (extract_high (v2i64 vec)))
// (extract_high (v2i64 (dup128 scalar)))))		// (extract_high (v2i64 (dup128 scalar)))))
//		//
static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,		static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
▲ Show 20 Lines • Show All 2,332 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {		DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;		SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {		switch (N->getOpcode()) {
default:		default:
LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");		LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
break;		break;
case ISD::ADD:		case ISD::ADD:
case ISD::SUB:		case ISD::SUB:
return performAddSubLongCombine(N, DCI, DAG);		return performAddSubCombine(N, DCI, DAG);
case ISD::XOR:		case ISD::XOR:
return performXorCombine(N, DAG, DCI, Subtarget);		return performXorCombine(N, DAG, DCI, Subtarget);
case ISD::MUL:		case ISD::MUL:
return performMulCombine(N, DAG, DCI, Subtarget);		return performMulCombine(N, DAG, DCI, Subtarget);
case ISD::SINT_TO_FP:		case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:		case ISD::UINT_TO_FP:
return performIntToFpCombine(N, DAG, Subtarget);		return performIntToFpCombine(N, DAG, Subtarget);
case ISD::FP_TO_SINT:		case ISD::FP_TO_SINT:
▲ Show 20 Lines • Show All 1,520 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/aarch64-addv.ll

	Show First 20 Lines • Show All 62 Lines • ▼ Show 20 Lines

	define i32 @oversized_ADDV_512(<16 x i32>* %arr) {			define i32 @oversized_ADDV_512(<16 x i32>* %arr) {
	; CHECK-LABEL: oversized_ADDV_512			; CHECK-LABEL: oversized_ADDV_512
	; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s			; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
	%bin.rdx = load <16 x i32>, <16 x i32>* %arr			%bin.rdx = load <16 x i32>, <16 x i32>* %arr
	%r = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %bin.rdx)			%r = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %bin.rdx)
	ret i32 %r			ret i32 %r
	}			}

				define i32 @addv_combine(i32* nocapture readonly %a1, i32* nocapture readonly %a2) {
				; CHECK-LABEL: addv_combine
				; CHECK: add {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
				; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
				entry:
				%0 = bitcast i32* %a1 to <4 x i32>*
				%1 = load <4 x i32>, <4 x i32>* %0
				%2 = bitcast i32* %a2 to <4 x i32>*
				%3 = load <4 x i32>, <4 x i32>* %2
				%4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %1)
				%5 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %3)
				%6 = add i32 %4, %5
				ret i32 %6
				}

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] Combine UADDVs to generate vector add
ClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 295801

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/aarch64-addv.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] Combine UADDVs to generate vector addClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 295801

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/aarch64-addv.ll

[AArch64] Combine UADDVs to generate vector add
ClosedPublic