Diff 302811

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 20,851 Lines • ▼ Show 20 Lines
	}			}

	SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {			SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
	SDValue N0 = N->getOperand(0);			SDValue N0 = N->getOperand(0);
	EVT VT = N0.getValueType();			EVT VT = N0.getValueType();
	unsigned Opcode = N->getOpcode();			unsigned Opcode = N->getOpcode();

	// VECREDUCE over 1-element vector is just an extract.			// VECREDUCE over 1-element vector is just an extract.
	if (VT.getVectorNumElements() == 1) {			if (VT.getVectorElementCount().isScalar()) {
	SDLoc dl(N);			SDLoc dl(N);
	SDValue Res =			SDValue Res =
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,			DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
	DAG.getVectorIdxConstant(0, dl));			DAG.getVectorIdxConstant(0, dl));
	if (Res.getValueType() != N->getValueType(0))			if (Res.getValueType() != N->getValueType(0))
	Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);			Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
	return Res;			return Res;
	}			}
	▲ Show 20 Lines • Show All 1,555 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 3,317 Lines • ▼ Show 20 Lines	case ISD::EXTRACT_ELEMENT: {
// Remove high part of known bit mask		// Remove high part of known bit mask
Known = Known.trunc(EltBitWidth);		Known = Known.trunc(EltBitWidth);
break;		break;
}		}
case ISD::EXTRACT_VECTOR_ELT: {		case ISD::EXTRACT_VECTOR_ELT: {
SDValue InVec = Op.getOperand(0);		SDValue InVec = Op.getOperand(0);
SDValue EltNo = Op.getOperand(1);		SDValue EltNo = Op.getOperand(1);
EVT VecVT = InVec.getValueType();		EVT VecVT = InVec.getValueType();
		// computeKnownBits not yet implemented for scalable vectors.
		if (VecVT.isScalableVector())
		break;
const unsigned EltBitWidth = VecVT.getScalarSizeInBits();		const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
const unsigned NumSrcElts = VecVT.getVectorNumElements();		const unsigned NumSrcElts = VecVT.getVectorNumElements();
		paulwalker-armUnsubmitted Done Reply Inline Actions I think you want to just reject extracts from scalable vectors here. There's code at the top of this function which normally does the job, but it relies on the result type and so EXTRACT_VECTOR-ELT slips through. FYI: Eli has the same fix in D87651 so I'd match that and crown whoever lands first the winner. paulwalker-arm: I think you want to just reject extracts from scalable vectors here. There's code at the top…

// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know		// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
// anything about the extended bits.		// anything about the extended bits.
if (BitWidth > EltBitWidth)		if (BitWidth > EltBitWidth)
Known = Known.trunc(EltBitWidth);		Known = Known.trunc(EltBitWidth);

// If we know the element index, just demand that vector element, else for		// If we know the element index, just demand that vector element, else for
// an unknown element index, ignore DemandedElts and demand them all.		// an unknown element index, ignore DemandedElts and demand them all.
▲ Show 20 Lines • Show All 1,468 Lines • ▼ Show 20 Lines	case ISD::FNEG:
break;		break;
case ISD::FABS:		case ISD::FABS:
if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)		if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
return getNode(ISD::FABS, DL, VT, Operand.getOperand(0));		return getNode(ISD::FABS, DL, VT, Operand.getOperand(0));
break;		break;
case ISD::VSCALE:		case ISD::VSCALE:
assert(VT == Operand.getValueType() && "Unexpected VT!");		assert(VT == Operand.getValueType() && "Unexpected VT!");
break;		break;
		case ISD::VECREDUCE_SMIN:
		case ISD::VECREDUCE_UMAX:
		if (Operand.getValueType().getScalarType() == MVT::i1)
		return getNode(ISD::VECREDUCE_OR, DL, VT, Operand);
		break;
		case ISD::VECREDUCE_SMAX:
		case ISD::VECREDUCE_UMIN:
		if (Operand.getValueType().getScalarType() == MVT::i1)
		return getNode(ISD::VECREDUCE_AND, DL, VT, Operand);
		break;
}		}

SDNode *N;		SDNode *N;
SDVTList VTs = getVTList(VT);		SDVTList VTs = getVTList(VT);
SDValue Ops[] = {Operand};		SDValue Ops[] = {Operand};
if (VT != MVT::Glue) { // Don't CSE flag producing nodes		if (VT != MVT::Glue) { // Don't CSE flag producing nodes
FoldingSetNodeID ID;		FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);		AddNodeIDNode(ID, Opcode, VTs, Ops);
▲ Show 20 Lines • Show All 493 Lines • ▼ Show 20 Lines	case ISD::MUL:
}		}
break;		break;
case ISD::UDIV:		case ISD::UDIV:
case ISD::UREM:		case ISD::UREM:
case ISD::MULHU:		case ISD::MULHU:
case ISD::MULHS:		case ISD::MULHS:
case ISD::SDIV:		case ISD::SDIV:
case ISD::SREM:		case ISD::SREM:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
case ISD::SADDSAT:		case ISD::SADDSAT:
case ISD::SSUBSAT:		case ISD::SSUBSAT:
case ISD::UADDSAT:		case ISD::UADDSAT:
case ISD::USUBSAT:		case ISD::USUBSAT:
assert(VT.isInteger() && "This operator does not apply to FP types!");		assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&		assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");		N1.getValueType() == VT && "Binary operator types must match!");
break;		break;
		case ISD::SMIN:
		case ISD::UMAX:
		assert(VT.isInteger() && "This operator does not apply to FP types!");
		assert(N1.getValueType() == N2.getValueType() &&
		N1.getValueType() == VT && "Binary operator types must match!");
		if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
		return getNode(ISD::OR, DL, VT, N1, N2);
		break;
		case ISD::SMAX:
		case ISD::UMIN:
		assert(VT.isInteger() && "This operator does not apply to FP types!");
		assert(N1.getValueType() == N2.getValueType() &&
		N1.getValueType() == VT && "Binary operator types must match!");
		if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
		return getNode(ISD::AND, DL, VT, N1, N2);
		break;
case ISD::FADD:		case ISD::FADD:
case ISD::FSUB:		case ISD::FSUB:
case ISD::FMUL:		case ISD::FMUL:
case ISD::FDIV:		case ISD::FDIV:
case ISD::FREM:		case ISD::FREM:
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");		assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(N1.getValueType() == N2.getValueType() &&		assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");		N1.getValueType() == VT && "Binary operator types must match!");
▲ Show 20 Lines • Show All 4,799 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 7,994 Lines • ▼ Show 20 Lines
	}			}

	SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {			SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
	SDLoc dl(Node);			SDLoc dl(Node);
	unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());			unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
	SDValue Op = Node->getOperand(0);			SDValue Op = Node->getOperand(0);
	EVT VT = Op.getValueType();			EVT VT = Op.getValueType();

				if (VT.isScalableVector())
				report_fatal_error(
				"Expanding reductions for scalable vectors is undefined.");

	// Try to use a shuffle reduction for power of two vectors.			// Try to use a shuffle reduction for power of two vectors.
	if (VT.isPow2VectorType()) {			if (VT.isPow2VectorType()) {
	while (VT.getVectorNumElements() > 1) {			while (VT.getVectorNumElements() > 1) {
	EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());			EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
	if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))			if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
	break;			break;

	SDValue Lo, Hi;			SDValue Lo, Hi;
	▲ Show 20 Lines • Show All 66 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Show First 20 Lines • Show All 927 Lines • ▼ Show 20 Lines	SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;		EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;

SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,		SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
SelectionDAG &DAG) const;		SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,		SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
SelectionDAG &DAG) const;		SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;		SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
SDValue LowerFixedLengthReductionToSVE(unsigned Opcode, SDValue ScalarOp,		SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
		SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
SelectionDAG &DAG) const;		SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,		SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
SelectionDAG &DAG) const;		SelectionDAG &DAG) const;

SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,		SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;		SmallVectorImpl<SDNode *> &Created) const override;
▲ Show 20 Lines • Show All 77 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,007 Lines • ▼ Show 20 Lines	for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
setOperationAction(ISD::UDIV, VT, Custom);		setOperationAction(ISD::UDIV, VT, Custom);
setOperationAction(ISD::SMIN, VT, Custom);		setOperationAction(ISD::SMIN, VT, Custom);
setOperationAction(ISD::UMIN, VT, Custom);		setOperationAction(ISD::UMIN, VT, Custom);
setOperationAction(ISD::SMAX, VT, Custom);		setOperationAction(ISD::SMAX, VT, Custom);
setOperationAction(ISD::UMAX, VT, Custom);		setOperationAction(ISD::UMAX, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);		setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);		setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);		setOperationAction(ISD::SRA, VT, Custom);
		setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
		setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
		setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
		setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
		setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
		setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
		setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
		setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
}		}

// Illegal unpacked integer vector types.		// Illegal unpacked integer vector types.
for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {		for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);		setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);		setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
}		}

for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {		for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);		setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);		setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);		setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);		setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);		setOperationAction(ISD::TRUNCATE, VT, Custom);
		setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
		setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
		paulwalker-armUnsubmitted Not Done Reply Inline Actions FYI: I guess we'll just promote[1] things like VECREDCUDE_ADD but I think [u/s][min/max] are just variants of AND and OR when it comes to i1 types. [1] Depending on the cost of `ptest` and the fact `VECREDUCE_AND` requires the extra xor, it might end up better to just promote all i1 base operations. Eitherway I'm happy with your current approach I'm just making you aware of a potential future change. paulwalker-arm: FYI: I guess we'll just promote[1] things like VECREDCUDE_ADD but I think [u/s][min/max] are…
		setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);

// There are no legal MVT::nxv16f## based types.		// There are no legal MVT::nxv16f## based types.
if (VT != MVT::nxv16i1) {		if (VT != MVT::nxv16i1) {
setOperationAction(ISD::SINT_TO_FP, VT, Promote);		setOperationAction(ISD::SINT_TO_FP, VT, Promote);
AddPromotedToType(ISD::SINT_TO_FP, VT, getPromotedVTForPredicate(VT));		AddPromotedToType(ISD::SINT_TO_FP, VT, getPromotedVTForPredicate(VT));
setOperationAction(ISD::UINT_TO_FP, VT, Promote);		setOperationAction(ISD::UINT_TO_FP, VT, Promote);
AddPromotedToType(ISD::UINT_TO_FP, VT, getPromotedVTForPredicate(VT));		AddPromotedToType(ISD::UINT_TO_FP, VT, getPromotedVTForPredicate(VT));
}		}
▲ Show 20 Lines • Show All 2,878 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::EXTRACT_SUBVECTOR:		case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);		return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::INSERT_SUBVECTOR:		case ISD::INSERT_SUBVECTOR:
return LowerINSERT_SUBVECTOR(Op, DAG);		return LowerINSERT_SUBVECTOR(Op, DAG);
case ISD::SDIV:		case ISD::SDIV:
case ISD::UDIV:		case ISD::UDIV:
return LowerDIV(Op, DAG);		return LowerDIV(Op, DAG);
case ISD::SMIN:		case ISD::SMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,		return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
/OverrideNEON=/true);		/OverrideNEON=/true);
		paulwalker-armUnsubmitted Done Reply Inline Actions On reflection and since you're going the extra mile, I'd rather have this transformation (and the matching reduction ones) at the `SelectionDAG::getNode()` level so that we can just force a canonical representation for all targets as early as possible and then it'll never be a concern from a legalisation point of view. paulwalker-arm: On reflection and since you're going the extra mile, I'd rather have this transformation (and…
case ISD::UMIN:		case ISD::UMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,		return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
/OverrideNEON=/true);		/OverrideNEON=/true);
case ISD::SMAX:		case ISD::SMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,		return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
/OverrideNEON=/true);		/OverrideNEON=/true);
case ISD::UMAX:		case ISD::UMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,		return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
▲ Show 20 Lines • Show All 5,876 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
// Try to lower fixed length reductions to SVE.		// Try to lower fixed length reductions to SVE.
EVT SrcVT = Src.getValueType();		EVT SrcVT = Src.getValueType();
bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND \|\|		bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND \|\|
Op.getOpcode() == ISD::VECREDUCE_OR \|\|		Op.getOpcode() == ISD::VECREDUCE_OR \|\|
Op.getOpcode() == ISD::VECREDUCE_XOR \|\|		Op.getOpcode() == ISD::VECREDUCE_XOR \|\|
Op.getOpcode() == ISD::VECREDUCE_FADD \|\|		Op.getOpcode() == ISD::VECREDUCE_FADD \|\|
(Op.getOpcode() != ISD::VECREDUCE_ADD &&		(Op.getOpcode() != ISD::VECREDUCE_ADD &&
SrcVT.getVectorElementType() == MVT::i64);		SrcVT.getVectorElementType() == MVT::i64);
if (useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {		if (SrcVT.isScalableVector() \|\|
		useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {

		if (SrcVT.getVectorElementType() == MVT::i1)
		return LowerPredReductionToSVE(Op, DAG);

switch (Op.getOpcode()) {		switch (Op.getOpcode()) {
case ISD::VECREDUCE_ADD:		case ISD::VECREDUCE_ADD:
return LowerFixedLengthReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);		return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
case ISD::VECREDUCE_AND:		case ISD::VECREDUCE_AND:
return LowerFixedLengthReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);		return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
		paulwalker-armUnsubmitted Done Reply Inline Actions Given LowerPredReductionToSVE has it's own switch block can this be done universally regardless of the opcode. i.e. if (SrcVT.isScalableVector() \|\| useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) { if (SrcVT.getVectorElementType() == MVT::i1) return LowerPredReductionToSVE(Op, DAG); switch (Op.getOpcode()) { paulwalker-arm: Given LowerPredReductionToSVE has it's own switch block can this be done universally regardless…
case ISD::VECREDUCE_OR:		case ISD::VECREDUCE_OR:
return LowerFixedLengthReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);		return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
case ISD::VECREDUCE_SMAX:		case ISD::VECREDUCE_SMAX:
return LowerFixedLengthReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);		return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
case ISD::VECREDUCE_SMIN:		case ISD::VECREDUCE_SMIN:
return LowerFixedLengthReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);		return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
case ISD::VECREDUCE_UMAX:		case ISD::VECREDUCE_UMAX:
return LowerFixedLengthReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);		return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
case ISD::VECREDUCE_UMIN:		case ISD::VECREDUCE_UMIN:
return LowerFixedLengthReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);		return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
case ISD::VECREDUCE_XOR:		case ISD::VECREDUCE_XOR:
return LowerFixedLengthReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);		return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
case ISD::VECREDUCE_FADD:		case ISD::VECREDUCE_FADD:
return LowerFixedLengthReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);		return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
case ISD::VECREDUCE_FMAX:		case ISD::VECREDUCE_FMAX:
return LowerFixedLengthReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);		return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
case ISD::VECREDUCE_FMIN:		case ISD::VECREDUCE_FMIN:
return LowerFixedLengthReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);		return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
default:		default:
llvm_unreachable("Unhandled fixed length reduction");		llvm_unreachable("Unhandled fixed length reduction");
}		}
}		}

// Lower NEON reductions.		// Lower NEON reductions.
SDLoc dl(Op);		SDLoc dl(Op);
switch (Op.getOpcode()) {		switch (Op.getOpcode()) {
▲ Show 20 Lines • Show All 6,478 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,

// Perform reduction.		// Perform reduction.
SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,		SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
Pg, AccOp, VecOp);		Pg, AccOp, VecOp);

return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);		return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
}		}

SDValue AArch64TargetLowering::LowerFixedLengthReductionToSVE(unsigned Opcode,		SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
SDValue ScalarOp, SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
		SDLoc DL(ReduceOp);
		SDValue Op = ReduceOp.getOperand(0);
		EVT OpVT = Op.getValueType();
		EVT VT = ReduceOp.getValueType();

		paulwalker-armUnsubmitted Done Reply Inline Actions Shouldn't this be `\|\|`? paulwalker-arm: Shouldn't this be `\|\|`?
		if (!OpVT.isScalableVector() \|\| OpVT.getVectorElementType() != MVT::i1)
		return SDValue();

		SDValue Pg = getPredicateForVector(DAG, DL, OpVT);

		switch (ReduceOp.getOpcode()) {
		default:
		return SDValue();
		case ISD::VECREDUCE_OR:
		return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
		case ISD::VECREDUCE_AND: {
		Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
		return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
		}
		case ISD::VECREDUCE_XOR: {
		SDValue ID =
		DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
		paulwalker-armUnsubmitted Done Reply Inline Actions This will cause a compiler warning if anybody adds a case statement after `ISD::VECREDUCE_XOR`. Personally I'd just wrap all the case blocks in `{}` and move AArch64CC::CondCode Cond into the blocks that need it. paulwalker-arm: This will cause a compiler warning if anybody adds a case statement after `ISD::VECREDUCE_XOR`.
		SDValue Cntp =
		DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op);
		return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
		}
		}

		paulwalker-armUnsubmitted Done Reply Inline Actions The AND is not required because `ISD::VECREDUCE_XOR` only says the bottom N bits are defined, where N is the size of the operand's vector element. The call to `getAnyExtOrTrunc` should be using `ReduceOp.getValueType()` and not hardwiring `MVT::i32`. Given this change `ReduceOp.getValueType()` will be used three times so I'd just have `EVT VT = ReduceOp.getValueType();` at the top of the function, which might have the affect that `AArch64CC::####_ACTIVE` can be used in place and still fit on the line and thus remove the need for `{}` for those entries. paulwalker-arm: The AND is not required because `ISD::VECREDUCE_XOR` only says the bottom N bits are defined…
		return SDValue();
		}

		SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
		SDValue ScalarOp,
		SelectionDAG &DAG) const {
SDLoc DL(ScalarOp);		SDLoc DL(ScalarOp);
SDValue VecOp = ScalarOp.getOperand(0);		SDValue VecOp = ScalarOp.getOperand(0);
EVT SrcVT = VecOp.getValueType();		EVT SrcVT = VecOp.getValueType();

SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);		if (useSVEForFixedLengthVectorVT(SrcVT, true)) {
		paulwalker-armUnsubmitted Done Reply Inline Actions Is OverrideNEON needed here? I ask because when SrcVT is a fixed length vector the value of OverrideNEON seems irrelevant as by calling this function the only safe action is to pack the vector into a scalable type and thus you may as well just always pass in `true`. paulwalker-arm: Is OverrideNEON needed here? I ask because when SrcVT is a fixed length vector the value of…
EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);		EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);		VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
		paulwalker-armUnsubmitted Done Reply Inline Actions My preference would be to not have any interdependence between these two lowering functions. If you agree and considering `LowerPredReductionToSVE` has it's own switch statement, can this be moved into `LowerVECREDUCE` within the `if (SrcVT.isScalableVector...` block and have `LowerPredReductionToSVE` create its own predicate.. paulwalker-arm: My preference would be to not have any interdependence between these two lowering functions.
		}
		paulwalker-armUnsubmitted Done Reply Inline Actions As you're moving this, can you put it immediately before the `SDValue Rdx` line so it has some company? paulwalker-arm: As you're moving this, can you put it immediately before the `SDValue Rdx` line so it has some…

// UADDV always returns an i64 result.		// UADDV always returns an i64 result.
EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :		EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
SrcVT.getVectorElementType();		SrcVT.getVectorElementType();

		SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
SDValue Rdx = DAG.getNode(Opcode, DL, getPackedSVEVectorVT(ResVT), Pg, VecOp);		SDValue Rdx = DAG.getNode(Opcode, DL, getPackedSVEVectorVT(ResVT), Pg, VecOp);
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,		SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
Rdx, DAG.getConstant(0, DL, MVT::i64));		Rdx, DAG.getConstant(0, DL, MVT::i64));

// The VEC_REDUCE nodes expect an element size result.		// The VEC_REDUCE nodes expect an element size result.
if (ResVT != ScalarOp.getValueType())		if (ResVT != ScalarOp.getValueType())
Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType());		Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType());

▲ Show 20 Lines • Show All 55 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t \| FileCheck %s
				; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t

				; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
				; WARN-NOT: warning

				; ANDV

				define i1 @reduce_and_nxv16i1(<vscale x 16 x i1> %vec) {
				; CHECK-LABEL: reduce_and_nxv16i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.b
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.and.i1.nxv16i1(<vscale x 16 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_and_nxv8i1(<vscale x 8 x i1> %vec) {
				; CHECK-LABEL: reduce_and_nxv8i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.h
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.and.i1.nxv8i1(<vscale x 8 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_and_nxv4i1(<vscale x 4 x i1> %vec) {
				; CHECK-LABEL: reduce_and_nxv4i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.s
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.and.i1.nxv4i1(<vscale x 4 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_and_nxv2i1(<vscale x 2 x i1> %vec) {
				; CHECK-LABEL: reduce_and_nxv2i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.d
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.and.i1.nxv2i1(<vscale x 2 x i1> %vec)
				ret i1 %res
				}

				; ORV

				define i1 @reduce_or_nxv16i1(<vscale x 16 x i1> %vec) {
				; CHECK-LABEL: reduce_or_nxv16i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.or.i1.nxv16i1(<vscale x 16 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_or_nxv8i1(<vscale x 8 x i1> %vec) {
				; CHECK-LABEL: reduce_or_nxv8i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.h
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.or.i1.nxv8i1(<vscale x 8 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_or_nxv4i1(<vscale x 4 x i1> %vec) {
				; CHECK-LABEL: reduce_or_nxv4i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.s
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.or.i1.nxv4i1(<vscale x 4 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_or_nxv2i1(<vscale x 2 x i1> %vec) {
				; CHECK-LABEL: reduce_or_nxv2i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.d
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.or.i1.nxv2i1(<vscale x 2 x i1> %vec)
				ret i1 %res
				}

				; XORV

				define i1 @reduce_xor_nxv16i1(<vscale x 16 x i1> %vec) {
				; CHECK-LABEL: reduce_xor_nxv16i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.b
				; CHECK-NEXT: cntp x8, p1, p0.b
				; CHECK-NEXT: and w0, w8, #0x1
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.xor.i1.nxv16i1(<vscale x 16 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_xor_nxv8i1(<vscale x 8 x i1> %vec) {
				; CHECK-LABEL: reduce_xor_nxv8i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.h
				; CHECK-NEXT: cntp x8, p1, p0.h
				; CHECK-NEXT: and w0, w8, #0x1
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.xor.i1.nxv8i1(<vscale x 8 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_xor_nxv4i1(<vscale x 4 x i1> %vec) {
				; CHECK-LABEL: reduce_xor_nxv4i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.s
				; CHECK-NEXT: cntp x8, p1, p0.s
				; CHECK-NEXT: and w0, w8, #0x1
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.xor.i1.nxv4i1(<vscale x 4 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_xor_nxv2i1(<vscale x 2 x i1> %vec) {
				; CHECK-LABEL: reduce_xor_nxv2i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.d
				; CHECK-NEXT: cntp x8, p1, p0.d
				; CHECK-NEXT: and w0, w8, #0x1
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.xor.i1.nxv2i1(<vscale x 2 x i1> %vec)
				ret i1 %res
				}

				; SMAXV

				define i1 @reduce_smax_nxv16i1(<vscale x 16 x i1> %vec) {
				; CHECK-LABEL: reduce_smax_nxv16i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.b
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smax.i1.nxv16i1(<vscale x 16 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_smax_nxv8i1(<vscale x 8 x i1> %vec) {
				; CHECK-LABEL: reduce_smax_nxv8i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.h
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smax.i1.nxv8i1(<vscale x 8 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_smax_nxv4i1(<vscale x 4 x i1> %vec) {
				; CHECK-LABEL: reduce_smax_nxv4i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.s
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smax.i1.nxv4i1(<vscale x 4 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_smax_nxv2i1(<vscale x 2 x i1> %vec) {
				; CHECK-LABEL: reduce_smax_nxv2i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.d
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smax.i1.nxv2i1(<vscale x 2 x i1> %vec)
				ret i1 %res
				}

				; SMINV

				define i1 @reduce_smin_nxv16i1(<vscale x 16 x i1> %vec) {
				; CHECK-LABEL: reduce_smin_nxv16i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smin.i1.nxv16i1(<vscale x 16 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_smin_nxv8i1(<vscale x 8 x i1> %vec) {
				; CHECK-LABEL: reduce_smin_nxv8i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.h
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smin.i1.nxv8i1(<vscale x 8 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_smin_nxv4i1(<vscale x 4 x i1> %vec) {
				; CHECK-LABEL: reduce_smin_nxv4i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.s
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smin.i1.nxv4i1(<vscale x 4 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_smin_nxv2i1(<vscale x 2 x i1> %vec) {
				; CHECK-LABEL: reduce_smin_nxv2i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.d
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smin.i1.nxv2i1(<vscale x 2 x i1> %vec)
				ret i1 %res
				}

				; UMAXV

				define i1 @reduce_umax_nxv16i1(<vscale x 16 x i1> %vec) {
				; CHECK-LABEL: reduce_umax_nxv16i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umax.i1.nxv16i1(<vscale x 16 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_umax_nxv8i1(<vscale x 8 x i1> %vec) {
				; CHECK-LABEL: reduce_umax_nxv8i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.h
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umax.i1.nxv8i1(<vscale x 8 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_umax_nxv4i1(<vscale x 4 x i1> %vec) {
				; CHECK-LABEL: reduce_umax_nxv4i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.s
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umax.i1.nxv4i1(<vscale x 4 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_umax_nxv2i1(<vscale x 2 x i1> %vec) {
				; CHECK-LABEL: reduce_umax_nxv2i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.d
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umax.i1.nxv2i1(<vscale x 2 x i1> %vec)
				ret i1 %res
				}

				; UMINV

				define i1 @reduce_umin_nxv16i1(<vscale x 16 x i1> %vec) {
				; CHECK-LABEL: reduce_umin_nxv16i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.b
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umin.i1.nxv16i1(<vscale x 16 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_umin_nxv8i1(<vscale x 8 x i1> %vec) {
				; CHECK-LABEL: reduce_umin_nxv8i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.h
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umin.i1.nxv8i1(<vscale x 8 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_umin_nxv4i1(<vscale x 4 x i1> %vec) {
				; CHECK-LABEL: reduce_umin_nxv4i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.s
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umin.i1.nxv4i1(<vscale x 4 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_umin_nxv2i1(<vscale x 2 x i1> %vec) {
				; CHECK-LABEL: reduce_umin_nxv2i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.d
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umin.i1.nxv2i1(<vscale x 2 x i1> %vec)
				ret i1 %res
				}

				declare i1 @llvm.vector.reduce.and.i1.nxv16i1(<vscale x 16 x i1> %vec)
				declare i1 @llvm.vector.reduce.and.i1.nxv8i1(<vscale x 8 x i1> %vec)
				declare i1 @llvm.vector.reduce.and.i1.nxv4i1(<vscale x 4 x i1> %vec)
				declare i1 @llvm.vector.reduce.and.i1.nxv2i1(<vscale x 2 x i1> %vec)

				declare i1 @llvm.vector.reduce.or.i1.nxv16i1(<vscale x 16 x i1> %vec)
				declare i1 @llvm.vector.reduce.or.i1.nxv8i1(<vscale x 8 x i1> %vec)
				declare i1 @llvm.vector.reduce.or.i1.nxv4i1(<vscale x 4 x i1> %vec)
				declare i1 @llvm.vector.reduce.or.i1.nxv2i1(<vscale x 2 x i1> %vec)

				declare i1 @llvm.vector.reduce.xor.i1.nxv16i1(<vscale x 16 x i1> %vec)
				declare i1 @llvm.vector.reduce.xor.i1.nxv8i1(<vscale x 8 x i1> %vec)
				declare i1 @llvm.vector.reduce.xor.i1.nxv4i1(<vscale x 4 x i1> %vec)
				declare i1 @llvm.vector.reduce.xor.i1.nxv2i1(<vscale x 2 x i1> %vec)

				declare i1 @llvm.vector.reduce.smin.i1.nxv16i1(<vscale x 16 x i1> %vec)
				declare i1 @llvm.vector.reduce.smin.i1.nxv8i1(<vscale x 8 x i1> %vec)
				declare i1 @llvm.vector.reduce.smin.i1.nxv4i1(<vscale x 4 x i1> %vec)
				declare i1 @llvm.vector.reduce.smin.i1.nxv2i1(<vscale x 2 x i1> %vec)

				declare i1 @llvm.vector.reduce.smax.i1.nxv16i1(<vscale x 16 x i1> %vec)
				declare i1 @llvm.vector.reduce.smax.i1.nxv8i1(<vscale x 8 x i1> %vec)
				declare i1 @llvm.vector.reduce.smax.i1.nxv4i1(<vscale x 4 x i1> %vec)
				declare i1 @llvm.vector.reduce.smax.i1.nxv2i1(<vscale x 2 x i1> %vec)

				declare i1 @llvm.vector.reduce.umin.i1.nxv16i1(<vscale x 16 x i1> %vec)
				declare i1 @llvm.vector.reduce.umin.i1.nxv8i1(<vscale x 8 x i1> %vec)
				declare i1 @llvm.vector.reduce.umin.i1.nxv4i1(<vscale x 4 x i1> %vec)
				declare i1 @llvm.vector.reduce.umin.i1.nxv2i1(<vscale x 2 x i1> %vec)

				declare i1 @llvm.vector.reduce.umax.i1.nxv16i1(<vscale x 16 x i1> %vec)
				declare i1 @llvm.vector.reduce.umax.i1.nxv8i1(<vscale x 8 x i1> %vec)
				declare i1 @llvm.vector.reduce.umax.i1.nxv4i1(<vscale x 4 x i1> %vec)
				declare i1 @llvm.vector.reduce.umax.i1.nxv2i1(<vscale x 2 x i1> %vec)

llvm/test/CodeGen/AArch64/sve-int-reduce.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t \| FileCheck %s
				; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t

				; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
				; WARN-NOT: warning

				; ANDV

				define i8 @andv_nxv16i8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: andv_nxv16i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: andv b0, p0, z0.b
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.and.nxv16i8(<vscale x 16 x i8> %a)
				ret i8 %res
				}

				define i16 @andv_nxv8i16(<vscale x 8 x i16> %a) {
				; CHECK-LABEL: andv_nxv8i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: andv h0, p0, z0.h
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16> %a)
				ret i16 %res
				}

				define i32 @andv_nxv4i32(<vscale x 4 x i32> %a) {
				; CHECK-LABEL: andv_nxv4i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: andv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %a)
				ret i32 %res
				}

				define i64 @andv_nxv2i64(<vscale x 2 x i64> %a) {
				; CHECK-LABEL: andv_nxv2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: andv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %a)
				ret i64 %res
				paulwalker-armUnsubmitted Done Reply Inline Actions This is personal preference so feel free to ignore but considering they go down different code paths and have structurally different output I'd prefer to have the predicate tests within their own ll file. paulwalker-arm: This is personal preference so feel free to ignore but considering they go down different code…
				}

				; ORV

				define i8 @orv_nxv16i8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: orv_nxv16i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: orv b0, p0, z0.b
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.or.nxv16i8(<vscale x 16 x i8> %a)
				ret i8 %res
				}

				define i16 @orv_nxv8i16(<vscale x 8 x i16> %a) {
				; CHECK-LABEL: orv_nxv8i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: orv h0, p0, z0.h
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.or.nxv8i16(<vscale x 8 x i16> %a)
				ret i16 %res
				}

				define i32 @orv_nxv4i32(<vscale x 4 x i32> %a) {
				; CHECK-LABEL: orv_nxv4i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: orv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %a)
				ret i32 %res
				}

				define i64 @orv_nxv2i64(<vscale x 2 x i64> %a) {
				; CHECK-LABEL: orv_nxv2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: orv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %a)
				ret i64 %res
				}

				; XORV

				define i8 @xorv_nxv16i8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: xorv_nxv16i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: eorv b0, p0, z0.b
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.xor.nxv16i8(<vscale x 16 x i8> %a)
				ret i8 %res
				}

				define i16 @xorv_nxv8i16(<vscale x 8 x i16> %a) {
				; CHECK-LABEL: xorv_nxv8i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: eorv h0, p0, z0.h
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.xor.nxv8i16(<vscale x 8 x i16> %a)
				ret i16 %res
				}

				define i32 @xorv_nxv4i32(<vscale x 4 x i32> %a) {
				; CHECK-LABEL: xorv_nxv4i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: eorv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %a)
				ret i32 %res
				}

				define i64 @xorv_nxv2i64(<vscale x 2 x i64> %a) {
				; CHECK-LABEL: xorv_nxv2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: eorv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %a)
				ret i64 %res
				}

				; UADDV

				define i8 @uaddv_nxv16i8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: uaddv_nxv16i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: uaddv d0, p0, z0.b
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> %a)
				ret i8 %res
				}

				define i16 @uaddv_nxv8i16(<vscale x 8 x i16> %a) {
				; CHECK-LABEL: uaddv_nxv8i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: uaddv d0, p0, z0.h
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> %a)
				ret i16 %res
				}

				define i32 @uaddv_nxv4i32(<vscale x 4 x i32> %a) {
				; CHECK-LABEL: uaddv_nxv4i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: uaddv d0, p0, z0.s
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %a)
				ret i32 %res
				}

				define i64 @uaddv_nxv2i64(<vscale x 2 x i64> %a) {
				; CHECK-LABEL: uaddv_nxv2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: uaddv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %a)
				ret i64 %res
				}

				; UMINV

				define i8 @umin_nxv16i8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: umin_nxv16i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: uminv b0, p0, z0.b
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.umin.nxv16i8(<vscale x 16 x i8> %a)
				ret i8 %res
				}

				define i16 @umin_nxv8i16(<vscale x 8 x i16> %a) {
				; CHECK-LABEL: umin_nxv8i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: uminv h0, p0, z0.h
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.umin.nxv8i16(<vscale x 8 x i16> %a)
				ret i16 %res
				}

				define i32 @umin_nxv4i32(<vscale x 4 x i32> %a) {
				; CHECK-LABEL: umin_nxv4i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: uminv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %a)
				ret i32 %res
				}

				define i64 @umin_nxv2i64(<vscale x 2 x i64> %a) {
				; CHECK-LABEL: umin_nxv2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: uminv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %a)
				ret i64 %res
				}

				; SMINV

				define i8 @smin_nxv16i8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: smin_nxv16i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: sminv b0, p0, z0.b
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.smin.nxv16i8(<vscale x 16 x i8> %a)
				ret i8 %res
				}

				define i16 @smin_nxv8i16(<vscale x 8 x i16> %a) {
				; CHECK-LABEL: smin_nxv8i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: sminv h0, p0, z0.h
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.smin.nxv8i16(<vscale x 8 x i16> %a)
				ret i16 %res
				}

				define i32 @smin_nxv4i32(<vscale x 4 x i32> %a) {
				; CHECK-LABEL: smin_nxv4i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: sminv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %a)
				ret i32 %res
				}

				define i64 @smin_nxv2i64(<vscale x 2 x i64> %a) {
				; CHECK-LABEL: smin_nxv2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: sminv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %a)
				ret i64 %res
				}

				; UMAXV

				define i8 @umax_nxv16i8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: umax_nxv16i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: umaxv b0, p0, z0.b
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.umax.nxv16i8(<vscale x 16 x i8> %a)
				ret i8 %res
				}

				define i16 @umax_nxv8i16(<vscale x 8 x i16> %a) {
				; CHECK-LABEL: umax_nxv8i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: umaxv h0, p0, z0.h
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.umax.nxv8i16(<vscale x 8 x i16> %a)
				ret i16 %res
				}

				define i32 @umax_nxv4i32(<vscale x 4 x i32> %a) {
				; CHECK-LABEL: umax_nxv4i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: umaxv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %a)
				ret i32 %res
				}

				define i64 @umax_nxv2i64(<vscale x 2 x i64> %a) {
				; CHECK-LABEL: umax_nxv2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: umaxv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %a)
				ret i64 %res
				}

				; SMAXV

				define i8 @smax_nxv16i8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: smax_nxv16i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: smaxv b0, p0, z0.b
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.smax.nxv16i8(<vscale x 16 x i8> %a)
				ret i8 %res
				}

				define i16 @smax_nxv8i16(<vscale x 8 x i16> %a) {
				; CHECK-LABEL: smax_nxv8i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: smaxv h0, p0, z0.h
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.smax.nxv8i16(<vscale x 8 x i16> %a)
				ret i16 %res
				}

				define i32 @smax_nxv4i32(<vscale x 4 x i32> %a) {
				; CHECK-LABEL: smax_nxv4i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: smaxv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %a)
				ret i32 %res
				}

				define i64 @smax_nxv2i64(<vscale x 2 x i64> %a) {
				; CHECK-LABEL: smax_nxv2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: smaxv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %a)
				ret i64 %res
				}

				declare i8 @llvm.vector.reduce.and.nxv16i8(<vscale x 16 x i8>)
				declare i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16>)
				declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
				declare i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64>)

				declare i8 @llvm.vector.reduce.or.nxv16i8(<vscale x 16 x i8>)
				declare i16 @llvm.vector.reduce.or.nxv8i16(<vscale x 8 x i16>)
				declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
				declare i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64>)

				declare i8 @llvm.vector.reduce.xor.nxv16i8(<vscale x 16 x i8>)
				declare i16 @llvm.vector.reduce.xor.nxv8i16(<vscale x 8 x i16>)
				declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
				declare i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64>)

				declare i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8>)
				declare i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16>)
				declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
				declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>)

				declare i8 @llvm.vector.reduce.umin.nxv16i8(<vscale x 16 x i8>)
				declare i16 @llvm.vector.reduce.umin.nxv8i16(<vscale x 8 x i16>)
				declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
				declare i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64>)

				declare i8 @llvm.vector.reduce.smin.nxv16i8(<vscale x 16 x i8>)
				declare i16 @llvm.vector.reduce.smin.nxv8i16(<vscale x 8 x i16>)
				declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
				declare i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64>)

				declare i8 @llvm.vector.reduce.umax.nxv16i8(<vscale x 16 x i8>)
				declare i16 @llvm.vector.reduce.umax.nxv8i16(<vscale x 8 x i16>)
				declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
				declare i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64>)

				declare i8 @llvm.vector.reduce.smax.nxv16i8(<vscale x 16 x i8>)
				declare i16 @llvm.vector.reduce.smax.nxv8i16(<vscale x 8 x i16>)
				declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
				declare i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64>)

llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t \| FileCheck %s
				; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t

				; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
				; WARN-NOT: warning

				; ANDV

				define i1 @andv_nxv32i1(<vscale x 32 x i1> %a) {
				; CHECK-LABEL: andv_nxv32i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p2.b
				; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
				; CHECK-NEXT: not p0.b, p2/z, p0.b
				; CHECK-NEXT: ptest p2, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> %a)
				ret i1 %res
				}

				define i1 @andv_nxv64i1(<vscale x 64 x i1> %a) {
				; CHECK-LABEL: andv_nxv64i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
				; CHECK-NEXT: addvl sp, sp, #-1
				; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
				; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
				; CHECK-NEXT: .cfi_offset w29, -16
				; CHECK-NEXT: ptrue p4.b
				; CHECK-NEXT: and p1.b, p4/z, p1.b, p3.b
				; CHECK-NEXT: and p0.b, p4/z, p0.b, p2.b
				; CHECK-NEXT: and p0.b, p4/z, p0.b, p1.b
				; CHECK-NEXT: not p0.b, p4/z, p0.b
				; CHECK-NEXT: ptest p4, p0.b
				; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: addvl sp, sp, #1
				; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> %a)
				ret i1 %res
				}

				; ORV

				define i1 @orv_nxv32i1(<vscale x 32 x i1> %a) {
				; CHECK-LABEL: orv_nxv32i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p2.b
				; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
				; CHECK-NEXT: ptest p2, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.or.nxv32i1(<vscale x 32 x i1> %a)
				ret i1 %res
				}

				; XORV

				define i1 @xorv_nxv32i1(<vscale x 32 x i1> %a) {
				; CHECK-LABEL: xorv_nxv32i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p2.b
				; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
				; CHECK-NEXT: cntp x8, p2, p0.b
				; CHECK-NEXT: and w0, w8, #0x1
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.xor.nxv32i1(<vscale x 32 x i1> %a)
				ret i1 %res
				}

				; SMAXV

				define i1 @smaxv_nxv32i1(<vscale x 32 x i1> %a) {
				; CHECK-LABEL: smaxv_nxv32i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p2.b
				; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
				; CHECK-NEXT: not p0.b, p2/z, p0.b
				; CHECK-NEXT: ptest p2, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smax.nxv32i1(<vscale x 32 x i1> %a)
				ret i1 %res
				}

				; SMINV

				define i1 @sminv_nxv32i1(<vscale x 32 x i1> %a) {
				; CHECK-LABEL: sminv_nxv32i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p2.b
				; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
				; CHECK-NEXT: ptest p2, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smin.nxv32i1(<vscale x 32 x i1> %a)
				ret i1 %res
				}

				; UMAXV

				define i1 @umaxv_nxv32i1(<vscale x 32 x i1> %a) {
				; CHECK-LABEL: umaxv_nxv32i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p2.b
				; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
				; CHECK-NEXT: ptest p2, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umax.nxv32i1(<vscale x 32 x i1> %a)
				ret i1 %res
				}

				; UMINV

				define i1 @uminv_nxv32i1(<vscale x 32 x i1> %a) {
				; CHECK-LABEL: uminv_nxv32i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p2.b
				; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
				; CHECK-NEXT: not p0.b, p2/z, p0.b
				; CHECK-NEXT: ptest p2, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umin.nxv32i1(<vscale x 32 x i1> %a)
				ret i1 %res
				}

				declare i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1>)
				declare i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1>)

				declare i1 @llvm.vector.reduce.or.nxv32i1(<vscale x 32 x i1>)

				declare i1 @llvm.vector.reduce.xor.nxv32i1(<vscale x 32 x i1>)

				declare i1 @llvm.vector.reduce.smax.nxv32i1(<vscale x 32 x i1>)

				declare i1 @llvm.vector.reduce.smin.nxv32i1(<vscale x 32 x i1>)

				declare i1 @llvm.vector.reduce.umax.nxv32i1(<vscale x 32 x i1>)

				declare i1 @llvm.vector.reduce.umin.nxv32i1(<vscale x 32 x i1>)

llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t \| FileCheck %s
				; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t

				; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
				; WARN-NOT: warning

				; ANDV

				define i8 @andv_nxv8i8(<vscale x 8 x i8> %a) {
				; CHECK-LABEL: andv_nxv8i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: andv h0, p0, z0.h
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8> %a)
				ret i8 %res
				}

				define i32 @andv_nxv8i32(<vscale x 8 x i32> %a) {
				; CHECK-LABEL: andv_nxv8i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: and z0.d, z0.d, z1.d
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: andv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> %a)
				ret i32 %res
				}

				; ORV

				define i32 @orv_nxv2i32(<vscale x 2 x i32> %a) {
				; CHECK-LABEL: orv_nxv2i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: orv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %a)
				ret i32 %res
				}

				define i64 @orv_nxv8i64(<vscale x 8 x i64> %a) {
				; CHECK-LABEL: orv_nxv8i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: orr z1.d, z1.d, z3.d
				; CHECK-NEXT: orr z0.d, z0.d, z2.d
				; CHECK-NEXT: orr z0.d, z0.d, z1.d
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: orv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64> %a)
				ret i64 %res
				}

				; XORV

				define i16 @xorv_nxv2i16(<vscale x 2 x i16> %a) {
				; CHECK-LABEL: xorv_nxv2i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: eorv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %a)
				ret i16 %res
				}

				define i32 @xorv_nxv8i32(<vscale x 8 x i32> %a) {
				; CHECK-LABEL: xorv_nxv8i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: eor z0.d, z0.d, z1.d
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: eorv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> %a)
				ret i32 %res
				}

				; UADDV

				define i16 @uaddv_nxv4i16(<vscale x 4 x i16> %a) {
				; CHECK-LABEL: uaddv_nxv4i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: uaddv d0, p0, z0.s
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %a)
				ret i16 %res
				}

				define i16 @uaddv_nxv16i16(<vscale x 16 x i16> %a) {
				; CHECK-LABEL: uaddv_nxv16i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: add z0.h, z0.h, z1.h
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: uaddv d0, p0, z0.h
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16> %a)
				ret i16 %res
				}

				define i32 @uaddv_nxv16i32(<vscale x 16 x i32> %a) {
				; CHECK-LABEL: uaddv_nxv16i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: add z1.s, z1.s, z3.s
				; CHECK-NEXT: add z0.s, z0.s, z2.s
				; CHECK-NEXT: add z0.s, z0.s, z1.s
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: uaddv d0, p0, z0.s
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %a)
				ret i32 %res
				}

				; UMINV

				define i32 @umin_nxv2i32(<vscale x 2 x i32> %a) {
				; CHECK-LABEL: umin_nxv2i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
				; CHECK-NEXT: uminv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %a)
				ret i32 %res
				}

				define i64 @umin_nxv4i64(<vscale x 4 x i64> %a) {
				; CHECK-LABEL: umin_nxv4i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
				; CHECK-NEXT: uminv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %a)
				ret i64 %res
				}

				; SMINV

				define i8 @smin_nxv4i8(<vscale x 4 x i8> %a) {
				; CHECK-LABEL: smin_nxv4i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
				; CHECK-NEXT: sminv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %a)
				ret i8 %res
				}

				define i32 @smin_nxv8i32(<vscale x 8 x i32> %a) {
				; CHECK-LABEL: smin_nxv8i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
				; CHECK-NEXT: sminv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> %a)
				ret i32 %res
				}

				; UMAXV

				define i16 @smin_nxv16i16(<vscale x 16 x i16> %a) {
				; CHECK-LABEL: smin_nxv16i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
				; CHECK-NEXT: umaxv h0, p0, z0.h
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16> %a)
				ret i16 %res
				}

				; SMAXV

				define i64 @smin_nxv8i64(<vscale x 8 x i64> %a) {
				; CHECK-LABEL: smin_nxv8i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: smax z1.d, p0/m, z1.d, z3.d
				; CHECK-NEXT: smax z0.d, p0/m, z0.d, z2.d
				; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
				; CHECK-NEXT: smaxv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64> %a)
				ret i64 %res
				}

				declare i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8>)
				declare i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32>)

				declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>)
				declare i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64>)

				declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>)
				declare i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32>)

				declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>)
				declare i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16>)
				declare i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32>)

				declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>)
				declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)

				declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>)
				declare i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32>)

				declare i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16>)

				declare i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64>)

llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll

	Show First 20 Lines • Show All 136 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a)			%b = call i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a)
	ret i32 %b			ret i32 %b
	}			}

	define i1 @test_v4i1(<4 x i1> %a) nounwind {			define i1 @test_v4i1(<4 x i1> %a) nounwind {
	; CHECK-LABEL: test_v4i1:			; CHECK-LABEL: test_v4i1:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: movi v1.4h, #1			; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
	; CHECK-NEXT: and v0.8b, v0.8b, v1.8b			; CHECK-NEXT: umov w10, v0.h[1]
	; CHECK-NEXT: umaxv h0, v0.4h			; CHECK-NEXT: umov w11, v0.h[0]
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: umov w9, v0.h[2]
				; CHECK-NEXT: orr w10, w11, w10
				; CHECK-NEXT: umov w8, v0.h[3]
				; CHECK-NEXT: orr w9, w10, w9
				; CHECK-NEXT: orr w8, w9, w8
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
				paulwalker-armUnsubmitted Not Done Reply Inline Actions Presumably this is down to the canonicalisation? I'm not sure there's any immediate performance concerns here, and I stand behind canonicalisation being the correct solution. I guess we can see if anybody disagrees with this. The worst that can happen is more custom lowering for NEON is required but because the existing lowering doesn't do anything special for OR, and MIN/MAX operations on i1 is weird anyways, I'd rather not do that in this patch unless we really have to. paulwalker-arm: Presumably this is down to the canonicalisation? I'm not sure there's any immediate…
				kmclaughlinAuthorUnsubmitted Not Done Reply Inline Actions It is, I needed to change this test after I moved the transformation of vecreduce_umax to getNode() kmclaughlin: It is, I needed to change this test after I moved the transformation of vecreduce_umax to…
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> %a)			%b = call i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> %a)
	ret i1 %b			ret i1 %b
	}			}

	define i24 @test_v4i24(<4 x i24> %a) nounwind {			define i24 @test_v4i24(<4 x i24> %a) nounwind {
	; CHECK-LABEL: test_v4i24:			; CHECK-LABEL: test_v4i24:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	Show All 34 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[SVE][CodeGen] Lower scalable integer vector reductions
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 302811

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll

llvm/test/CodeGen/AArch64/sve-int-reduce.ll

llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll

llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll

llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SVE][CodeGen] Lower scalable integer vector reductionsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 302811

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll

llvm/test/CodeGen/AArch64/sve-int-reduce.ll

llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll

llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll

llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll

[SVE][CodeGen] Lower scalable integer vector reductions
ClosedPublic