Diff 299397

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 20,851 Lines • ▼ Show 20 Lines
	}			}

	SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {			SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
	SDValue N0 = N->getOperand(0);			SDValue N0 = N->getOperand(0);
	EVT VT = N0.getValueType();			EVT VT = N0.getValueType();
	unsigned Opcode = N->getOpcode();			unsigned Opcode = N->getOpcode();

	// VECREDUCE over 1-element vector is just an extract.			// VECREDUCE over 1-element vector is just an extract.
	if (VT.getVectorNumElements() == 1) {			if (VT.getVectorElementCount().isScalar()) {
	SDLoc dl(N);			SDLoc dl(N);
	SDValue Res =			SDValue Res =
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,			DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
	DAG.getVectorIdxConstant(0, dl));			DAG.getVectorIdxConstant(0, dl));
	if (Res.getValueType() != N->getValueType(0))			if (Res.getValueType() != N->getValueType(0))
	Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);			Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
	return Res;			return Res;
	}			}
	▲ Show 20 Lines • Show All 1,555 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 3,315 Lines • ▼ Show 20 Lines	case ISD::EXTRACT_ELEMENT: {
Known = Known.trunc(EltBitWidth);		Known = Known.trunc(EltBitWidth);
break;		break;
}		}
case ISD::EXTRACT_VECTOR_ELT: {		case ISD::EXTRACT_VECTOR_ELT: {
SDValue InVec = Op.getOperand(0);		SDValue InVec = Op.getOperand(0);
SDValue EltNo = Op.getOperand(1);		SDValue EltNo = Op.getOperand(1);
EVT VecVT = InVec.getValueType();		EVT VecVT = InVec.getValueType();
const unsigned EltBitWidth = VecVT.getScalarSizeInBits();		const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
const unsigned NumSrcElts = VecVT.getVectorNumElements();		const unsigned NumSrcElts = VecVT.getVectorMinNumElements();
		paulwalker-armUnsubmitted Done Reply Inline Actions I think you want to just reject extracts from scalable vectors here. There's code at the top of this function which normally does the job, but it relies on the result type and so EXTRACT_VECTOR-ELT slips through. FYI: Eli has the same fix in D87651 so I'd match that and crown whoever lands first the winner. paulwalker-arm: I think you want to just reject extracts from scalable vectors here. There's code at the top…

// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know		// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
// anything about the extended bits.		// anything about the extended bits.
if (BitWidth > EltBitWidth)		if (BitWidth > EltBitWidth)
Known = Known.trunc(EltBitWidth);		Known = Known.trunc(EltBitWidth);

// If we know the element index, just demand that vector element, else for		// If we know the element index, just demand that vector element, else for
// an unknown element index, ignore DemandedElts and demand them all.		// an unknown element index, ignore DemandedElts and demand them all.
▲ Show 20 Lines • Show All 6,714 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 8,008 Lines • ▼ Show 20 Lines	SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break;		case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break;
case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;		case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;
case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;		case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;
}		}

SDValue Op = Node->getOperand(0);		SDValue Op = Node->getOperand(0);
EVT VT = Op.getValueType();		EVT VT = Op.getValueType();

		if (VT.isScalableVector())
		report_fatal_error(
		"Expanding reductions for scalable vectors is undefined.");

// Try to use a shuffle reduction for power of two vectors.		// Try to use a shuffle reduction for power of two vectors.
if (VT.isPow2VectorType()) {		if (VT.isPow2VectorType()) {
while (VT.getVectorNumElements() > 1) {		while (VT.getVectorNumElements() > 1) {
EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());		EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))		if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
break;		break;

SDValue Lo, Hi;		SDValue Lo, Hi;
▲ Show 20 Lines • Show All 44 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Show First 20 Lines • Show All 929 Lines • ▼ Show 20 Lines	private:
SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,		SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;		EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;

SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,		SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
SelectionDAG &DAG) const;		SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,		SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
SelectionDAG &DAG) const;		SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthReductionToSVE(unsigned Opcode, SDValue ScalarOp,		SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
		SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
SelectionDAG &DAG) const;		SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,		SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
SelectionDAG &DAG) const;		SelectionDAG &DAG) const;

SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,		SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;		SmallVectorImpl<SDNode *> &Created) const override;
▲ Show 20 Lines • Show All 77 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,007 Lines • ▼ Show 20 Lines	for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
setOperationAction(ISD::UDIV, VT, Custom);		setOperationAction(ISD::UDIV, VT, Custom);
setOperationAction(ISD::SMIN, VT, Custom);		setOperationAction(ISD::SMIN, VT, Custom);
setOperationAction(ISD::UMIN, VT, Custom);		setOperationAction(ISD::UMIN, VT, Custom);
setOperationAction(ISD::SMAX, VT, Custom);		setOperationAction(ISD::SMAX, VT, Custom);
setOperationAction(ISD::UMAX, VT, Custom);		setOperationAction(ISD::UMAX, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);		setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);		setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);		setOperationAction(ISD::SRA, VT, Custom);
		setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
		setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
		setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
		setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
		setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
		setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
		setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
		setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
}		}

// Illegal unpacked integer vector types.		// Illegal unpacked integer vector types.
for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {		for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);		setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);		setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
}		}

for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {		for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);		setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);		setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);		setOperationAction(ISD::SETCC, VT, Custom);
		setOperationAction(ISD::SMIN, VT, Custom);
		setOperationAction(ISD::UMIN, VT, Custom);
		setOperationAction(ISD::SMAX, VT, Custom);
		setOperationAction(ISD::UMAX, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);		setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);		setOperationAction(ISD::TRUNCATE, VT, Custom);
		setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
		setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
		paulwalker-armUnsubmitted Not Done Reply Inline Actions FYI: I guess we'll just promote[1] things like VECREDCUDE_ADD but I think [u/s][min/max] are just variants of AND and OR when it comes to i1 types. [1] Depending on the cost of `ptest` and the fact `VECREDUCE_AND` requires the extra xor, it might end up better to just promote all i1 base operations. Eitherway I'm happy with your current approach I'm just making you aware of a potential future change. paulwalker-arm: FYI: I guess we'll just promote[1] things like VECREDCUDE_ADD but I think [u/s][min/max] are…
		setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
		setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
		setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
		setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
		setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);

// There are no legal MVT::nxv16f## based types.		// There are no legal MVT::nxv16f## based types.
if (VT != MVT::nxv16i1) {		if (VT != MVT::nxv16i1) {
setOperationAction(ISD::SINT_TO_FP, VT, Promote);		setOperationAction(ISD::SINT_TO_FP, VT, Promote);
AddPromotedToType(ISD::SINT_TO_FP, VT, getPromotedVTForPredicate(VT));		AddPromotedToType(ISD::SINT_TO_FP, VT, getPromotedVTForPredicate(VT));
setOperationAction(ISD::UINT_TO_FP, VT, Promote);		setOperationAction(ISD::UINT_TO_FP, VT, Promote);
AddPromotedToType(ISD::UINT_TO_FP, VT, getPromotedVTForPredicate(VT));		AddPromotedToType(ISD::UINT_TO_FP, VT, getPromotedVTForPredicate(VT));
}		}
▲ Show 20 Lines • Show All 2,871 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::EXTRACT_SUBVECTOR:		case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);		return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::INSERT_SUBVECTOR:		case ISD::INSERT_SUBVECTOR:
return LowerINSERT_SUBVECTOR(Op, DAG);		return LowerINSERT_SUBVECTOR(Op, DAG);
case ISD::SDIV:		case ISD::SDIV:
case ISD::UDIV:		case ISD::UDIV:
return LowerDIV(Op, DAG);		return LowerDIV(Op, DAG);
case ISD::SMIN:		case ISD::SMIN:
		if (Op.getValueType().getVectorElementType() == MVT::i1)
		return DAG.getNode(ISD::OR, SDLoc(Op), Op.getValueType(),
		paulwalker-armUnsubmitted Done Reply Inline Actions On reflection and since you're going the extra mile, I'd rather have this transformation (and the matching reduction ones) at the `SelectionDAG::getNode()` level so that we can just force a canonical representation for all targets as early as possible and then it'll never be a concern from a legalisation point of view. paulwalker-arm: On reflection and since you're going the extra mile, I'd rather have this transformation (and…
		Op.getOperand(0), Op.getOperand(1));
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,		return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
/OverrideNEON=/true);		/OverrideNEON=/true);
case ISD::UMIN:		case ISD::UMIN:
		if (Op.getValueType().getVectorElementType() == MVT::i1)
		return DAG.getNode(ISD::AND, SDLoc(Op), Op.getValueType(),
		Op.getOperand(0), Op.getOperand(1));
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,		return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
/OverrideNEON=/true);		/OverrideNEON=/true);
case ISD::SMAX:		case ISD::SMAX:
		if (Op.getValueType().getVectorElementType() == MVT::i1)
		return DAG.getNode(ISD::AND, SDLoc(Op), Op.getValueType(),
		Op.getOperand(0), Op.getOperand(1));
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,		return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
/OverrideNEON=/true);		/OverrideNEON=/true);
case ISD::UMAX:		case ISD::UMAX:
		if (Op.getValueType().getVectorElementType() == MVT::i1)
		return DAG.getNode(ISD::OR, SDLoc(Op), Op.getValueType(),
		Op.getOperand(0), Op.getOperand(1));
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,		return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
/OverrideNEON=/true);		/OverrideNEON=/true);
case ISD::SRA:		case ISD::SRA:
case ISD::SRL:		case ISD::SRL:
case ISD::SHL:		case ISD::SHL:
return LowerVectorSRA_SRL_SHL(Op, DAG);		return LowerVectorSRA_SRL_SHL(Op, DAG);
case ISD::SHL_PARTS:		case ISD::SHL_PARTS:
return LowerShiftLeftParts(Op, DAG);		return LowerShiftLeftParts(Op, DAG);
▲ Show 20 Lines • Show All 5,828 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
// Try to lower fixed length reductions to SVE.		// Try to lower fixed length reductions to SVE.
EVT SrcVT = Src.getValueType();		EVT SrcVT = Src.getValueType();
bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND \|\|		bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND \|\|
Op.getOpcode() == ISD::VECREDUCE_OR \|\|		Op.getOpcode() == ISD::VECREDUCE_OR \|\|
Op.getOpcode() == ISD::VECREDUCE_XOR \|\|		Op.getOpcode() == ISD::VECREDUCE_XOR \|\|
Op.getOpcode() == ISD::VECREDUCE_FADD \|\|		Op.getOpcode() == ISD::VECREDUCE_FADD \|\|
(Op.getOpcode() != ISD::VECREDUCE_ADD &&		(Op.getOpcode() != ISD::VECREDUCE_ADD &&
SrcVT.getVectorElementType() == MVT::i64);		SrcVT.getVectorElementType() == MVT::i64);
if (useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {		if (SrcVT.isScalableVector() \|\|
		useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {
switch (Op.getOpcode()) {		switch (Op.getOpcode()) {
case ISD::VECREDUCE_ADD:		case ISD::VECREDUCE_ADD:
return LowerFixedLengthReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);		return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
case ISD::VECREDUCE_AND:		case ISD::VECREDUCE_AND:
return LowerFixedLengthReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);		if (SrcVT.getVectorElementType() == MVT::i1)
		return LowerPredReductionToSVE(Op, DAG);
		return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
		paulwalker-armUnsubmitted Done Reply Inline Actions Given LowerPredReductionToSVE has it's own switch block can this be done universally regardless of the opcode. i.e. if (SrcVT.isScalableVector() \|\| useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) { if (SrcVT.getVectorElementType() == MVT::i1) return LowerPredReductionToSVE(Op, DAG); switch (Op.getOpcode()) { paulwalker-arm: Given LowerPredReductionToSVE has it's own switch block can this be done universally regardless…
case ISD::VECREDUCE_OR:		case ISD::VECREDUCE_OR:
return LowerFixedLengthReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);		if (SrcVT.getVectorElementType() == MVT::i1)
		return LowerPredReductionToSVE(Op, DAG);
		return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
case ISD::VECREDUCE_SMAX:		case ISD::VECREDUCE_SMAX:
return LowerFixedLengthReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);		if (SrcVT.getVectorElementType() == MVT::i1)
		return LowerPredReductionToSVE(Op, DAG);
		return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
case ISD::VECREDUCE_SMIN:		case ISD::VECREDUCE_SMIN:
return LowerFixedLengthReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);		if (SrcVT.getVectorElementType() == MVT::i1)
		return LowerPredReductionToSVE(Op, DAG);
		return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
case ISD::VECREDUCE_UMAX:		case ISD::VECREDUCE_UMAX:
return LowerFixedLengthReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);		if (SrcVT.getVectorElementType() == MVT::i1)
		return LowerPredReductionToSVE(Op, DAG);
		return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
case ISD::VECREDUCE_UMIN:		case ISD::VECREDUCE_UMIN:
return LowerFixedLengthReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);		if (SrcVT.getVectorElementType() == MVT::i1)
		return LowerPredReductionToSVE(Op, DAG);
		return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
case ISD::VECREDUCE_XOR:		case ISD::VECREDUCE_XOR:
return LowerFixedLengthReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);		if (SrcVT.getVectorElementType() == MVT::i1)
		return LowerPredReductionToSVE(Op, DAG);
		return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
case ISD::VECREDUCE_FADD:		case ISD::VECREDUCE_FADD:
return LowerFixedLengthReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);		return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
case ISD::VECREDUCE_FMAX:		case ISD::VECREDUCE_FMAX:
return LowerFixedLengthReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);		return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
case ISD::VECREDUCE_FMIN:		case ISD::VECREDUCE_FMIN:
return LowerFixedLengthReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);		return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
default:		default:
llvm_unreachable("Unhandled fixed length reduction");		llvm_unreachable("Unhandled fixed length reduction");
}		}
}		}

// Lower NEON reductions.		// Lower NEON reductions.
SDLoc dl(Op);		SDLoc dl(Op);
switch (Op.getOpcode()) {		switch (Op.getOpcode()) {
▲ Show 20 Lines • Show All 6,450 Lines • ▼ Show 20 Lines	assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&
"Only fixed length vectors are supported!");		"Only fixed length vectors are supported!");
Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));		Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
}		}

auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops);		auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops);
return convertFromScalableVector(DAG, VT, ScalableRes);		return convertFromScalableVector(DAG, VT, ScalableRes);
}		}

SDValue AArch64TargetLowering::LowerFixedLengthReductionToSVE(unsigned Opcode,		SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
SDValue ScalarOp, SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
		SDLoc DL(ReduceOp);
		SDValue Op = ReduceOp.getOperand(0);
		EVT OpVT = Op.getValueType();

		if (!OpVT.isScalableVector() && OpVT.getVectorElementType() != MVT::i1)
		paulwalker-armUnsubmitted Done Reply Inline Actions Shouldn't this be `\|\|`? paulwalker-arm: Shouldn't this be `\|\|`?
		return SDValue();

		SDValue Pg = getPredicateForVector(DAG, DL, OpVT);

		AArch64CC::CondCode Cond;
		switch (ReduceOp.getOpcode()) {
		default:
		return SDValue();
		case ISD::VECREDUCE_OR:
		case ISD::VECREDUCE_UMAX:
		case ISD::VECREDUCE_SMIN:
		Cond = AArch64CC::ANY_ACTIVE;
		return getPTest(DAG, ReduceOp.getValueType(), Pg, Op, Cond);
		case ISD::VECREDUCE_AND:
		case ISD::VECREDUCE_UMIN:
		case ISD::VECREDUCE_SMAX:
		Cond = AArch64CC::NONE_ACTIVE;
		paulwalker-armUnsubmitted Done Reply Inline Actions This will cause a compiler warning if anybody adds a case statement after `ISD::VECREDUCE_XOR`. Personally I'd just wrap all the case blocks in `{}` and move AArch64CC::CondCode Cond into the blocks that need it. paulwalker-arm: This will cause a compiler warning if anybody adds a case statement after `ISD::VECREDUCE_XOR`.
		Op = DAG.getNode(ISD::XOR, SDLoc(ReduceOp), OpVT, Op, Pg);
		return getPTest(DAG, ReduceOp.getValueType(), Pg, Op, Cond);
		case ISD::VECREDUCE_XOR:
		SDValue ID =
		DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
		SDValue Cntp =
		paulwalker-armUnsubmitted Done Reply Inline Actions The AND is not required because `ISD::VECREDUCE_XOR` only says the bottom N bits are defined, where N is the size of the operand's vector element. The call to `getAnyExtOrTrunc` should be using `ReduceOp.getValueType()` and not hardwiring `MVT::i32`. Given this change `ReduceOp.getValueType()` will be used three times so I'd just have `EVT VT = ReduceOp.getValueType();` at the top of the function, which might have the affect that `AArch64CC::####_ACTIVE` can be used in place and still fit on the line and thus remove the need for `{}` for those entries. paulwalker-arm: The AND is not required because `ISD::VECREDUCE_XOR` only says the bottom N bits are defined…
		DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op);
		SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Cntp,
		DAG.getConstant(1, DL, MVT::i64));
		return DAG.getAnyExtOrTrunc(And, DL, MVT::i32);
		}

		return SDValue();
		}

		SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
		SDValue ScalarOp,
		SelectionDAG &DAG) const {
SDLoc DL(ScalarOp);		SDLoc DL(ScalarOp);
SDValue VecOp = ScalarOp.getOperand(0);		SDValue VecOp = ScalarOp.getOperand(0);
EVT SrcVT = VecOp.getValueType();		EVT SrcVT = VecOp.getValueType();

SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);		if (useSVEForFixedLengthVectorVT(SrcVT, true)) {
		paulwalker-armUnsubmitted Done Reply Inline Actions Is OverrideNEON needed here? I ask because when SrcVT is a fixed length vector the value of OverrideNEON seems irrelevant as by calling this function the only safe action is to pack the vector into a scalable type and thus you may as well just always pass in `true`. paulwalker-arm: Is OverrideNEON needed here? I ask because when SrcVT is a fixed length vector the value of…
EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);		EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);		VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
		}

		SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);

		paulwalker-armUnsubmitted Done Reply Inline Actions As you're moving this, can you put it immediately before the `SDValue Rdx` line so it has some company? paulwalker-arm: As you're moving this, can you put it immediately before the `SDValue Rdx` line so it has some…
// UADDV always returns an i64 result.		// UADDV always returns an i64 result.
		paulwalker-armUnsubmitted Done Reply Inline Actions My preference would be to not have any interdependence between these two lowering functions. If you agree and considering `LowerPredReductionToSVE` has it's own switch statement, can this be moved into `LowerVECREDUCE` within the `if (SrcVT.isScalableVector...` block and have `LowerPredReductionToSVE` create its own predicate.. paulwalker-arm: My preference would be to not have any interdependence between these two lowering functions.
EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :		EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
SrcVT.getVectorElementType();		SrcVT.getVectorElementType();

SDValue Rdx = DAG.getNode(Opcode, DL, getPackedSVEVectorVT(ResVT), Pg, VecOp);		SDValue Rdx = DAG.getNode(Opcode, DL, getPackedSVEVectorVT(ResVT), Pg, VecOp);
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,		SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
Rdx, DAG.getConstant(0, DL, MVT::i64));		Rdx, DAG.getConstant(0, DL, MVT::i64));

// The VEC_REDUCE nodes expect an element size result.		// The VEC_REDUCE nodes expect an element size result.
▲ Show 20 Lines • Show All 58 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t \| FileCheck %s
				; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t

				; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
				; WARN-NOT: warning

				; ANDV

				define i1 @reduce_and_nxv16i1(<vscale x 16 x i1> %vec) {
				; CHECK-LABEL: reduce_and_nxv16i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.b
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.and.i1.nxv16i1(<vscale x 16 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_and_nxv8i1(<vscale x 8 x i1> %vec) {
				; CHECK-LABEL: reduce_and_nxv8i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.h
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.and.i1.nxv8i1(<vscale x 8 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_and_nxv4i1(<vscale x 4 x i1> %vec) {
				; CHECK-LABEL: reduce_and_nxv4i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.s
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.and.i1.nxv4i1(<vscale x 4 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_and_nxv2i1(<vscale x 2 x i1> %vec) {
				; CHECK-LABEL: reduce_and_nxv2i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.d
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.and.i1.nxv2i1(<vscale x 2 x i1> %vec)
				ret i1 %res
				}

				; ORV

				define i1 @reduce_or_nxv16i1(<vscale x 16 x i1> %vec) {
				; CHECK-LABEL: reduce_or_nxv16i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.or.i1.nxv16i1(<vscale x 16 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_or_nxv8i1(<vscale x 8 x i1> %vec) {
				; CHECK-LABEL: reduce_or_nxv8i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.h
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.or.i1.nxv8i1(<vscale x 8 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_or_nxv4i1(<vscale x 4 x i1> %vec) {
				; CHECK-LABEL: reduce_or_nxv4i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.s
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.or.i1.nxv4i1(<vscale x 4 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_or_nxv2i1(<vscale x 2 x i1> %vec) {
				; CHECK-LABEL: reduce_or_nxv2i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.d
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.or.i1.nxv2i1(<vscale x 2 x i1> %vec)
				ret i1 %res
				}

				; XORV

				define i1 @reduce_xor_nxv16i1(<vscale x 16 x i1> %vec) {
				; CHECK-LABEL: reduce_xor_nxv16i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.b
				; CHECK-NEXT: cntp x8, p1, p0.b
				; CHECK-NEXT: and w0, w8, #0x1
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.xor.i1.nxv16i1(<vscale x 16 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_xor_nxv8i1(<vscale x 8 x i1> %vec) {
				; CHECK-LABEL: reduce_xor_nxv8i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.h
				; CHECK-NEXT: cntp x8, p1, p0.h
				; CHECK-NEXT: and w0, w8, #0x1
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.xor.i1.nxv8i1(<vscale x 8 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_xor_nxv4i1(<vscale x 4 x i1> %vec) {
				; CHECK-LABEL: reduce_xor_nxv4i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.s
				; CHECK-NEXT: cntp x8, p1, p0.s
				; CHECK-NEXT: and w0, w8, #0x1
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.xor.i1.nxv4i1(<vscale x 4 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_xor_nxv2i1(<vscale x 2 x i1> %vec) {
				; CHECK-LABEL: reduce_xor_nxv2i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.d
				; CHECK-NEXT: cntp x8, p1, p0.d
				; CHECK-NEXT: and w0, w8, #0x1
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.xor.i1.nxv2i1(<vscale x 2 x i1> %vec)
				ret i1 %res
				}

				; SMAXV

				define i1 @reduce_smax_nxv16i1(<vscale x 16 x i1> %vec) {
				; CHECK-LABEL: reduce_smax_nxv16i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.b
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smax.i1.nxv16i1(<vscale x 16 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_smax_nxv8i1(<vscale x 8 x i1> %vec) {
				; CHECK-LABEL: reduce_smax_nxv8i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.h
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smax.i1.nxv8i1(<vscale x 8 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_smax_nxv4i1(<vscale x 4 x i1> %vec) {
				; CHECK-LABEL: reduce_smax_nxv4i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.s
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smax.i1.nxv4i1(<vscale x 4 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_smax_nxv2i1(<vscale x 2 x i1> %vec) {
				; CHECK-LABEL: reduce_smax_nxv2i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.d
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smax.i1.nxv2i1(<vscale x 2 x i1> %vec)
				ret i1 %res
				}

				; SMINV

				define i1 @reduce_smin_nxv16i1(<vscale x 16 x i1> %vec) {
				; CHECK-LABEL: reduce_smin_nxv16i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smin.i1.nxv16i1(<vscale x 16 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_smin_nxv8i1(<vscale x 8 x i1> %vec) {
				; CHECK-LABEL: reduce_smin_nxv8i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.h
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smin.i1.nxv8i1(<vscale x 8 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_smin_nxv4i1(<vscale x 4 x i1> %vec) {
				; CHECK-LABEL: reduce_smin_nxv4i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.s
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smin.i1.nxv4i1(<vscale x 4 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_smin_nxv2i1(<vscale x 2 x i1> %vec) {
				; CHECK-LABEL: reduce_smin_nxv2i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.d
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smin.i1.nxv2i1(<vscale x 2 x i1> %vec)
				ret i1 %res
				}

				; UMAXV

				define i1 @reduce_umax_nxv16i1(<vscale x 16 x i1> %vec) {
				; CHECK-LABEL: reduce_umax_nxv16i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umax.i1.nxv16i1(<vscale x 16 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_umax_nxv8i1(<vscale x 8 x i1> %vec) {
				; CHECK-LABEL: reduce_umax_nxv8i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.h
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umax.i1.nxv8i1(<vscale x 8 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_umax_nxv4i1(<vscale x 4 x i1> %vec) {
				; CHECK-LABEL: reduce_umax_nxv4i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.s
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umax.i1.nxv4i1(<vscale x 4 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_umax_nxv2i1(<vscale x 2 x i1> %vec) {
				; CHECK-LABEL: reduce_umax_nxv2i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.d
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umax.i1.nxv2i1(<vscale x 2 x i1> %vec)
				ret i1 %res
				}

				; UMINV

				define i1 @reduce_umin_nxv16i1(<vscale x 16 x i1> %vec) {
				; CHECK-LABEL: reduce_umin_nxv16i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.b
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umin.i1.nxv16i1(<vscale x 16 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_umin_nxv8i1(<vscale x 8 x i1> %vec) {
				; CHECK-LABEL: reduce_umin_nxv8i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.h
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umin.i1.nxv8i1(<vscale x 8 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_umin_nxv4i1(<vscale x 4 x i1> %vec) {
				; CHECK-LABEL: reduce_umin_nxv4i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.s
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umin.i1.nxv4i1(<vscale x 4 x i1> %vec)
				ret i1 %res
				}

				define i1 @reduce_umin_nxv2i1(<vscale x 2 x i1> %vec) {
				; CHECK-LABEL: reduce_umin_nxv2i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p1.d
				; CHECK-NEXT: not p0.b, p1/z, p0.b
				; CHECK-NEXT: ptest p1, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umin.i1.nxv2i1(<vscale x 2 x i1> %vec)
				ret i1 %res
				}

				declare i1 @llvm.vector.reduce.and.i1.nxv16i1(<vscale x 16 x i1> %vec)
				declare i1 @llvm.vector.reduce.and.i1.nxv8i1(<vscale x 8 x i1> %vec)
				declare i1 @llvm.vector.reduce.and.i1.nxv4i1(<vscale x 4 x i1> %vec)
				declare i1 @llvm.vector.reduce.and.i1.nxv2i1(<vscale x 2 x i1> %vec)

				declare i1 @llvm.vector.reduce.or.i1.nxv16i1(<vscale x 16 x i1> %vec)
				declare i1 @llvm.vector.reduce.or.i1.nxv8i1(<vscale x 8 x i1> %vec)
				declare i1 @llvm.vector.reduce.or.i1.nxv4i1(<vscale x 4 x i1> %vec)
				declare i1 @llvm.vector.reduce.or.i1.nxv2i1(<vscale x 2 x i1> %vec)

				declare i1 @llvm.vector.reduce.xor.i1.nxv16i1(<vscale x 16 x i1> %vec)
				declare i1 @llvm.vector.reduce.xor.i1.nxv8i1(<vscale x 8 x i1> %vec)
				declare i1 @llvm.vector.reduce.xor.i1.nxv4i1(<vscale x 4 x i1> %vec)
				declare i1 @llvm.vector.reduce.xor.i1.nxv2i1(<vscale x 2 x i1> %vec)

				declare i1 @llvm.vector.reduce.smin.i1.nxv16i1(<vscale x 16 x i1> %vec)
				declare i1 @llvm.vector.reduce.smin.i1.nxv8i1(<vscale x 8 x i1> %vec)
				declare i1 @llvm.vector.reduce.smin.i1.nxv4i1(<vscale x 4 x i1> %vec)
				declare i1 @llvm.vector.reduce.smin.i1.nxv2i1(<vscale x 2 x i1> %vec)

				declare i1 @llvm.vector.reduce.smax.i1.nxv16i1(<vscale x 16 x i1> %vec)
				declare i1 @llvm.vector.reduce.smax.i1.nxv8i1(<vscale x 8 x i1> %vec)
				declare i1 @llvm.vector.reduce.smax.i1.nxv4i1(<vscale x 4 x i1> %vec)
				declare i1 @llvm.vector.reduce.smax.i1.nxv2i1(<vscale x 2 x i1> %vec)

				declare i1 @llvm.vector.reduce.umin.i1.nxv16i1(<vscale x 16 x i1> %vec)
				declare i1 @llvm.vector.reduce.umin.i1.nxv8i1(<vscale x 8 x i1> %vec)
				declare i1 @llvm.vector.reduce.umin.i1.nxv4i1(<vscale x 4 x i1> %vec)
				declare i1 @llvm.vector.reduce.umin.i1.nxv2i1(<vscale x 2 x i1> %vec)

				declare i1 @llvm.vector.reduce.umax.i1.nxv16i1(<vscale x 16 x i1> %vec)
				declare i1 @llvm.vector.reduce.umax.i1.nxv8i1(<vscale x 8 x i1> %vec)
				declare i1 @llvm.vector.reduce.umax.i1.nxv4i1(<vscale x 4 x i1> %vec)
				declare i1 @llvm.vector.reduce.umax.i1.nxv2i1(<vscale x 2 x i1> %vec)

llvm/test/CodeGen/AArch64/sve-int-reduce.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t \| FileCheck %s
				; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t

				; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
				; WARN-NOT: warning

				; ANDV

				define i8 @andv_nxv16i8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: andv_nxv16i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: andv b0, p0, z0.b
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.and.nxv16i8(<vscale x 16 x i8> %a)
				ret i8 %res
				}

				define i16 @andv_nxv8i16(<vscale x 8 x i16> %a) {
				; CHECK-LABEL: andv_nxv8i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: andv h0, p0, z0.h
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16> %a)
				ret i16 %res
				}

				define i32 @andv_nxv4i32(<vscale x 4 x i32> %a) {
				; CHECK-LABEL: andv_nxv4i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: andv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %a)
				ret i32 %res
				}

				define i64 @andv_nxv2i64(<vscale x 2 x i64> %a) {
				; CHECK-LABEL: andv_nxv2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: andv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %a)
				ret i64 %res
				paulwalker-armUnsubmitted Done Reply Inline Actions This is personal preference so feel free to ignore but considering they go down different code paths and have structurally different output I'd prefer to have the predicate tests within their own ll file. paulwalker-arm: This is personal preference so feel free to ignore but considering they go down different code…
				}

				; ORV

				define i8 @orv_nxv16i8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: orv_nxv16i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: orv b0, p0, z0.b
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.or.nxv16i8(<vscale x 16 x i8> %a)
				ret i8 %res
				}

				define i16 @orv_nxv8i16(<vscale x 8 x i16> %a) {
				; CHECK-LABEL: orv_nxv8i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: orv h0, p0, z0.h
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.or.nxv8i16(<vscale x 8 x i16> %a)
				ret i16 %res
				}

				define i32 @orv_nxv4i32(<vscale x 4 x i32> %a) {
				; CHECK-LABEL: orv_nxv4i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: orv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %a)
				ret i32 %res
				}

				define i64 @orv_nxv2i64(<vscale x 2 x i64> %a) {
				; CHECK-LABEL: orv_nxv2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: orv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %a)
				ret i64 %res
				}

				; XORV

				define i8 @xorv_nxv16i8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: xorv_nxv16i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: eorv b0, p0, z0.b
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.xor.nxv16i8(<vscale x 16 x i8> %a)
				ret i8 %res
				}

				define i16 @xorv_nxv8i16(<vscale x 8 x i16> %a) {
				; CHECK-LABEL: xorv_nxv8i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: eorv h0, p0, z0.h
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.xor.nxv8i16(<vscale x 8 x i16> %a)
				ret i16 %res
				}

				define i32 @xorv_nxv4i32(<vscale x 4 x i32> %a) {
				; CHECK-LABEL: xorv_nxv4i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: eorv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %a)
				ret i32 %res
				}

				define i64 @xorv_nxv2i64(<vscale x 2 x i64> %a) {
				; CHECK-LABEL: xorv_nxv2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: eorv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %a)
				ret i64 %res
				}

				; UADDV

				define i8 @uaddv_nxv16i8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: uaddv_nxv16i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: uaddv d0, p0, z0.b
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> %a)
				ret i8 %res
				}

				define i16 @uaddv_nxv8i16(<vscale x 8 x i16> %a) {
				; CHECK-LABEL: uaddv_nxv8i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: uaddv d0, p0, z0.h
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> %a)
				ret i16 %res
				}

				define i32 @uaddv_nxv4i32(<vscale x 4 x i32> %a) {
				; CHECK-LABEL: uaddv_nxv4i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: uaddv d0, p0, z0.s
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %a)
				ret i32 %res
				}

				define i64 @uaddv_nxv2i64(<vscale x 2 x i64> %a) {
				; CHECK-LABEL: uaddv_nxv2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: uaddv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %a)
				ret i64 %res
				}

				; UMINV

				define i8 @umin_nxv16i8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: umin_nxv16i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: uminv b0, p0, z0.b
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.umin.nxv16i8(<vscale x 16 x i8> %a)
				ret i8 %res
				}

				define i16 @umin_nxv8i16(<vscale x 8 x i16> %a) {
				; CHECK-LABEL: umin_nxv8i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: uminv h0, p0, z0.h
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.umin.nxv8i16(<vscale x 8 x i16> %a)
				ret i16 %res
				}

				define i32 @umin_nxv4i32(<vscale x 4 x i32> %a) {
				; CHECK-LABEL: umin_nxv4i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: uminv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %a)
				ret i32 %res
				}

				define i64 @umin_nxv2i64(<vscale x 2 x i64> %a) {
				; CHECK-LABEL: umin_nxv2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: uminv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %a)
				ret i64 %res
				}

				; SMINV

				define i8 @smin_nxv16i8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: smin_nxv16i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: sminv b0, p0, z0.b
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.smin.nxv16i8(<vscale x 16 x i8> %a)
				ret i8 %res
				}

				define i16 @smin_nxv8i16(<vscale x 8 x i16> %a) {
				; CHECK-LABEL: smin_nxv8i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: sminv h0, p0, z0.h
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.smin.nxv8i16(<vscale x 8 x i16> %a)
				ret i16 %res
				}

				define i32 @smin_nxv4i32(<vscale x 4 x i32> %a) {
				; CHECK-LABEL: smin_nxv4i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: sminv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %a)
				ret i32 %res
				}

				define i64 @smin_nxv2i64(<vscale x 2 x i64> %a) {
				; CHECK-LABEL: smin_nxv2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: sminv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %a)
				ret i64 %res
				}

				; UMAXV

				define i8 @umax_nxv16i8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: umax_nxv16i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: umaxv b0, p0, z0.b
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.umax.nxv16i8(<vscale x 16 x i8> %a)
				ret i8 %res
				}

				define i16 @umax_nxv8i16(<vscale x 8 x i16> %a) {
				; CHECK-LABEL: umax_nxv8i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: umaxv h0, p0, z0.h
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.umax.nxv8i16(<vscale x 8 x i16> %a)
				ret i16 %res
				}

				define i32 @umax_nxv4i32(<vscale x 4 x i32> %a) {
				; CHECK-LABEL: umax_nxv4i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: umaxv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %a)
				ret i32 %res
				}

				define i64 @umax_nxv2i64(<vscale x 2 x i64> %a) {
				; CHECK-LABEL: umax_nxv2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: umaxv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %a)
				ret i64 %res
				}

				; SMAXV

				define i8 @smax_nxv16i8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: smax_nxv16i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: smaxv b0, p0, z0.b
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.smax.nxv16i8(<vscale x 16 x i8> %a)
				ret i8 %res
				}

				define i16 @smax_nxv8i16(<vscale x 8 x i16> %a) {
				; CHECK-LABEL: smax_nxv8i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: smaxv h0, p0, z0.h
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.smax.nxv8i16(<vscale x 8 x i16> %a)
				ret i16 %res
				}

				define i32 @smax_nxv4i32(<vscale x 4 x i32> %a) {
				; CHECK-LABEL: smax_nxv4i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: smaxv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %a)
				ret i32 %res
				}

				define i64 @smax_nxv2i64(<vscale x 2 x i64> %a) {
				; CHECK-LABEL: smax_nxv2i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: smaxv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %a)
				ret i64 %res
				}

				declare i8 @llvm.vector.reduce.and.nxv16i8(<vscale x 16 x i8>)
				declare i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16>)
				declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
				declare i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64>)

				declare i8 @llvm.vector.reduce.or.nxv16i8(<vscale x 16 x i8>)
				declare i16 @llvm.vector.reduce.or.nxv8i16(<vscale x 8 x i16>)
				declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
				declare i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64>)

				declare i8 @llvm.vector.reduce.xor.nxv16i8(<vscale x 16 x i8>)
				declare i16 @llvm.vector.reduce.xor.nxv8i16(<vscale x 8 x i16>)
				declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
				declare i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64>)

				declare i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8>)
				declare i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16>)
				declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
				declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>)

				declare i8 @llvm.vector.reduce.umin.nxv16i8(<vscale x 16 x i8>)
				declare i16 @llvm.vector.reduce.umin.nxv8i16(<vscale x 8 x i16>)
				declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
				declare i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64>)

				declare i8 @llvm.vector.reduce.smin.nxv16i8(<vscale x 16 x i8>)
				declare i16 @llvm.vector.reduce.smin.nxv8i16(<vscale x 8 x i16>)
				declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
				declare i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64>)

				declare i8 @llvm.vector.reduce.umax.nxv16i8(<vscale x 16 x i8>)
				declare i16 @llvm.vector.reduce.umax.nxv8i16(<vscale x 8 x i16>)
				declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
				declare i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64>)

				declare i8 @llvm.vector.reduce.smax.nxv16i8(<vscale x 16 x i8>)
				declare i16 @llvm.vector.reduce.smax.nxv8i16(<vscale x 8 x i16>)
				declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
				declare i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64>)

llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t \| FileCheck %s
				; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t

				; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
				; WARN-NOT: warning

				; ANDV

				define i1 @andv_nxv32i1(<vscale x 32 x i1> %a) {
				; CHECK-LABEL: andv_nxv32i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p2.b
				; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
				; CHECK-NEXT: not p0.b, p2/z, p0.b
				; CHECK-NEXT: ptest p2, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> %a)
				ret i1 %res
				}

				define i1 @andv_nxv64i1(<vscale x 64 x i1> %a) {
				; CHECK-LABEL: andv_nxv64i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
				; CHECK-NEXT: addvl sp, sp, #-1
				; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
				; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
				; CHECK-NEXT: .cfi_offset w29, -16
				; CHECK-NEXT: ptrue p4.b
				; CHECK-NEXT: and p1.b, p4/z, p1.b, p3.b
				; CHECK-NEXT: and p0.b, p4/z, p0.b, p2.b
				; CHECK-NEXT: and p0.b, p4/z, p0.b, p1.b
				; CHECK-NEXT: not p0.b, p4/z, p0.b
				; CHECK-NEXT: ptest p4, p0.b
				; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: addvl sp, sp, #1
				; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> %a)
				ret i1 %res
				}

				; ORV

				define i1 @orv_nxv32i1(<vscale x 32 x i1> %a) {
				; CHECK-LABEL: orv_nxv32i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p2.b
				; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
				; CHECK-NEXT: ptest p2, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.or.nxv32i1(<vscale x 32 x i1> %a)
				ret i1 %res
				}

				; XORV

				define i1 @xorv_nxv32i1(<vscale x 32 x i1> %a) {
				; CHECK-LABEL: xorv_nxv32i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p2.b
				; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
				; CHECK-NEXT: cntp x8, p2, p0.b
				; CHECK-NEXT: and w0, w8, #0x1
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.xor.nxv32i1(<vscale x 32 x i1> %a)
				ret i1 %res
				}

				; SMAXV

				define i1 @smaxv_nxv32i1(<vscale x 32 x i1> %a) {
				; CHECK-LABEL: smaxv_nxv32i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p2.b
				; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
				; CHECK-NEXT: not p0.b, p2/z, p0.b
				; CHECK-NEXT: ptest p2, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smax.nxv32i1(<vscale x 32 x i1> %a)
				ret i1 %res
				}

				; SMINV

				define i1 @sminv_nxv32i1(<vscale x 32 x i1> %a) {
				; CHECK-LABEL: sminv_nxv32i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p2.b
				; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
				; CHECK-NEXT: ptest p2, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.smin.nxv32i1(<vscale x 32 x i1> %a)
				ret i1 %res
				}

				; UMAXV

				define i1 @umaxv_nxv32i1(<vscale x 32 x i1> %a) {
				; CHECK-LABEL: umaxv_nxv32i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p2.b
				; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
				; CHECK-NEXT: ptest p2, p0.b
				; CHECK-NEXT: cset w0, ne
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umax.nxv32i1(<vscale x 32 x i1> %a)
				ret i1 %res
				}

				; UMINV

				define i1 @uminv_nxv32i1(<vscale x 32 x i1> %a) {
				; CHECK-LABEL: uminv_nxv32i1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p2.b
				; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
				; CHECK-NEXT: not p0.b, p2/z, p0.b
				; CHECK-NEXT: ptest p2, p0.b
				; CHECK-NEXT: cset w0, eq
				; CHECK-NEXT: ret
				%res = call i1 @llvm.vector.reduce.umin.nxv32i1(<vscale x 32 x i1> %a)
				ret i1 %res
				}

				declare i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1>)
				declare i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1>)

				declare i1 @llvm.vector.reduce.or.nxv32i1(<vscale x 32 x i1>)

				declare i1 @llvm.vector.reduce.xor.nxv32i1(<vscale x 32 x i1>)

				declare i1 @llvm.vector.reduce.smax.nxv32i1(<vscale x 32 x i1>)

				declare i1 @llvm.vector.reduce.smin.nxv32i1(<vscale x 32 x i1>)

				declare i1 @llvm.vector.reduce.umax.nxv32i1(<vscale x 32 x i1>)

				declare i1 @llvm.vector.reduce.umin.nxv32i1(<vscale x 32 x i1>)

llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t \| FileCheck %s
				; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t

				; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
				; WARN-NOT: warning

				; ANDV

				define i8 @andv_nxv8i8(<vscale x 8 x i8> %a) {
				; CHECK-LABEL: andv_nxv8i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: andv h0, p0, z0.h
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8> %a)
				ret i8 %res
				}

				define i32 @andv_nxv8i32(<vscale x 8 x i32> %a) {
				; CHECK-LABEL: andv_nxv8i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: and z0.d, z0.d, z1.d
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: andv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> %a)
				ret i32 %res
				}

				; ORV

				define i32 @orv_nxv2i32(<vscale x 2 x i32> %a) {
				; CHECK-LABEL: orv_nxv2i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: orv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %a)
				ret i32 %res
				}

				define i64 @orv_nxv8i64(<vscale x 8 x i64> %a) {
				; CHECK-LABEL: orv_nxv8i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: orr z1.d, z1.d, z3.d
				; CHECK-NEXT: orr z0.d, z0.d, z2.d
				; CHECK-NEXT: orr z0.d, z0.d, z1.d
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: orv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64> %a)
				ret i64 %res
				}

				; XORV

				define i16 @xorv_nxv2i16(<vscale x 2 x i16> %a) {
				; CHECK-LABEL: xorv_nxv2i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: eorv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %a)
				ret i16 %res
				}

				define i32 @xorv_nxv8i32(<vscale x 8 x i32> %a) {
				; CHECK-LABEL: xorv_nxv8i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: eor z0.d, z0.d, z1.d
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: eorv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> %a)
				ret i32 %res
				}

				; UADDV

				define i16 @uaddv_nxv4i16(<vscale x 4 x i16> %a) {
				; CHECK-LABEL: uaddv_nxv4i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: uaddv d0, p0, z0.s
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %a)
				ret i16 %res
				}

				define i16 @uaddv_nxv16i16(<vscale x 16 x i16> %a) {
				; CHECK-LABEL: uaddv_nxv16i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: add z0.h, z0.h, z1.h
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: uaddv d0, p0, z0.h
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16> %a)
				ret i16 %res
				}

				define i32 @uaddv_nxv16i32(<vscale x 16 x i32> %a) {
				; CHECK-LABEL: uaddv_nxv16i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: add z1.s, z1.s, z3.s
				; CHECK-NEXT: add z0.s, z0.s, z2.s
				; CHECK-NEXT: add z0.s, z0.s, z1.s
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: uaddv d0, p0, z0.s
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %a)
				ret i32 %res
				}

				; UMINV

				define i32 @umin_nxv2i32(<vscale x 2 x i32> %a) {
				; CHECK-LABEL: umin_nxv2i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
				; CHECK-NEXT: uminv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %a)
				ret i32 %res
				}

				define i64 @umin_nxv4i64(<vscale x 4 x i64> %a) {
				; CHECK-LABEL: umin_nxv4i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
				; CHECK-NEXT: uminv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %a)
				ret i64 %res
				}

				; SMINV

				define i8 @smin_nxv4i8(<vscale x 4 x i8> %a) {
				; CHECK-LABEL: smin_nxv4i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
				; CHECK-NEXT: sminv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %a)
				ret i8 %res
				}

				define i32 @smin_nxv8i32(<vscale x 8 x i32> %a) {
				; CHECK-LABEL: smin_nxv8i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
				; CHECK-NEXT: sminv s0, p0, z0.s
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> %a)
				ret i32 %res
				}

				; UMAXV

				define i16 @smin_nxv16i16(<vscale x 16 x i16> %a) {
				; CHECK-LABEL: smin_nxv16i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
				; CHECK-NEXT: umaxv h0, p0, z0.h
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%res = call i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16> %a)
				ret i16 %res
				}

				; SMAXV

				define i64 @smin_nxv8i64(<vscale x 8 x i64> %a) {
				; CHECK-LABEL: smin_nxv8i64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: smax z1.d, p0/m, z1.d, z3.d
				; CHECK-NEXT: smax z0.d, p0/m, z0.d, z2.d
				; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
				; CHECK-NEXT: smaxv d0, p0, z0.d
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%res = call i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64> %a)
				ret i64 %res
				}

				declare i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8>)
				declare i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32>)

				declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>)
				declare i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64>)

				declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>)
				declare i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32>)

				declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>)
				declare i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16>)
				declare i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32>)

				declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>)
				declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)

				declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>)
				declare i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32>)

				declare i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16>)

				declare i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64>)

This is an archive of the discontinued LLVM Phabricator instance.

[SVE][CodeGen] Lower scalable integer vector reductions
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 299397

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll

llvm/test/CodeGen/AArch64/sve-int-reduce.ll

llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll

llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SVE][CodeGen] Lower scalable integer vector reductionsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 299397

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll

llvm/test/CodeGen/AArch64/sve-int-reduce.ll

llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll

llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll

[SVE][CodeGen] Lower scalable integer vector reductions
ClosedPublic