Diff 513167

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 995 Lines • ▼ Show 20 Lines	#undef LCALLNAME5
setTargetDAGCombine({ISD::MGATHER, ISD::MSCATTER});		setTargetDAGCombine({ISD::MGATHER, ISD::MSCATTER});

setTargetDAGCombine(ISD::FP_EXTEND);		setTargetDAGCombine(ISD::FP_EXTEND);

setTargetDAGCombine(ISD::GlobalAddress);		setTargetDAGCombine(ISD::GlobalAddress);

setTargetDAGCombine(ISD::CTLZ);		setTargetDAGCombine(ISD::CTLZ);

		setTargetDAGCombine(ISD::VECREDUCE_AND);
		setTargetDAGCombine(ISD::VECREDUCE_OR);
		setTargetDAGCombine(ISD::VECREDUCE_XOR);

// In case of strict alignment, avoid an excessive number of byte wide stores.		// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;		MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemset =		MaxStoresPerMemset =
Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32;		Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32;

MaxGluedStoresPerMemcpy = 4;		MaxGluedStoresPerMemcpy = 4;
MaxStoresPerMemcpyOptSize = 4;		MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemcpy =		MaxStoresPerMemcpy =
▲ Show 20 Lines • Show All 148 Lines • ▼ Show 20 Lines	if (Subtarget->hasNEON()) {
}		}
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,		for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {		MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);		setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);		setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);		setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);		setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);		setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
		setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
		setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
		setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
}		}
setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);		setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
		setOperationAction(ISD::VECREDUCE_AND, MVT::v2i64, Custom);
		setOperationAction(ISD::VECREDUCE_OR, MVT::v2i64, Custom);
		setOperationAction(ISD::VECREDUCE_XOR, MVT::v2i64, Custom);

setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);		setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);		setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
// Likewise, narrowing and extending vector loads/stores aren't handled		// Likewise, narrowing and extending vector loads/stores aren't handled
// directly.		// directly.
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {		for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);		setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);

▲ Show 20 Lines • Show All 11,993 Lines • ▼ Show 20 Lines
static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,		static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
SDValue VecOp = ScalarOp.getOperand(0);		SDValue VecOp = ScalarOp.getOperand(0);
auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);		auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,		return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,
DAG.getConstant(0, DL, MVT::i64));		DAG.getConstant(0, DL, MVT::i64));
}		}

		static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT,
		SDLoc DL, SelectionDAG &DAG) {
		unsigned ScalarOpcode;
		switch (Opcode) {
		case ISD::VECREDUCE_AND:
		ScalarOpcode = ISD::AND;
		break;
		case ISD::VECREDUCE_OR:
		ScalarOpcode = ISD::OR;
		break;
		case ISD::VECREDUCE_XOR:
		ScalarOpcode = ISD::XOR;
		break;
		default:
		llvm_unreachable("Expected bitwise vector reduction");
		return SDValue();
		}

		EVT VecVT = Vec.getValueType();
		assert(VecVT.isFixedLengthVector() && VecVT.isPow2VectorType() &&
		"Expected power-of-2 length vector");

		EVT ElemVT = VecVT.getVectorElementType();

		SDValue Result;
		unsigned NumElems = VecVT.getVectorNumElements();

		// special case for boolean reductions
		dmgreenUnsubmitted Done Reply Inline Actions special -> Special dmgreen: special -> Special
		if (ElemVT == MVT::i1) {
		// split large vectors into smaller ones
		dmgreenUnsubmitted Done Reply Inline Actions split -> Split dmgreen: split -> Split
		if (NumElems > 16) {
		SDValue Lo, Hi;
		std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
		return getVectorBitwiseReduce(Opcode, DAG.getNode(ScalarOpcode, DL, Lo.getValueType(), Lo, Hi), VT, DL, DAG);
		dmgreenUnsubmitted Done Reply Inline Actions Formatting - the line is a bot long here. dmgreen: Formatting - the line is a bot long here.
		}

		// Casting to i8 first leads to better codegen.
		SDValue Extended =
		DAG.getAnyExtOrTrunc(Vec, DL, VecVT.changeVectorElementType(MVT::i8));
		switch (ScalarOpcode) {
		case ISD::AND:
		Sp00phAuthorUnsubmitted Done Reply Inline Actions Using either zext or sext here adds a few extra instructions in the generated code. Is it guaranteed that any-extending an i1 vector results in a vector whose elements are all either 0 or -1? It seems reasonable because afaik mask vector elements on AArch64 are always either 0 or -1, but it could also introduce some subtle incorrectness if there is some case where any-extending an i1 vector does not result in such a mask vector. Sp00ph: Using either zext or sext here adds a few extra instructions in the generated code. Is it…
		efriedmaUnsubmitted Done Reply Inline Actions No, no guarantee here. I mean, there are restrictions related to boolean operands certain specific operations (like the condition of a VSELECT), but there isn't any restriction that applies to arithmetic operations. An easy way to get a vector with arbitrary data in the high bits is truncating from nxi8 to nxi1. You could generate a different sequence if the operand is known to be sign-extended (ComputeNumSignBits). efriedma: No, no guarantee here. I mean, there are restrictions related to boolean operands certain…
		Sp00phAuthorUnsubmitted Done Reply Inline Actions `ComputeNumSignBits` doesn't seem to work properly on `<N x i1>` function arguments. So e.g. an `<8 x i1>` gets lowered to an `<8 x i8>` during function argument lowering, and calling `ComputeNumSignBits` on that returns a 1 (even though `<N x i1>` in function arguments seems to always be all zeros or all ones; either that or the current codegen is already incorrect). If I instead sign extend the vector in the `i1` branch it adds 2 redundant instructions to all the codegen tests that take a `<N x i1>` as a function argument. Tests that e.g. reduce a `<N x i1>` obtained from a setcc don't get those extra instructions because there's a `setcc + sext` combine I believe. I guess this could be fixed by somehow convincing `ComputeNumSignBits` that a `<N x i1>` function argument that got lowered to a `<N x iM>` does in fact have M sign bits? Sp00ph: `ComputeNumSignBits` doesn't seem to work properly on `<N x i1>` function arguments. So e.g. an…
		dmgreenUnsubmitted Done Reply Inline Actions I believe there is no requirement that arguments are all-ones. For example https://godbolt.org/z/MYdEh1fET. There is a signext attribute that can be applied to scalars, but not vectors. dmgreen: I believe there is no requirement that arguments are all-ones. For example https://godbolt.
		Sp00phAuthorUnsubmitted Done Reply Inline Actions In that case it looks like the codegen for the boolean vector reductions is already wrong without this patch. For example this: https://llvm.godbolt.org/z/YjE8n7q7s causes calls to `bad` to return a 0, when I believe they should return a 1 instead, because it does umax then truncate instead of truncate then umax, and those don't commute in the general case. Sp00ph: In that case it looks like the codegen for the boolean vector reductions is already wrong…
		Result = DAG.getNode(ISD::VECREDUCE_UMIN, DL, MVT::i8, Extended);
		break;
		case ISD::OR:
		Result = DAG.getNode(ISD::VECREDUCE_UMAX, DL, MVT::i8, Extended);
		break;
		case ISD::XOR:
		Result = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i8, Extended);
		break;
		default:
		llvm_unreachable("Unexpected Opcode");
		}

		Result = DAG.getAnyExtOrTrunc(Result, DL, MVT::i1);
		} else {
		SmallVector<int, 16> ShiftValues(NumElems, -1);

		SDValue Accumulator = Vec;

		// Iteratively apply the bitwise operator to the first and second half
		dmgreenUnsubmitted Done Reply Inline Actions Would it be possible for vector <= 64bits to use the 64bit type sizes? It won't matter in a lot of cases but some cpu's have a higher throughput for 64bit vectors. dmgreen: Would it be possible for vector <= 64bits to use the 64bit type sizes? It won't matter in a lot…
		efriedmaUnsubmitted Done Reply Inline Actions For <=64 bits, don't you want to switch to integer registers? `orr x0, x0, x0, lsr #32` etc. is generally going to be faster than dup+orr. efriedma: For <=64 bits, don't you want to switch to integer registers? `orr x0, x0, x0, lsr #32` etc.
		// of the vector until only one element remains.
		for (unsigned Shift = NumElems / 2; Shift > 0; Shift /= 2) {
		// ShiftValues should contain `<Shift, Shift + 1, ..., 2 * Shift - 1,
		// undef, ..., undef>`
		std::iota(ShiftValues.begin(), ShiftValues.begin() + Shift, Shift);
		std::fill(ShiftValues.begin() + Shift, ShiftValues.end(), -1);
		SDValue ShuffledVector = DAG.getVectorShuffle(
		VecVT, DL, Accumulator, DAG.getUNDEF(VecVT), ShiftValues);
		Accumulator =
		DAG.getNode(ScalarOpcode, DL, VecVT, Accumulator, ShuffledVector);
		}

		Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Accumulator,
		DAG.getConstant(0, DL, MVT::i64));
		}

		return DAG.getAnyExtOrTrunc(Result, DL, VT);
		}

SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,		SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
SDValue Src = Op.getOperand(0);		SDValue Src = Op.getOperand(0);

// Try to lower fixed length reductions to SVE.		// Try to lower fixed length reductions to SVE.
EVT SrcVT = Src.getValueType();		EVT SrcVT = Src.getValueType();
bool OverrideNEON = Subtarget->forceStreamingCompatibleSVE() \|\|		bool OverrideNEON = Subtarget->forceStreamingCompatibleSVE() \|\|
Op.getOpcode() == ISD::VECREDUCE_AND \|\|
Op.getOpcode() == ISD::VECREDUCE_OR \|\|
Op.getOpcode() == ISD::VECREDUCE_XOR \|\|
dmgreenUnsubmitted Done Reply Inline Actions If SVE is available then the orv/eorv/etc should be preferred. dmgreen: If SVE is available then the orv/eorv/etc should be preferred.
Op.getOpcode() == ISD::VECREDUCE_FADD \|\|		Op.getOpcode() == ISD::VECREDUCE_FADD \|\|
(Op.getOpcode() != ISD::VECREDUCE_ADD &&		(Op.getOpcode() != ISD::VECREDUCE_ADD &&
SrcVT.getVectorElementType() == MVT::i64);		SrcVT.getVectorElementType() == MVT::i64);
if (SrcVT.isScalableVector() \|\|		if (SrcVT.isScalableVector() \|\|
useSVEForFixedLengthVectorVT(		useSVEForFixedLengthVectorVT(
SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {		SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {

if (SrcVT.getVectorElementType() == MVT::i1)		if (SrcVT.getVectorElementType() == MVT::i1)
Show All 25 Lines	if (SrcVT.isScalableVector() \|\|
default:		default:
llvm_unreachable("Unhandled fixed length reduction");		llvm_unreachable("Unhandled fixed length reduction");
}		}
}		}

// Lower NEON reductions.		// Lower NEON reductions.
SDLoc dl(Op);		SDLoc dl(Op);
switch (Op.getOpcode()) {		switch (Op.getOpcode()) {
		case ISD::VECREDUCE_AND:
		case ISD::VECREDUCE_OR:
		case ISD::VECREDUCE_XOR:
		return getVectorBitwiseReduce(Op.getOpcode(), Op.getOperand(0),
		Op.getValueType(), dl, DAG);
case ISD::VECREDUCE_ADD:		case ISD::VECREDUCE_ADD:
return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);		return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
case ISD::VECREDUCE_SMAX:		case ISD::VECREDUCE_SMAX:
return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);		return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
case ISD::VECREDUCE_SMIN:		case ISD::VECREDUCE_SMIN:
return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);		return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
case ISD::VECREDUCE_UMAX:		case ISD::VECREDUCE_UMAX:
return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);		return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
▲ Show 20 Lines • Show All 7,242 Lines • ▼ Show 20 Lines	if (Op0SExt && (isSignedIntSetCC(CC) \|\| isIntEqualitySetCC(CC))) {
Op1ExtV = DAG.getNode(ISD::ZERO_EXTEND, DL, UseMVT, Op->getOperand(1));		Op1ExtV = DAG.getNode(ISD::ZERO_EXTEND, DL, UseMVT, Op->getOperand(1));
} else		} else
return SDValue();		return SDValue();

return DAG.getNode(ISD::SETCC, DL, UseMVT.changeVectorElementType(MVT::i1),		return DAG.getNode(ISD::SETCC, DL, UseMVT.changeVectorElementType(MVT::i1),
Op0ExtV, Op1ExtV, Op->getOperand(2));		Op0ExtV, Op1ExtV, Op->getOperand(2));
}		}

		static SDValue
		performVecReduceBitwiseCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
		SelectionDAG &DAG) {
		SDValue Vec = N->getOperand(0);
		if (DCI.isBeforeLegalize() &&
		Vec.getValueType().getVectorElementType() == MVT::i1 &&
		Vec.getValueType().isFixedLengthVector() &&
		Vec.getValueType().isPow2VectorType()) {
		SDLoc DL(N);
		return getVectorBitwiseReduce(N->getOpcode(), Vec, N->getValueType(0), DL,
		DAG);
		}

		return SDValue();
		}

static SDValue performSETCCCombine(SDNode *N,		static SDValue performSETCCCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,		TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");		assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");
SDValue LHS = N->getOperand(0);		SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);		SDValue RHS = N->getOperand(1);
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();		ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
SDLoc DL(N);		SDLoc DL(N);
▲ Show 20 Lines • Show All 1,152 Lines • ▼ Show 20 Lines

SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,		SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {		DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;		SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {		switch (N->getOpcode()) {
default:		default:
LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");		LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
break;		break;
		case ISD::VECREDUCE_AND:
		case ISD::VECREDUCE_OR:
		case ISD::VECREDUCE_XOR:
		return performVecReduceBitwiseCombine(N, DCI, DAG);
case ISD::ADD:		case ISD::ADD:
case ISD::SUB:		case ISD::SUB:
return performAddSubCombine(N, DCI, DAG);		return performAddSubCombine(N, DCI, DAG);
case ISD::BUILD_VECTOR:		case ISD::BUILD_VECTOR:
return performBuildVectorCombine(N, DCI, DAG);		return performBuildVectorCombine(N, DCI, DAG);
case ISD::TRUNCATE:		case ISD::TRUNCATE:
return performTruncateCombine(N, DAG);		return performTruncateCombine(N, DAG);
case AArch64ISD::ANDS:		case AArch64ISD::ANDS:
▲ Show 20 Lines • Show All 2,892 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/dag-combine-setcc.ll

Show First 20 Lines • Show All 187 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%cmp2 = icmp eq i64 %cast, -1		%cmp2 = icmp eq i64 %cast, -1
ret i1 %cmp2		ret i1 %cmp2
}		}

define i1 @combine_setcc_ne_vecreduce_and_v8i1(<8 x i8> %a) {		define i1 @combine_setcc_ne_vecreduce_and_v8i1(<8 x i8> %a) {
; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v8i1:		; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v8i1:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: cmtst v0.8b, v0.8b, v0.8b		; CHECK-NEXT: cmtst v0.8b, v0.8b, v0.8b
		; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: uminv b0, v0.8b		; CHECK-NEXT: uminv b0, v0.8b
; CHECK-NEXT: fmov w8, s0		; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: tst w8, #0x1		; CHECK-NEXT: bic w0, w8, w9
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%cmp1 = icmp ne <8 x i8> %a, zeroinitializer		%cmp1 = icmp ne <8 x i8> %a, zeroinitializer
%cast = bitcast <8 x i1> %cmp1 to i8		%cast = bitcast <8 x i1> %cmp1 to i8
%cmp2 = icmp ne i8 %cast, -1		%cmp2 = icmp ne i8 %cast, -1
ret i1 %cmp2		ret i1 %cmp2
}		}

define i1 @combine_setcc_ne_vecreduce_and_v16i1(<16 x i8> %a) {		define i1 @combine_setcc_ne_vecreduce_and_v16i1(<16 x i8> %a) {
; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v16i1:		; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v16i1:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b		; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b
		; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: uminv b0, v0.16b		; CHECK-NEXT: uminv b0, v0.16b
; CHECK-NEXT: fmov w8, s0		; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: tst w8, #0x1		; CHECK-NEXT: bic w0, w8, w9
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%cmp1 = icmp ne <16 x i8> %a, zeroinitializer		%cmp1 = icmp ne <16 x i8> %a, zeroinitializer
%cast = bitcast <16 x i1> %cmp1 to i16		%cast = bitcast <16 x i1> %cmp1 to i16
%cmp2 = icmp ne i16 %cast, -1		%cmp2 = icmp ne i16 %cast, -1
ret i1 %cmp2		ret i1 %cmp2
}		}

define i1 @combine_setcc_ne_vecreduce_and_v32i1(<32 x i8> %a) {		define i1 @combine_setcc_ne_vecreduce_and_v32i1(<32 x i8> %a) {
; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v32i1:		; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v32i1:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b		; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b
		; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: cmeq v1.16b, v1.16b, #0		; CHECK-NEXT: cmeq v1.16b, v1.16b, #0
; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b		; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b
; CHECK-NEXT: uminv b0, v0.16b		; CHECK-NEXT: uminv b0, v0.16b
; CHECK-NEXT: fmov w8, s0		; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: tst w8, #0x1		; CHECK-NEXT: bic w0, w8, w9
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%cmp1 = icmp ne <32 x i8> %a, zeroinitializer		%cmp1 = icmp ne <32 x i8> %a, zeroinitializer
%cast = bitcast <32 x i1> %cmp1 to i32		%cast = bitcast <32 x i1> %cmp1 to i32
%cmp2 = icmp ne i32 %cast, -1		%cmp2 = icmp ne i32 %cast, -1
ret i1 %cmp2		ret i1 %cmp2
}		}

define i1 @combine_setcc_ne_vecreduce_and_v64i1(<64 x i8> %a) {		define i1 @combine_setcc_ne_vecreduce_and_v64i1(<64 x i8> %a) {
; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v64i1:		; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v64i1:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: cmtst v1.16b, v1.16b, v1.16b		; CHECK-NEXT: cmtst v1.16b, v1.16b, v1.16b
		; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b		; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b
; CHECK-NEXT: cmeq v3.16b, v3.16b, #0		; CHECK-NEXT: cmeq v3.16b, v3.16b, #0
; CHECK-NEXT: cmeq v2.16b, v2.16b, #0		; CHECK-NEXT: cmeq v2.16b, v2.16b, #0
; CHECK-NEXT: bic v1.16b, v1.16b, v3.16b		; CHECK-NEXT: bic v1.16b, v1.16b, v3.16b
; CHECK-NEXT: bic v0.16b, v0.16b, v2.16b		; CHECK-NEXT: bic v0.16b, v0.16b, v2.16b
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: uminv b0, v0.16b		; CHECK-NEXT: uminv b0, v0.16b
; CHECK-NEXT: fmov w8, s0		; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: tst w8, #0x1		; CHECK-NEXT: bic w0, w8, w9
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%cmp1 = icmp ne <64 x i8> %a, zeroinitializer		%cmp1 = icmp ne <64 x i8> %a, zeroinitializer
%cast = bitcast <64 x i1> %cmp1 to i64		%cast = bitcast <64 x i1> %cmp1 to i64
%cmp2 = icmp ne i64 %cast, -1		%cmp2 = icmp ne i64 %cast, -1
ret i1 %cmp2		ret i1 %cmp2
}		}

define i1 @combine_setcc_eq0_conjunction_xor_or(ptr %a, ptr %b) {		define i1 @combine_setcc_eq0_conjunction_xor_or(ptr %a, ptr %b) {
▲ Show 20 Lines • Show All 116 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/double_reduct.ll

Show First 20 Lines • Show All 123 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
ret i32 %r		ret i32 %r
}		}

define i32 @and_i32(<8 x i32> %a, <4 x i32> %b) {		define i32 @and_i32(<8 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: and_i32:		; CHECK-LABEL: and_i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b		; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: and w0, w9, w8		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%r1 = call i32 @llvm.vector.reduce.and.i32.v8i32(<8 x i32> %a)		%r1 = call i32 @llvm.vector.reduce.and.i32.v8i32(<8 x i32> %a)
%r2 = call i32 @llvm.vector.reduce.and.i32.v4i32(<4 x i32> %b)		%r2 = call i32 @llvm.vector.reduce.and.i32.v4i32(<4 x i32> %b)
%r = and i32 %r1, %r2		%r = and i32 %r1, %r2
ret i32 %r		ret i32 %r
}		}

define i32 @or_i32(<8 x i32> %a, <4 x i32> %b) {		define i32 @or_i32(<8 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: or_i32:		; CHECK-LABEL: or_i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b		; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orr w0, w9, w8		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%r1 = call i32 @llvm.vector.reduce.or.i32.v8i32(<8 x i32> %a)		%r1 = call i32 @llvm.vector.reduce.or.i32.v8i32(<8 x i32> %a)
%r2 = call i32 @llvm.vector.reduce.or.i32.v4i32(<4 x i32> %b)		%r2 = call i32 @llvm.vector.reduce.or.i32.v4i32(<4 x i32> %b)
%r = or i32 %r1, %r2		%r = or i32 %r1, %r2
ret i32 %r		ret i32 %r
}		}

define i32 @xor_i32(<8 x i32> %a, <4 x i32> %b) {		define i32 @xor_i32(<8 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: xor_i32:		; CHECK-LABEL: xor_i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: eor v0.16b, v0.16b, v2.16b		; CHECK-NEXT: eor v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: eor w0, w9, w8		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%r1 = call i32 @llvm.vector.reduce.xor.i32.v8i32(<8 x i32> %a)		%r1 = call i32 @llvm.vector.reduce.xor.i32.v8i32(<8 x i32> %a)
%r2 = call i32 @llvm.vector.reduce.xor.i32.v4i32(<4 x i32> %b)		%r2 = call i32 @llvm.vector.reduce.xor.i32.v4i32(<4 x i32> %b)
%r = xor i32 %r1, %r2		%r = xor i32 %r1, %r2
ret i32 %r		ret i32 %r
}		}

define i32 @umin_i32(<8 x i32> %a, <4 x i32> %b) {		define i32 @umin_i32(<8 x i32> %a, <4 x i32> %b) {
▲ Show 20 Lines • Show All 89 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mtriple=aarch64 < %s \| FileCheck %s			; RUN: llc -mtriple=aarch64 < %s \| FileCheck %s

	; All tests are doing unordered vector comparisons on vectors larger than a			; All tests are doing unordered vector comparisons on vectors larger than a
	; Neon vector.			; Neon vector.

	define i1 @unordered_floating_point_compare_on_v8f32(<8 x float> %a_vec) {			define i1 @unordered_floating_point_compare_on_v8f32(<8 x float> %a_vec) {
	; CHECK-LABEL: unordered_floating_point_compare_on_v8f32:			; CHECK-LABEL: unordered_floating_point_compare_on_v8f32:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0			; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0
	; CHECK-NEXT: mov w8, #1			; CHECK-NEXT: mov w8, #1 // =0x1
	; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0			; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0
	; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h			; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
	; CHECK-NEXT: mvn v0.16b, v0.16b			; CHECK-NEXT: mvn v0.16b, v0.16b
	; CHECK-NEXT: xtn v0.8b, v0.8h			; CHECK-NEXT: xtn v0.8b, v0.8h
	; CHECK-NEXT: umaxv b0, v0.8b			; CHECK-NEXT: umaxv b0, v0.8b
	; CHECK-NEXT: fmov w9, s0			; CHECK-NEXT: fmov w9, s0
	; CHECK-NEXT: bic w0, w8, w9			; CHECK-NEXT: bic w0, w8, w9
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%a_cmp = fcmp ule <8 x float> %a_vec, zeroinitializer			%a_cmp = fcmp ule <8 x float> %a_vec, zeroinitializer
	%cmp_result = bitcast <8 x i1> %a_cmp to i8			%cmp_result = bitcast <8 x i1> %a_cmp to i8
	%all_zero = icmp eq i8 %cmp_result, 0			%all_zero = icmp eq i8 %cmp_result, 0
	ret i1 %all_zero			ret i1 %all_zero
	}			}

	define i1 @unordered_floating_point_compare_on_v16f32(<16 x float> %a_vec) {			define i1 @unordered_floating_point_compare_on_v16f32(<16 x float> %a_vec) {
	; CHECK-LABEL: unordered_floating_point_compare_on_v16f32:			; CHECK-LABEL: unordered_floating_point_compare_on_v16f32:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: fcmgt v3.4s, v3.4s, #0.0			; CHECK-NEXT: fcmgt v3.4s, v3.4s, #0.0
	; CHECK-NEXT: mov w8, #1			; CHECK-NEXT: mov w8, #1 // =0x1
	; CHECK-NEXT: fcmgt v2.4s, v2.4s, #0.0			; CHECK-NEXT: fcmgt v2.4s, v2.4s, #0.0
	; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0			; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0
	; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0			; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0
	; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h			; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h
	; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h			; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
	; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b			; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b
	; CHECK-NEXT: mvn v0.16b, v0.16b			; CHECK-NEXT: mvn v0.16b, v0.16b
	; CHECK-NEXT: umaxv b0, v0.16b			; CHECK-NEXT: umaxv b0, v0.16b
	; CHECK-NEXT: fmov w9, s0			; CHECK-NEXT: fmov w9, s0
	; CHECK-NEXT: bic w0, w8, w9			; CHECK-NEXT: bic w0, w8, w9
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%a_cmp = fcmp ule <16 x float> %a_vec, zeroinitializer			%a_cmp = fcmp ule <16 x float> %a_vec, zeroinitializer
	%cmp_result = bitcast <16 x i1> %a_cmp to i16			%cmp_result = bitcast <16 x i1> %a_cmp to i16
	%all_zero = icmp eq i16 %cmp_result, 0			%all_zero = icmp eq i16 %cmp_result, 0
	ret i1 %all_zero			ret i1 %all_zero
	}			}

	define i1 @unordered_floating_point_compare_on_v32f32(<32 x float> %a_vec) {			define i1 @unordered_floating_point_compare_on_v32f32(<32 x float> %a_vec) {
	; CHECK-LABEL: unordered_floating_point_compare_on_v32f32:			; CHECK-LABEL: unordered_floating_point_compare_on_v32f32:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: fcmgt v3.4s, v3.4s, #0.0			; CHECK-NEXT: fcmgt v3.4s, v3.4s, #0.0
	; CHECK-NEXT: mov w9, #1			; CHECK-NEXT: mov w9, #1 // =0x1
	; CHECK-NEXT: fcmgt v2.4s, v2.4s, #0.0			; CHECK-NEXT: fcmgt v2.4s, v2.4s, #0.0
	; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0			; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0
	; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0			; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0
	; CHECK-NEXT: fcmgt v7.4s, v7.4s, #0.0			; CHECK-NEXT: fcmgt v7.4s, v7.4s, #0.0
	; CHECK-NEXT: fcmgt v6.4s, v6.4s, #0.0			; CHECK-NEXT: fcmgt v6.4s, v6.4s, #0.0
	; CHECK-NEXT: fcmgt v5.4s, v5.4s, #0.0			; CHECK-NEXT: fcmgt v5.4s, v5.4s, #0.0
	; CHECK-NEXT: fcmgt v4.4s, v4.4s, #0.0			; CHECK-NEXT: fcmgt v4.4s, v4.4s, #0.0
	; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h			; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h
	; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h			; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
	; CHECK-NEXT: uzp1 v6.8h, v6.8h, v7.8h			; CHECK-NEXT: uzp1 v1.8h, v6.8h, v7.8h
	; CHECK-NEXT: uzp1 v1.8h, v4.8h, v5.8h			; CHECK-NEXT: uzp1 v3.8h, v4.8h, v5.8h
	; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b			; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b
	; CHECK-NEXT: uzp1 v1.16b, v1.16b, v6.16b			; CHECK-NEXT: uzp1 v1.16b, v3.16b, v1.16b
	; CHECK-NEXT: mvn v0.16b, v0.16b			; CHECK-NEXT: mvn v0.16b, v0.16b
	; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b			; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b
	; CHECK-NEXT: umaxv b0, v0.16b			; CHECK-NEXT: umaxv b0, v0.16b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: bic w0, w9, w8			; CHECK-NEXT: bic w0, w9, w8
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%a_cmp = fcmp ule <32 x float> %a_vec, zeroinitializer			%a_cmp = fcmp ule <32 x float> %a_vec, zeroinitializer
	%cmp_result = bitcast <32 x i1> %a_cmp to i32			%cmp_result = bitcast <32 x i1> %a_cmp to i32
	%all_zero = icmp eq i32 %cmp_result, 0			%all_zero = icmp eq i32 %cmp_result, 0
	ret i1 %all_zero			ret i1 %all_zero
	}			}

llvm/test/CodeGen/AArch64/reduce-and.ll

Show All 14 Lines
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %a)		%or_result = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %a)
ret i1 %or_result		ret i1 %or_result
}		}

define i1 @test_redand_v2i1(<2 x i1> %a) {		define i1 @test_redand_v2i1(<2 x i1> %a) {
; CHECK-LABEL: test_redand_v2i1:		; CHECK-LABEL: test_redand_v2i1:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
		; CHECK-NEXT: movi d1, #0x0000ff000000ff
		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: uminp v0.2s, v0.2s, v0.2s		; CHECK-NEXT: uminp v0.2s, v0.2s, v0.2s
; CHECK-NEXT: fmov w8, s0		; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1		; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v2i1:		; GISEL-LABEL: test_redand_v2i1:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: and w8, w8, w9		; GISEL-NEXT: and w8, w8, w9
; GISEL-NEXT: and w0, w8, #0x1		; GISEL-NEXT: and w0, w8, #0x1
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a)		%or_result = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a)
ret i1 %or_result		ret i1 %or_result
}		}

define i1 @test_redand_v4i1(<4 x i1> %a) {		define i1 @test_redand_v4i1(<4 x i1> %a) {
; CHECK-LABEL: test_redand_v4i1:		; CHECK-LABEL: test_redand_v4i1:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
		; CHECK-NEXT: bic v0.4h, #255, lsl #8
; CHECK-NEXT: uminv h0, v0.4h		; CHECK-NEXT: uminv h0, v0.4h
; CHECK-NEXT: fmov w8, s0		; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1		; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v4i1:		; GISEL-LABEL: test_redand_v4i1:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
▲ Show 20 Lines • Show All 189 Lines • ▼ Show 20 Lines
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%and_result = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> %a)		%and_result = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> %a)
ret i8 %and_result		ret i8 %and_result
}		}

define i8 @test_redand_v3i8(<3 x i8> %a) {		define i8 @test_redand_v3i8(<3 x i8> %a) {
; CHECK-LABEL: test_redand_v3i8:		; CHECK-LABEL: test_redand_v3i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, w1		; CHECK-NEXT: movi d0, #0xff00ff00ff00ff
; CHECK-NEXT: and w8, w8, w2		; CHECK-NEXT: mov v0.h[0], w0
; CHECK-NEXT: and w0, w8, #0xff		; CHECK-NEXT: mov v0.h[1], w1
		; CHECK-NEXT: mov v0.h[2], w2
		; CHECK-NEXT: dup v1.2s, v0.s[1]
		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
		; CHECK-NEXT: dup v1.4h, v0.h[1]
		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
		; CHECK-NEXT: umov w0, v0.h[0]
		dmgreenUnsubmitted Not Done Reply Inline Actions I'm surprised this passes vectors in gpr registers. It would be quite different for values vector regs. dmgreen: I'm surprised this passes vectors in gpr registers. It would be quite different for values…
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v3i8:		; GISEL-LABEL: test_redand_v3i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: and w8, w0, w1		; GISEL-NEXT: and w8, w0, w1
; GISEL-NEXT: and w0, w8, w2		; GISEL-NEXT: and w0, w8, w2
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%and_result = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a)		%and_result = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a)
ret i8 %and_result		ret i8 %and_result
}		}

define i8 @test_redand_v4i8(<4 x i8> %a) {		define i8 @test_redand_v4i8(<4 x i8> %a) {
; CHECK-LABEL: test_redand_v4i8:		; CHECK-LABEL: test_redand_v4i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w8, v0.h[3]		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: umov w9, v0.h[2]		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w10, v0.h[1]		; CHECK-NEXT: dup v1.4h, v0.h[1]
; CHECK-NEXT: umov w11, v0.h[0]		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: and w8, w9, w8		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: and w10, w11, w10
; CHECK-NEXT: and w0, w10, w8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v4i8:		; GISEL-LABEL: test_redand_v4i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
; GISEL-NEXT: mov h2, v0.h[2]		; GISEL-NEXT: mov h2, v0.h[2]
; GISEL-NEXT: mov h3, v0.h[3]		; GISEL-NEXT: mov h3, v0.h[3]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: fmov w10, s2		; GISEL-NEXT: fmov w10, s2
; GISEL-NEXT: fmov w11, s3		; GISEL-NEXT: fmov w11, s3
; GISEL-NEXT: and w8, w8, w9		; GISEL-NEXT: and w8, w8, w9
; GISEL-NEXT: and w9, w10, w11		; GISEL-NEXT: and w9, w10, w11
; GISEL-NEXT: and w0, w8, w9		; GISEL-NEXT: and w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%and_result = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %a)		%and_result = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %a)
ret i8 %and_result		ret i8 %and_result
}		}

define i8 @test_redand_v8i8(<8 x i8> %a) {		define i8 @test_redand_v8i8(<8 x i8> %a) {
; CHECK-LABEL: test_redand_v8i8:		; CHECK-LABEL: test_redand_v8i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w8, v0.b[5]		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: umov w9, v0.b[4]		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w10, v0.b[1]		; CHECK-NEXT: dup v1.4h, v0.h[1]
; CHECK-NEXT: umov w11, v0.b[0]		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w12, v0.b[3]		; CHECK-NEXT: dup v1.8b, v0.b[1]
; CHECK-NEXT: umov w13, v0.b[2]		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w14, v0.b[6]		; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: umov w15, v0.b[7]
; CHECK-NEXT: and w8, w9, w8
; CHECK-NEXT: and w10, w11, w10
; CHECK-NEXT: and w11, w13, w12
; CHECK-NEXT: and w9, w10, w11
; CHECK-NEXT: and w8, w8, w14
; CHECK-NEXT: and w8, w9, w8
; CHECK-NEXT: and w0, w8, w15
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v8i8:		; GISEL-LABEL: test_redand_v8i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov b1, v0.b[1]		; GISEL-NEXT: mov b1, v0.b[1]
; GISEL-NEXT: mov b2, v0.b[2]		; GISEL-NEXT: mov b2, v0.b[2]
; GISEL-NEXT: mov b3, v0.b[3]		; GISEL-NEXT: mov b3, v0.b[3]
Show All 19 Lines
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%and_result = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a)		%and_result = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a)
ret i8 %and_result		ret i8 %and_result
}		}

define i8 @test_redand_v16i8(<16 x i8> %a) {		define i8 @test_redand_v16i8(<16 x i8> %a) {
; CHECK-LABEL: test_redand_v16i8:		; CHECK-LABEL: test_redand_v16i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w8, v0.b[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: umov w9, v0.b[0]		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w10, v0.b[2]		; CHECK-NEXT: dup v1.8h, v0.h[1]
; CHECK-NEXT: umov w11, v0.b[3]		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w12, v0.b[4]		; CHECK-NEXT: dup v1.16b, v0.b[1]
; CHECK-NEXT: umov w13, v0.b[5]		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w14, v0.b[6]		; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: and w8, w9, w8
; CHECK-NEXT: umov w9, v0.b[7]
; CHECK-NEXT: and w10, w10, w11
; CHECK-NEXT: and w11, w12, w13
; CHECK-NEXT: and w8, w8, w10
; CHECK-NEXT: and w10, w11, w14
; CHECK-NEXT: and w8, w8, w10
; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v16i8:		; GISEL-LABEL: test_redand_v16i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: and v0.8b, v0.8b, v1.8b		; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov b1, v0.b[1]		; GISEL-NEXT: mov b1, v0.b[1]
; GISEL-NEXT: mov b2, v0.b[2]		; GISEL-NEXT: mov b2, v0.b[2]
Show All 21 Lines	; GISEL-NEXT: ret
%and_result = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a)		%and_result = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a)
ret i8 %and_result		ret i8 %and_result
}		}

define i8 @test_redand_v32i8(<32 x i8> %a) {		define i8 @test_redand_v32i8(<32 x i8> %a) {
; CHECK-LABEL: test_redand_v32i8:		; CHECK-LABEL: test_redand_v32i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w8, v0.b[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: umov w9, v0.b[0]		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w10, v0.b[2]		; CHECK-NEXT: dup v1.8h, v0.h[1]
; CHECK-NEXT: umov w11, v0.b[3]		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w12, v0.b[4]		; CHECK-NEXT: dup v1.16b, v0.b[1]
; CHECK-NEXT: umov w13, v0.b[5]		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w14, v0.b[6]		; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: and w8, w9, w8
; CHECK-NEXT: umov w9, v0.b[7]
; CHECK-NEXT: and w10, w10, w11
; CHECK-NEXT: and w11, w12, w13
; CHECK-NEXT: and w8, w8, w10
; CHECK-NEXT: and w10, w11, w14
; CHECK-NEXT: and w8, w8, w10
; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v32i8:		; GISEL-LABEL: test_redand_v32i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: and v0.16b, v0.16b, v1.16b		; GISEL-NEXT: and v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: and v0.8b, v0.8b, v1.8b		; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov b1, v0.b[1]		; GISEL-NEXT: mov b1, v0.b[1]
Show All 22 Lines	; GISEL-NEXT: ret
%and_result = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %a)		%and_result = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %a)
ret i8 %and_result		ret i8 %and_result
}		}

define i16 @test_redand_v4i16(<4 x i16> %a) {		define i16 @test_redand_v4i16(<4 x i16> %a) {
; CHECK-LABEL: test_redand_v4i16:		; CHECK-LABEL: test_redand_v4i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w8, v0.h[3]		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: umov w9, v0.h[2]		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w10, v0.h[1]		; CHECK-NEXT: dup v1.4h, v0.h[1]
; CHECK-NEXT: umov w11, v0.h[0]		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: and w8, w9, w8		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: and w10, w11, w10
; CHECK-NEXT: and w0, w10, w8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v4i16:		; GISEL-LABEL: test_redand_v4i16:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
; GISEL-NEXT: mov h2, v0.h[2]		; GISEL-NEXT: mov h2, v0.h[2]
; GISEL-NEXT: mov h3, v0.h[3]		; GISEL-NEXT: mov h3, v0.h[3]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: fmov w10, s2		; GISEL-NEXT: fmov w10, s2
; GISEL-NEXT: fmov w11, s3		; GISEL-NEXT: fmov w11, s3
; GISEL-NEXT: and w8, w8, w9		; GISEL-NEXT: and w8, w8, w9
; GISEL-NEXT: and w9, w10, w11		; GISEL-NEXT: and w9, w10, w11
; GISEL-NEXT: and w0, w8, w9		; GISEL-NEXT: and w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%and_result = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a)		%and_result = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a)
ret i16 %and_result		ret i16 %and_result
}		}

define i16 @test_redand_v8i16(<8 x i16> %a) {		define i16 @test_redand_v8i16(<8 x i16> %a) {
; CHECK-LABEL: test_redand_v8i16:		; CHECK-LABEL: test_redand_v8i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w8, v0.h[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: umov w9, v0.h[0]		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w10, v0.h[2]		; CHECK-NEXT: dup v1.8h, v0.h[1]
; CHECK-NEXT: umov w11, v0.h[3]		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: and w8, w9, w8		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: and w9, w10, w11
; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v8i16:		; GISEL-LABEL: test_redand_v8i16:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: and v0.8b, v0.8b, v1.8b		; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
; GISEL-NEXT: mov h2, v0.h[2]		; GISEL-NEXT: mov h2, v0.h[2]
Show All 9 Lines	; GISEL-NEXT: ret
%and_result = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a)		%and_result = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a)
ret i16 %and_result		ret i16 %and_result
}		}

define i16 @test_redand_v16i16(<16 x i16> %a) {		define i16 @test_redand_v16i16(<16 x i16> %a) {
; CHECK-LABEL: test_redand_v16i16:		; CHECK-LABEL: test_redand_v16i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w8, v0.h[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: umov w9, v0.h[0]		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w10, v0.h[2]		; CHECK-NEXT: dup v1.8h, v0.h[1]
; CHECK-NEXT: umov w11, v0.h[3]		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: and w8, w9, w8		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: and w9, w10, w11
; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v16i16:		; GISEL-LABEL: test_redand_v16i16:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: and v0.16b, v0.16b, v1.16b		; GISEL-NEXT: and v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: and v0.8b, v0.8b, v1.8b		; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
Show All 10 Lines	; GISEL-NEXT: ret
%and_result = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %a)		%and_result = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %a)
ret i16 %and_result		ret i16 %and_result
}		}

define i32 @test_redand_v2i32(<2 x i32> %a) {		define i32 @test_redand_v2i32(<2 x i32> %a) {
; CHECK-LABEL: test_redand_v2i32:		; CHECK-LABEL: test_redand_v2i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: and w0, w9, w8		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v2i32:		; GISEL-LABEL: test_redand_v2i32:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: and w0, w8, w9		; GISEL-NEXT: and w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%and_result = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %a)		%and_result = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %a)
ret i32 %and_result		ret i32 %and_result
}		}

define i32 @test_redand_v4i32(<4 x i32> %a) {		define i32 @test_redand_v4i32(<4 x i32> %a) {
; CHECK-LABEL: test_redand_v4i32:		; CHECK-LABEL: test_redand_v4i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: and w0, w9, w8		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v4i32:		; GISEL-LABEL: test_redand_v4i32:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: and v0.8b, v0.8b, v1.8b		; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: and w0, w8, w9		; GISEL-NEXT: and w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%and_result = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)		%and_result = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)
ret i32 %and_result		ret i32 %and_result
}		}

define i32 @test_redand_v8i32(<8 x i32> %a) {		define i32 @test_redand_v8i32(<8 x i32> %a) {
; CHECK-LABEL: test_redand_v8i32:		; CHECK-LABEL: test_redand_v8i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: and w0, w9, w8		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v8i32:		; GISEL-LABEL: test_redand_v8i32:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: and v0.16b, v0.16b, v1.16b		; GISEL-NEXT: and v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: and v0.8b, v0.8b, v1.8b		; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: and w0, w8, w9		; GISEL-NEXT: and w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%and_result = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %a)		%and_result = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %a)
ret i32 %and_result		ret i32 %and_result
}		}

define i64 @test_redand_v2i64(<2 x i64> %a) {		define i64 @test_redand_v2i64(<2 x i64> %a) {
; CHECK-LABEL: test_redand_v2i64:		; CHECK-LABEL: test_redand_v2i64:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: fmov x0, d0		; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v2i64:		; GISEL-LABEL: test_redand_v2i64:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: fmov x8, d0		; GISEL-NEXT: fmov x8, d0
; GISEL-NEXT: fmov x9, d1		; GISEL-NEXT: fmov x9, d1
; GISEL-NEXT: and x0, x8, x9		; GISEL-NEXT: and x0, x8, x9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%and_result = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %a)		%and_result = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %a)
ret i64 %and_result		ret i64 %and_result
}		}

define i64 @test_redand_v4i64(<4 x i64> %a) {		define i64 @test_redand_v4i64(<4 x i64> %a) {
; CHECK-LABEL: test_redand_v4i64:		; CHECK-LABEL: test_redand_v4i64:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: fmov x0, d0		; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v4i64:		; GISEL-LABEL: test_redand_v4i64:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: and v0.16b, v0.16b, v1.16b		; GISEL-NEXT: and v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: fmov x8, d0		; GISEL-NEXT: fmov x8, d0
Show All 26 Lines

llvm/test/CodeGen/AArch64/reduce-or.ll

Show All 14 Lines
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %a)		%or_result = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %a)
ret i1 %or_result		ret i1 %or_result
}		}

define i1 @test_redor_v2i1(<2 x i1> %a) {		define i1 @test_redor_v2i1(<2 x i1> %a) {
; CHECK-LABEL: test_redor_v2i1:		; CHECK-LABEL: test_redor_v2i1:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
		; CHECK-NEXT: movi d1, #0x0000ff000000ff
		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umaxp v0.2s, v0.2s, v0.2s		; CHECK-NEXT: umaxp v0.2s, v0.2s, v0.2s
; CHECK-NEXT: fmov w8, s0		; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1		; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v2i1:		; GISEL-LABEL: test_redor_v2i1:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: orr w8, w8, w9		; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: and w0, w8, #0x1		; GISEL-NEXT: and w0, w8, #0x1
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %a)		%or_result = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %a)
ret i1 %or_result		ret i1 %or_result
}		}

define i1 @test_redor_v4i1(<4 x i1> %a) {		define i1 @test_redor_v4i1(<4 x i1> %a) {
; CHECK-LABEL: test_redor_v4i1:		; CHECK-LABEL: test_redor_v4i1:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
		; CHECK-NEXT: bic v0.4h, #255, lsl #8
; CHECK-NEXT: umaxv h0, v0.4h		; CHECK-NEXT: umaxv h0, v0.4h
; CHECK-NEXT: fmov w8, s0		; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1		; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v4i1:		; GISEL-LABEL: test_redor_v4i1:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
▲ Show 20 Lines • Show All 189 Lines • ▼ Show 20 Lines
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> %a)		%or_result = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> %a)
ret i8 %or_result		ret i8 %or_result
}		}

define i8 @test_redor_v3i8(<3 x i8> %a) {		define i8 @test_redor_v3i8(<3 x i8> %a) {
; CHECK-LABEL: test_redor_v3i8:		; CHECK-LABEL: test_redor_v3i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w0, w1		; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: orr w0, w8, w2		; CHECK-NEXT: dup v2.4h, w1
		; CHECK-NEXT: mov v0.h[0], w0
		; CHECK-NEXT: mov v1.16b, v0.16b
		; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b
		; CHECK-NEXT: mov v1.h[1], w1
		; CHECK-NEXT: mov v1.h[2], w2
		; CHECK-NEXT: dup v1.2s, v1.s[1]
		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v3i8:		; GISEL-LABEL: test_redor_v3i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: orr w8, w0, w1		; GISEL-NEXT: orr w8, w0, w1
; GISEL-NEXT: orr w0, w8, w2		; GISEL-NEXT: orr w0, w8, w2
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i8 @llvm.vector.reduce.or.v3i8(<3 x i8> %a)		%or_result = call i8 @llvm.vector.reduce.or.v3i8(<3 x i8> %a)
ret i8 %or_result		ret i8 %or_result
}		}

define i8 @test_redor_v4i8(<4 x i8> %a) {		define i8 @test_redor_v4i8(<4 x i8> %a) {
; CHECK-LABEL: test_redor_v4i8:		; CHECK-LABEL: test_redor_v4i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w8, v0.h[3]		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: umov w9, v0.h[2]		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w10, v0.h[1]		; CHECK-NEXT: dup v1.4h, v0.h[1]
; CHECK-NEXT: umov w11, v0.h[0]		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: orr w8, w9, w8		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: orr w10, w11, w10
; CHECK-NEXT: orr w0, w10, w8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v4i8:		; GISEL-LABEL: test_redor_v4i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
; GISEL-NEXT: mov h2, v0.h[2]		; GISEL-NEXT: mov h2, v0.h[2]
; GISEL-NEXT: mov h3, v0.h[3]		; GISEL-NEXT: mov h3, v0.h[3]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: fmov w10, s2		; GISEL-NEXT: fmov w10, s2
; GISEL-NEXT: fmov w11, s3		; GISEL-NEXT: fmov w11, s3
; GISEL-NEXT: orr w8, w8, w9		; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: orr w9, w10, w11		; GISEL-NEXT: orr w9, w10, w11
; GISEL-NEXT: orr w0, w8, w9		; GISEL-NEXT: orr w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %a)		%or_result = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %a)
ret i8 %or_result		ret i8 %or_result
}		}

define i8 @test_redor_v8i8(<8 x i8> %a) {		define i8 @test_redor_v8i8(<8 x i8> %a) {
; CHECK-LABEL: test_redor_v8i8:		; CHECK-LABEL: test_redor_v8i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w8, v0.b[5]		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: umov w9, v0.b[4]		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w10, v0.b[1]		; CHECK-NEXT: dup v1.4h, v0.h[1]
; CHECK-NEXT: umov w11, v0.b[0]		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w12, v0.b[3]		; CHECK-NEXT: dup v1.8b, v0.b[1]
; CHECK-NEXT: umov w13, v0.b[2]		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w14, v0.b[6]		; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: umov w15, v0.b[7]
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: orr w10, w11, w10
; CHECK-NEXT: orr w11, w13, w12
; CHECK-NEXT: orr w9, w10, w11
; CHECK-NEXT: orr w8, w8, w14
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: orr w0, w8, w15
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v8i8:		; GISEL-LABEL: test_redor_v8i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov b1, v0.b[1]		; GISEL-NEXT: mov b1, v0.b[1]
; GISEL-NEXT: mov b2, v0.b[2]		; GISEL-NEXT: mov b2, v0.b[2]
; GISEL-NEXT: mov b3, v0.b[3]		; GISEL-NEXT: mov b3, v0.b[3]
Show All 19 Lines
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a)		%or_result = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a)
ret i8 %or_result		ret i8 %or_result
}		}

define i8 @test_redor_v16i8(<16 x i8> %a) {		define i8 @test_redor_v16i8(<16 x i8> %a) {
; CHECK-LABEL: test_redor_v16i8:		; CHECK-LABEL: test_redor_v16i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w8, v0.b[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: umov w9, v0.b[0]		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w10, v0.b[2]		; CHECK-NEXT: dup v1.8h, v0.h[1]
; CHECK-NEXT: umov w11, v0.b[3]		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w12, v0.b[4]		; CHECK-NEXT: dup v1.16b, v0.b[1]
; CHECK-NEXT: umov w13, v0.b[5]		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w14, v0.b[6]		; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: umov w9, v0.b[7]
; CHECK-NEXT: orr w10, w10, w11
; CHECK-NEXT: orr w11, w12, w13
; CHECK-NEXT: orr w8, w8, w10
; CHECK-NEXT: orr w10, w11, w14
; CHECK-NEXT: orr w8, w8, w10
; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v16i8:		; GISEL-LABEL: test_redor_v16i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b		; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov b1, v0.b[1]		; GISEL-NEXT: mov b1, v0.b[1]
; GISEL-NEXT: mov b2, v0.b[2]		; GISEL-NEXT: mov b2, v0.b[2]
Show All 21 Lines	; GISEL-NEXT: ret
%or_result = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a)		%or_result = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a)
ret i8 %or_result		ret i8 %or_result
}		}

define i8 @test_redor_v32i8(<32 x i8> %a) {		define i8 @test_redor_v32i8(<32 x i8> %a) {
; CHECK-LABEL: test_redor_v32i8:		; CHECK-LABEL: test_redor_v32i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w8, v0.b[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: umov w9, v0.b[0]		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w10, v0.b[2]		; CHECK-NEXT: dup v1.8h, v0.h[1]
; CHECK-NEXT: umov w11, v0.b[3]		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w12, v0.b[4]		; CHECK-NEXT: dup v1.16b, v0.b[1]
; CHECK-NEXT: umov w13, v0.b[5]		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w14, v0.b[6]		; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: umov w9, v0.b[7]
; CHECK-NEXT: orr w10, w10, w11
; CHECK-NEXT: orr w11, w12, w13
; CHECK-NEXT: orr w8, w8, w10
; CHECK-NEXT: orr w10, w11, w14
; CHECK-NEXT: orr w8, w8, w10
; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v32i8:		; GISEL-LABEL: test_redor_v32i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b		; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b		; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov b1, v0.b[1]		; GISEL-NEXT: mov b1, v0.b[1]
Show All 22 Lines	; GISEL-NEXT: ret
%or_result = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %a)		%or_result = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %a)
ret i8 %or_result		ret i8 %or_result
}		}

define i16 @test_redor_v4i16(<4 x i16> %a) {		define i16 @test_redor_v4i16(<4 x i16> %a) {
; CHECK-LABEL: test_redor_v4i16:		; CHECK-LABEL: test_redor_v4i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w8, v0.h[3]		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: umov w9, v0.h[2]		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w10, v0.h[1]		; CHECK-NEXT: dup v1.4h, v0.h[1]
; CHECK-NEXT: umov w11, v0.h[0]		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: orr w8, w9, w8		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: orr w10, w11, w10
; CHECK-NEXT: orr w0, w10, w8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v4i16:		; GISEL-LABEL: test_redor_v4i16:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
; GISEL-NEXT: mov h2, v0.h[2]		; GISEL-NEXT: mov h2, v0.h[2]
; GISEL-NEXT: mov h3, v0.h[3]		; GISEL-NEXT: mov h3, v0.h[3]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: fmov w10, s2		; GISEL-NEXT: fmov w10, s2
; GISEL-NEXT: fmov w11, s3		; GISEL-NEXT: fmov w11, s3
; GISEL-NEXT: orr w8, w8, w9		; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: orr w9, w10, w11		; GISEL-NEXT: orr w9, w10, w11
; GISEL-NEXT: orr w0, w8, w9		; GISEL-NEXT: orr w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %a)		%or_result = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %a)
ret i16 %or_result		ret i16 %or_result
}		}

define i16 @test_redor_v8i16(<8 x i16> %a) {		define i16 @test_redor_v8i16(<8 x i16> %a) {
; CHECK-LABEL: test_redor_v8i16:		; CHECK-LABEL: test_redor_v8i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w8, v0.h[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: umov w9, v0.h[0]		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w10, v0.h[2]		; CHECK-NEXT: dup v1.8h, v0.h[1]
; CHECK-NEXT: umov w11, v0.h[3]		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orr w8, w9, w8		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: orr w9, w10, w11
; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v8i16:		; GISEL-LABEL: test_redor_v8i16:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b		; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
; GISEL-NEXT: mov h2, v0.h[2]		; GISEL-NEXT: mov h2, v0.h[2]
Show All 9 Lines	; GISEL-NEXT: ret
%or_result = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %a)		%or_result = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %a)
ret i16 %or_result		ret i16 %or_result
}		}

define i16 @test_redor_v16i16(<16 x i16> %a) {		define i16 @test_redor_v16i16(<16 x i16> %a) {
; CHECK-LABEL: test_redor_v16i16:		; CHECK-LABEL: test_redor_v16i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w8, v0.h[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: umov w9, v0.h[0]		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w10, v0.h[2]		; CHECK-NEXT: dup v1.8h, v0.h[1]
; CHECK-NEXT: umov w11, v0.h[3]		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orr w8, w9, w8		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: orr w9, w10, w11
; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v16i16:		; GISEL-LABEL: test_redor_v16i16:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b		; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b		; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
Show All 10 Lines	; GISEL-NEXT: ret
%or_result = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %a)		%or_result = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %a)
ret i16 %or_result		ret i16 %or_result
}		}

define i32 @test_redor_v2i32(<2 x i32> %a) {		define i32 @test_redor_v2i32(<2 x i32> %a) {
; CHECK-LABEL: test_redor_v2i32:		; CHECK-LABEL: test_redor_v2i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: orr w0, w9, w8		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v2i32:		; GISEL-LABEL: test_redor_v2i32:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: orr w0, w8, w9		; GISEL-NEXT: orr w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a)		%or_result = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a)
ret i32 %or_result		ret i32 %or_result
}		}

define i32 @test_redor_v4i32(<4 x i32> %a) {		define i32 @test_redor_v4i32(<4 x i32> %a) {
; CHECK-LABEL: test_redor_v4i32:		; CHECK-LABEL: test_redor_v4i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orr w0, w9, w8		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v4i32:		; GISEL-LABEL: test_redor_v4i32:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b		; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: orr w0, w8, w9		; GISEL-NEXT: orr w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)		%or_result = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)
ret i32 %or_result		ret i32 %or_result
}		}

define i32 @test_redor_v8i32(<8 x i32> %a) {		define i32 @test_redor_v8i32(<8 x i32> %a) {
; CHECK-LABEL: test_redor_v8i32:		; CHECK-LABEL: test_redor_v8i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orr w0, w9, w8		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v8i32:		; GISEL-LABEL: test_redor_v8i32:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b		; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b		; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: orr w0, w8, w9		; GISEL-NEXT: orr w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a)		%or_result = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a)
ret i32 %or_result		ret i32 %or_result
}		}

define i64 @test_redor_v2i64(<2 x i64> %a) {		define i64 @test_redor_v2i64(<2 x i64> %a) {
; CHECK-LABEL: test_redor_v2i64:		; CHECK-LABEL: test_redor_v2i64:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: fmov x0, d0		; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v2i64:		; GISEL-LABEL: test_redor_v2i64:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: fmov x8, d0		; GISEL-NEXT: fmov x8, d0
; GISEL-NEXT: fmov x9, d1		; GISEL-NEXT: fmov x9, d1
; GISEL-NEXT: orr x0, x8, x9		; GISEL-NEXT: orr x0, x8, x9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a)		%or_result = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a)
ret i64 %or_result		ret i64 %or_result
}		}

define i64 @test_redor_v4i64(<4 x i64> %a) {		define i64 @test_redor_v4i64(<4 x i64> %a) {
; CHECK-LABEL: test_redor_v4i64:		; CHECK-LABEL: test_redor_v4i64:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: fmov x0, d0		; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v4i64:		; GISEL-LABEL: test_redor_v4i64:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b		; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: fmov x8, d0		; GISEL-NEXT: fmov x8, d0
Show All 26 Lines

llvm/test/CodeGen/AArch64/reduce-xor.ll

Show First 20 Lines • Show All 239 Lines • ▼ Show 20 Lines
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%xor_result = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> %a)		%xor_result = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> %a)
ret i8 %xor_result		ret i8 %xor_result
}		}

define i8 @test_redxor_v3i8(<3 x i8> %a) {		define i8 @test_redxor_v3i8(<3 x i8> %a) {
; CHECK-LABEL: test_redxor_v3i8:		; CHECK-LABEL: test_redxor_v3i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: eor w8, w0, w1		; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: eor w0, w8, w2		; CHECK-NEXT: dup v2.4h, w1
		; CHECK-NEXT: mov v0.h[0], w0
		; CHECK-NEXT: mov v1.16b, v0.16b
		; CHECK-NEXT: eor v0.8b, v0.8b, v2.8b
		; CHECK-NEXT: mov v1.h[1], w1
		; CHECK-NEXT: mov v1.h[2], w2
		; CHECK-NEXT: dup v1.2s, v1.s[1]
		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v3i8:		; GISEL-LABEL: test_redxor_v3i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: eor w8, w0, w1		; GISEL-NEXT: eor w8, w0, w1
; GISEL-NEXT: eor w0, w8, w2		; GISEL-NEXT: eor w0, w8, w2
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%xor_result = call i8 @llvm.vector.reduce.xor.v3i8(<3 x i8> %a)		%xor_result = call i8 @llvm.vector.reduce.xor.v3i8(<3 x i8> %a)
ret i8 %xor_result		ret i8 %xor_result
}		}

define i8 @test_redxor_v4i8(<4 x i8> %a) {		define i8 @test_redxor_v4i8(<4 x i8> %a) {
; CHECK-LABEL: test_redxor_v4i8:		; CHECK-LABEL: test_redxor_v4i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w8, v0.h[3]		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: umov w9, v0.h[2]		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w10, v0.h[1]		; CHECK-NEXT: dup v1.4h, v0.h[1]
; CHECK-NEXT: umov w11, v0.h[0]		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: eor w8, w9, w8		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: eor w10, w11, w10
; CHECK-NEXT: eor w0, w10, w8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v4i8:		; GISEL-LABEL: test_redxor_v4i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
; GISEL-NEXT: mov h2, v0.h[2]		; GISEL-NEXT: mov h2, v0.h[2]
; GISEL-NEXT: mov h3, v0.h[3]		; GISEL-NEXT: mov h3, v0.h[3]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: fmov w10, s2		; GISEL-NEXT: fmov w10, s2
; GISEL-NEXT: fmov w11, s3		; GISEL-NEXT: fmov w11, s3
; GISEL-NEXT: eor w8, w8, w9		; GISEL-NEXT: eor w8, w8, w9
; GISEL-NEXT: eor w9, w10, w11		; GISEL-NEXT: eor w9, w10, w11
; GISEL-NEXT: eor w0, w8, w9		; GISEL-NEXT: eor w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%xor_result = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> %a)		%xor_result = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> %a)
ret i8 %xor_result		ret i8 %xor_result
}		}

define i8 @test_redxor_v8i8(<8 x i8> %a) {		define i8 @test_redxor_v8i8(<8 x i8> %a) {
; CHECK-LABEL: test_redxor_v8i8:		; CHECK-LABEL: test_redxor_v8i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w8, v0.b[5]		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: umov w9, v0.b[4]		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w10, v0.b[1]		; CHECK-NEXT: dup v1.4h, v0.h[1]
; CHECK-NEXT: umov w11, v0.b[0]		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w12, v0.b[3]		; CHECK-NEXT: dup v1.8b, v0.b[1]
; CHECK-NEXT: umov w13, v0.b[2]		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w14, v0.b[6]		; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: umov w15, v0.b[7]
; CHECK-NEXT: eor w8, w9, w8
; CHECK-NEXT: eor w10, w11, w10
; CHECK-NEXT: eor w11, w13, w12
; CHECK-NEXT: eor w9, w10, w11
; CHECK-NEXT: eor w8, w8, w14
; CHECK-NEXT: eor w8, w9, w8
; CHECK-NEXT: eor w0, w8, w15
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v8i8:		; GISEL-LABEL: test_redxor_v8i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov b1, v0.b[1]		; GISEL-NEXT: mov b1, v0.b[1]
; GISEL-NEXT: mov b2, v0.b[2]		; GISEL-NEXT: mov b2, v0.b[2]
; GISEL-NEXT: mov b3, v0.b[3]		; GISEL-NEXT: mov b3, v0.b[3]
Show All 19 Lines
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%xor_result = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a)		%xor_result = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a)
ret i8 %xor_result		ret i8 %xor_result
}		}

define i8 @test_redxor_v16i8(<16 x i8> %a) {		define i8 @test_redxor_v16i8(<16 x i8> %a) {
; CHECK-LABEL: test_redxor_v16i8:		; CHECK-LABEL: test_redxor_v16i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w8, v0.b[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: umov w9, v0.b[0]		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w10, v0.b[2]		; CHECK-NEXT: dup v1.8h, v0.h[1]
; CHECK-NEXT: umov w11, v0.b[3]		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w12, v0.b[4]		; CHECK-NEXT: dup v1.16b, v0.b[1]
; CHECK-NEXT: umov w13, v0.b[5]		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w14, v0.b[6]		; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: eor w8, w9, w8
; CHECK-NEXT: umov w9, v0.b[7]
; CHECK-NEXT: eor w10, w10, w11
; CHECK-NEXT: eor w11, w12, w13
; CHECK-NEXT: eor w8, w8, w10
; CHECK-NEXT: eor w10, w11, w14
; CHECK-NEXT: eor w8, w8, w10
; CHECK-NEXT: eor w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v16i8:		; GISEL-LABEL: test_redxor_v16i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b		; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov b1, v0.b[1]		; GISEL-NEXT: mov b1, v0.b[1]
; GISEL-NEXT: mov b2, v0.b[2]		; GISEL-NEXT: mov b2, v0.b[2]
Show All 21 Lines	; GISEL-NEXT: ret
%xor_result = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %a)		%xor_result = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %a)
ret i8 %xor_result		ret i8 %xor_result
}		}

define i8 @test_redxor_v32i8(<32 x i8> %a) {		define i8 @test_redxor_v32i8(<32 x i8> %a) {
; CHECK-LABEL: test_redxor_v32i8:		; CHECK-LABEL: test_redxor_v32i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w8, v0.b[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: umov w9, v0.b[0]		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w10, v0.b[2]		; CHECK-NEXT: dup v1.8h, v0.h[1]
; CHECK-NEXT: umov w11, v0.b[3]		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w12, v0.b[4]		; CHECK-NEXT: dup v1.16b, v0.b[1]
; CHECK-NEXT: umov w13, v0.b[5]		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w14, v0.b[6]		; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: eor w8, w9, w8
; CHECK-NEXT: umov w9, v0.b[7]
; CHECK-NEXT: eor w10, w10, w11
; CHECK-NEXT: eor w11, w12, w13
; CHECK-NEXT: eor w8, w8, w10
; CHECK-NEXT: eor w10, w11, w14
; CHECK-NEXT: eor w8, w8, w10
; CHECK-NEXT: eor w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v32i8:		; GISEL-LABEL: test_redxor_v32i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: eor v0.16b, v0.16b, v1.16b		; GISEL-NEXT: eor v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b		; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov b1, v0.b[1]		; GISEL-NEXT: mov b1, v0.b[1]
Show All 22 Lines	; GISEL-NEXT: ret
%xor_result = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %a)		%xor_result = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %a)
ret i8 %xor_result		ret i8 %xor_result
}		}

define i16 @test_redxor_v4i16(<4 x i16> %a) {		define i16 @test_redxor_v4i16(<4 x i16> %a) {
; CHECK-LABEL: test_redxor_v4i16:		; CHECK-LABEL: test_redxor_v4i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w8, v0.h[3]		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: umov w9, v0.h[2]		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w10, v0.h[1]		; CHECK-NEXT: dup v1.4h, v0.h[1]
; CHECK-NEXT: umov w11, v0.h[0]		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: eor w8, w9, w8		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: eor w10, w11, w10
; CHECK-NEXT: eor w0, w10, w8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v4i16:		; GISEL-LABEL: test_redxor_v4i16:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
; GISEL-NEXT: mov h2, v0.h[2]		; GISEL-NEXT: mov h2, v0.h[2]
; GISEL-NEXT: mov h3, v0.h[3]		; GISEL-NEXT: mov h3, v0.h[3]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: fmov w10, s2		; GISEL-NEXT: fmov w10, s2
; GISEL-NEXT: fmov w11, s3		; GISEL-NEXT: fmov w11, s3
; GISEL-NEXT: eor w8, w8, w9		; GISEL-NEXT: eor w8, w8, w9
; GISEL-NEXT: eor w9, w10, w11		; GISEL-NEXT: eor w9, w10, w11
; GISEL-NEXT: eor w0, w8, w9		; GISEL-NEXT: eor w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%xor_result = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %a)		%xor_result = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %a)
ret i16 %xor_result		ret i16 %xor_result
}		}

define i16 @test_redxor_v8i16(<8 x i16> %a) {		define i16 @test_redxor_v8i16(<8 x i16> %a) {
; CHECK-LABEL: test_redxor_v8i16:		; CHECK-LABEL: test_redxor_v8i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w8, v0.h[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: umov w9, v0.h[0]		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w10, v0.h[2]		; CHECK-NEXT: dup v1.8h, v0.h[1]
; CHECK-NEXT: umov w11, v0.h[3]		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: eor w8, w9, w8		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: eor w9, w10, w11
; CHECK-NEXT: eor w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v8i16:		; GISEL-LABEL: test_redxor_v8i16:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b		; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
; GISEL-NEXT: mov h2, v0.h[2]		; GISEL-NEXT: mov h2, v0.h[2]
Show All 9 Lines	; GISEL-NEXT: ret
%xor_result = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %a)		%xor_result = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %a)
ret i16 %xor_result		ret i16 %xor_result
}		}

define i16 @test_redxor_v16i16(<16 x i16> %a) {		define i16 @test_redxor_v16i16(<16 x i16> %a) {
; CHECK-LABEL: test_redxor_v16i16:		; CHECK-LABEL: test_redxor_v16i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w8, v0.h[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: umov w9, v0.h[0]		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umov w10, v0.h[2]		; CHECK-NEXT: dup v1.8h, v0.h[1]
; CHECK-NEXT: umov w11, v0.h[3]		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: eor w8, w9, w8		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: eor w9, w10, w11
; CHECK-NEXT: eor w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v16i16:		; GISEL-LABEL: test_redxor_v16i16:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: eor v0.16b, v0.16b, v1.16b		; GISEL-NEXT: eor v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b		; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
Show All 10 Lines	; GISEL-NEXT: ret
%xor_result = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %a)		%xor_result = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %a)
ret i16 %xor_result		ret i16 %xor_result
}		}

define i32 @test_redxor_v2i32(<2 x i32> %a) {		define i32 @test_redxor_v2i32(<2 x i32> %a) {
; CHECK-LABEL: test_redxor_v2i32:		; CHECK-LABEL: test_redxor_v2i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: eor w0, w9, w8		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v2i32:		; GISEL-LABEL: test_redxor_v2i32:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: eor w0, w8, w9		; GISEL-NEXT: eor w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%xor_result = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %a)		%xor_result = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %a)
ret i32 %xor_result		ret i32 %xor_result
}		}

define i32 @test_redxor_v4i32(<4 x i32> %a) {		define i32 @test_redxor_v4i32(<4 x i32> %a) {
; CHECK-LABEL: test_redxor_v4i32:		; CHECK-LABEL: test_redxor_v4i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: eor w0, w9, w8		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v4i32:		; GISEL-LABEL: test_redxor_v4i32:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b		; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: eor w0, w8, w9		; GISEL-NEXT: eor w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%xor_result = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a)		%xor_result = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a)
ret i32 %xor_result		ret i32 %xor_result
}		}

define i32 @test_redxor_v8i32(<8 x i32> %a) {		define i32 @test_redxor_v8i32(<8 x i32> %a) {
; CHECK-LABEL: test_redxor_v8i32:		; CHECK-LABEL: test_redxor_v8i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: eor w0, w9, w8		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v8i32:		; GISEL-LABEL: test_redxor_v8i32:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: eor v0.16b, v0.16b, v1.16b		; GISEL-NEXT: eor v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b		; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: eor w0, w8, w9		; GISEL-NEXT: eor w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%xor_result = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %a)		%xor_result = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %a)
ret i32 %xor_result		ret i32 %xor_result
}		}

define i64 @test_redxor_v2i64(<2 x i64> %a) {		define i64 @test_redxor_v2i64(<2 x i64> %a) {
; CHECK-LABEL: test_redxor_v2i64:		; CHECK-LABEL: test_redxor_v2i64:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: fmov x0, d0		; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v2i64:		; GISEL-LABEL: test_redxor_v2i64:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: fmov x8, d0		; GISEL-NEXT: fmov x8, d0
; GISEL-NEXT: fmov x9, d1		; GISEL-NEXT: fmov x9, d1
; GISEL-NEXT: eor x0, x8, x9		; GISEL-NEXT: eor x0, x8, x9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%xor_result = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %a)		%xor_result = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %a)
ret i64 %xor_result		ret i64 %xor_result
}		}

define i64 @test_redxor_v4i64(<4 x i64> %a) {		define i64 @test_redxor_v4i64(<4 x i64> %a) {
; CHECK-LABEL: test_redxor_v4i64:		; CHECK-LABEL: test_redxor_v4i64:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: fmov x0, d0		; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v4i64:		; GISEL-LABEL: test_redxor_v4i64:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: eor v0.16b, v0.16b, v1.16b		; GISEL-NEXT: eor v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: fmov x8, d0		; GISEL-NEXT: fmov x8, d0
Show All 26 Lines

llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_256		; RUN: llc -aarch64-sve-vector-bits-min=256 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_512		; RUN: llc -aarch64-sve-vector-bits-min=512 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_512		; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s \| FileCheck %s -check-prefixes=CHECK,VBITS_GE_512

target triple = "aarch64-unknown-linux-gnu"		target triple = "aarch64-unknown-linux-gnu"

;		;
; ANDV		; ANDV
;		;

; No single instruction NEON ANDV support. Use SVE.		; No single instruction NEON ANDV support. Use SVE.
define i8 @andv_v8i8(<8 x i8> %a) vscale_range(2,0) #0 {		define i8 @andv_v8i8(<8 x i8> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v8i8:		; CHECK-LABEL: andv_v8i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: ptrue p0.b, vl8		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: andv b0, p0, z0.b		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: dup v1.4h, v0.h[1]
		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
		; CHECK-NEXT: dup v1.8b, v0.b[1]
		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
		; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a)		%res = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a)
ret i8 %res		ret i8 %res
}		}

; No single instruction NEON ANDV support. Use SVE.		; No single instruction NEON ANDV support. Use SVE.
define i8 @andv_v16i8(<16 x i8> %a) vscale_range(2,0) #0 {		define i8 @andv_v16i8(<16 x i8> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v16i8:		; CHECK-LABEL: andv_v16i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: ptrue p0.b, vl16		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: andv b0, p0, z0.b		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
		; CHECK-NEXT: dup v1.8h, v0.h[1]
		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
		; CHECK-NEXT: dup v1.16b, v0.b[1]
		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
		; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a)		%res = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a)
ret i8 %res		ret i8 %res
}		}

define i8 @andv_v32i8(ptr %a) vscale_range(2,0) #0 {		define i8 @andv_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v32i8:		; CHECK-LABEL: andv_v32i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32		; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]		; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: andv b0, p0, z0.b		; CHECK-NEXT: andv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%op = load <32 x i8>, ptr %a		%op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %op)		%res = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %op)
ret i8 %res		ret i8 %res
}		}

define i8 @andv_v64i8(ptr %a) #0 {		define i8 @andv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: andv_v64i8:		; VBITS_GE_256-LABEL: andv_v64i8:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32		; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
; VBITS_GE_256-NEXT: ptrue p0.b, vl32		; VBITS_GE_256-NEXT: ptrue p0.b, vl32
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]		; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]
; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: andv b0, p0, z0.b		; VBITS_GE_256-NEXT: andv b0, p0, z0.b
; VBITS_GE_256-NEXT: fmov w0, s0		; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
Show All 34 Lines	; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.and.v256i8(<256 x i8> %op)		%res = call i8 @llvm.vector.reduce.and.v256i8(<256 x i8> %op)
ret i8 %res		ret i8 %res
}		}

; No single instruction NEON ANDV support. Use SVE.		; No single instruction NEON ANDV support. Use SVE.
define i16 @andv_v4i16(<4 x i16> %a) vscale_range(2,0) #0 {		define i16 @andv_v4i16(<4 x i16> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v4i16:		; CHECK-LABEL: andv_v4i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: ptrue p0.h, vl4		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: andv h0, p0, z0.h		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: dup v1.4h, v0.h[1]
		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a)		%res = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a)
ret i16 %res		ret i16 %res
}		}

; No single instruction NEON ANDV support. Use SVE.		; No single instruction NEON ANDV support. Use SVE.
define i16 @andv_v8i16(<8 x i16> %a) vscale_range(2,0) #0 {		define i16 @andv_v8i16(<8 x i16> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v8i16:		; CHECK-LABEL: andv_v8i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: ptrue p0.h, vl8		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: andv h0, p0, z0.h		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
		; CHECK-NEXT: dup v1.8h, v0.h[1]
		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a)		%res = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a)
ret i16 %res		ret i16 %res
}		}

define i16 @andv_v16i16(ptr %a) vscale_range(2,0) #0 {		define i16 @andv_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v16i16:		; CHECK-LABEL: andv_v16i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16		; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]		; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: andv h0, p0, z0.h		; CHECK-NEXT: andv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%op = load <16 x i16>, ptr %a		%op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %op)		%res = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %op)
ret i16 %res		ret i16 %res
}		}

define i16 @andv_v32i16(ptr %a) #0 {		define i16 @andv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: andv_v32i16:		; VBITS_GE_256-LABEL: andv_v32i16:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16		; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ptrue p0.h, vl16		; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]		; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: andv h0, p0, z0.h		; VBITS_GE_256-NEXT: andv h0, p0, z0.h
; VBITS_GE_256-NEXT: fmov w0, s0		; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
Show All 34 Lines	; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> %op)		%res = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> %op)
ret i16 %res		ret i16 %res
}		}

; No single instruction NEON ANDV support. Use SVE.		; No single instruction NEON ANDV support. Use SVE.
define i32 @andv_v2i32(<2 x i32> %a) vscale_range(2,0) #0 {		define i32 @andv_v2i32(<2 x i32> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v2i32:		; CHECK-LABEL: andv_v2i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: ptrue p0.s, vl2		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: andv s0, p0, z0.s		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %a)		%res = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %a)
ret i32 %res		ret i32 %res
}		}

; No single instruction NEON ANDV support. Use SVE.		; No single instruction NEON ANDV support. Use SVE.
define i32 @andv_v4i32(<4 x i32> %a) vscale_range(2,0) #0 {		define i32 @andv_v4i32(<4 x i32> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v4i32:		; CHECK-LABEL: andv_v4i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: ptrue p0.s, vl4		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: andv s0, p0, z0.s		; CHECK-NEXT: dup v1.4s, v0.s[1]
		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)		%res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)
ret i32 %res		ret i32 %res
}		}

define i32 @andv_v8i32(ptr %a) vscale_range(2,0) #0 {		define i32 @andv_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: andv_v8i32:		; CHECK-LABEL: andv_v8i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8		; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]		; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: andv s0, p0, z0.s		; CHECK-NEXT: andv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%op = load <8 x i32>, ptr %a		%op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %op)		%res = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %op)
ret i32 %res		ret i32 %res
}		}

define i32 @andv_v16i32(ptr %a) #0 {		define i32 @andv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: andv_v16i32:		; VBITS_GE_256-LABEL: andv_v16i32:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8		; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ptrue p0.s, vl8		; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]		; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: andv s0, p0, z0.s		; VBITS_GE_256-NEXT: andv s0, p0, z0.s
; VBITS_GE_256-NEXT: fmov w0, s0		; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%op = load <4 x i64>, ptr %a		%op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %op)		%res = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %op)
ret i64 %res		ret i64 %res
}		}

define i64 @andv_v8i64(ptr %a) #0 {		define i64 @andv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: andv_v8i64:		; VBITS_GE_256-LABEL: andv_v8i64:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4		; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ptrue p0.d, vl4		; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]		; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: andv d0, p0, z0.d		; VBITS_GE_256-NEXT: andv d0, p0, z0.d
; VBITS_GE_256-NEXT: fmov x0, d0		; VBITS_GE_256-NEXT: fmov x0, d0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
Show All 38 Lines
;		;
; EORV		; EORV
;		;

; No single instruction NEON EORV support. Use SVE.		; No single instruction NEON EORV support. Use SVE.
define i8 @eorv_v8i8(<8 x i8> %a) vscale_range(2,0) #0 {		define i8 @eorv_v8i8(<8 x i8> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v8i8:		; CHECK-LABEL: eorv_v8i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: ptrue p0.b, vl8		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: eorv b0, p0, z0.b		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: dup v1.4h, v0.h[1]
		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
		; CHECK-NEXT: dup v1.8b, v0.b[1]
		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
		; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a)		%res = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a)
ret i8 %res		ret i8 %res
}		}

; No single instruction NEON EORV support. Use SVE.		; No single instruction NEON EORV support. Use SVE.
define i8 @eorv_v16i8(<16 x i8> %a) vscale_range(2,0) #0 {		define i8 @eorv_v16i8(<16 x i8> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v16i8:		; CHECK-LABEL: eorv_v16i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: ptrue p0.b, vl16		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: eorv b0, p0, z0.b		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
		; CHECK-NEXT: dup v1.8h, v0.h[1]
		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
		; CHECK-NEXT: dup v1.16b, v0.b[1]
		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
		; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %a)		%res = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %a)
ret i8 %res		ret i8 %res
}		}

define i8 @eorv_v32i8(ptr %a) vscale_range(2,0) #0 {		define i8 @eorv_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v32i8:		; CHECK-LABEL: eorv_v32i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32		; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]		; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: eorv b0, p0, z0.b		; CHECK-NEXT: eorv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%op = load <32 x i8>, ptr %a		%op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %op)		%res = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %op)
ret i8 %res		ret i8 %res
}		}

define i8 @eorv_v64i8(ptr %a) #0 {		define i8 @eorv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: eorv_v64i8:		; VBITS_GE_256-LABEL: eorv_v64i8:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32		; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
; VBITS_GE_256-NEXT: ptrue p0.b, vl32		; VBITS_GE_256-NEXT: ptrue p0.b, vl32
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]		; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]
; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: eorv b0, p0, z0.b		; VBITS_GE_256-NEXT: eorv b0, p0, z0.b
; VBITS_GE_256-NEXT: fmov w0, s0		; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
Show All 34 Lines	; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.xor.v256i8(<256 x i8> %op)		%res = call i8 @llvm.vector.reduce.xor.v256i8(<256 x i8> %op)
ret i8 %res		ret i8 %res
}		}

; No single instruction NEON EORV support. Use SVE.		; No single instruction NEON EORV support. Use SVE.
define i16 @eorv_v4i16(<4 x i16> %a) vscale_range(2,0) #0 {		define i16 @eorv_v4i16(<4 x i16> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v4i16:		; CHECK-LABEL: eorv_v4i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: ptrue p0.h, vl4		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: eorv h0, p0, z0.h		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: dup v1.4h, v0.h[1]
		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %a)		%res = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %a)
ret i16 %res		ret i16 %res
}		}

; No single instruction NEON EORV support. Use SVE.		; No single instruction NEON EORV support. Use SVE.
define i16 @eorv_v8i16(<8 x i16> %a) vscale_range(2,0) #0 {		define i16 @eorv_v8i16(<8 x i16> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v8i16:		; CHECK-LABEL: eorv_v8i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: ptrue p0.h, vl8		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: eorv h0, p0, z0.h		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
		; CHECK-NEXT: dup v1.8h, v0.h[1]
		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %a)		%res = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %a)
ret i16 %res		ret i16 %res
}		}

define i16 @eorv_v16i16(ptr %a) vscale_range(2,0) #0 {		define i16 @eorv_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v16i16:		; CHECK-LABEL: eorv_v16i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16		; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]		; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: eorv h0, p0, z0.h		; CHECK-NEXT: eorv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%op = load <16 x i16>, ptr %a		%op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %op)		%res = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %op)
ret i16 %res		ret i16 %res
}		}

define i16 @eorv_v32i16(ptr %a) #0 {		define i16 @eorv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: eorv_v32i16:		; VBITS_GE_256-LABEL: eorv_v32i16:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16		; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ptrue p0.h, vl16		; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]		; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: eorv h0, p0, z0.h		; VBITS_GE_256-NEXT: eorv h0, p0, z0.h
; VBITS_GE_256-NEXT: fmov w0, s0		; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
Show All 34 Lines	; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> %op)		%res = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> %op)
ret i16 %res		ret i16 %res
}		}

; No single instruction NEON EORV support. Use SVE.		; No single instruction NEON EORV support. Use SVE.
define i32 @eorv_v2i32(<2 x i32> %a) vscale_range(2,0) #0 {		define i32 @eorv_v2i32(<2 x i32> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v2i32:		; CHECK-LABEL: eorv_v2i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: ptrue p0.s, vl2		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: eorv s0, p0, z0.s		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %a)		%res = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %a)
ret i32 %res		ret i32 %res
}		}

; No single instruction NEON EORV support. Use SVE.		; No single instruction NEON EORV support. Use SVE.
define i32 @eorv_v4i32(<4 x i32> %a) vscale_range(2,0) #0 {		define i32 @eorv_v4i32(<4 x i32> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v4i32:		; CHECK-LABEL: eorv_v4i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: ptrue p0.s, vl4		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: eorv s0, p0, z0.s		; CHECK-NEXT: dup v1.4s, v0.s[1]
		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a)		%res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a)
ret i32 %res		ret i32 %res
}		}

define i32 @eorv_v8i32(ptr %a) vscale_range(2,0) #0 {		define i32 @eorv_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: eorv_v8i32:		; CHECK-LABEL: eorv_v8i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8		; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]		; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: eorv s0, p0, z0.s		; CHECK-NEXT: eorv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%op = load <8 x i32>, ptr %a		%op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %op)		%res = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %op)
ret i32 %res		ret i32 %res
}		}

define i32 @eorv_v16i32(ptr %a) #0 {		define i32 @eorv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: eorv_v16i32:		; VBITS_GE_256-LABEL: eorv_v16i32:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8		; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ptrue p0.s, vl8		; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]		; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: eorv s0, p0, z0.s		; VBITS_GE_256-NEXT: eorv s0, p0, z0.s
; VBITS_GE_256-NEXT: fmov w0, s0		; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%op = load <4 x i64>, ptr %a		%op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %op)		%res = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %op)
ret i64 %res		ret i64 %res
}		}

define i64 @eorv_v8i64(ptr %a) #0 {		define i64 @eorv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: eorv_v8i64:		; VBITS_GE_256-LABEL: eorv_v8i64:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4		; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ptrue p0.d, vl4		; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]		; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: eorv d0, p0, z0.d		; VBITS_GE_256-NEXT: eorv d0, p0, z0.d
; VBITS_GE_256-NEXT: fmov x0, d0		; VBITS_GE_256-NEXT: fmov x0, d0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
Show All 38 Lines
;		;
; ORV		; ORV
;		;

; No single instruction NEON ORV support. Use SVE.		; No single instruction NEON ORV support. Use SVE.
define i8 @orv_v8i8(<8 x i8> %a) vscale_range(2,0) #0 {		define i8 @orv_v8i8(<8 x i8> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v8i8:		; CHECK-LABEL: orv_v8i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: ptrue p0.b, vl8		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: orv b0, p0, z0.b		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: dup v1.4h, v0.h[1]
		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
		; CHECK-NEXT: dup v1.8b, v0.b[1]
		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
		; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a)		%res = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a)
ret i8 %res		ret i8 %res
}		}

; No single instruction NEON ORV support. Use SVE.		; No single instruction NEON ORV support. Use SVE.
define i8 @orv_v16i8(<16 x i8> %a) vscale_range(2,0) #0 {		define i8 @orv_v16i8(<16 x i8> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v16i8:		; CHECK-LABEL: orv_v16i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: ptrue p0.b, vl16		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orv b0, p0, z0.b		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
		; CHECK-NEXT: dup v1.8h, v0.h[1]
		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
		; CHECK-NEXT: dup v1.16b, v0.b[1]
		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
		; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a)		%res = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a)
ret i8 %res		ret i8 %res
}		}

define i8 @orv_v32i8(ptr %a) vscale_range(2,0) #0 {		define i8 @orv_v32i8(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v32i8:		; CHECK-LABEL: orv_v32i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl32		; CHECK-NEXT: ptrue p0.b, vl32
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]		; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: orv b0, p0, z0.b		; CHECK-NEXT: orv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%op = load <32 x i8>, ptr %a		%op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %op)		%res = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %op)
ret i8 %res		ret i8 %res
}		}

define i8 @orv_v64i8(ptr %a) #0 {		define i8 @orv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: orv_v64i8:		; VBITS_GE_256-LABEL: orv_v64i8:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32		; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
; VBITS_GE_256-NEXT: ptrue p0.b, vl32		; VBITS_GE_256-NEXT: ptrue p0.b, vl32
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]		; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]
; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: orv b0, p0, z0.b		; VBITS_GE_256-NEXT: orv b0, p0, z0.b
; VBITS_GE_256-NEXT: fmov w0, s0		; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
Show All 34 Lines	; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.or.v256i8(<256 x i8> %op)		%res = call i8 @llvm.vector.reduce.or.v256i8(<256 x i8> %op)
ret i8 %res		ret i8 %res
}		}

; No single instruction NEON ORV support. Use SVE.		; No single instruction NEON ORV support. Use SVE.
define i16 @orv_v4i16(<4 x i16> %a) vscale_range(2,0) #0 {		define i16 @orv_v4i16(<4 x i16> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v4i16:		; CHECK-LABEL: orv_v4i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: ptrue p0.h, vl4		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: orv h0, p0, z0.h		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: dup v1.4h, v0.h[1]
		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %a)		%res = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %a)
ret i16 %res		ret i16 %res
}		}

; No single instruction NEON ORV support. Use SVE.		; No single instruction NEON ORV support. Use SVE.
define i16 @orv_v8i16(<8 x i16> %a) vscale_range(2,0) #0 {		define i16 @orv_v8i16(<8 x i16> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v8i16:		; CHECK-LABEL: orv_v8i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: ptrue p0.h, vl8		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orv h0, p0, z0.h		; CHECK-NEXT: dup v1.4s, v0.s[1]
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
		; CHECK-NEXT: dup v1.8h, v0.h[1]
		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
		; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %a)		%res = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %a)
ret i16 %res		ret i16 %res
}		}

define i16 @orv_v16i16(ptr %a) vscale_range(2,0) #0 {		define i16 @orv_v16i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v16i16:		; CHECK-LABEL: orv_v16i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl16		; CHECK-NEXT: ptrue p0.h, vl16
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]		; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: orv h0, p0, z0.h		; CHECK-NEXT: orv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%op = load <16 x i16>, ptr %a		%op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %op)		%res = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %op)
ret i16 %res		ret i16 %res
}		}

define i16 @orv_v32i16(ptr %a) #0 {		define i16 @orv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: orv_v32i16:		; VBITS_GE_256-LABEL: orv_v32i16:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16		; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ptrue p0.h, vl16		; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]		; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: orv h0, p0, z0.h		; VBITS_GE_256-NEXT: orv h0, p0, z0.h
; VBITS_GE_256-NEXT: fmov w0, s0		; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
Show All 34 Lines	; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> %op)		%res = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> %op)
ret i16 %res		ret i16 %res
}		}

; No single instruction NEON ORV support. Use SVE.		; No single instruction NEON ORV support. Use SVE.
define i32 @orv_v2i32(<2 x i32> %a) vscale_range(2,0) #0 {		define i32 @orv_v2i32(<2 x i32> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v2i32:		; CHECK-LABEL: orv_v2i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0		; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: ptrue p0.s, vl2		; CHECK-NEXT: dup v1.2s, v0.s[1]
; CHECK-NEXT: orv s0, p0, z0.s		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a)		%res = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a)
ret i32 %res		ret i32 %res
}		}

; No single instruction NEON ORV support. Use SVE.		; No single instruction NEON ORV support. Use SVE.
define i32 @orv_v4i32(<4 x i32> %a) vscale_range(2,0) #0 {		define i32 @orv_v4i32(<4 x i32> %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v4i32:		; CHECK-LABEL: orv_v4i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0		; CHECK-NEXT: dup v1.2d, v0.d[1]
; CHECK-NEXT: ptrue p0.s, vl4		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: orv s0, p0, z0.s		; CHECK-NEXT: dup v1.4s, v0.s[1]
		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)		%res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)
ret i32 %res		ret i32 %res
}		}

define i32 @orv_v8i32(ptr %a) vscale_range(2,0) #0 {		define i32 @orv_v8i32(ptr %a) vscale_range(2,0) #0 {
; CHECK-LABEL: orv_v8i32:		; CHECK-LABEL: orv_v8i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8		; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]		; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: orv s0, p0, z0.s		; CHECK-NEXT: orv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0		; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%op = load <8 x i32>, ptr %a		%op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %op)		%res = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %op)
ret i32 %res		ret i32 %res
}		}

define i32 @orv_v16i32(ptr %a) #0 {		define i32 @orv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: orv_v16i32:		; VBITS_GE_256-LABEL: orv_v16i32:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8		; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ptrue p0.s, vl8		; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]		; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: orv s0, p0, z0.s		; VBITS_GE_256-NEXT: orv s0, p0, z0.s
; VBITS_GE_256-NEXT: fmov w0, s0		; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%op = load <4 x i64>, ptr %a		%op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %op)		%res = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %op)
ret i64 %res		ret i64 %res
}		}

define i64 @orv_v8i64(ptr %a) #0 {		define i64 @orv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: orv_v8i64:		; VBITS_GE_256-LABEL: orv_v8i64:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4		; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ptrue p0.d, vl4		; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]		; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: orv d0, p0, z0.d		; VBITS_GE_256-NEXT: orv d0, p0, z0.d
; VBITS_GE_256-NEXT: fmov x0, d0		; VBITS_GE_256-NEXT: fmov x0, d0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
▲ Show 20 Lines • Show All 123 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mtriple=aarch64 -mattr=+sve < %s \| FileCheck %s			; RUN: llc -mtriple=aarch64 -mattr=+sve < %s \| FileCheck %s

	define i1 @ptest_v16i1_256bit_min_sve(ptr %a, ptr %b) vscale_range(2, 0) {			define i1 @ptest_v16i1_256bit_min_sve(ptr %a, ptr %b) vscale_range(2, 0) {
	; CHECK-LABEL: ptest_v16i1_256bit_min_sve:			; CHECK-LABEL: ptest_v16i1_256bit_min_sve:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: mov x8, #8			; CHECK-NEXT: mov x8, #8 // =0x8
	; CHECK-NEXT: ptrue p0.s, vl8			; CHECK-NEXT: ptrue p0.s, vl8
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]			; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
	; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]			; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
	; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0			; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0
	; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0			; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0
	; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff			; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
	; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff			; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
	; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h			; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
	; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h			; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
	; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b			; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
	; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b			; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b
	; CHECK-NEXT: mov v1.d[1], v0.d[0]			; CHECK-NEXT: mov v1.d[1], v0.d[0]
	; CHECK-NEXT: ptrue p0.b, vl16			; CHECK-NEXT: umaxv b0, v1.16b
	; CHECK-NEXT: orv b0, p0, z1.b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v1 = load <16 x float>, ptr %a, align 4			%v1 = load <16 x float>, ptr %a, align 4
	%v2 = fcmp une <16 x float> %v1, zeroinitializer			%v2 = fcmp une <16 x float> %v1, zeroinitializer
	%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)			%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)
	ret i1 %v3			ret i1 %v3
	}			}

	define i1 @ptest_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) {			define i1 @ptest_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) {
	; CHECK-LABEL: ptest_v16i1_512bit_min_sve:			; CHECK-LABEL: ptest_v16i1_512bit_min_sve:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.s, vl16			; CHECK-NEXT: ptrue p0.s, vl16
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]			; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
	; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0			; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0
	; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff			; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
	; CHECK-NEXT: ptrue p0.b, vl16
	; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h			; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
	; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b			; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
	; CHECK-NEXT: orv b0, p0, z0.b			; CHECK-NEXT: umaxv b0, v0.16b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v1 = load <16 x float>, ptr %a, align 4			%v1 = load <16 x float>, ptr %a, align 4
	%v2 = fcmp une <16 x float> %v1, zeroinitializer			%v2 = fcmp une <16 x float> %v1, zeroinitializer
	%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)			%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)
	ret i1 %v3			ret i1 %v3
	}			}

	define i1 @ptest_v16i1_512bit_sve(ptr %a, ptr %b) vscale_range(4, 4) {			define i1 @ptest_v16i1_512bit_sve(ptr %a, ptr %b) vscale_range(4, 4) {
	; CHECK-LABEL: ptest_v16i1_512bit_sve:			; CHECK-LABEL: ptest_v16i1_512bit_sve:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.s			; CHECK-NEXT: ptrue p0.s
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]			; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
	; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0			; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0
	; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff			; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
	; CHECK-NEXT: ptrue p0.b, vl16
	; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h			; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
	; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b			; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
	; CHECK-NEXT: orv b0, p0, z0.b			; CHECK-NEXT: umaxv b0, v0.16b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v1 = load <16 x float>, ptr %a, align 4			%v1 = load <16 x float>, ptr %a, align 4
	%v2 = fcmp une <16 x float> %v1, zeroinitializer			%v2 = fcmp une <16 x float> %v1, zeroinitializer
	%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)			%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)
	ret i1 %v3			ret i1 %v3
	}			}

	define i1 @ptest_or_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) {			define i1 @ptest_or_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) {
	; CHECK-LABEL: ptest_or_v16i1_512bit_min_sve:			; CHECK-LABEL: ptest_or_v16i1_512bit_min_sve:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.s, vl16			; CHECK-NEXT: ptrue p0.s, vl16
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]			; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
	; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]			; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
	; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0			; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0
	; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0			; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0
	; CHECK-NEXT: mov p0.b, p1/m, p1.b			; CHECK-NEXT: mov p0.b, p1/m, p1.b
	; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff			; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
	; CHECK-NEXT: ptrue p0.b, vl16
	; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h			; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
	; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b			; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
	; CHECK-NEXT: orv b0, p0, z0.b			; CHECK-NEXT: umaxv b0, v0.16b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v1 = load <16 x float>, ptr %a, align 4			%v1 = load <16 x float>, ptr %a, align 4
	%v2 = fcmp une <16 x float> %v1, zeroinitializer			%v2 = fcmp une <16 x float> %v1, zeroinitializer
	%v4 = load <16 x float>, ptr %b, align 4			%v4 = load <16 x float>, ptr %b, align 4
	%v5 = fcmp une <16 x float> %v4, zeroinitializer			%v5 = fcmp une <16 x float> %v4, zeroinitializer
	%v6 = or <16 x i1> %v2, %v5			%v6 = or <16 x i1> %v2, %v5
	Show All 11 Lines
	; CHECK-LABEL: ptest_and_v16i1_512bit_sve:			; CHECK-LABEL: ptest_and_v16i1_512bit_sve:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.s			; CHECK-NEXT: ptrue p0.s
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]			; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
	; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]			; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
	; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0			; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0
	; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0			; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0
	; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff			; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
	; CHECK-NEXT: ptrue p0.b, vl16
	; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h			; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
	; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b			; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
	; CHECK-NEXT: andv b0, p0, z0.b			; CHECK-NEXT: uminv b0, v0.16b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v1 = load <16 x float>, ptr %a, align 4			%v1 = load <16 x float>, ptr %a, align 4
	%v2 = fcmp une <16 x float> %v1, zeroinitializer			%v2 = fcmp une <16 x float> %v1, zeroinitializer
	%v4 = load <16 x float>, ptr %b, align 4			%v4 = load <16 x float>, ptr %b, align 4
	%v5 = fcmp une <16 x float> %v4, zeroinitializer			%v5 = fcmp une <16 x float> %v4, zeroinitializer
	%v6 = and <16 x i1> %v2, %v5			%v6 = and <16 x i1> %v2, %v5
	%v7 = call i1 @llvm.vector.reduce.and.i1.v16i1 (<16 x i1> %v6)			%v7 = call i1 @llvm.vector.reduce.and.i1.v16i1 (<16 x i1> %v6)
	ret i1 %v7			ret i1 %v7
	}			}

	define i1 @ptest_and_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) {			define i1 @ptest_and_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) {
	; CHECK-LABEL: ptest_and_v16i1_512bit_min_sve:			; CHECK-LABEL: ptest_and_v16i1_512bit_min_sve:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.s, vl16			; CHECK-NEXT: ptrue p0.s, vl16
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]			; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
	; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]			; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
	; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0			; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0
	; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0			; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0
	; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b			; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
	; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff			; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
	; CHECK-NEXT: ptrue p0.b, vl16
	; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h			; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
	; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b			; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
	; CHECK-NEXT: andv b0, p0, z0.b			; CHECK-NEXT: uminv b0, v0.16b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v1 = load <16 x float>, ptr %a, align 4			%v1 = load <16 x float>, ptr %a, align 4
	%v2 = fcmp une <16 x float> %v1, zeroinitializer			%v2 = fcmp une <16 x float> %v1, zeroinitializer
	%v4 = load <16 x float>, ptr %b, align 4			%v4 = load <16 x float>, ptr %b, align 4
	%v5 = fcmp une <16 x float> %v4, zeroinitializer			%v5 = fcmp une <16 x float> %v4, zeroinitializer
	%v6 = and <16 x i1> %v2, %v5			%v6 = and <16 x i1> %v2, %v5
	%v7 = call i1 @llvm.vector.reduce.and.i1.v16i1 (<16 x i1> %v6)			%v7 = call i1 @llvm.vector.reduce.and.i1.v16i1 (<16 x i1> %v6)
	ret i1 %v7			ret i1 %v7
	}			}

	declare i1 @llvm.vector.reduce.and.i1.v16i1(<16 x i1>)			declare i1 @llvm.vector.reduce.and.i1.v16i1(<16 x i1>)

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll

	Show All 23 Lines
	; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h			; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
	; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h			; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
	; CHECK-NEXT: splice z3.h, p1, z3.h, z2.h			; CHECK-NEXT: splice z3.h, p1, z3.h, z2.h
	; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b			; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
	; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b			; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b
	; CHECK-NEXT: ptrue p0.b, vl8			; CHECK-NEXT: ptrue p0.b, vl8
	; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b			; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b
	; CHECK-NEXT: ptrue p0.b, vl16			; CHECK-NEXT: ptrue p0.b, vl16
	; CHECK-NEXT: orv b0, p0, z1.b			; CHECK-NEXT: umaxv b0, p0, z1.b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v0 = bitcast ptr %a to <16 x float>*			%v0 = bitcast ptr %a to <16 x float>*
	%v1 = load <16 x float>, <16 x float>* %v0, align 4			%v1 = load <16 x float>, <16 x float>* %v0, align 4
	%v2 = fcmp une <16 x float> %v1, zeroinitializer			%v2 = fcmp une <16 x float> %v1, zeroinitializer
	%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)			%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)
	ret i1 %v3			ret i1 %v3
	Show All 40 Lines
	; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h			; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h
	; CHECK-NEXT: ptrue p0.b, vl8			; CHECK-NEXT: ptrue p0.b, vl8
	; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b			; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b
	; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b			; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b
	; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b			; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b
	; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b			; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b
	; CHECK-NEXT: orr z0.d, z1.d, z3.d			; CHECK-NEXT: orr z0.d, z1.d, z3.d
	; CHECK-NEXT: ptrue p0.b, vl16			; CHECK-NEXT: ptrue p0.b, vl16
	; CHECK-NEXT: orv b0, p0, z0.b			; CHECK-NEXT: umaxv b0, p0, z0.b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v0 = bitcast ptr %a to <16 x float>*			%v0 = bitcast ptr %a to <16 x float>*
	%v1 = load <16 x float>, <16 x float>* %v0, align 4			%v1 = load <16 x float>, <16 x float>* %v0, align 4
	%v2 = fcmp une <16 x float> %v1, zeroinitializer			%v2 = fcmp une <16 x float> %v1, zeroinitializer
	%v3 = bitcast float* %b to <16 x float>*			%v3 = bitcast float* %b to <16 x float>*
	%v4 = load <16 x float>, <16 x float>* %v3, align 4			%v4 = load <16 x float>, <16 x float>* %v3, align 4
	▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h			; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h
	; CHECK-NEXT: ptrue p0.b, vl8			; CHECK-NEXT: ptrue p0.b, vl8
	; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b			; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b
	; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b			; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b
	; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b			; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b
	; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b			; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b
	; CHECK-NEXT: and z0.d, z1.d, z3.d			; CHECK-NEXT: and z0.d, z1.d, z3.d
	; CHECK-NEXT: ptrue p0.b, vl16			; CHECK-NEXT: ptrue p0.b, vl16
	; CHECK-NEXT: andv b0, p0, z0.b			; CHECK-NEXT: uminv b0, p0, z0.b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v0 = bitcast ptr %a to <16 x float>*			%v0 = bitcast ptr %a to <16 x float>*
	%v1 = load <16 x float>, <16 x float>* %v0, align 4			%v1 = load <16 x float>, <16 x float>* %v0, align 4
	%v2 = fcmp une <16 x float> %v1, zeroinitializer			%v2 = fcmp une <16 x float> %v1, zeroinitializer
	%v3 = bitcast float* %b to <16 x float>*			%v3 = bitcast float* %b to <16 x float>*
	%v4 = load <16 x float>, <16 x float>* %v3, align 4			%v4 = load <16 x float>, <16 x float>* %v3, align 4
	Show All 9 Lines

llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll

	Show First 20 Lines • Show All 79 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i128 @llvm.vector.reduce.and.v1i128(<1 x i128> %a)			%b = call i128 @llvm.vector.reduce.and.v1i128(<1 x i128> %a)
	ret i128 %b			ret i128 %b
	}			}

	define i8 @test_v3i8(<3 x i8> %a) nounwind {			define i8 @test_v3i8(<3 x i8> %a) nounwind {
	; CHECK-LABEL: test_v3i8:			; CHECK-LABEL: test_v3i8:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: and w8, w0, w1			; CHECK-NEXT: movi d0, #0xff00ff00ff00ff
	; CHECK-NEXT: and w8, w8, w2			; CHECK-NEXT: mov v0.h[0], w0
	; CHECK-NEXT: and w0, w8, #0xff			; CHECK-NEXT: mov v0.h[1], w1
				; CHECK-NEXT: mov v0.h[2], w2
				; CHECK-NEXT: dup v1.2s, v0.s[1]
				; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
				; CHECK-NEXT: dup v1.4h, v0.h[1]
				; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
				; CHECK-NEXT: umov w0, v0.h[0]
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a)			%b = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a)
	ret i8 %b			ret i8 %b
	}			}

	define i8 @test_v9i8(<9 x i8> %a) nounwind {			define i8 @test_v9i8(<9 x i8> %a) nounwind {
	; CHECK-LABEL: test_v9i8:			; CHECK-LABEL: test_v9i8:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: mov w8, #-1			; CHECK-NEXT: mov w8, #-1 // =0xffffffff
	; CHECK-NEXT: umov w14, v0.b[6]			; CHECK-NEXT: mov v0.b[9], w8
	; CHECK-NEXT: mov v1.16b, v0.16b			; CHECK-NEXT: mov v0.b[10], w8
	; CHECK-NEXT: mov v1.b[9], w8			; CHECK-NEXT: mov v0.b[11], w8
	; CHECK-NEXT: mov v1.b[10], w8			; CHECK-NEXT: mov v0.b[12], w8
	; CHECK-NEXT: mov v1.b[11], w8			; CHECK-NEXT: mov v0.b[13], w8
	; CHECK-NEXT: mov v1.b[13], w8			; CHECK-NEXT: mov v0.b[14], w8
	; CHECK-NEXT: umov w8, v0.b[4]			; CHECK-NEXT: mov v0.b[15], w8
	; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8			; CHECK-NEXT: dup v1.2d, v0.d[1]
	; CHECK-NEXT: and v1.8b, v0.8b, v1.8b			; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
	; CHECK-NEXT: umov w9, v1.b[1]			; CHECK-NEXT: dup v1.4s, v0.s[1]
	; CHECK-NEXT: umov w10, v1.b[0]			; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
	; CHECK-NEXT: umov w11, v1.b[2]			; CHECK-NEXT: dup v1.8h, v0.h[1]
	; CHECK-NEXT: umov w12, v1.b[3]			; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
	; CHECK-NEXT: umov w13, v1.b[5]			; CHECK-NEXT: dup v1.16b, v0.b[1]
	; CHECK-NEXT: and w9, w10, w9			; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
	; CHECK-NEXT: umov w10, v0.b[7]			; CHECK-NEXT: umov w0, v0.b[0]
	; CHECK-NEXT: and w11, w11, w12
	; CHECK-NEXT: and w8, w8, w13
	; CHECK-NEXT: and w9, w9, w11
	; CHECK-NEXT: and w8, w8, w14
	; CHECK-NEXT: and w8, w9, w8
	; CHECK-NEXT: and w0, w8, w10
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i8 @llvm.vector.reduce.and.v9i8(<9 x i8> %a)			%b = call i8 @llvm.vector.reduce.and.v9i8(<9 x i8> %a)
	ret i8 %b			ret i8 %b
	}			}

	define i32 @test_v3i32(<3 x i32> %a) nounwind {			define i32 @test_v3i32(<3 x i32> %a) nounwind {
	; CHECK-LABEL: test_v3i32:			; CHECK-LABEL: test_v3i32:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8			; CHECK-NEXT: dup v1.2d, v0.d[1]
	; CHECK-NEXT: mov w8, v0.s[1]			; CHECK-NEXT: dup v2.4s, v0.s[1]
	; CHECK-NEXT: and v0.8b, v0.8b, v1.8b			; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
	; CHECK-NEXT: fmov w9, s0			; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
	; CHECK-NEXT: and w0, w9, w8			; CHECK-NEXT: fmov w0, s0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i32 @llvm.vector.reduce.and.v3i32(<3 x i32> %a)			%b = call i32 @llvm.vector.reduce.and.v3i32(<3 x i32> %a)
	ret i32 %b			ret i32 %b
	}			}

	define i1 @test_v4i1(<4 x i1> %a) nounwind {			define i1 @test_v4i1(<4 x i1> %a) nounwind {
	; CHECK-LABEL: test_v4i1:			; CHECK-LABEL: test_v4i1:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
				; CHECK-NEXT: bic v0.4h, #255, lsl #8
	; CHECK-NEXT: uminv h0, v0.4h			; CHECK-NEXT: uminv h0, v0.4h
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)			%b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
	ret i1 %b			ret i1 %b
	}			}

	define i24 @test_v4i24(<4 x i24> %a) nounwind {			define i24 @test_v4i24(<4 x i24> %a) nounwind {
	; CHECK-LABEL: test_v4i24:			; CHECK-LABEL: test_v4i24:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8			; CHECK-NEXT: dup v1.2d, v0.d[1]
	; CHECK-NEXT: and v0.8b, v0.8b, v1.8b			; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
	; CHECK-NEXT: mov w8, v0.s[1]			; CHECK-NEXT: dup v1.4s, v0.s[1]
	; CHECK-NEXT: fmov w9, s0			; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
	; CHECK-NEXT: and w0, w9, w8			; CHECK-NEXT: fmov w0, s0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i24 @llvm.vector.reduce.and.v4i24(<4 x i24> %a)			%b = call i24 @llvm.vector.reduce.and.v4i24(<4 x i24> %a)
	ret i24 %b			ret i24 %b
	}			}

	define i128 @test_v2i128(<2 x i128> %a) nounwind {			define i128 @test_v2i128(<2 x i128> %a) nounwind {
	; CHECK-LABEL: test_v2i128:			; CHECK-LABEL: test_v2i128:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: and x0, x0, x2			; CHECK-NEXT: and x0, x0, x2
	; CHECK-NEXT: and x1, x1, x3			; CHECK-NEXT: and x1, x1, x3
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i128 @llvm.vector.reduce.and.v2i128(<2 x i128> %a)			%b = call i128 @llvm.vector.reduce.and.v2i128(<2 x i128> %a)
	ret i128 %b			ret i128 %b
	}			}

	define i32 @test_v16i32(<16 x i32> %a) nounwind {			define i32 @test_v16i32(<16 x i32> %a) nounwind {
	; CHECK-LABEL: test_v16i32:			; CHECK-LABEL: test_v16i32:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: and v1.16b, v1.16b, v3.16b			; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
	; CHECK-NEXT: and v0.16b, v0.16b, v2.16b			; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
	; CHECK-NEXT: and v0.16b, v0.16b, v1.16b			; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
	; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8			; CHECK-NEXT: dup v1.2d, v0.d[1]
	; CHECK-NEXT: and v0.8b, v0.8b, v1.8b			; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
	; CHECK-NEXT: mov w8, v0.s[1]			; CHECK-NEXT: dup v1.4s, v0.s[1]
	; CHECK-NEXT: fmov w9, s0			; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
	; CHECK-NEXT: and w0, w9, w8			; CHECK-NEXT: fmov w0, s0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %a)			%b = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %a)
	ret i32 %b			ret i32 %b
	}			}

llvm/test/CodeGen/AArch64/vecreduce-bool.ll

Show All 27 Lines	; CHECK-NEXT: ret
%z = select i1 %y, i32 %a1, i32 %a2		%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z		ret i32 %z
}		}

define i32 @reduce_and_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {		define i32 @reduce_and_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: reduce_and_v2:		; CHECK-LABEL: reduce_and_v2:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: shl v0.2s, v0.2s, #24		; CHECK-NEXT: shl v0.2s, v0.2s, #24
		; CHECK-NEXT: movi d1, #0x0000ff000000ff
; CHECK-NEXT: sshr v0.2s, v0.2s, #24		; CHECK-NEXT: sshr v0.2s, v0.2s, #24
; CHECK-NEXT: cmlt v0.2s, v0.2s, #0		; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: uminp v0.2s, v0.2s, v0.2s		; CHECK-NEXT: uminp v0.2s, v0.2s, v0.2s
; CHECK-NEXT: fmov w8, s0		; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: tst w8, #0x1		; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: csel w0, w0, w1, ne		; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%x = icmp slt <2 x i8> %a0, zeroinitializer		%x = icmp slt <2 x i8> %a0, zeroinitializer
%y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x)		%y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2		%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z		ret i32 %z
}		}

define i32 @reduce_and_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {		define i32 @reduce_and_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: reduce_and_v4:		; CHECK-LABEL: reduce_and_v4:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: shl v0.4h, v0.4h, #8		; CHECK-NEXT: shl v0.4h, v0.4h, #8
; CHECK-NEXT: sshr v0.4h, v0.4h, #8		; CHECK-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-NEXT: cmlt v0.4h, v0.4h, #0		; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
		; CHECK-NEXT: bic v0.4h, #255, lsl #8
; CHECK-NEXT: uminv h0, v0.4h		; CHECK-NEXT: uminv h0, v0.4h
; CHECK-NEXT: fmov w8, s0		; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: tst w8, #0x1		; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: csel w0, w0, w1, ne		; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%x = icmp slt <4 x i8> %a0, zeroinitializer		%x = icmp slt <4 x i8> %a0, zeroinitializer
%y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x)		%y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2		%z = select i1 %y, i32 %a1, i32 %a2
▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%z = select i1 %y, i32 %a1, i32 %a2		%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z		ret i32 %z
}		}

define i32 @reduce_or_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {		define i32 @reduce_or_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: reduce_or_v2:		; CHECK-LABEL: reduce_or_v2:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: shl v0.2s, v0.2s, #24		; CHECK-NEXT: shl v0.2s, v0.2s, #24
		; CHECK-NEXT: movi d1, #0x0000ff000000ff
; CHECK-NEXT: sshr v0.2s, v0.2s, #24		; CHECK-NEXT: sshr v0.2s, v0.2s, #24
; CHECK-NEXT: cmlt v0.2s, v0.2s, #0		; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umaxp v0.2s, v0.2s, v0.2s		; CHECK-NEXT: umaxp v0.2s, v0.2s, v0.2s
; CHECK-NEXT: fmov w8, s0		; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: tst w8, #0x1		; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: csel w0, w0, w1, ne		; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%x = icmp slt <2 x i8> %a0, zeroinitializer		%x = icmp slt <2 x i8> %a0, zeroinitializer
%y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x)		%y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2		%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z		ret i32 %z
}		}

define i32 @reduce_or_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {		define i32 @reduce_or_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: reduce_or_v4:		; CHECK-LABEL: reduce_or_v4:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: shl v0.4h, v0.4h, #8		; CHECK-NEXT: shl v0.4h, v0.4h, #8
; CHECK-NEXT: sshr v0.4h, v0.4h, #8		; CHECK-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-NEXT: cmlt v0.4h, v0.4h, #0		; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
		; CHECK-NEXT: bic v0.4h, #255, lsl #8
; CHECK-NEXT: umaxv h0, v0.4h		; CHECK-NEXT: umaxv h0, v0.4h
; CHECK-NEXT: fmov w8, s0		; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: tst w8, #0x1		; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: csel w0, w0, w1, ne		; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%x = icmp slt <4 x i8> %a0, zeroinitializer		%x = icmp slt <4 x i8> %a0, zeroinitializer
%y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x)		%y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2		%z = select i1 %y, i32 %a1, i32 %a2
▲ Show 20 Lines • Show All 48 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll

	Show First 20 Lines • Show All 132 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a)			%b = call i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a)
	ret i32 %b			ret i32 %b
	}			}

	define i1 @test_v4i1(<4 x i1> %a) nounwind {			define i1 @test_v4i1(<4 x i1> %a) nounwind {
	; CHECK-LABEL: test_v4i1:			; CHECK-LABEL: test_v4i1:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
				; CHECK-NEXT: bic v0.4h, #255, lsl #8
	; CHECK-NEXT: umaxv h0, v0.4h			; CHECK-NEXT: umaxv h0, v0.4h
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> %a)			%b = call i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> %a)
	ret i1 %b			ret i1 %b
	}			}

	Show All 37 Lines

This is an archive of the discontinued LLVM Phabricator instance.

Add more efficient bitwise vector reductions on AArch64
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 513167

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/dag-combine-setcc.ll

llvm/test/CodeGen/AArch64/double_reduct.ll

llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll

llvm/test/CodeGen/AArch64/reduce-and.ll

llvm/test/CodeGen/AArch64/reduce-or.ll

llvm/test/CodeGen/AArch64/reduce-xor.ll

llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll

llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll

llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll

llvm/test/CodeGen/AArch64/vecreduce-bool.ll

llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll

This is an archive of the discontinued LLVM Phabricator instance.

Add more efficient bitwise vector reductions on AArch64ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 513167

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/dag-combine-setcc.ll

llvm/test/CodeGen/AArch64/double_reduct.ll

llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll

llvm/test/CodeGen/AArch64/reduce-and.ll

llvm/test/CodeGen/AArch64/reduce-or.ll

llvm/test/CodeGen/AArch64/reduce-xor.ll

llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll

llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll

llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll

llvm/test/CodeGen/AArch64/vecreduce-bool.ll

llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll

Add more efficient bitwise vector reductions on AArch64
ClosedPublic