Diff 519074

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 995 Lines • ▼ Show 20 Lines	#undef LCALLNAME5
setTargetDAGCombine({ISD::MGATHER, ISD::MSCATTER});		setTargetDAGCombine({ISD::MGATHER, ISD::MSCATTER});

setTargetDAGCombine(ISD::FP_EXTEND);		setTargetDAGCombine(ISD::FP_EXTEND);

setTargetDAGCombine(ISD::GlobalAddress);		setTargetDAGCombine(ISD::GlobalAddress);

setTargetDAGCombine(ISD::CTLZ);		setTargetDAGCombine(ISD::CTLZ);

		setTargetDAGCombine(ISD::VECREDUCE_AND);
		setTargetDAGCombine(ISD::VECREDUCE_OR);
		setTargetDAGCombine(ISD::VECREDUCE_XOR);

// In case of strict alignment, avoid an excessive number of byte wide stores.		// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;		MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemset =		MaxStoresPerMemset =
Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32;		Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32;

MaxGluedStoresPerMemcpy = 4;		MaxGluedStoresPerMemcpy = 4;
MaxStoresPerMemcpyOptSize = 4;		MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemcpy =		MaxStoresPerMemcpy =
▲ Show 20 Lines • Show All 148 Lines • ▼ Show 20 Lines	if (Subtarget->hasNEON()) {
}		}
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,		for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {		MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);		setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);		setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);		setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);		setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);		setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
		setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
		setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
		setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
}		}
setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);		setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
		setOperationAction(ISD::VECREDUCE_AND, MVT::v2i64, Custom);
		setOperationAction(ISD::VECREDUCE_OR, MVT::v2i64, Custom);
		setOperationAction(ISD::VECREDUCE_XOR, MVT::v2i64, Custom);

setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);		setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);		setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
// Likewise, narrowing and extending vector loads/stores aren't handled		// Likewise, narrowing and extending vector loads/stores aren't handled
// directly.		// directly.
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {		for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);		setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);

▲ Show 20 Lines • Show All 12,123 Lines • ▼ Show 20 Lines
static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,		static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
SDValue VecOp = ScalarOp.getOperand(0);		SDValue VecOp = ScalarOp.getOperand(0);
auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);		auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,		return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,
DAG.getConstant(0, DL, MVT::i64));		DAG.getConstant(0, DL, MVT::i64));
}		}

		static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT,
		SDLoc DL, SelectionDAG &DAG) {
		unsigned ScalarOpcode;
		switch (Opcode) {
		case ISD::VECREDUCE_AND:
		ScalarOpcode = ISD::AND;
		break;
		case ISD::VECREDUCE_OR:
		ScalarOpcode = ISD::OR;
		break;
		case ISD::VECREDUCE_XOR:
		ScalarOpcode = ISD::XOR;
		break;
		default:
		llvm_unreachable("Expected bitwise vector reduction");
		return SDValue();
		}

		EVT VecVT = Vec.getValueType();
		assert(VecVT.isFixedLengthVector() && VecVT.isPow2VectorType() &&
		"Expected power-of-2 length vector");

		EVT ElemVT = VecVT.getVectorElementType();

		SDValue Result;
		unsigned NumElems = VecVT.getVectorNumElements();

		// Special case for boolean reductions
		dmgreenUnsubmitted Done Reply Inline Actions special -> Special dmgreen: special -> Special
		if (ElemVT == MVT::i1) {
		// Split large vectors into smaller ones
		dmgreenUnsubmitted Done Reply Inline Actions split -> Split dmgreen: split -> Split
		if (NumElems > 16) {
		SDValue Lo, Hi;
		std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
		EVT HalfVT = Lo.getValueType();
		dmgreenUnsubmitted Done Reply Inline Actions Formatting - the line is a bot long here. dmgreen: Formatting - the line is a bot long here.
		SDValue HalfVec = DAG.getNode(ScalarOpcode, DL, HalfVT, Lo, Hi);
		return getVectorBitwiseReduce(Opcode, HalfVec, VT, DL, DAG);
		}

		// Vectors that are less than 64 bits get widened to neatly fit a 64 bit
		// register, so e.g. <4 x i1> gets lowered to <4 x i16>. Sign extending to
		// this element size leads to the best codegen, since e.g. setcc results
		Sp00phAuthorUnsubmitted Done Reply Inline Actions Using either zext or sext here adds a few extra instructions in the generated code. Is it guaranteed that any-extending an i1 vector results in a vector whose elements are all either 0 or -1? It seems reasonable because afaik mask vector elements on AArch64 are always either 0 or -1, but it could also introduce some subtle incorrectness if there is some case where any-extending an i1 vector does not result in such a mask vector. Sp00ph: Using either zext or sext here adds a few extra instructions in the generated code. Is it…
		efriedmaUnsubmitted Done Reply Inline Actions No, no guarantee here. I mean, there are restrictions related to boolean operands certain specific operations (like the condition of a VSELECT), but there isn't any restriction that applies to arithmetic operations. An easy way to get a vector with arbitrary data in the high bits is truncating from nxi8 to nxi1. You could generate a different sequence if the operand is known to be sign-extended (ComputeNumSignBits). efriedma: No, no guarantee here. I mean, there are restrictions related to boolean operands certain…
		Sp00phAuthorUnsubmitted Done Reply Inline Actions `ComputeNumSignBits` doesn't seem to work properly on `<N x i1>` function arguments. So e.g. an `<8 x i1>` gets lowered to an `<8 x i8>` during function argument lowering, and calling `ComputeNumSignBits` on that returns a 1 (even though `<N x i1>` in function arguments seems to always be all zeros or all ones; either that or the current codegen is already incorrect). If I instead sign extend the vector in the `i1` branch it adds 2 redundant instructions to all the codegen tests that take a `<N x i1>` as a function argument. Tests that e.g. reduce a `<N x i1>` obtained from a setcc don't get those extra instructions because there's a `setcc + sext` combine I believe. I guess this could be fixed by somehow convincing `ComputeNumSignBits` that a `<N x i1>` function argument that got lowered to a `<N x iM>` does in fact have M sign bits? Sp00ph: `ComputeNumSignBits` doesn't seem to work properly on `<N x i1>` function arguments. So e.g. an…
		dmgreenUnsubmitted Done Reply Inline Actions I believe there is no requirement that arguments are all-ones. For example https://godbolt.org/z/MYdEh1fET. There is a signext attribute that can be applied to scalars, but not vectors. dmgreen: I believe there is no requirement that arguments are all-ones. For example https://godbolt.
		Sp00phAuthorUnsubmitted Done Reply Inline Actions In that case it looks like the codegen for the boolean vector reductions is already wrong without this patch. For example this: https://llvm.godbolt.org/z/YjE8n7q7s causes calls to `bad` to return a 0, when I believe they should return a 1 instead, because it does umax then truncate instead of truncate then umax, and those don't commute in the general case. Sp00ph: In that case it looks like the codegen for the boolean vector reductions is already wrong…
		// might need to be truncated otherwise.
		EVT ExtendedVT = MVT::getIntegerVT(std::max(64u / NumElems, 8u));

		// any_ext doesn't work with umin/umax, so only use it for uadd.
		unsigned ExtendOp =
		ScalarOpcode == ISD::XOR ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
		SDValue Extended = DAG.getNode(
		ExtendOp, DL, VecVT.changeVectorElementType(ExtendedVT), Vec);
		switch (ScalarOpcode) {
		case ISD::AND:
		Result = DAG.getNode(ISD::VECREDUCE_UMIN, DL, ExtendedVT, Extended);
		break;
		case ISD::OR:
		Result = DAG.getNode(ISD::VECREDUCE_UMAX, DL, ExtendedVT, Extended);
		break;
		case ISD::XOR:
		Result = DAG.getNode(ISD::VECREDUCE_ADD, DL, ExtendedVT, Extended);
		break;
		default:
		dmgreenUnsubmitted Done Reply Inline Actions Would it be possible for vector <= 64bits to use the 64bit type sizes? It won't matter in a lot of cases but some cpu's have a higher throughput for 64bit vectors. dmgreen: Would it be possible for vector <= 64bits to use the 64bit type sizes? It won't matter in a lot…
		efriedmaUnsubmitted Done Reply Inline Actions For <=64 bits, don't you want to switch to integer registers? `orr x0, x0, x0, lsr #32` etc. is generally going to be faster than dup+orr. efriedma: For <=64 bits, don't you want to switch to integer registers? `orr x0, x0, x0, lsr #32` etc.
		llvm_unreachable("Unexpected Opcode");
		}

		Result = DAG.getAnyExtOrTrunc(Result, DL, MVT::i1);
		} else {
		// Iteratively split the vector in half and combine using the bitwise
		// operation until it fits in a 64 bit register.
		while (VecVT.getSizeInBits() > 64) {
		SDValue Lo, Hi;
		std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
		VecVT = Lo.getValueType();
		NumElems = VecVT.getVectorNumElements();
		Vec = DAG.getNode(ScalarOpcode, DL, VecVT, Lo, Hi);
		}

		EVT ScalarVT = EVT::getIntegerVT(*DAG.getContext(), VecVT.getSizeInBits());

		// Do the remaining work on a scalar since it allows the code generator to
		// combine the shift and bitwise operation into one instruction and since
		// integer instructions can have higher throughput than vector instructions.
		SDValue Scalar = DAG.getBitcast(ScalarVT, Vec);

		// Iteratively combine the lower and upper halves of the scalar using the
		// bitwise operation, halving the relevant region of the scalar in each
		// iteration, until the relevant region is just one element of the original
		// vector.
		for (unsigned Shift = NumElems / 2; Shift > 0; Shift /= 2) {
		SDValue ShiftAmount =
		DAG.getConstant(Shift * ElemVT.getSizeInBits(), DL, MVT::i64);
		SDValue Shifted =
		DAG.getNode(ISD::SRL, DL, ScalarVT, Scalar, ShiftAmount);
		Scalar = DAG.getNode(ScalarOpcode, DL, ScalarVT, Scalar, Shifted);
		}

		Result = DAG.getAnyExtOrTrunc(Scalar, DL, ElemVT);
		}

		return DAG.getAnyExtOrTrunc(Result, DL, VT);
		}

SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,		SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
SDValue Src = Op.getOperand(0);		SDValue Src = Op.getOperand(0);

// Try to lower fixed length reductions to SVE.		// Try to lower fixed length reductions to SVE.
EVT SrcVT = Src.getValueType();		EVT SrcVT = Src.getValueType();
bool OverrideNEON = Subtarget->forceStreamingCompatibleSVE() \|\|		bool OverrideNEON = Subtarget->forceStreamingCompatibleSVE() \|\|
Op.getOpcode() == ISD::VECREDUCE_AND \|\|		Op.getOpcode() == ISD::VECREDUCE_AND \|\|
Op.getOpcode() == ISD::VECREDUCE_OR \|\|		Op.getOpcode() == ISD::VECREDUCE_OR \|\|
Op.getOpcode() == ISD::VECREDUCE_XOR \|\|		Op.getOpcode() == ISD::VECREDUCE_XOR \|\|
dmgreenUnsubmitted Done Reply Inline Actions If SVE is available then the orv/eorv/etc should be preferred. dmgreen: If SVE is available then the orv/eorv/etc should be preferred.
Op.getOpcode() == ISD::VECREDUCE_FADD \|\|		Op.getOpcode() == ISD::VECREDUCE_FADD \|\|
(Op.getOpcode() != ISD::VECREDUCE_ADD &&		(Op.getOpcode() != ISD::VECREDUCE_ADD &&
SrcVT.getVectorElementType() == MVT::i64);		SrcVT.getVectorElementType() == MVT::i64);
if (SrcVT.isScalableVector() \|\|		if (SrcVT.isScalableVector() \|\|
useSVEForFixedLengthVectorVT(		useSVEForFixedLengthVectorVT(
SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {		SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {

if (SrcVT.getVectorElementType() == MVT::i1)		if (SrcVT.getVectorElementType() == MVT::i1)
Show All 25 Lines	if (SrcVT.isScalableVector() \|\|
default:		default:
llvm_unreachable("Unhandled fixed length reduction");		llvm_unreachable("Unhandled fixed length reduction");
}		}
}		}

// Lower NEON reductions.		// Lower NEON reductions.
SDLoc dl(Op);		SDLoc dl(Op);
switch (Op.getOpcode()) {		switch (Op.getOpcode()) {
		case ISD::VECREDUCE_AND:
		case ISD::VECREDUCE_OR:
		case ISD::VECREDUCE_XOR:
		return getVectorBitwiseReduce(Op.getOpcode(), Op.getOperand(0),
		Op.getValueType(), dl, DAG);
case ISD::VECREDUCE_ADD:		case ISD::VECREDUCE_ADD:
return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);		return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
case ISD::VECREDUCE_SMAX:		case ISD::VECREDUCE_SMAX:
return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);		return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
case ISD::VECREDUCE_SMIN:		case ISD::VECREDUCE_SMIN:
return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);		return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
case ISD::VECREDUCE_UMAX:		case ISD::VECREDUCE_UMAX:
return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);		return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
▲ Show 20 Lines • Show All 7,519 Lines • ▼ Show 20 Lines	if (Op0SExt && (isSignedIntSetCC(CC) \|\| isIntEqualitySetCC(CC))) {
Op1ExtV = DAG.getNode(ISD::ZERO_EXTEND, DL, UseMVT, Op->getOperand(1));		Op1ExtV = DAG.getNode(ISD::ZERO_EXTEND, DL, UseMVT, Op->getOperand(1));
} else		} else
return SDValue();		return SDValue();

return DAG.getNode(ISD::SETCC, DL, UseMVT.changeVectorElementType(MVT::i1),		return DAG.getNode(ISD::SETCC, DL, UseMVT.changeVectorElementType(MVT::i1),
Op0ExtV, Op1ExtV, Op->getOperand(2));		Op0ExtV, Op1ExtV, Op->getOperand(2));
}		}

		static SDValue
		performVecReduceBitwiseCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
		SelectionDAG &DAG) {
		SDValue Vec = N->getOperand(0);
		if (DCI.isBeforeLegalize() &&
		Vec.getValueType().getVectorElementType() == MVT::i1 &&
		Vec.getValueType().isFixedLengthVector() &&
		Vec.getValueType().isPow2VectorType()) {
		SDLoc DL(N);
		return getVectorBitwiseReduce(N->getOpcode(), Vec, N->getValueType(0), DL,
		DAG);
		}

		return SDValue();
		}

static SDValue performSETCCCombine(SDNode *N,		static SDValue performSETCCCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,		TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");		assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");
SDValue LHS = N->getOperand(0);		SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);		SDValue RHS = N->getOperand(1);
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();		ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
SDLoc DL(N);		SDLoc DL(N);
▲ Show 20 Lines • Show All 1,152 Lines • ▼ Show 20 Lines

SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,		SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {		DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;		SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {		switch (N->getOpcode()) {
default:		default:
LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");		LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
break;		break;
		case ISD::VECREDUCE_AND:
		case ISD::VECREDUCE_OR:
		case ISD::VECREDUCE_XOR:
		return performVecReduceBitwiseCombine(N, DCI, DAG);
case ISD::ADD:		case ISD::ADD:
case ISD::SUB:		case ISD::SUB:
return performAddSubCombine(N, DCI, DAG);		return performAddSubCombine(N, DCI, DAG);
case ISD::BUILD_VECTOR:		case ISD::BUILD_VECTOR:
return performBuildVectorCombine(N, DCI, DAG);		return performBuildVectorCombine(N, DCI, DAG);
case ISD::TRUNCATE:		case ISD::TRUNCATE:
return performTruncateCombine(N, DAG);		return performTruncateCombine(N, DAG);
case AArch64ISD::ANDS:		case AArch64ISD::ANDS:
▲ Show 20 Lines • Show All 2,960 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/dag-combine-setcc.ll

Show First 20 Lines • Show All 187 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%cmp2 = icmp eq i64 %cast, -1		%cmp2 = icmp eq i64 %cast, -1
ret i1 %cmp2		ret i1 %cmp2
}		}

define i1 @combine_setcc_ne_vecreduce_and_v8i1(<8 x i8> %a) {		define i1 @combine_setcc_ne_vecreduce_and_v8i1(<8 x i8> %a) {
; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v8i1:		; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v8i1:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: cmtst v0.8b, v0.8b, v0.8b		; CHECK-NEXT: cmtst v0.8b, v0.8b, v0.8b
		; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: uminv b0, v0.8b		; CHECK-NEXT: uminv b0, v0.8b
; CHECK-NEXT: fmov w8, s0		; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: tst w8, #0x1		; CHECK-NEXT: bic w0, w8, w9
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%cmp1 = icmp ne <8 x i8> %a, zeroinitializer		%cmp1 = icmp ne <8 x i8> %a, zeroinitializer
%cast = bitcast <8 x i1> %cmp1 to i8		%cast = bitcast <8 x i1> %cmp1 to i8
%cmp2 = icmp ne i8 %cast, -1		%cmp2 = icmp ne i8 %cast, -1
ret i1 %cmp2		ret i1 %cmp2
}		}

define i1 @combine_setcc_ne_vecreduce_and_v16i1(<16 x i8> %a) {		define i1 @combine_setcc_ne_vecreduce_and_v16i1(<16 x i8> %a) {
; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v16i1:		; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v16i1:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b		; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b
		; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: uminv b0, v0.16b		; CHECK-NEXT: uminv b0, v0.16b
; CHECK-NEXT: fmov w8, s0		; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: tst w8, #0x1		; CHECK-NEXT: bic w0, w8, w9
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%cmp1 = icmp ne <16 x i8> %a, zeroinitializer		%cmp1 = icmp ne <16 x i8> %a, zeroinitializer
%cast = bitcast <16 x i1> %cmp1 to i16		%cast = bitcast <16 x i1> %cmp1 to i16
%cmp2 = icmp ne i16 %cast, -1		%cmp2 = icmp ne i16 %cast, -1
ret i1 %cmp2		ret i1 %cmp2
}		}

define i1 @combine_setcc_ne_vecreduce_and_v32i1(<32 x i8> %a) {		define i1 @combine_setcc_ne_vecreduce_and_v32i1(<32 x i8> %a) {
; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v32i1:		; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v32i1:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b		; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b
		; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: cmeq v1.16b, v1.16b, #0		; CHECK-NEXT: cmeq v1.16b, v1.16b, #0
; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b		; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b
		; CHECK-NEXT: shl v0.16b, v0.16b, #7
		; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-NEXT: uminv b0, v0.16b		; CHECK-NEXT: uminv b0, v0.16b
; CHECK-NEXT: fmov w8, s0		; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: tst w8, #0x1		; CHECK-NEXT: bic w0, w8, w9
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%cmp1 = icmp ne <32 x i8> %a, zeroinitializer		%cmp1 = icmp ne <32 x i8> %a, zeroinitializer
%cast = bitcast <32 x i1> %cmp1 to i32		%cast = bitcast <32 x i1> %cmp1 to i32
%cmp2 = icmp ne i32 %cast, -1		%cmp2 = icmp ne i32 %cast, -1
ret i1 %cmp2		ret i1 %cmp2
}		}

define i1 @combine_setcc_ne_vecreduce_and_v64i1(<64 x i8> %a) {		define i1 @combine_setcc_ne_vecreduce_and_v64i1(<64 x i8> %a) {
; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v64i1:		; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v64i1:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: cmtst v1.16b, v1.16b, v1.16b		; CHECK-NEXT: cmtst v1.16b, v1.16b, v1.16b
		; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b		; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b
; CHECK-NEXT: cmeq v3.16b, v3.16b, #0		; CHECK-NEXT: cmeq v3.16b, v3.16b, #0
; CHECK-NEXT: cmeq v2.16b, v2.16b, #0		; CHECK-NEXT: cmeq v2.16b, v2.16b, #0
; CHECK-NEXT: bic v1.16b, v1.16b, v3.16b		; CHECK-NEXT: bic v1.16b, v1.16b, v3.16b
; CHECK-NEXT: bic v0.16b, v0.16b, v2.16b		; CHECK-NEXT: bic v0.16b, v0.16b, v2.16b
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
		; CHECK-NEXT: shl v0.16b, v0.16b, #7
		; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-NEXT: uminv b0, v0.16b		; CHECK-NEXT: uminv b0, v0.16b
; CHECK-NEXT: fmov w8, s0		; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: tst w8, #0x1		; CHECK-NEXT: bic w0, w8, w9
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%cmp1 = icmp ne <64 x i8> %a, zeroinitializer		%cmp1 = icmp ne <64 x i8> %a, zeroinitializer
%cast = bitcast <64 x i1> %cmp1 to i64		%cast = bitcast <64 x i1> %cmp1 to i64
%cmp2 = icmp ne i64 %cast, -1		%cmp2 = icmp ne i64 %cast, -1
ret i1 %cmp2		ret i1 %cmp2
}		}

define i1 @combine_setcc_eq0_conjunction_xor_or(ptr %a, ptr %b) {		define i1 @combine_setcc_eq0_conjunction_xor_or(ptr %a, ptr %b) {
▲ Show 20 Lines • Show All 116 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/double_reduct.ll

	Show First 20 Lines • Show All 125 Lines • ▼ Show 20 Lines

	define i32 @and_i32(<8 x i32> %a, <4 x i32> %b) {			define i32 @and_i32(<8 x i32> %a, <4 x i32> %b) {
	; CHECK-LABEL: and_i32:			; CHECK-LABEL: and_i32:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: and v0.16b, v0.16b, v1.16b			; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
	; CHECK-NEXT: and v0.16b, v0.16b, v2.16b			; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
	; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8			; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
	; CHECK-NEXT: and v0.8b, v0.8b, v1.8b			; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
	; CHECK-NEXT: mov w8, v0.s[1]			; CHECK-NEXT: fmov x8, d0
	; CHECK-NEXT: fmov w9, s0			; CHECK-NEXT: lsr x9, x8, #32
	; CHECK-NEXT: and w0, w9, w8			; CHECK-NEXT: and w0, w8, w9
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%r1 = call i32 @llvm.vector.reduce.and.i32.v8i32(<8 x i32> %a)			%r1 = call i32 @llvm.vector.reduce.and.i32.v8i32(<8 x i32> %a)
	%r2 = call i32 @llvm.vector.reduce.and.i32.v4i32(<4 x i32> %b)			%r2 = call i32 @llvm.vector.reduce.and.i32.v4i32(<4 x i32> %b)
	%r = and i32 %r1, %r2			%r = and i32 %r1, %r2
	ret i32 %r			ret i32 %r
	}			}

	define i32 @or_i32(<8 x i32> %a, <4 x i32> %b) {			define i32 @or_i32(<8 x i32> %a, <4 x i32> %b) {
	; CHECK-LABEL: or_i32:			; CHECK-LABEL: or_i32:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b			; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
	; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b			; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
	; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8			; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
	; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b			; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
	; CHECK-NEXT: mov w8, v0.s[1]			; CHECK-NEXT: fmov x8, d0
	; CHECK-NEXT: fmov w9, s0			; CHECK-NEXT: lsr x9, x8, #32
	; CHECK-NEXT: orr w0, w9, w8			; CHECK-NEXT: orr w0, w8, w9
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%r1 = call i32 @llvm.vector.reduce.or.i32.v8i32(<8 x i32> %a)			%r1 = call i32 @llvm.vector.reduce.or.i32.v8i32(<8 x i32> %a)
	%r2 = call i32 @llvm.vector.reduce.or.i32.v4i32(<4 x i32> %b)			%r2 = call i32 @llvm.vector.reduce.or.i32.v4i32(<4 x i32> %b)
	%r = or i32 %r1, %r2			%r = or i32 %r1, %r2
	ret i32 %r			ret i32 %r
	}			}

	define i32 @xor_i32(<8 x i32> %a, <4 x i32> %b) {			define i32 @xor_i32(<8 x i32> %a, <4 x i32> %b) {
	; CHECK-LABEL: xor_i32:			; CHECK-LABEL: xor_i32:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b			; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
	; CHECK-NEXT: eor v0.16b, v0.16b, v2.16b			; CHECK-NEXT: eor v0.16b, v0.16b, v2.16b
	; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8			; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
	; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b			; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
	; CHECK-NEXT: mov w8, v0.s[1]			; CHECK-NEXT: fmov x8, d0
	; CHECK-NEXT: fmov w9, s0			; CHECK-NEXT: lsr x9, x8, #32
	; CHECK-NEXT: eor w0, w9, w8			; CHECK-NEXT: eor w0, w8, w9
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%r1 = call i32 @llvm.vector.reduce.xor.i32.v8i32(<8 x i32> %a)			%r1 = call i32 @llvm.vector.reduce.xor.i32.v8i32(<8 x i32> %a)
	%r2 = call i32 @llvm.vector.reduce.xor.i32.v4i32(<4 x i32> %b)			%r2 = call i32 @llvm.vector.reduce.xor.i32.v4i32(<4 x i32> %b)
	%r = xor i32 %r1, %r2			%r = xor i32 %r1, %r2
	ret i32 %r			ret i32 %r
	}			}

	define i32 @umin_i32(<8 x i32> %a, <4 x i32> %b) {			define i32 @umin_i32(<8 x i32> %a, <4 x i32> %b) {
	▲ Show 20 Lines • Show All 89 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll

	Show First 20 Lines • Show All 53 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0			; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0
	; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0			; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0
	; CHECK-NEXT: fcmgt v7.4s, v7.4s, #0.0			; CHECK-NEXT: fcmgt v7.4s, v7.4s, #0.0
	; CHECK-NEXT: fcmgt v6.4s, v6.4s, #0.0			; CHECK-NEXT: fcmgt v6.4s, v6.4s, #0.0
	; CHECK-NEXT: fcmgt v5.4s, v5.4s, #0.0			; CHECK-NEXT: fcmgt v5.4s, v5.4s, #0.0
	; CHECK-NEXT: fcmgt v4.4s, v4.4s, #0.0			; CHECK-NEXT: fcmgt v4.4s, v4.4s, #0.0
	; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h			; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h
	; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h			; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
	; CHECK-NEXT: uzp1 v6.8h, v6.8h, v7.8h			; CHECK-NEXT: uzp1 v1.8h, v6.8h, v7.8h
	; CHECK-NEXT: uzp1 v1.8h, v4.8h, v5.8h			; CHECK-NEXT: uzp1 v3.8h, v4.8h, v5.8h
	; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b			; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b
	; CHECK-NEXT: uzp1 v1.16b, v1.16b, v6.16b			; CHECK-NEXT: uzp1 v1.16b, v3.16b, v1.16b
	; CHECK-NEXT: mvn v0.16b, v0.16b			; CHECK-NEXT: mvn v0.16b, v0.16b
	; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b			; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b
				; CHECK-NEXT: shl v0.16b, v0.16b, #7
				; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
	; CHECK-NEXT: umaxv b0, v0.16b			; CHECK-NEXT: umaxv b0, v0.16b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: bic w0, w9, w8			; CHECK-NEXT: bic w0, w9, w8
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%a_cmp = fcmp ule <32 x float> %a_vec, zeroinitializer			%a_cmp = fcmp ule <32 x float> %a_vec, zeroinitializer
	%cmp_result = bitcast <32 x i1> %a_cmp to i32			%cmp_result = bitcast <32 x i1> %a_cmp to i32
	%all_zero = icmp eq i32 %cmp_result, 0			%all_zero = icmp eq i32 %cmp_result, 0
	ret i1 %all_zero			ret i1 %all_zero
	}			}

llvm/test/CodeGen/AArch64/reduce-and.ll

Show First 20 Lines • Show All 250 Lines • ▼ Show 20 Lines
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%and_result = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> %a)		%and_result = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> %a)
ret i8 %and_result		ret i8 %and_result
}		}

define i8 @test_redand_v3i8(<3 x i8> %a) {		define i8 @test_redand_v3i8(<3 x i8> %a) {
; CHECK-LABEL: test_redand_v3i8:		; CHECK-LABEL: test_redand_v3i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, w1		; CHECK-NEXT: movi d0, #0xff00ff00ff00ff
; CHECK-NEXT: and w8, w8, w2		; CHECK-NEXT: mov v0.h[0], w0
; CHECK-NEXT: and w0, w8, #0xff		; CHECK-NEXT: mov v0.h[1], w1
		; CHECK-NEXT: mov v0.h[2], w2
		; CHECK-NEXT: fmov x8, d0
		; CHECK-NEXT: and x8, x8, x8, lsr #32
		; CHECK-NEXT: lsr x9, x8, #16
		; CHECK-NEXT: and w0, w8, w9
		dmgreenUnsubmitted Not Done Reply Inline Actions I'm surprised this passes vectors in gpr registers. It would be quite different for values vector regs. dmgreen: I'm surprised this passes vectors in gpr registers. It would be quite different for values…
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v3i8:		; GISEL-LABEL: test_redand_v3i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: and w8, w0, w1		; GISEL-NEXT: and w8, w0, w1
; GISEL-NEXT: and w0, w8, w2		; GISEL-NEXT: and w0, w8, w2
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%and_result = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a)		%and_result = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a)
ret i8 %and_result		ret i8 %and_result
}		}

define i8 @test_redand_v4i8(<4 x i8> %a) {		define i8 @test_redand_v4i8(<4 x i8> %a) {
; CHECK-LABEL: test_redand_v4i8:		; CHECK-LABEL: test_redand_v4i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w8, v0.h[3]		; CHECK-NEXT: and x8, x8, x8, lsr #32
; CHECK-NEXT: umov w9, v0.h[2]		; CHECK-NEXT: lsr x9, x8, #16
; CHECK-NEXT: umov w10, v0.h[1]		; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: umov w11, v0.h[0]
; CHECK-NEXT: and w8, w9, w8
; CHECK-NEXT: and w10, w11, w10
; CHECK-NEXT: and w0, w10, w8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v4i8:		; GISEL-LABEL: test_redand_v4i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
; GISEL-NEXT: mov h2, v0.h[2]		; GISEL-NEXT: mov h2, v0.h[2]
; GISEL-NEXT: mov h3, v0.h[3]		; GISEL-NEXT: mov h3, v0.h[3]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: fmov w10, s2		; GISEL-NEXT: fmov w10, s2
; GISEL-NEXT: fmov w11, s3		; GISEL-NEXT: fmov w11, s3
; GISEL-NEXT: and w8, w8, w9		; GISEL-NEXT: and w8, w8, w9
; GISEL-NEXT: and w9, w10, w11		; GISEL-NEXT: and w9, w10, w11
; GISEL-NEXT: and w0, w8, w9		; GISEL-NEXT: and w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%and_result = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %a)		%and_result = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %a)
ret i8 %and_result		ret i8 %and_result
}		}

define i8 @test_redand_v8i8(<8 x i8> %a) {		define i8 @test_redand_v8i8(<8 x i8> %a) {
; CHECK-LABEL: test_redand_v8i8:		; CHECK-LABEL: test_redand_v8i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w8, v0.b[5]		; CHECK-NEXT: and x8, x8, x8, lsr #32
; CHECK-NEXT: umov w9, v0.b[4]		; CHECK-NEXT: and x8, x8, x8, lsr #16
; CHECK-NEXT: umov w10, v0.b[1]		; CHECK-NEXT: lsr x9, x8, #8
; CHECK-NEXT: umov w11, v0.b[0]		; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: umov w12, v0.b[3]
; CHECK-NEXT: umov w13, v0.b[2]
; CHECK-NEXT: umov w14, v0.b[6]
; CHECK-NEXT: umov w15, v0.b[7]
; CHECK-NEXT: and w8, w9, w8
; CHECK-NEXT: and w10, w11, w10
; CHECK-NEXT: and w11, w13, w12
; CHECK-NEXT: and w9, w10, w11
; CHECK-NEXT: and w8, w8, w14
; CHECK-NEXT: and w8, w9, w8
; CHECK-NEXT: and w0, w8, w15
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v8i8:		; GISEL-LABEL: test_redand_v8i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov b1, v0.b[1]		; GISEL-NEXT: mov b1, v0.b[1]
; GISEL-NEXT: mov b2, v0.b[2]		; GISEL-NEXT: mov b2, v0.b[2]
; GISEL-NEXT: mov b3, v0.b[3]		; GISEL-NEXT: mov b3, v0.b[3]
Show All 21 Lines	; GISEL-NEXT: ret
ret i8 %and_result		ret i8 %and_result
}		}

define i8 @test_redand_v16i8(<16 x i8> %a) {		define i8 @test_redand_v16i8(<16 x i8> %a) {
; CHECK-LABEL: test_redand_v16i8:		; CHECK-LABEL: test_redand_v16i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w8, v0.b[1]		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w9, v0.b[0]		; CHECK-NEXT: and x8, x8, x8, lsr #32
; CHECK-NEXT: umov w10, v0.b[2]		; CHECK-NEXT: and x8, x8, x8, lsr #16
; CHECK-NEXT: umov w11, v0.b[3]		; CHECK-NEXT: lsr x9, x8, #8
; CHECK-NEXT: umov w12, v0.b[4]
; CHECK-NEXT: umov w13, v0.b[5]
; CHECK-NEXT: umov w14, v0.b[6]
; CHECK-NEXT: and w8, w9, w8
; CHECK-NEXT: umov w9, v0.b[7]
; CHECK-NEXT: and w10, w10, w11
; CHECK-NEXT: and w11, w12, w13
; CHECK-NEXT: and w8, w8, w10
; CHECK-NEXT: and w10, w11, w14
; CHECK-NEXT: and w8, w8, w10
; CHECK-NEXT: and w0, w8, w9		; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v16i8:		; GISEL-LABEL: test_redand_v16i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: and v0.8b, v0.8b, v1.8b		; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov b1, v0.b[1]		; GISEL-NEXT: mov b1, v0.b[1]
Show All 24 Lines
}		}

define i8 @test_redand_v32i8(<32 x i8> %a) {		define i8 @test_redand_v32i8(<32 x i8> %a) {
; CHECK-LABEL: test_redand_v32i8:		; CHECK-LABEL: test_redand_v32i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w8, v0.b[1]		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w9, v0.b[0]		; CHECK-NEXT: and x8, x8, x8, lsr #32
; CHECK-NEXT: umov w10, v0.b[2]		; CHECK-NEXT: and x8, x8, x8, lsr #16
; CHECK-NEXT: umov w11, v0.b[3]		; CHECK-NEXT: lsr x9, x8, #8
; CHECK-NEXT: umov w12, v0.b[4]
; CHECK-NEXT: umov w13, v0.b[5]
; CHECK-NEXT: umov w14, v0.b[6]
; CHECK-NEXT: and w8, w9, w8
; CHECK-NEXT: umov w9, v0.b[7]
; CHECK-NEXT: and w10, w10, w11
; CHECK-NEXT: and w11, w12, w13
; CHECK-NEXT: and w8, w8, w10
; CHECK-NEXT: and w10, w11, w14
; CHECK-NEXT: and w8, w8, w10
; CHECK-NEXT: and w0, w8, w9		; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v32i8:		; GISEL-LABEL: test_redand_v32i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: and v0.16b, v0.16b, v1.16b		; GISEL-NEXT: and v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: and v0.8b, v0.8b, v1.8b		; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
Show All 22 Lines
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%and_result = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %a)		%and_result = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %a)
ret i8 %and_result		ret i8 %and_result
}		}

define i16 @test_redand_v4i16(<4 x i16> %a) {		define i16 @test_redand_v4i16(<4 x i16> %a) {
; CHECK-LABEL: test_redand_v4i16:		; CHECK-LABEL: test_redand_v4i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w8, v0.h[3]		; CHECK-NEXT: and x8, x8, x8, lsr #32
; CHECK-NEXT: umov w9, v0.h[2]		; CHECK-NEXT: lsr x9, x8, #16
; CHECK-NEXT: umov w10, v0.h[1]		; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: umov w11, v0.h[0]
; CHECK-NEXT: and w8, w9, w8
; CHECK-NEXT: and w10, w11, w10
; CHECK-NEXT: and w0, w10, w8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v4i16:		; GISEL-LABEL: test_redand_v4i16:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
; GISEL-NEXT: mov h2, v0.h[2]		; GISEL-NEXT: mov h2, v0.h[2]
; GISEL-NEXT: mov h3, v0.h[3]		; GISEL-NEXT: mov h3, v0.h[3]
Show All 9 Lines	; GISEL-NEXT: ret
ret i16 %and_result		ret i16 %and_result
}		}

define i16 @test_redand_v8i16(<8 x i16> %a) {		define i16 @test_redand_v8i16(<8 x i16> %a) {
; CHECK-LABEL: test_redand_v8i16:		; CHECK-LABEL: test_redand_v8i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w8, v0.h[1]		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w9, v0.h[0]		; CHECK-NEXT: and x8, x8, x8, lsr #32
; CHECK-NEXT: umov w10, v0.h[2]		; CHECK-NEXT: lsr x9, x8, #16
; CHECK-NEXT: umov w11, v0.h[3]
; CHECK-NEXT: and w8, w9, w8
; CHECK-NEXT: and w9, w10, w11
; CHECK-NEXT: and w0, w8, w9		; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v8i16:		; GISEL-LABEL: test_redand_v8i16:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: and v0.8b, v0.8b, v1.8b		; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
Show All 12 Lines
}		}

define i16 @test_redand_v16i16(<16 x i16> %a) {		define i16 @test_redand_v16i16(<16 x i16> %a) {
; CHECK-LABEL: test_redand_v16i16:		; CHECK-LABEL: test_redand_v16i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w8, v0.h[1]		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w9, v0.h[0]		; CHECK-NEXT: and x8, x8, x8, lsr #32
; CHECK-NEXT: umov w10, v0.h[2]		; CHECK-NEXT: lsr x9, x8, #16
; CHECK-NEXT: umov w11, v0.h[3]
; CHECK-NEXT: and w8, w9, w8
; CHECK-NEXT: and w9, w10, w11
; CHECK-NEXT: and w0, w8, w9		; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v16i16:		; GISEL-LABEL: test_redand_v16i16:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: and v0.16b, v0.16b, v1.16b		; GISEL-NEXT: and v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: and v0.8b, v0.8b, v1.8b		; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
Show All 10 Lines
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%and_result = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %a)		%and_result = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %a)
ret i16 %and_result		ret i16 %and_result
}		}

define i32 @test_redand_v2i32(<2 x i32> %a) {		define i32 @test_redand_v2i32(<2 x i32> %a) {
; CHECK-LABEL: test_redand_v2i32:		; CHECK-LABEL: test_redand_v2i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: lsr x9, x8, #32
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: and w0, w9, w8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v2i32:		; GISEL-LABEL: test_redand_v2i32:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: and w0, w8, w9		; GISEL-NEXT: and w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%and_result = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %a)		%and_result = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %a)
ret i32 %and_result		ret i32 %and_result
}		}

define i32 @test_redand_v4i32(<4 x i32> %a) {		define i32 @test_redand_v4i32(<4 x i32> %a) {
; CHECK-LABEL: test_redand_v4i32:		; CHECK-LABEL: test_redand_v4i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: lsr x9, x8, #32
; CHECK-NEXT: and w0, w9, w8		; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v4i32:		; GISEL-LABEL: test_redand_v4i32:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: and v0.8b, v0.8b, v1.8b		; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: and w0, w8, w9		; GISEL-NEXT: and w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%and_result = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)		%and_result = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)
ret i32 %and_result		ret i32 %and_result
}		}

define i32 @test_redand_v8i32(<8 x i32> %a) {		define i32 @test_redand_v8i32(<8 x i32> %a) {
; CHECK-LABEL: test_redand_v8i32:		; CHECK-LABEL: test_redand_v8i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b		; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b		; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: lsr x9, x8, #32
; CHECK-NEXT: and w0, w9, w8		; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redand_v8i32:		; GISEL-LABEL: test_redand_v8i32:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: and v0.16b, v0.16b, v1.16b		; GISEL-NEXT: and v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: and v0.8b, v0.8b, v1.8b		; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
▲ Show 20 Lines • Show All 67 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/reduce-or.ll

Show First 20 Lines • Show All 250 Lines • ▼ Show 20 Lines
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> %a)		%or_result = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> %a)
ret i8 %or_result		ret i8 %or_result
}		}

define i8 @test_redor_v3i8(<3 x i8> %a) {		define i8 @test_redor_v3i8(<3 x i8> %a) {
; CHECK-LABEL: test_redor_v3i8:		; CHECK-LABEL: test_redor_v3i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr w8, w0, w1		; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: orr w0, w8, w2		; CHECK-NEXT: mov v0.h[0], w0
		; CHECK-NEXT: mov v0.h[1], w1
		; CHECK-NEXT: fmov x8, d0
		; CHECK-NEXT: mov v0.h[2], w2
		; CHECK-NEXT: fmov x9, d0
		; CHECK-NEXT: lsr x10, x9, #32
		; CHECK-NEXT: lsr x9, x9, #16
		; CHECK-NEXT: orr w8, w8, w10
		; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v3i8:		; GISEL-LABEL: test_redor_v3i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: orr w8, w0, w1		; GISEL-NEXT: orr w8, w0, w1
; GISEL-NEXT: orr w0, w8, w2		; GISEL-NEXT: orr w0, w8, w2
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i8 @llvm.vector.reduce.or.v3i8(<3 x i8> %a)		%or_result = call i8 @llvm.vector.reduce.or.v3i8(<3 x i8> %a)
ret i8 %or_result		ret i8 %or_result
}		}

define i8 @test_redor_v4i8(<4 x i8> %a) {		define i8 @test_redor_v4i8(<4 x i8> %a) {
; CHECK-LABEL: test_redor_v4i8:		; CHECK-LABEL: test_redor_v4i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w8, v0.h[3]		; CHECK-NEXT: orr x8, x8, x8, lsr #32
; CHECK-NEXT: umov w9, v0.h[2]		; CHECK-NEXT: lsr x9, x8, #16
; CHECK-NEXT: umov w10, v0.h[1]		; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: umov w11, v0.h[0]
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: orr w10, w11, w10
; CHECK-NEXT: orr w0, w10, w8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v4i8:		; GISEL-LABEL: test_redor_v4i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
; GISEL-NEXT: mov h2, v0.h[2]		; GISEL-NEXT: mov h2, v0.h[2]
; GISEL-NEXT: mov h3, v0.h[3]		; GISEL-NEXT: mov h3, v0.h[3]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: fmov w10, s2		; GISEL-NEXT: fmov w10, s2
; GISEL-NEXT: fmov w11, s3		; GISEL-NEXT: fmov w11, s3
; GISEL-NEXT: orr w8, w8, w9		; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: orr w9, w10, w11		; GISEL-NEXT: orr w9, w10, w11
; GISEL-NEXT: orr w0, w8, w9		; GISEL-NEXT: orr w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %a)		%or_result = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %a)
ret i8 %or_result		ret i8 %or_result
}		}

define i8 @test_redor_v8i8(<8 x i8> %a) {		define i8 @test_redor_v8i8(<8 x i8> %a) {
; CHECK-LABEL: test_redor_v8i8:		; CHECK-LABEL: test_redor_v8i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w8, v0.b[5]		; CHECK-NEXT: orr x8, x8, x8, lsr #32
; CHECK-NEXT: umov w9, v0.b[4]		; CHECK-NEXT: orr x8, x8, x8, lsr #16
; CHECK-NEXT: umov w10, v0.b[1]		; CHECK-NEXT: lsr x9, x8, #8
; CHECK-NEXT: umov w11, v0.b[0]		; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: umov w12, v0.b[3]
; CHECK-NEXT: umov w13, v0.b[2]
; CHECK-NEXT: umov w14, v0.b[6]
; CHECK-NEXT: umov w15, v0.b[7]
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: orr w10, w11, w10
; CHECK-NEXT: orr w11, w13, w12
; CHECK-NEXT: orr w9, w10, w11
; CHECK-NEXT: orr w8, w8, w14
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: orr w0, w8, w15
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v8i8:		; GISEL-LABEL: test_redor_v8i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov b1, v0.b[1]		; GISEL-NEXT: mov b1, v0.b[1]
; GISEL-NEXT: mov b2, v0.b[2]		; GISEL-NEXT: mov b2, v0.b[2]
; GISEL-NEXT: mov b3, v0.b[3]		; GISEL-NEXT: mov b3, v0.b[3]
Show All 21 Lines	; GISEL-NEXT: ret
ret i8 %or_result		ret i8 %or_result
}		}

define i8 @test_redor_v16i8(<16 x i8> %a) {		define i8 @test_redor_v16i8(<16 x i8> %a) {
; CHECK-LABEL: test_redor_v16i8:		; CHECK-LABEL: test_redor_v16i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w8, v0.b[1]		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w9, v0.b[0]		; CHECK-NEXT: orr x8, x8, x8, lsr #32
; CHECK-NEXT: umov w10, v0.b[2]		; CHECK-NEXT: orr x8, x8, x8, lsr #16
; CHECK-NEXT: umov w11, v0.b[3]		; CHECK-NEXT: lsr x9, x8, #8
; CHECK-NEXT: umov w12, v0.b[4]
; CHECK-NEXT: umov w13, v0.b[5]
; CHECK-NEXT: umov w14, v0.b[6]
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: umov w9, v0.b[7]
; CHECK-NEXT: orr w10, w10, w11
; CHECK-NEXT: orr w11, w12, w13
; CHECK-NEXT: orr w8, w8, w10
; CHECK-NEXT: orr w10, w11, w14
; CHECK-NEXT: orr w8, w8, w10
; CHECK-NEXT: orr w0, w8, w9		; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v16i8:		; GISEL-LABEL: test_redor_v16i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b		; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov b1, v0.b[1]		; GISEL-NEXT: mov b1, v0.b[1]
Show All 24 Lines
}		}

define i8 @test_redor_v32i8(<32 x i8> %a) {		define i8 @test_redor_v32i8(<32 x i8> %a) {
; CHECK-LABEL: test_redor_v32i8:		; CHECK-LABEL: test_redor_v32i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w8, v0.b[1]		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w9, v0.b[0]		; CHECK-NEXT: orr x8, x8, x8, lsr #32
; CHECK-NEXT: umov w10, v0.b[2]		; CHECK-NEXT: orr x8, x8, x8, lsr #16
; CHECK-NEXT: umov w11, v0.b[3]		; CHECK-NEXT: lsr x9, x8, #8
; CHECK-NEXT: umov w12, v0.b[4]
; CHECK-NEXT: umov w13, v0.b[5]
; CHECK-NEXT: umov w14, v0.b[6]
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: umov w9, v0.b[7]
; CHECK-NEXT: orr w10, w10, w11
; CHECK-NEXT: orr w11, w12, w13
; CHECK-NEXT: orr w8, w8, w10
; CHECK-NEXT: orr w10, w11, w14
; CHECK-NEXT: orr w8, w8, w10
; CHECK-NEXT: orr w0, w8, w9		; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v32i8:		; GISEL-LABEL: test_redor_v32i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b		; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b		; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b
Show All 22 Lines
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %a)		%or_result = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %a)
ret i8 %or_result		ret i8 %or_result
}		}

define i16 @test_redor_v4i16(<4 x i16> %a) {		define i16 @test_redor_v4i16(<4 x i16> %a) {
; CHECK-LABEL: test_redor_v4i16:		; CHECK-LABEL: test_redor_v4i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w8, v0.h[3]		; CHECK-NEXT: orr x8, x8, x8, lsr #32
; CHECK-NEXT: umov w9, v0.h[2]		; CHECK-NEXT: lsr x9, x8, #16
; CHECK-NEXT: umov w10, v0.h[1]		; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: umov w11, v0.h[0]
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: orr w10, w11, w10
; CHECK-NEXT: orr w0, w10, w8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v4i16:		; GISEL-LABEL: test_redor_v4i16:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
; GISEL-NEXT: mov h2, v0.h[2]		; GISEL-NEXT: mov h2, v0.h[2]
; GISEL-NEXT: mov h3, v0.h[3]		; GISEL-NEXT: mov h3, v0.h[3]
Show All 9 Lines	; GISEL-NEXT: ret
ret i16 %or_result		ret i16 %or_result
}		}

define i16 @test_redor_v8i16(<8 x i16> %a) {		define i16 @test_redor_v8i16(<8 x i16> %a) {
; CHECK-LABEL: test_redor_v8i16:		; CHECK-LABEL: test_redor_v8i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w8, v0.h[1]		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w9, v0.h[0]		; CHECK-NEXT: orr x8, x8, x8, lsr #32
; CHECK-NEXT: umov w10, v0.h[2]		; CHECK-NEXT: lsr x9, x8, #16
; CHECK-NEXT: umov w11, v0.h[3]
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: orr w9, w10, w11
; CHECK-NEXT: orr w0, w8, w9		; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v8i16:		; GISEL-LABEL: test_redor_v8i16:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b		; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
Show All 12 Lines
}		}

define i16 @test_redor_v16i16(<16 x i16> %a) {		define i16 @test_redor_v16i16(<16 x i16> %a) {
; CHECK-LABEL: test_redor_v16i16:		; CHECK-LABEL: test_redor_v16i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w8, v0.h[1]		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w9, v0.h[0]		; CHECK-NEXT: orr x8, x8, x8, lsr #32
; CHECK-NEXT: umov w10, v0.h[2]		; CHECK-NEXT: lsr x9, x8, #16
; CHECK-NEXT: umov w11, v0.h[3]
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: orr w9, w10, w11
; CHECK-NEXT: orr w0, w8, w9		; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v16i16:		; GISEL-LABEL: test_redor_v16i16:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b		; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b		; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b
Show All 10 Lines
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %a)		%or_result = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %a)
ret i16 %or_result		ret i16 %or_result
}		}

define i32 @test_redor_v2i32(<2 x i32> %a) {		define i32 @test_redor_v2i32(<2 x i32> %a) {
; CHECK-LABEL: test_redor_v2i32:		; CHECK-LABEL: test_redor_v2i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: lsr x9, x8, #32
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: orr w0, w9, w8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v2i32:		; GISEL-LABEL: test_redor_v2i32:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: orr w0, w8, w9		; GISEL-NEXT: orr w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a)		%or_result = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a)
ret i32 %or_result		ret i32 %or_result
}		}

define i32 @test_redor_v4i32(<4 x i32> %a) {		define i32 @test_redor_v4i32(<4 x i32> %a) {
; CHECK-LABEL: test_redor_v4i32:		; CHECK-LABEL: test_redor_v4i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: lsr x9, x8, #32
; CHECK-NEXT: orr w0, w9, w8		; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v4i32:		; GISEL-LABEL: test_redor_v4i32:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b		; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: orr w0, w8, w9		; GISEL-NEXT: orr w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%or_result = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)		%or_result = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)
ret i32 %or_result		ret i32 %or_result
}		}

define i32 @test_redor_v8i32(<8 x i32> %a) {		define i32 @test_redor_v8i32(<8 x i32> %a) {
; CHECK-LABEL: test_redor_v8i32:		; CHECK-LABEL: test_redor_v8i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b		; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b		; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: lsr x9, x8, #32
; CHECK-NEXT: orr w0, w9, w8		; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redor_v8i32:		; GISEL-LABEL: test_redor_v8i32:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b		; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b		; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
▲ Show 20 Lines • Show All 67 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/reduce-xor.ll

Show First 20 Lines • Show All 239 Lines • ▼ Show 20 Lines
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%xor_result = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> %a)		%xor_result = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> %a)
ret i8 %xor_result		ret i8 %xor_result
}		}

define i8 @test_redxor_v3i8(<3 x i8> %a) {		define i8 @test_redxor_v3i8(<3 x i8> %a) {
; CHECK-LABEL: test_redxor_v3i8:		; CHECK-LABEL: test_redxor_v3i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: eor w8, w0, w1		; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: eor w0, w8, w2		; CHECK-NEXT: mov v0.h[0], w0
		; CHECK-NEXT: mov v0.h[1], w1
		; CHECK-NEXT: fmov x8, d0
		; CHECK-NEXT: mov v0.h[2], w2
		; CHECK-NEXT: fmov x9, d0
		; CHECK-NEXT: lsr x10, x9, #32
		; CHECK-NEXT: lsr x9, x9, #16
		; CHECK-NEXT: eor w8, w8, w10
		; CHECK-NEXT: eor w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v3i8:		; GISEL-LABEL: test_redxor_v3i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: eor w8, w0, w1		; GISEL-NEXT: eor w8, w0, w1
; GISEL-NEXT: eor w0, w8, w2		; GISEL-NEXT: eor w0, w8, w2
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%xor_result = call i8 @llvm.vector.reduce.xor.v3i8(<3 x i8> %a)		%xor_result = call i8 @llvm.vector.reduce.xor.v3i8(<3 x i8> %a)
ret i8 %xor_result		ret i8 %xor_result
}		}

define i8 @test_redxor_v4i8(<4 x i8> %a) {		define i8 @test_redxor_v4i8(<4 x i8> %a) {
; CHECK-LABEL: test_redxor_v4i8:		; CHECK-LABEL: test_redxor_v4i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w8, v0.h[3]		; CHECK-NEXT: eor x8, x8, x8, lsr #32
; CHECK-NEXT: umov w9, v0.h[2]		; CHECK-NEXT: lsr x9, x8, #16
; CHECK-NEXT: umov w10, v0.h[1]		; CHECK-NEXT: eor w0, w8, w9
; CHECK-NEXT: umov w11, v0.h[0]
; CHECK-NEXT: eor w8, w9, w8
; CHECK-NEXT: eor w10, w11, w10
; CHECK-NEXT: eor w0, w10, w8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v4i8:		; GISEL-LABEL: test_redxor_v4i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
; GISEL-NEXT: mov h2, v0.h[2]		; GISEL-NEXT: mov h2, v0.h[2]
; GISEL-NEXT: mov h3, v0.h[3]		; GISEL-NEXT: mov h3, v0.h[3]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: fmov w10, s2		; GISEL-NEXT: fmov w10, s2
; GISEL-NEXT: fmov w11, s3		; GISEL-NEXT: fmov w11, s3
; GISEL-NEXT: eor w8, w8, w9		; GISEL-NEXT: eor w8, w8, w9
; GISEL-NEXT: eor w9, w10, w11		; GISEL-NEXT: eor w9, w10, w11
; GISEL-NEXT: eor w0, w8, w9		; GISEL-NEXT: eor w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%xor_result = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> %a)		%xor_result = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> %a)
ret i8 %xor_result		ret i8 %xor_result
}		}

define i8 @test_redxor_v8i8(<8 x i8> %a) {		define i8 @test_redxor_v8i8(<8 x i8> %a) {
; CHECK-LABEL: test_redxor_v8i8:		; CHECK-LABEL: test_redxor_v8i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w8, v0.b[5]		; CHECK-NEXT: eor x8, x8, x8, lsr #32
; CHECK-NEXT: umov w9, v0.b[4]		; CHECK-NEXT: eor x8, x8, x8, lsr #16
; CHECK-NEXT: umov w10, v0.b[1]		; CHECK-NEXT: lsr x9, x8, #8
; CHECK-NEXT: umov w11, v0.b[0]		; CHECK-NEXT: eor w0, w8, w9
; CHECK-NEXT: umov w12, v0.b[3]
; CHECK-NEXT: umov w13, v0.b[2]
; CHECK-NEXT: umov w14, v0.b[6]
; CHECK-NEXT: umov w15, v0.b[7]
; CHECK-NEXT: eor w8, w9, w8
; CHECK-NEXT: eor w10, w11, w10
; CHECK-NEXT: eor w11, w13, w12
; CHECK-NEXT: eor w9, w10, w11
; CHECK-NEXT: eor w8, w8, w14
; CHECK-NEXT: eor w8, w9, w8
; CHECK-NEXT: eor w0, w8, w15
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v8i8:		; GISEL-LABEL: test_redxor_v8i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov b1, v0.b[1]		; GISEL-NEXT: mov b1, v0.b[1]
; GISEL-NEXT: mov b2, v0.b[2]		; GISEL-NEXT: mov b2, v0.b[2]
; GISEL-NEXT: mov b3, v0.b[3]		; GISEL-NEXT: mov b3, v0.b[3]
Show All 21 Lines	; GISEL-NEXT: ret
ret i8 %xor_result		ret i8 %xor_result
}		}

define i8 @test_redxor_v16i8(<16 x i8> %a) {		define i8 @test_redxor_v16i8(<16 x i8> %a) {
; CHECK-LABEL: test_redxor_v16i8:		; CHECK-LABEL: test_redxor_v16i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w8, v0.b[1]		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w9, v0.b[0]		; CHECK-NEXT: eor x8, x8, x8, lsr #32
; CHECK-NEXT: umov w10, v0.b[2]		; CHECK-NEXT: eor x8, x8, x8, lsr #16
; CHECK-NEXT: umov w11, v0.b[3]		; CHECK-NEXT: lsr x9, x8, #8
; CHECK-NEXT: umov w12, v0.b[4]
; CHECK-NEXT: umov w13, v0.b[5]
; CHECK-NEXT: umov w14, v0.b[6]
; CHECK-NEXT: eor w8, w9, w8
; CHECK-NEXT: umov w9, v0.b[7]
; CHECK-NEXT: eor w10, w10, w11
; CHECK-NEXT: eor w11, w12, w13
; CHECK-NEXT: eor w8, w8, w10
; CHECK-NEXT: eor w10, w11, w14
; CHECK-NEXT: eor w8, w8, w10
; CHECK-NEXT: eor w0, w8, w9		; CHECK-NEXT: eor w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v16i8:		; GISEL-LABEL: test_redxor_v16i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b		; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov b1, v0.b[1]		; GISEL-NEXT: mov b1, v0.b[1]
Show All 24 Lines
}		}

define i8 @test_redxor_v32i8(<32 x i8> %a) {		define i8 @test_redxor_v32i8(<32 x i8> %a) {
; CHECK-LABEL: test_redxor_v32i8:		; CHECK-LABEL: test_redxor_v32i8:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w8, v0.b[1]		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w9, v0.b[0]		; CHECK-NEXT: eor x8, x8, x8, lsr #32
; CHECK-NEXT: umov w10, v0.b[2]		; CHECK-NEXT: eor x8, x8, x8, lsr #16
; CHECK-NEXT: umov w11, v0.b[3]		; CHECK-NEXT: lsr x9, x8, #8
; CHECK-NEXT: umov w12, v0.b[4]
; CHECK-NEXT: umov w13, v0.b[5]
; CHECK-NEXT: umov w14, v0.b[6]
; CHECK-NEXT: eor w8, w9, w8
; CHECK-NEXT: umov w9, v0.b[7]
; CHECK-NEXT: eor w10, w10, w11
; CHECK-NEXT: eor w11, w12, w13
; CHECK-NEXT: eor w8, w8, w10
; CHECK-NEXT: eor w10, w11, w14
; CHECK-NEXT: eor w8, w8, w10
; CHECK-NEXT: eor w0, w8, w9		; CHECK-NEXT: eor w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v32i8:		; GISEL-LABEL: test_redxor_v32i8:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: eor v0.16b, v0.16b, v1.16b		; GISEL-NEXT: eor v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b		; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b
Show All 22 Lines
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%xor_result = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %a)		%xor_result = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %a)
ret i8 %xor_result		ret i8 %xor_result
}		}

define i16 @test_redxor_v4i16(<4 x i16> %a) {		define i16 @test_redxor_v4i16(<4 x i16> %a) {
; CHECK-LABEL: test_redxor_v4i16:		; CHECK-LABEL: test_redxor_v4i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w8, v0.h[3]		; CHECK-NEXT: eor x8, x8, x8, lsr #32
; CHECK-NEXT: umov w9, v0.h[2]		; CHECK-NEXT: lsr x9, x8, #16
; CHECK-NEXT: umov w10, v0.h[1]		; CHECK-NEXT: eor w0, w8, w9
; CHECK-NEXT: umov w11, v0.h[0]
; CHECK-NEXT: eor w8, w9, w8
; CHECK-NEXT: eor w10, w11, w10
; CHECK-NEXT: eor w0, w10, w8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v4i16:		; GISEL-LABEL: test_redxor_v4i16:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
; GISEL-NEXT: mov h2, v0.h[2]		; GISEL-NEXT: mov h2, v0.h[2]
; GISEL-NEXT: mov h3, v0.h[3]		; GISEL-NEXT: mov h3, v0.h[3]
Show All 9 Lines	; GISEL-NEXT: ret
ret i16 %xor_result		ret i16 %xor_result
}		}

define i16 @test_redxor_v8i16(<8 x i16> %a) {		define i16 @test_redxor_v8i16(<8 x i16> %a) {
; CHECK-LABEL: test_redxor_v8i16:		; CHECK-LABEL: test_redxor_v8i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w8, v0.h[1]		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w9, v0.h[0]		; CHECK-NEXT: eor x8, x8, x8, lsr #32
; CHECK-NEXT: umov w10, v0.h[2]		; CHECK-NEXT: lsr x9, x8, #16
; CHECK-NEXT: umov w11, v0.h[3]
; CHECK-NEXT: eor w8, w9, w8
; CHECK-NEXT: eor w9, w10, w11
; CHECK-NEXT: eor w0, w8, w9		; CHECK-NEXT: eor w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v8i16:		; GISEL-LABEL: test_redxor_v8i16:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b		; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov h1, v0.h[1]		; GISEL-NEXT: mov h1, v0.h[1]
Show All 12 Lines
}		}

define i16 @test_redxor_v16i16(<16 x i16> %a) {		define i16 @test_redxor_v16i16(<16 x i16> %a) {
; CHECK-LABEL: test_redxor_v16i16:		; CHECK-LABEL: test_redxor_v16i16:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umov w8, v0.h[1]		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: umov w9, v0.h[0]		; CHECK-NEXT: eor x8, x8, x8, lsr #32
; CHECK-NEXT: umov w10, v0.h[2]		; CHECK-NEXT: lsr x9, x8, #16
; CHECK-NEXT: umov w11, v0.h[3]
; CHECK-NEXT: eor w8, w9, w8
; CHECK-NEXT: eor w9, w10, w11
; CHECK-NEXT: eor w0, w8, w9		; CHECK-NEXT: eor w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v16i16:		; GISEL-LABEL: test_redxor_v16i16:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: eor v0.16b, v0.16b, v1.16b		; GISEL-NEXT: eor v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b		; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b
Show All 10 Lines
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%xor_result = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %a)		%xor_result = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %a)
ret i16 %xor_result		ret i16 %xor_result
}		}

define i32 @test_redxor_v2i32(<2 x i32> %a) {		define i32 @test_redxor_v2i32(<2 x i32> %a) {
; CHECK-LABEL: test_redxor_v2i32:		; CHECK-LABEL: test_redxor_v2i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: lsr x9, x8, #32
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: eor w0, w8, w9
; CHECK-NEXT: eor w0, w9, w8
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v2i32:		; GISEL-LABEL: test_redxor_v2i32:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0		; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: eor w0, w8, w9		; GISEL-NEXT: eor w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%xor_result = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %a)		%xor_result = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %a)
ret i32 %xor_result		ret i32 %xor_result
}		}

define i32 @test_redxor_v4i32(<4 x i32> %a) {		define i32 @test_redxor_v4i32(<4 x i32> %a) {
; CHECK-LABEL: test_redxor_v4i32:		; CHECK-LABEL: test_redxor_v4i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: lsr x9, x8, #32
; CHECK-NEXT: eor w0, w9, w8		; CHECK-NEXT: eor w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v4i32:		; GISEL-LABEL: test_redxor_v4i32:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b		; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
; GISEL-NEXT: fmov w8, s0		; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1		; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: eor w0, w8, w9		; GISEL-NEXT: eor w0, w8, w9
; GISEL-NEXT: ret		; GISEL-NEXT: ret
%xor_result = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a)		%xor_result = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a)
ret i32 %xor_result		ret i32 %xor_result
}		}

define i32 @test_redxor_v8i32(<8 x i32> %a) {		define i32 @test_redxor_v8i32(<8 x i32> %a) {
; CHECK-LABEL: test_redxor_v8i32:		; CHECK-LABEL: test_redxor_v8i32:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b		; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8		; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b		; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-NEXT: mov w8, v0.s[1]		; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: fmov w9, s0		; CHECK-NEXT: lsr x9, x8, #32
; CHECK-NEXT: eor w0, w9, w8		; CHECK-NEXT: eor w0, w8, w9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
;		;
; GISEL-LABEL: test_redxor_v8i32:		; GISEL-LABEL: test_redxor_v8i32:
; GISEL: // %bb.0:		; GISEL: // %bb.0:
; GISEL-NEXT: eor v0.16b, v0.16b, v1.16b		; GISEL-NEXT: eor v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]		; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b		; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b
; GISEL-NEXT: mov s1, v0.s[1]		; GISEL-NEXT: mov s1, v0.s[1]
▲ Show 20 Lines • Show All 67 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll

Show First 20 Lines • Show All 45 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%op = load <32 x i8>, ptr %a		%op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %op)		%res = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %op)
ret i8 %res		ret i8 %res
}		}

define i8 @andv_v64i8(ptr %a) #0 {		define i8 @andv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: andv_v64i8:		; VBITS_GE_256-LABEL: andv_v64i8:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32		; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
; VBITS_GE_256-NEXT: ptrue p0.b, vl32		; VBITS_GE_256-NEXT: ptrue p0.b, vl32
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]		; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]
; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: andv b0, p0, z0.b		; VBITS_GE_256-NEXT: andv b0, p0, z0.b
; VBITS_GE_256-NEXT: fmov w0, s0		; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%op = load <16 x i16>, ptr %a		%op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %op)		%res = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %op)
ret i16 %res		ret i16 %res
}		}

define i16 @andv_v32i16(ptr %a) #0 {		define i16 @andv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: andv_v32i16:		; VBITS_GE_256-LABEL: andv_v32i16:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16		; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ptrue p0.h, vl16		; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]		; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: andv h0, p0, z0.h		; VBITS_GE_256-NEXT: andv h0, p0, z0.h
; VBITS_GE_256-NEXT: fmov w0, s0		; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%op = load <8 x i32>, ptr %a		%op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %op)		%res = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %op)
ret i32 %res		ret i32 %res
}		}

define i32 @andv_v16i32(ptr %a) #0 {		define i32 @andv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: andv_v16i32:		; VBITS_GE_256-LABEL: andv_v16i32:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8		; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ptrue p0.s, vl8		; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]		; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: andv s0, p0, z0.s		; VBITS_GE_256-NEXT: andv s0, p0, z0.s
; VBITS_GE_256-NEXT: fmov w0, s0		; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%op = load <4 x i64>, ptr %a		%op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %op)		%res = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %op)
ret i64 %res		ret i64 %res
}		}

define i64 @andv_v8i64(ptr %a) #0 {		define i64 @andv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: andv_v8i64:		; VBITS_GE_256-LABEL: andv_v8i64:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4		; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ptrue p0.d, vl4		; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]		; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: andv d0, p0, z0.d		; VBITS_GE_256-NEXT: andv d0, p0, z0.d
; VBITS_GE_256-NEXT: fmov x0, d0		; VBITS_GE_256-NEXT: fmov x0, d0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%op = load <32 x i8>, ptr %a		%op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %op)		%res = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %op)
ret i8 %res		ret i8 %res
}		}

define i8 @eorv_v64i8(ptr %a) #0 {		define i8 @eorv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: eorv_v64i8:		; VBITS_GE_256-LABEL: eorv_v64i8:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32		; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
; VBITS_GE_256-NEXT: ptrue p0.b, vl32		; VBITS_GE_256-NEXT: ptrue p0.b, vl32
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]		; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]
; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: eorv b0, p0, z0.b		; VBITS_GE_256-NEXT: eorv b0, p0, z0.b
; VBITS_GE_256-NEXT: fmov w0, s0		; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%op = load <16 x i16>, ptr %a		%op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %op)		%res = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %op)
ret i16 %res		ret i16 %res
}		}

define i16 @eorv_v32i16(ptr %a) #0 {		define i16 @eorv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: eorv_v32i16:		; VBITS_GE_256-LABEL: eorv_v32i16:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16		; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ptrue p0.h, vl16		; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]		; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: eorv h0, p0, z0.h		; VBITS_GE_256-NEXT: eorv h0, p0, z0.h
; VBITS_GE_256-NEXT: fmov w0, s0		; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%op = load <8 x i32>, ptr %a		%op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %op)		%res = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %op)
ret i32 %res		ret i32 %res
}		}

define i32 @eorv_v16i32(ptr %a) #0 {		define i32 @eorv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: eorv_v16i32:		; VBITS_GE_256-LABEL: eorv_v16i32:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8		; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ptrue p0.s, vl8		; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]		; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: eorv s0, p0, z0.s		; VBITS_GE_256-NEXT: eorv s0, p0, z0.s
; VBITS_GE_256-NEXT: fmov w0, s0		; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%op = load <4 x i64>, ptr %a		%op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %op)		%res = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %op)
ret i64 %res		ret i64 %res
}		}

define i64 @eorv_v8i64(ptr %a) #0 {		define i64 @eorv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: eorv_v8i64:		; VBITS_GE_256-LABEL: eorv_v8i64:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4		; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ptrue p0.d, vl4		; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]		; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: eorv d0, p0, z0.d		; VBITS_GE_256-NEXT: eorv d0, p0, z0.d
; VBITS_GE_256-NEXT: fmov x0, d0		; VBITS_GE_256-NEXT: fmov x0, d0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%op = load <32 x i8>, ptr %a		%op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %op)		%res = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %op)
ret i8 %res		ret i8 %res
}		}

define i8 @orv_v64i8(ptr %a) #0 {		define i8 @orv_v64i8(ptr %a) #0 {
; VBITS_GE_256-LABEL: orv_v64i8:		; VBITS_GE_256-LABEL: orv_v64i8:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32		; VBITS_GE_256-NEXT: mov w8, #32 // =0x20
; VBITS_GE_256-NEXT: ptrue p0.b, vl32		; VBITS_GE_256-NEXT: ptrue p0.b, vl32
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]		; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]
; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: orv b0, p0, z0.b		; VBITS_GE_256-NEXT: orv b0, p0, z0.b
; VBITS_GE_256-NEXT: fmov w0, s0		; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%op = load <16 x i16>, ptr %a		%op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %op)		%res = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %op)
ret i16 %res		ret i16 %res
}		}

define i16 @orv_v32i16(ptr %a) #0 {		define i16 @orv_v32i16(ptr %a) #0 {
; VBITS_GE_256-LABEL: orv_v32i16:		; VBITS_GE_256-LABEL: orv_v32i16:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16		; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
; VBITS_GE_256-NEXT: ptrue p0.h, vl16		; VBITS_GE_256-NEXT: ptrue p0.h, vl16
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]		; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: orv h0, p0, z0.h		; VBITS_GE_256-NEXT: orv h0, p0, z0.h
; VBITS_GE_256-NEXT: fmov w0, s0		; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%op = load <8 x i32>, ptr %a		%op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %op)		%res = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %op)
ret i32 %res		ret i32 %res
}		}

define i32 @orv_v16i32(ptr %a) #0 {		define i32 @orv_v16i32(ptr %a) #0 {
; VBITS_GE_256-LABEL: orv_v16i32:		; VBITS_GE_256-LABEL: orv_v16i32:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8		; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
; VBITS_GE_256-NEXT: ptrue p0.s, vl8		; VBITS_GE_256-NEXT: ptrue p0.s, vl8
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]		; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: orv s0, p0, z0.s		; VBITS_GE_256-NEXT: orv s0, p0, z0.s
; VBITS_GE_256-NEXT: fmov w0, s0		; VBITS_GE_256-NEXT: fmov w0, s0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%op = load <4 x i64>, ptr %a		%op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %op)		%res = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %op)
ret i64 %res		ret i64 %res
}		}

define i64 @orv_v8i64(ptr %a) #0 {		define i64 @orv_v8i64(ptr %a) #0 {
; VBITS_GE_256-LABEL: orv_v8i64:		; VBITS_GE_256-LABEL: orv_v8i64:
; VBITS_GE_256: // %bb.0:		; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4		; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
; VBITS_GE_256-NEXT: ptrue p0.d, vl4		; VBITS_GE_256-NEXT: ptrue p0.d, vl4
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]		; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]		; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d		; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d
; VBITS_GE_256-NEXT: orv d0, p0, z0.d		; VBITS_GE_256-NEXT: orv d0, p0, z0.d
; VBITS_GE_256-NEXT: fmov x0, d0		; VBITS_GE_256-NEXT: fmov x0, d0
; VBITS_GE_256-NEXT: ret		; VBITS_GE_256-NEXT: ret
;		;
▲ Show 20 Lines • Show All 123 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll

	Show All 11 Lines
	; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0			; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0
	; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff			; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
	; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff			; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
	; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h			; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
	; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h			; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
	; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b			; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
	; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b			; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b
	; CHECK-NEXT: mov v1.d[1], v0.d[0]			; CHECK-NEXT: mov v1.d[1], v0.d[0]
	; CHECK-NEXT: ptrue p0.b, vl16			; CHECK-NEXT: umaxv b0, v1.16b
	; CHECK-NEXT: orv b0, p0, z1.b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v1 = load <16 x float>, ptr %a, align 4			%v1 = load <16 x float>, ptr %a, align 4
	%v2 = fcmp une <16 x float> %v1, zeroinitializer			%v2 = fcmp une <16 x float> %v1, zeroinitializer
	%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)			%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)
	ret i1 %v3			ret i1 %v3
	}			}

	define i1 @ptest_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) {			define i1 @ptest_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) {
	; CHECK-LABEL: ptest_v16i1_512bit_min_sve:			; CHECK-LABEL: ptest_v16i1_512bit_min_sve:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.s, vl16			; CHECK-NEXT: ptrue p0.s, vl16
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]			; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
	; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0			; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0
	; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff			; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
	; CHECK-NEXT: ptrue p0.b, vl16
	; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h			; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
	; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b			; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
	; CHECK-NEXT: orv b0, p0, z0.b			; CHECK-NEXT: umaxv b0, v0.16b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v1 = load <16 x float>, ptr %a, align 4			%v1 = load <16 x float>, ptr %a, align 4
	%v2 = fcmp une <16 x float> %v1, zeroinitializer			%v2 = fcmp une <16 x float> %v1, zeroinitializer
	%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)			%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)
	ret i1 %v3			ret i1 %v3
	}			}

	define i1 @ptest_v16i1_512bit_sve(ptr %a, ptr %b) vscale_range(4, 4) {			define i1 @ptest_v16i1_512bit_sve(ptr %a, ptr %b) vscale_range(4, 4) {
	; CHECK-LABEL: ptest_v16i1_512bit_sve:			; CHECK-LABEL: ptest_v16i1_512bit_sve:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.s			; CHECK-NEXT: ptrue p0.s
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]			; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
	; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0			; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0
	; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff			; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
	; CHECK-NEXT: ptrue p0.b, vl16
	; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h			; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
	; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b			; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
	; CHECK-NEXT: orv b0, p0, z0.b			; CHECK-NEXT: umaxv b0, v0.16b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v1 = load <16 x float>, ptr %a, align 4			%v1 = load <16 x float>, ptr %a, align 4
	%v2 = fcmp une <16 x float> %v1, zeroinitializer			%v2 = fcmp une <16 x float> %v1, zeroinitializer
	%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)			%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)
	ret i1 %v3			ret i1 %v3
	}			}

	define i1 @ptest_or_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) {			define i1 @ptest_or_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) {
	; CHECK-LABEL: ptest_or_v16i1_512bit_min_sve:			; CHECK-LABEL: ptest_or_v16i1_512bit_min_sve:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.s, vl16			; CHECK-NEXT: ptrue p0.s, vl16
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]			; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
	; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]			; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
	; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0			; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0
	; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0			; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0
	; CHECK-NEXT: mov p0.b, p1/m, p1.b			; CHECK-NEXT: mov p0.b, p1/m, p1.b
	; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff			; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
	; CHECK-NEXT: ptrue p0.b, vl16
	; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h			; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
	; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b			; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
	; CHECK-NEXT: orv b0, p0, z0.b			; CHECK-NEXT: umaxv b0, v0.16b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v1 = load <16 x float>, ptr %a, align 4			%v1 = load <16 x float>, ptr %a, align 4
	%v2 = fcmp une <16 x float> %v1, zeroinitializer			%v2 = fcmp une <16 x float> %v1, zeroinitializer
	%v4 = load <16 x float>, ptr %b, align 4			%v4 = load <16 x float>, ptr %b, align 4
	%v5 = fcmp une <16 x float> %v4, zeroinitializer			%v5 = fcmp une <16 x float> %v4, zeroinitializer
	%v6 = or <16 x i1> %v2, %v5			%v6 = or <16 x i1> %v2, %v5
	Show All 11 Lines
	; CHECK-LABEL: ptest_and_v16i1_512bit_sve:			; CHECK-LABEL: ptest_and_v16i1_512bit_sve:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.s			; CHECK-NEXT: ptrue p0.s
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]			; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
	; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]			; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
	; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0			; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0
	; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0			; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0
	; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff			; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
	; CHECK-NEXT: ptrue p0.b, vl16
	; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h			; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
	; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b			; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
	; CHECK-NEXT: andv b0, p0, z0.b			; CHECK-NEXT: uminv b0, v0.16b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v1 = load <16 x float>, ptr %a, align 4			%v1 = load <16 x float>, ptr %a, align 4
	%v2 = fcmp une <16 x float> %v1, zeroinitializer			%v2 = fcmp une <16 x float> %v1, zeroinitializer
	%v4 = load <16 x float>, ptr %b, align 4			%v4 = load <16 x float>, ptr %b, align 4
	%v5 = fcmp une <16 x float> %v4, zeroinitializer			%v5 = fcmp une <16 x float> %v4, zeroinitializer
	%v6 = and <16 x i1> %v2, %v5			%v6 = and <16 x i1> %v2, %v5
	%v7 = call i1 @llvm.vector.reduce.and.i1.v16i1 (<16 x i1> %v6)			%v7 = call i1 @llvm.vector.reduce.and.i1.v16i1 (<16 x i1> %v6)
	ret i1 %v7			ret i1 %v7
	}			}

	define i1 @ptest_and_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) {			define i1 @ptest_and_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) {
	; CHECK-LABEL: ptest_and_v16i1_512bit_min_sve:			; CHECK-LABEL: ptest_and_v16i1_512bit_min_sve:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.s, vl16			; CHECK-NEXT: ptrue p0.s, vl16
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]			; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
	; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]			; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
	; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0			; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0
	; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0			; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0
	; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b			; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b
	; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff			; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
	; CHECK-NEXT: ptrue p0.b, vl16
	; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h			; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
	; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b			; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
	; CHECK-NEXT: andv b0, p0, z0.b			; CHECK-NEXT: uminv b0, v0.16b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v1 = load <16 x float>, ptr %a, align 4			%v1 = load <16 x float>, ptr %a, align 4
	%v2 = fcmp une <16 x float> %v1, zeroinitializer			%v2 = fcmp une <16 x float> %v1, zeroinitializer
	%v4 = load <16 x float>, ptr %b, align 4			%v4 = load <16 x float>, ptr %b, align 4
	%v5 = fcmp une <16 x float> %v4, zeroinitializer			%v5 = fcmp une <16 x float> %v4, zeroinitializer
	%v6 = and <16 x i1> %v2, %v5			%v6 = and <16 x i1> %v2, %v5
	%v7 = call i1 @llvm.vector.reduce.and.i1.v16i1 (<16 x i1> %v6)			%v7 = call i1 @llvm.vector.reduce.and.i1.v16i1 (<16 x i1> %v6)
	ret i1 %v7			ret i1 %v7
	}			}

	declare i1 @llvm.vector.reduce.and.i1.v16i1(<16 x i1>)			declare i1 @llvm.vector.reduce.and.i1.v16i1(<16 x i1>)

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll

	Show All 23 Lines
	; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h			; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
	; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h			; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
	; CHECK-NEXT: splice z3.h, p1, z3.h, z2.h			; CHECK-NEXT: splice z3.h, p1, z3.h, z2.h
	; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b			; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
	; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b			; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b
	; CHECK-NEXT: ptrue p0.b, vl8			; CHECK-NEXT: ptrue p0.b, vl8
	; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b			; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b
	; CHECK-NEXT: ptrue p0.b, vl16			; CHECK-NEXT: ptrue p0.b, vl16
	; CHECK-NEXT: orv b0, p0, z1.b			; CHECK-NEXT: umaxv b0, p0, z1.b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v0 = bitcast ptr %a to <16 x float>*			%v0 = bitcast ptr %a to <16 x float>*
	%v1 = load <16 x float>, <16 x float>* %v0, align 4			%v1 = load <16 x float>, <16 x float>* %v0, align 4
	%v2 = fcmp une <16 x float> %v1, zeroinitializer			%v2 = fcmp une <16 x float> %v1, zeroinitializer
	%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)			%v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2)
	ret i1 %v3			ret i1 %v3
	Show All 40 Lines
	; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h			; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h
	; CHECK-NEXT: ptrue p0.b, vl8			; CHECK-NEXT: ptrue p0.b, vl8
	; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b			; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b
	; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b			; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b
	; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b			; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b
	; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b			; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b
	; CHECK-NEXT: orr z0.d, z1.d, z3.d			; CHECK-NEXT: orr z0.d, z1.d, z3.d
	; CHECK-NEXT: ptrue p0.b, vl16			; CHECK-NEXT: ptrue p0.b, vl16
	; CHECK-NEXT: orv b0, p0, z0.b			; CHECK-NEXT: umaxv b0, p0, z0.b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v0 = bitcast ptr %a to <16 x float>*			%v0 = bitcast ptr %a to <16 x float>*
	%v1 = load <16 x float>, <16 x float>* %v0, align 4			%v1 = load <16 x float>, <16 x float>* %v0, align 4
	%v2 = fcmp une <16 x float> %v1, zeroinitializer			%v2 = fcmp une <16 x float> %v1, zeroinitializer
	%v3 = bitcast float* %b to <16 x float>*			%v3 = bitcast float* %b to <16 x float>*
	%v4 = load <16 x float>, <16 x float>* %v3, align 4			%v4 = load <16 x float>, <16 x float>* %v3, align 4
	▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h			; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h
	; CHECK-NEXT: ptrue p0.b, vl8			; CHECK-NEXT: ptrue p0.b, vl8
	; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b			; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b
	; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b			; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b
	; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b			; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b
	; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b			; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b
	; CHECK-NEXT: and z0.d, z1.d, z3.d			; CHECK-NEXT: and z0.d, z1.d, z3.d
	; CHECK-NEXT: ptrue p0.b, vl16			; CHECK-NEXT: ptrue p0.b, vl16
	; CHECK-NEXT: andv b0, p0, z0.b			; CHECK-NEXT: uminv b0, p0, z0.b
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v0 = bitcast ptr %a to <16 x float>*			%v0 = bitcast ptr %a to <16 x float>*
	%v1 = load <16 x float>, <16 x float>* %v0, align 4			%v1 = load <16 x float>, <16 x float>* %v0, align 4
	%v2 = fcmp une <16 x float> %v1, zeroinitializer			%v2 = fcmp une <16 x float> %v1, zeroinitializer
	%v3 = bitcast float* %b to <16 x float>*			%v3 = bitcast float* %b to <16 x float>*
	%v4 = load <16 x float>, <16 x float>* %v3, align 4			%v4 = load <16 x float>, <16 x float>* %v3, align 4
	Show All 9 Lines

llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll

	Show First 20 Lines • Show All 79 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i128 @llvm.vector.reduce.and.v1i128(<1 x i128> %a)			%b = call i128 @llvm.vector.reduce.and.v1i128(<1 x i128> %a)
	ret i128 %b			ret i128 %b
	}			}

	define i8 @test_v3i8(<3 x i8> %a) nounwind {			define i8 @test_v3i8(<3 x i8> %a) nounwind {
	; CHECK-LABEL: test_v3i8:			; CHECK-LABEL: test_v3i8:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: and w8, w0, w1			; CHECK-NEXT: movi d0, #0xff00ff00ff00ff
	; CHECK-NEXT: and w8, w8, w2			; CHECK-NEXT: mov v0.h[0], w0
	; CHECK-NEXT: and w0, w8, #0xff			; CHECK-NEXT: mov v0.h[1], w1
				; CHECK-NEXT: mov v0.h[2], w2
				; CHECK-NEXT: fmov x8, d0
				; CHECK-NEXT: and x8, x8, x8, lsr #32
				; CHECK-NEXT: lsr x9, x8, #16
				; CHECK-NEXT: and w0, w8, w9
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a)			%b = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a)
	ret i8 %b			ret i8 %b
	}			}

	define i8 @test_v9i8(<9 x i8> %a) nounwind {			define i8 @test_v9i8(<9 x i8> %a) nounwind {
	; CHECK-LABEL: test_v9i8:			; CHECK-LABEL: test_v9i8:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: mov w8, #-1 // =0xffffffff			; CHECK-NEXT: mov w8, #-1 // =0xffffffff
	; CHECK-NEXT: umov w14, v0.b[6]
	; CHECK-NEXT: mov v1.16b, v0.16b			; CHECK-NEXT: mov v1.16b, v0.16b
	; CHECK-NEXT: mov v1.b[9], w8			; CHECK-NEXT: mov v1.b[9], w8
	; CHECK-NEXT: mov v1.b[10], w8			; CHECK-NEXT: mov v1.b[10], w8
	; CHECK-NEXT: mov v1.b[11], w8			; CHECK-NEXT: mov v1.b[11], w8
				; CHECK-NEXT: mov v1.b[12], w8
	; CHECK-NEXT: mov v1.b[13], w8			; CHECK-NEXT: mov v1.b[13], w8
	; CHECK-NEXT: umov w8, v0.b[4]			; CHECK-NEXT: mov v1.b[14], w8
				; CHECK-NEXT: mov v1.b[15], w8
	; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8			; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
	; CHECK-NEXT: and v1.8b, v0.8b, v1.8b			; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
	; CHECK-NEXT: umov w9, v1.b[1]			; CHECK-NEXT: fmov x8, d0
	; CHECK-NEXT: umov w10, v1.b[0]			; CHECK-NEXT: and x8, x8, x8, lsr #32
	; CHECK-NEXT: umov w11, v1.b[2]			; CHECK-NEXT: and x8, x8, x8, lsr #16
	; CHECK-NEXT: umov w12, v1.b[3]			; CHECK-NEXT: lsr x9, x8, #8
	; CHECK-NEXT: umov w13, v1.b[5]			; CHECK-NEXT: and w0, w8, w9
	; CHECK-NEXT: and w9, w10, w9
	; CHECK-NEXT: umov w10, v0.b[7]
	; CHECK-NEXT: and w11, w11, w12
	; CHECK-NEXT: and w8, w8, w13
	; CHECK-NEXT: and w9, w9, w11
	; CHECK-NEXT: and w8, w8, w14
	; CHECK-NEXT: and w8, w9, w8
	; CHECK-NEXT: and w0, w8, w10
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i8 @llvm.vector.reduce.and.v9i8(<9 x i8> %a)			%b = call i8 @llvm.vector.reduce.and.v9i8(<9 x i8> %a)
	ret i8 %b			ret i8 %b
	}			}

	define i32 @test_v3i32(<3 x i32> %a) nounwind {			define i32 @test_v3i32(<3 x i32> %a) nounwind {
	; CHECK-LABEL: test_v3i32:			; CHECK-LABEL: test_v3i32:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8			; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
	; CHECK-NEXT: mov w8, v0.s[1]			; CHECK-NEXT: fmov x8, d0
	; CHECK-NEXT: and v0.8b, v0.8b, v1.8b			; CHECK-NEXT: lsr x8, x8, #32
	; CHECK-NEXT: fmov w9, s0			; CHECK-NEXT: and v1.8b, v0.8b, v1.8b
				; CHECK-NEXT: fmov x9, d1
	; CHECK-NEXT: and w0, w9, w8			; CHECK-NEXT: and w0, w9, w8
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i32 @llvm.vector.reduce.and.v3i32(<3 x i32> %a)			%b = call i32 @llvm.vector.reduce.and.v3i32(<3 x i32> %a)
	ret i32 %b			ret i32 %b
	}			}

	define i1 @test_v4i1(<4 x i1> %a) nounwind {			define i1 @test_v4i1(<4 x i1> %a) nounwind {
	; CHECK-LABEL: test_v4i1:			; CHECK-LABEL: test_v4i1:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: shl v0.4h, v0.4h, #15			; CHECK-NEXT: shl v0.4h, v0.4h, #15
	; CHECK-NEXT: cmlt v0.4h, v0.4h, #0			; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
	; CHECK-NEXT: uminv h0, v0.4h			; CHECK-NEXT: uminv h0, v0.4h
	; CHECK-NEXT: fmov w8, s0			; CHECK-NEXT: fmov w8, s0
	; CHECK-NEXT: and w0, w8, #0x1			; CHECK-NEXT: and w0, w8, #0x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)			%b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
	ret i1 %b			ret i1 %b
	}			}

	define i24 @test_v4i24(<4 x i24> %a) nounwind {			define i24 @test_v4i24(<4 x i24> %a) nounwind {
	; CHECK-LABEL: test_v4i24:			; CHECK-LABEL: test_v4i24:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8			; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
	; CHECK-NEXT: and v0.8b, v0.8b, v1.8b			; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
	; CHECK-NEXT: mov w8, v0.s[1]			; CHECK-NEXT: fmov x8, d0
	; CHECK-NEXT: fmov w9, s0			; CHECK-NEXT: lsr x9, x8, #32
	; CHECK-NEXT: and w0, w9, w8			; CHECK-NEXT: and w0, w8, w9
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i24 @llvm.vector.reduce.and.v4i24(<4 x i24> %a)			%b = call i24 @llvm.vector.reduce.and.v4i24(<4 x i24> %a)
	ret i24 %b			ret i24 %b
	}			}

	define i128 @test_v2i128(<2 x i128> %a) nounwind {			define i128 @test_v2i128(<2 x i128> %a) nounwind {
	; CHECK-LABEL: test_v2i128:			; CHECK-LABEL: test_v2i128:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: and x0, x0, x2			; CHECK-NEXT: and x0, x0, x2
	; CHECK-NEXT: and x1, x1, x3			; CHECK-NEXT: and x1, x1, x3
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i128 @llvm.vector.reduce.and.v2i128(<2 x i128> %a)			%b = call i128 @llvm.vector.reduce.and.v2i128(<2 x i128> %a)
	ret i128 %b			ret i128 %b
	}			}

	define i32 @test_v16i32(<16 x i32> %a) nounwind {			define i32 @test_v16i32(<16 x i32> %a) nounwind {
	; CHECK-LABEL: test_v16i32:			; CHECK-LABEL: test_v16i32:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: and v1.16b, v1.16b, v3.16b			; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
	; CHECK-NEXT: and v0.16b, v0.16b, v2.16b			; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
	; CHECK-NEXT: and v0.16b, v0.16b, v1.16b			; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
	; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8			; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
	; CHECK-NEXT: and v0.8b, v0.8b, v1.8b			; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
	; CHECK-NEXT: mov w8, v0.s[1]			; CHECK-NEXT: fmov x8, d0
	; CHECK-NEXT: fmov w9, s0			; CHECK-NEXT: lsr x9, x8, #32
	; CHECK-NEXT: and w0, w9, w8			; CHECK-NEXT: and w0, w8, w9
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%b = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %a)			%b = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %a)
	ret i32 %b			ret i32 %b
	}			}

This is an archive of the discontinued LLVM Phabricator instance.

Add more efficient bitwise vector reductions on AArch64
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 519074

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/dag-combine-setcc.ll

llvm/test/CodeGen/AArch64/double_reduct.ll

llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll

llvm/test/CodeGen/AArch64/reduce-and.ll

llvm/test/CodeGen/AArch64/reduce-or.ll

llvm/test/CodeGen/AArch64/reduce-xor.ll

llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll

llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll

llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll

This is an archive of the discontinued LLVM Phabricator instance.

Add more efficient bitwise vector reductions on AArch64ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 519074

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/dag-combine-setcc.ll

llvm/test/CodeGen/AArch64/double_reduct.ll

llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll

llvm/test/CodeGen/AArch64/reduce-and.ll

llvm/test/CodeGen/AArch64/reduce-or.ll

llvm/test/CodeGen/AArch64/reduce-xor.ll

llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll

llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll

llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll

Add more efficient bitwise vector reductions on AArch64
ClosedPublic