Diff 34686

lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 8,579 Lines • ▼ Show 20 Lines	for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result		DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register		DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register

break;		break;
}		}
return SDValue();		return SDValue();
}		}

/// Target-specific DAG combine for the across vector reduction.		/// This function handles the log2-shuffle pattern produced by the
/// This function specifically handles the final clean-up step of a vector		/// LoopVectorizer for the across vector reduction. It consists of
/// reduction produced by the LoopVectorizer. It is the log2-shuffle pattern,		/// log2(NumVectorElements) steps and, in each step, 2^(s) elements
/// consisting of log2(NumVectorElements) steps and, in each step, 2^(s)		/// are reduced, where s is an induction variable from 0 to
/// elements are reduced, where s is an induction variable from 0		/// log2(NumVectorElements).
/// to log2(NumVectorElements).		static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
/// For example,		unsigned Op,
/// %1 = vector_shuffle %0, <2,3,u,u>		SelectionDAG &DAG) {
/// %2 = add %0, %1		EVT VTy = OpV->getOperand(0).getValueType();
/// %3 = vector_shuffle %2, <1,u,u,u>		if (!VTy.isVector())
/// %4 = add %2, %3
/// %5 = extract_vector_elt %4, 0
/// becomes :
/// %0 = uaddv %0
/// %1 = extract_vector_elt %0, 0
///
/// FIXME: Currently this function is implemented and tested specifically
/// for the add reduction. We could also support other types of across lane
/// reduction available in AArch64, including SMAXV, SMINV, UMAXV, UMINV,
/// SADDLV, UADDLV, FMAXNMV, FMAXV, FMINNMV, FMINV.
static SDValue
performAcrossLaneReductionCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
if (!Subtarget->hasNEON())
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);

// Check if the input vector is fed by the operator we want to handle.
// We specifically check only ADD for now.
if (N0->getOpcode() != ISD::ADD)
return SDValue();

// The vector extract idx must constant zero because we only expect the final
// result of the reduction is placed in lane 0.
if (!isa<ConstantSDNode>(N1) \|\| cast<ConstantSDNode>(N1)->getZExtValue())
return SDValue();

EVT EltTy = N0.getValueType().getVectorElementType();
if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
return SDValue();		return SDValue();

int NumVecElts = N0.getValueType().getVectorNumElements();		int NumVecElts = VTy.getVectorNumElements();
if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16)		if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16)
return SDValue();		return SDValue();

int NumExpectedSteps = APInt(8, NumVecElts).logBase2();		int NumExpectedSteps = APInt(8, NumVecElts).logBase2();
SDValue PreOp = N0;		SDValue PreOp = OpV;
// Iterate over each step of the across vector reduction.		// Iterate over each step of the across vector reduction.
for (int CurStep = 0; CurStep != NumExpectedSteps; ++CurStep) {		for (int CurStep = 0; CurStep != NumExpectedSteps; ++CurStep) {
// We specifically check ADD for now.
if (PreOp.getOpcode() != ISD::ADD)
return SDValue();
SDValue CurOp = PreOp.getOperand(0);		SDValue CurOp = PreOp.getOperand(0);
SDValue Shuffle = PreOp.getOperand(1);		SDValue Shuffle = PreOp.getOperand(1);
if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) {		if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) {
// Try to swap the 1st and 2nd operand as add is commutative.		// Try to swap the 1st and 2nd operand as add and min/max instructions
		// are commutative.
CurOp = PreOp.getOperand(1);		CurOp = PreOp.getOperand(1);
Shuffle = PreOp.getOperand(0);		Shuffle = PreOp.getOperand(0);
if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)		if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
		jmolloyUnsubmitted Done Reply Inline Actions Any reduction should be commutative, right? surely at least SMAX and UMAX should be? Not just ADD? jmolloy: Any reduction should be commutative, right? surely at least SMAX and UMAX should be? Not just…
return SDValue();		return SDValue();
}		}

		// Check if the input vector is fed by the operator we want to handle,
		// except the last step; the very first input vector is not necessarily
		jmolloyUnsubmitted Done Reply Inline Actions Braces around the else jmolloy: Braces around the else
		jmolloyUnsubmitted Done Reply Inline Actions necessarily jmolloy: necessarily
		// the same operator we are handling.
		if (CurOp.getOpcode() != Op && (CurStep != (NumExpectedSteps - 1)))
		return SDValue();

// Check if it forms one step of the across vector reduction.		// Check if it forms one step of the across vector reduction.
// E.g.,		// E.g.,
// %cur = add %1, %0		// %cur = add %1, %0
// %shuffle = vector_shuffle %cur, <2, 3, u, u>		// %shuffle = vector_shuffle %cur, <2, 3, u, u>
// %pre = add %cur, %shuffle		// %pre = add %cur, %shuffle
if (Shuffle.getOperand(0) != CurOp)		if (Shuffle.getOperand(0) != CurOp)
return SDValue();		return SDValue();

Show All 11 Lines	for (int CurStep = 0; CurStep != NumExpectedSteps; ++CurStep) {
// step 2 : <4,5,6,7,u,u,u,u>		// step 2 : <4,5,6,7,u,u,u,u>
for (int i = 0; i < NumVecElts; ++i)		for (int i = 0; i < NumVecElts; ++i)
if ((i < NumMaskElts && Mask[i] != (NumMaskElts + i)) \|\|		if ((i < NumMaskElts && Mask[i] != (NumMaskElts + i)) \|\|
(i >= NumMaskElts && !(Mask[i] < 0)))		(i >= NumMaskElts && !(Mask[i] < 0)))
return SDValue();		return SDValue();

PreOp = CurOp;		PreOp = CurOp;
}		}
		unsigned Opcode;
		switch (Op) {
		default:
		llvm_unreachable("Unexpected operator for across vector reduction");
		case ISD::ADD:
		Opcode = AArch64ISD::UADDV;
		break;
		case ISD::SMAX:
		Opcode = AArch64ISD::SMAXV;
		break;
		case ISD::UMAX:
		Opcode = AArch64ISD::UMAXV;
		break;
		case ISD::SMIN:
		Opcode = AArch64ISD::SMINV;
		break;
		case ISD::UMIN:
		Opcode = AArch64ISD::UMINV;
		break;
		}
SDLoc DL(N);		SDLoc DL(N);
return DAG.getNode(		return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0),
ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0),		DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp),
DAG.getNode(AArch64ISD::UADDV, DL, PreOp.getSimpleValueType(), PreOp),
DAG.getConstant(0, DL, MVT::i64));		DAG.getConstant(0, DL, MVT::i64));
}		}

		/// Target-specific DAG combine for the across vector min/max reductions.
		/// This function specifically handles the final clean-up step of the vector
		/// min/max reductions produced by the LoopVectorizer. It is the log2-shuffle
		/// pattern, which narrows down and finds the final min/max value from all
		/// elements of the vector.
		/// For example, for a <16 x i8> vector :
		/// svn0 = vector_shuffle %0, undef<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u>
		/// %smax0 = smax %arr, svn0
		/// %svn1 = vector_shuffle %smax0, undef<4,5,6,7,u,u,u,u,u,u,u,u,u,u,u,u>
		/// %smax1 = smax %smax0, %svn1
		/// %svn2 = vector_shuffle %smax1, undef<2,3,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
		/// %smax2 = smax %smax1, svn2
		/// %svn3 = vector_shuffle %smax2, undef<1,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
		/// %sc = setcc %smax2, %svn3, gt
		/// %n0 = extract_vector_elt %sc, #0
		/// %n1 = extract_vector_elt %smax2, #0
		/// %n2 = extract_vector_elt $smax2, #1
		/// %result = select %n0, %n1, n2
		/// becomes :
		/// %1 = smaxv %0
		/// %result = extract_vector_elt %1, 0
		/// FIXME: Currently this function matches only SMAXV, UMAXV, SMINV, and UMINV.
		/// We could also support other types of across lane reduction available
		/// in AArch64, including FMAXNMV, FMAXV, FMINNMV, and FMINV.
		static SDValue
		performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
		const AArch64Subtarget *Subtarget) {
		if (!Subtarget->hasNEON())
		return SDValue();

		SDValue N0 = N->getOperand(0);
		SDValue IfTrue = N->getOperand(1);
		SDValue IfFalse = N->getOperand(2);

		// Check if the SELECT merges up the final result of the min/max
		// from a vector.
		if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
		IfTrue.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
		IfFalse.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
		return SDValue();

		// Expect N0 is fed by SETCC.
		SDValue SetCC = N0.getOperand(0);
		EVT SetCCVT = SetCC.getValueType();
		if (SetCC.getOpcode() != ISD::SETCC \|\| !SetCCVT.isVector() \|\|
		SetCCVT.getVectorElementType() != MVT::i1)
		return SDValue();

		SDValue VectorOp = SetCC.getOperand(0);
		unsigned Op = VectorOp->getOpcode();
		// Check if the input vector is fed by the operator we want to handle.
		if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN && Op != ISD::UMIN)
		return SDValue();

		EVT VTy = VectorOp.getValueType();
		if (!VTy.isVector())
		return SDValue();

		EVT EltTy = VTy.getVectorElementType();
		if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
		return SDValue();

		// Check if extracting from the same vector.
		// For example,
		// %sc = setcc %vector, %svn1, gt
		// %n0 = extract_vector_elt %sc, #0
		// %n1 = extract_vector_elt %vector, #0
		// %n2 = extract_vector_elt $vector, #1
		if (!(VectorOp == IfTrue->getOperand(0) &&
		VectorOp == IfFalse->getOperand(0)))
		return SDValue();

		// Check if the condition code is matched with the operator type.
		ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
		if ((Op == ISD::SMAX && CC != ISD::SETGT && CC != ISD::SETGE) \|\|
		(Op == ISD::UMAX && CC != ISD::SETUGT && CC != ISD::SETUGE) \|\|
		(Op == ISD::SMIN && CC != ISD::SETLT && CC != ISD::SETLE) \|\|
		(Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE))
		return SDValue();

		// Expect to check only lane 0 from the vector SETCC.
		if (!isa<ConstantSDNode>(N0.getOperand(1)) \|\|
		cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue() != 0)
		mcrosierUnsubmitted Done Reply Inline Actions It might improve readability if we check against != 0. I.e., .. getZExtValue() != 0) mcrosier: It might improve readability if we check against != 0. I.e., .. getZExtValue() != 0)
		return SDValue();

		// Expect to extract the true value from lane 0.
		if (!isa<ConstantSDNode>(IfTrue.getOperand(1)) \|\|
		cast<ConstantSDNode>(IfTrue.getOperand(1))->getZExtValue() != 0)
		mcrosierUnsubmitted Done Reply Inline Actions It might improve readability if we check against != 0. I.e., .. getZExtValue() != 0) mcrosier: It might improve readability if we check against != 0. I.e., .. getZExtValue() != 0)
		return SDValue();

		// Expect to extract the false value from lane 1.
		if (!isa<ConstantSDNode>(IfFalse.getOperand(1)) \|\|
		cast<ConstantSDNode>(IfFalse.getOperand(1))->getZExtValue() != 1)
		return SDValue();

		return tryMatchAcrossLaneShuffleForReduction(N, SetCC, Op, DAG);
		}

		/// Target-specific DAG combine for the across vector add reduction.
		/// This function specifically handles the final clean-up step of the vector
		/// add reduction produced by the LoopVectorizer. It is the log2-shuffle
		/// pattern, which adds all elements of a vector together.
		/// For example, for a <4 x i32> vector :
		/// %1 = vector_shuffle %0, <2,3,u,u>
		/// %2 = add %0, %1
		/// %3 = vector_shuffle %2, <1,u,u,u>
		/// %4 = add %2, %3
		/// %result = extract_vector_elt %4, 0
		/// becomes :
		/// %0 = uaddv %0
		/// %result = extract_vector_elt %0, 0
		static SDValue
		performAcrossLaneAddReductionCombine(SDNode *N, SelectionDAG &DAG,
		const AArch64Subtarget *Subtarget) {
		if (!Subtarget->hasNEON())
		return SDValue();
		SDValue N0 = N->getOperand(0);
		SDValue N1 = N->getOperand(1);

		// Check if the input vector is fed by the ADD.
		if (N0->getOpcode() != ISD::ADD)
		return SDValue();

		// The vector extract idx must constant zero because we only expect the final
		// result of the reduction is placed in lane 0.
		if (!isa<ConstantSDNode>(N1) \|\| cast<ConstantSDNode>(N1)->getZExtValue() != 0)
		return SDValue();

		EVT VTy = N0.getValueType();
		if (!VTy.isVector())
		return SDValue();

		EVT EltTy = VTy.getVectorElementType();
		if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
		return SDValue();

		return tryMatchAcrossLaneShuffleForReduction(N, N0, ISD::ADD, DAG);
		}

/// Target-specific DAG combine function for NEON load/store intrinsics		/// Target-specific DAG combine function for NEON load/store intrinsics
/// to merge base address updates.		/// to merge base address updates.
static SDValue performNEONPostLDSTCombine(SDNode *N,		static SDValue performNEONPostLDSTCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,		TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
if (DCI.isBeforeLegalize() \|\| DCI.isCalledByLegalizer())		if (DCI.isBeforeLegalize() \|\| DCI.isCalledByLegalizer())
return SDValue();		return SDValue();

▲ Show 20 Lines • Show All 562 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ANY_EXTEND:		case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:		case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:		case ISD::SIGN_EXTEND:
return performExtendCombine(N, DCI, DAG);		return performExtendCombine(N, DCI, DAG);
case ISD::BITCAST:		case ISD::BITCAST:
return performBitcastCombine(N, DCI, DAG);		return performBitcastCombine(N, DCI, DAG);
case ISD::CONCAT_VECTORS:		case ISD::CONCAT_VECTORS:
return performConcatVectorsCombine(N, DCI, DAG);		return performConcatVectorsCombine(N, DCI, DAG);
case ISD::SELECT:		case ISD::SELECT: {
return performSelectCombine(N, DCI);		SDValue RV = performSelectCombine(N, DCI);
		if (!RV.getNode())
		RV = performAcrossLaneMinMaxReductionCombine(N, DAG, Subtarget);
		return RV;
		}
case ISD::VSELECT:		case ISD::VSELECT:
return performVSelectCombine(N, DCI.DAG);		return performVSelectCombine(N, DCI.DAG);
case ISD::STORE:		case ISD::STORE:
return performSTORECombine(N, DCI, DAG, Subtarget);		return performSTORECombine(N, DCI, DAG, Subtarget);
case AArch64ISD::BRCOND:		case AArch64ISD::BRCOND:
return performBRCONDCombine(N, DCI, DAG);		return performBRCONDCombine(N, DCI, DAG);
case AArch64ISD::CSEL:		case AArch64ISD::CSEL:
return performCONDCombine(N, DCI, DAG, 2, 3);		return performCONDCombine(N, DCI, DAG, 2, 3);
case AArch64ISD::DUP:		case AArch64ISD::DUP:
return performPostLD1Combine(N, DCI, false);		return performPostLD1Combine(N, DCI, false);
case AArch64ISD::NVCAST:		case AArch64ISD::NVCAST:
return performNVCASTCombine(N);		return performNVCASTCombine(N);
case ISD::INSERT_VECTOR_ELT:		case ISD::INSERT_VECTOR_ELT:
return performPostLD1Combine(N, DCI, true);		return performPostLD1Combine(N, DCI, true);
case ISD::EXTRACT_VECTOR_ELT:		case ISD::EXTRACT_VECTOR_ELT:
return performAcrossLaneReductionCombine(N, DAG, Subtarget);		return performAcrossLaneAddReductionCombine(N, DAG, Subtarget);
case ISD::INTRINSIC_VOID:		case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:		case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {		switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
case Intrinsic::aarch64_neon_ld2:		case Intrinsic::aarch64_neon_ld2:
case Intrinsic::aarch64_neon_ld3:		case Intrinsic::aarch64_neon_ld3:
case Intrinsic::aarch64_neon_ld4:		case Intrinsic::aarch64_neon_ld4:
case Intrinsic::aarch64_neon_ld1x2:		case Intrinsic::aarch64_neon_ld1x2:
case Intrinsic::aarch64_neon_ld1x3:		case Intrinsic::aarch64_neon_ld1x3:
▲ Show 20 Lines • Show All 301 Lines • Show Last 20 Lines

test/CodeGen/AArch64/aarch64-addv.ll

	; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic < %s \| FileCheck %s			; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic < %s \| FileCheck %s
				mcrosierUnsubmitted Done Reply Inline Actions I believe this fix was committed in r246833. You may need to rebase your patch. mcrosier: I believe this fix was committed in r246833. You may need to rebase your patch.

	define i8 @f_v16i8(<16 x i8>* %arr) {			define i8 @add_B(<16 x i8>* %arr) {
	; CHECK-LABEL: f_v16i8			; CHECK-LABEL: add_B
	; CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.16b			; CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.16b
	%bin.rdx = load <16 x i8>, <16 x i8>* %arr			%bin.rdx = load <16 x i8>, <16 x i8>* %arr
	%rdx.shuf0 = shufflevector <16 x i8> %bin.rdx, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%rdx.shuf0 = shufflevector <16 x i8> %bin.rdx, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%bin.rdx0 = add <16 x i8> %bin.rdx, %rdx.shuf0			%bin.rdx0 = add <16 x i8> %bin.rdx, %rdx.shuf0
	%rdx.shuf = shufflevector <16 x i8> %bin.rdx0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef >			%rdx.shuf = shufflevector <16 x i8> %bin.rdx0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef >
	%bin.rdx11 = add <16 x i8> %bin.rdx0, %rdx.shuf			%bin.rdx11 = add <16 x i8> %bin.rdx0, %rdx.shuf
	%rdx.shuf12 = shufflevector <16 x i8> %bin.rdx11, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>			%rdx.shuf12 = shufflevector <16 x i8> %bin.rdx11, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>
	%bin.rdx13 = add <16 x i8> %bin.rdx11, %rdx.shuf12			%bin.rdx13 = add <16 x i8> %bin.rdx11, %rdx.shuf12
	%rdx.shuf13 = shufflevector <16 x i8> %bin.rdx13, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>			%rdx.shuf13 = shufflevector <16 x i8> %bin.rdx13, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>
	%bin.rdx14 = add <16 x i8> %bin.rdx13, %rdx.shuf13			%bin.rdx14 = add <16 x i8> %bin.rdx13, %rdx.shuf13
	%r = extractelement <16 x i8> %bin.rdx14, i32 0			%r = extractelement <16 x i8> %bin.rdx14, i32 0
	ret i8 %r			ret i8 %r
	}			}

	define i16 @f_v8i16(<8 x i16>* %arr) {			define i16 @add_H(<8 x i16>* %arr) {
	; CHECK-LABEL: f_v8i16			; CHECK-LABEL: add_H
	; CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.8h			; CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.8h
	%bin.rdx = load <8 x i16>, <8 x i16>* %arr			%bin.rdx = load <8 x i16>, <8 x i16>* %arr
	%rdx.shuf = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,i32 undef, i32 undef>			%rdx.shuf = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,i32 undef, i32 undef>
	%bin.rdx11 = add <8 x i16> %bin.rdx, %rdx.shuf			%bin.rdx11 = add <8 x i16> %bin.rdx, %rdx.shuf
	%rdx.shuf12 = shufflevector <8 x i16> %bin.rdx11, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%rdx.shuf12 = shufflevector <8 x i16> %bin.rdx11, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%bin.rdx13 = add <8 x i16> %bin.rdx11, %rdx.shuf12			%bin.rdx13 = add <8 x i16> %bin.rdx11, %rdx.shuf12
	%rdx.shuf13 = shufflevector <8 x i16> %bin.rdx13, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%rdx.shuf13 = shufflevector <8 x i16> %bin.rdx13, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%bin.rdx14 = add <8 x i16> %bin.rdx13, %rdx.shuf13			%bin.rdx14 = add <8 x i16> %bin.rdx13, %rdx.shuf13
	%r = extractelement <8 x i16> %bin.rdx14, i32 0			%r = extractelement <8 x i16> %bin.rdx14, i32 0
	ret i16 %r			ret i16 %r
	}			}

	define i32 @f_v4i32( <4 x i32>* %arr) {			define i32 @add_S( <4 x i32>* %arr) {
	; CHECK-LABEL: f_v4i32			; CHECK-LABEL: add_S
	; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s			; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
	%bin.rdx = load <4 x i32>, <4 x i32>* %arr			%bin.rdx = load <4 x i32>, <4 x i32>* %arr
	%rdx.shuf = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>			%rdx.shuf = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
	%bin.rdx11 = add <4 x i32> %bin.rdx, %rdx.shuf			%bin.rdx11 = add <4 x i32> %bin.rdx, %rdx.shuf
	%rdx.shuf12 = shufflevector <4 x i32> %bin.rdx11, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>			%rdx.shuf12 = shufflevector <4 x i32> %bin.rdx11, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
	%bin.rdx13 = add <4 x i32> %bin.rdx11, %rdx.shuf12			%bin.rdx13 = add <4 x i32> %bin.rdx11, %rdx.shuf12
	%r = extractelement <4 x i32> %bin.rdx13, i32 0			%r = extractelement <4 x i32> %bin.rdx13, i32 0
	ret i32 %r			ret i32 %r
	}			}

	define i64 @f_v2i64(<2 x i64>* %arr) {			define i64 @add_D(<2 x i64>* %arr) {
	; CHECK-LABEL: f_v2i64			; CHECK-LABEL: add_D
	; CHECK-NOT: addv			; CHECK-NOT: addv
	%bin.rdx = load <2 x i64>, <2 x i64>* %arr			%bin.rdx = load <2 x i64>, <2 x i64>* %arr
	%rdx.shuf0 = shufflevector <2 x i64> %bin.rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>			%rdx.shuf0 = shufflevector <2 x i64> %bin.rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
	%bin.rdx0 = add <2 x i64> %bin.rdx, %rdx.shuf0			%bin.rdx0 = add <2 x i64> %bin.rdx, %rdx.shuf0
	%r = extractelement <2 x i64> %bin.rdx0, i32 0			%r = extractelement <2 x i64> %bin.rdx0, i32 0
	ret i64 %r			ret i64 %r
	}			}

test/CodeGen/AArch64/aarch64-minmaxv.ll

This file was added.

				; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic < %s \| FileCheck %s

				mcrosierUnsubmitted Done Reply Inline Actions Please add -aarch64-neon-syntax=generic, so this doesn't fail on Darwin. mcrosier: Please add -aarch64-neon-syntax=generic, so this doesn't fail on Darwin.
				target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
				target triple = "aarch64-linu--gnu"

				; CHECK-LABEL: smax_B
				; CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
				define i8 @smax_B(<16 x i8>* nocapture readonly %arr) {
				%arr.load = load <16 x i8>, <16 x i8>* %arr
				%rdx.shuf = shufflevector <16 x i8> %arr.load, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp22 = icmp sgt <16 x i8> %arr.load, %rdx.shuf
				%rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %arr.load, <16 x i8> %rdx.shuf
				%rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp25 = icmp sgt <16 x i8> %rdx.minmax.select23, %rdx.shuf24
				%rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24
				%rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp28 = icmp sgt <16 x i8> %rdx.minmax.select26, %rdx.shuf27
				%rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27
				%rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp31 = icmp sgt <16 x i8> %rdx.minmax.select29, %rdx.shuf30
				%rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
				%rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0
				%rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1
				%r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt
				ret i8 %r
				}

				; CHECK-LABEL: smax_H
				; CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
				define i16 @smax_H(<8 x i16>* nocapture readonly %arr) {
				%rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr
				%rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp23 = icmp sgt <8 x i16> %rdx.minmax.select, %rdx.shuf
				%rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf
				%rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp26 = icmp sgt <8 x i16> %rdx.minmax.select24, %rdx.shuf25
				%rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25
				%rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp29 = icmp sgt <8 x i16> %rdx.minmax.select27, %rdx.shuf28
				%rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0
				%rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0
				%rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1
				%r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt
				ret i16 %r
				}

				; CHECK-LABEL: smax_S
				; CHECK: smaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
				define i32 @smax_S(<4 x i32> * nocapture readonly %arr) {
				%rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr
				%rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
				%rdx.minmax.cmp18 = icmp sgt <4 x i32> %rdx.minmax.select, %rdx.shuf
				%rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf
				%rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp21 = icmp sgt <4 x i32> %rdx.minmax.select19, %rdx.shuf20
				%rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0
				%rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0
				%rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1
				%r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt
				ret i32 %r
				}

				; CHECK-LABEL: smax_D
				; CHECK-NOT: smaxv
				define i64 @smax_D(<2 x i64>* nocapture readonly %arr) {
				%rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr
				%rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
				%rdx.minmax.cmp18 = icmp sgt <2 x i64> %rdx.minmax.select, %rdx.shuf
				%rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0
				%rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0
				%rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1
				%r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt
				ret i64 %r
				}


				; CHECK-LABEL: umax_B
				; CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
				define i8 @umax_B(<16 x i8>* nocapture readonly %arr) {
				%rdx.minmax.select = load <16 x i8>, <16 x i8>* %arr
				%rdx.shuf = shufflevector <16 x i8> %rdx.minmax.select, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp22 = icmp ugt <16 x i8> %rdx.minmax.select, %rdx.shuf
				%rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %rdx.minmax.select, <16 x i8> %rdx.shuf
				%rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp25 = icmp ugt <16 x i8> %rdx.minmax.select23, %rdx.shuf24
				%rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24
				%rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp28 = icmp ugt <16 x i8> %rdx.minmax.select26, %rdx.shuf27
				%rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27
				%rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp31 = icmp ugt <16 x i8> %rdx.minmax.select29, %rdx.shuf30
				%rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
				%rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0
				%rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1
				%r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt
				ret i8 %r
				}

				; CHECK-LABEL: umax_H
				; CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
				define i16 @umax_H(<8 x i16>* nocapture readonly %arr) {
				%rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr
				%rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp23 = icmp ugt <8 x i16> %rdx.minmax.select, %rdx.shuf
				%rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf
				%rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp26 = icmp ugt <8 x i16> %rdx.minmax.select24, %rdx.shuf25
				%rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25
				%rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp29 = icmp ugt <8 x i16> %rdx.minmax.select27, %rdx.shuf28
				%rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0
				%rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0
				%rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1
				%r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt
				ret i16 %r
				}

				; CHECK-LABEL: umax_S
				; CHECK: umaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
				define i32 @umax_S(<4 x i32>* nocapture readonly %arr) {
				%rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr
				%rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
				%rdx.minmax.cmp18 = icmp ugt <4 x i32> %rdx.minmax.select, %rdx.shuf
				%rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf
				%rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp21 = icmp ugt <4 x i32> %rdx.minmax.select19, %rdx.shuf20
				%rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0
				%rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0
				%rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1
				%r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt
				ret i32 %r
				}

				; CHECK-LABEL: umax_D
				; CHECK-NOT: umaxv
				define i64 @umax_D(<2 x i64>* nocapture readonly %arr) {
				%rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr
				%rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
				%rdx.minmax.cmp18 = icmp ugt <2 x i64> %rdx.minmax.select, %rdx.shuf
				%rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0
				%rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0
				%rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1
				%r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt
				ret i64 %r
				}


				; CHECK-LABEL: smin_B
				; CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.16b
				define i8 @smin_B(<16 x i8>* nocapture readonly %arr) {
				%rdx.minmax.select = load <16 x i8>, <16 x i8>* %arr
				%rdx.shuf = shufflevector <16 x i8> %rdx.minmax.select, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp22 = icmp slt <16 x i8> %rdx.minmax.select, %rdx.shuf
				%rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %rdx.minmax.select, <16 x i8> %rdx.shuf
				%rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp25 = icmp slt <16 x i8> %rdx.minmax.select23, %rdx.shuf24
				%rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24
				%rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp28 = icmp slt <16 x i8> %rdx.minmax.select26, %rdx.shuf27
				%rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27
				%rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp31 = icmp slt <16 x i8> %rdx.minmax.select29, %rdx.shuf30
				%rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
				%rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0
				%rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1
				%r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt
				ret i8 %r
				}

				; CHECK-LABEL: smin_H
				; CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.8h
				define i16 @smin_H(<8 x i16>* nocapture readonly %arr) {
				%rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr
				%rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp23 = icmp slt <8 x i16> %rdx.minmax.select, %rdx.shuf
				%rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf
				%rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp26 = icmp slt <8 x i16> %rdx.minmax.select24, %rdx.shuf25
				%rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25
				%rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp29 = icmp slt <8 x i16> %rdx.minmax.select27, %rdx.shuf28
				%rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0
				%rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0
				%rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1
				%r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt
				ret i16 %r
				}

				; CHECK-LABEL: smin_S
				; CHECK: sminv {{s[0-9]+}}, {{v[0-9]+}}.4s
				define i32 @smin_S(<4 x i32>* nocapture readonly %arr) {
				%rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr
				%rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
				%rdx.minmax.cmp18 = icmp slt <4 x i32> %rdx.minmax.select, %rdx.shuf
				%rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf
				%rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp21 = icmp slt <4 x i32> %rdx.minmax.select19, %rdx.shuf20
				%rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0
				%rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0
				%rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1
				%r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt
				ret i32 %r
				}

				; CHECK-LABEL: smin_D
				; CHECK-NOT: sminv
				define i64 @smin_D(<2 x i64>* nocapture readonly %arr) {
				%rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr
				%rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
				%rdx.minmax.cmp18 = icmp slt <2 x i64> %rdx.minmax.select, %rdx.shuf
				%rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0
				%rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0
				%rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1
				%r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt
				ret i64 %r
				}


				; CHECK-LABEL: umin_B
				; CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.16b
				define i8 @umin_B(<16 x i8>* nocapture readonly %arr) {
				%rdx.minmax.select = load <16 x i8>, <16 x i8>* %arr
				%rdx.shuf = shufflevector <16 x i8> %rdx.minmax.select, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp22 = icmp ult <16 x i8> %rdx.minmax.select, %rdx.shuf
				%rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %rdx.minmax.select, <16 x i8> %rdx.shuf
				%rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp25 = icmp ult <16 x i8> %rdx.minmax.select23, %rdx.shuf24
				%rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24
				%rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp28 = icmp ult <16 x i8> %rdx.minmax.select26, %rdx.shuf27
				%rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27
				%rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp31 = icmp ult <16 x i8> %rdx.minmax.select29, %rdx.shuf30
				%rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
				%rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0
				%rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1
				%r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt
				ret i8 %r
				}

				; CHECK-LABEL: umin_H
				; CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.8h
				define i16 @umin_H(<8 x i16>* nocapture readonly %arr) {
				%rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr
				%rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp23 = icmp ult <8 x i16> %rdx.minmax.select, %rdx.shuf
				%rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf
				%rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp26 = icmp ult <8 x i16> %rdx.minmax.select24, %rdx.shuf25
				%rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25
				%rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp29 = icmp ult <8 x i16> %rdx.minmax.select27, %rdx.shuf28
				%rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0
				%rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0
				%rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1
				%r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt
				ret i16 %r
				}

				; CHECK-LABEL: umin_S
				; CHECK: uminv {{s[0-9]+}}, {{v[0-9]+}}.4s
				define i32 @umin_S(<4 x i32>* nocapture readonly %arr) {
				%rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr
				%rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
				%rdx.minmax.cmp18 = icmp ult <4 x i32> %rdx.minmax.select, %rdx.shuf
				%rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf
				%rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
				%rdx.minmax.cmp21 = icmp ult <4 x i32> %rdx.minmax.select19, %rdx.shuf20
				%rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0
				%rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0
				%rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1
				%r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt
				ret i32 %r
				}

				; CHECK-LABEL: umin_D
				; CHECK-NOT: uminv
				define i64 @umin_D(<2 x i64>* nocapture readonly %arr) {
				%rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr
				%rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
				%rdx.minmax.cmp18 = icmp ult <2 x i64> %rdx.minmax.select, %rdx.shuf
				%rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0
				%rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0
				%rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1
				%r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt
				ret i64 %r
				}

This is an archive of the discontinued LLVM Phabricator instance.

Improve ISel using across lane min/max reduction
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 34686

lib/Target/AArch64/AArch64ISelLowering.cpp

test/CodeGen/AArch64/aarch64-addv.ll

test/CodeGen/AArch64/aarch64-minmaxv.ll

This is an archive of the discontinued LLVM Phabricator instance.

Improve ISel using across lane min/max reductionClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 34686

lib/Target/AArch64/AArch64ISelLowering.cpp

test/CodeGen/AArch64/aarch64-addv.ll

test/CodeGen/AArch64/aarch64-minmaxv.ll

Improve ISel using across lane min/max reduction
ClosedPublic