Diff 296132

llvm/lib/Target/ARM/ARMISelLowering.h

Show First 20 Lines • Show All 235 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
VMLALVs, // Same as VMLAV but with i64, returning the low and		VMLALVs, // Same as VMLAV but with i64, returning the low and
VMLALVu, // high 32-bit halves of the sum		VMLALVu, // high 32-bit halves of the sum
VMLALVps, // Same as VMLALV[su] with a v4i1 predicate mask		VMLALVps, // Same as VMLALV[su] with a v4i1 predicate mask
VMLALVpu,		VMLALVpu,
VMLALVAs, // Same as VMLALV but also add an input accumulator		VMLALVAs, // Same as VMLALV but also add an input accumulator
VMLALVAu, // provided as low and high halves		VMLALVAu, // provided as low and high halves
VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask		VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask
VMLALVApu,		VMLALVApu,
		VMINVu, // Find minimum unsigned value of a vector and register
		VMINVs, // Find minimum signed value of a vector and register
		VMAXVu, // Find maximum unsigned value of a vector and register
		VMAXVs, // Find maximum signed value of a vector and register

SMULWB, // Signed multiply word by half word, bottom		SMULWB, // Signed multiply word by half word, bottom
SMULWT, // Signed multiply word by half word, top		SMULWT, // Signed multiply word by half word, top
UMLAL, // 64bit Unsigned Accumulate Multiply		UMLAL, // 64bit Unsigned Accumulate Multiply
SMLAL, // 64bit Signed Accumulate Multiply		SMLAL, // 64bit Signed Accumulate Multiply
UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply		UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply
SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16		SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16
SMLALBT, // 64-bit signed accumulate multiply bottom, top 16		SMLALBT, // 64-bit signed accumulate multiply bottom, top 16
▲ Show 20 Lines • Show All 494 Lines • ▼ Show 20 Lines	SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
TLSModel::Model model) const;		TLSModel::Model model) const;
SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
		dmgreenUnsubmitted Done Reply Inline Actions This is no longer used? dmgreen: This is no longer used?
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
▲ Show 20 Lines • Show All 168 Lines • Show Last 20 Lines

llvm/lib/Target/ARM/ARMISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 981 Lines • ▼ Show 20 Lines	if (Subtarget->hasNEON() \|\| Subtarget->hasMVEIntegerOps()) {
setTargetDAGCombine(ISD::BITCAST);		setTargetDAGCombine(ISD::BITCAST);
}		}
if (Subtarget->hasMVEIntegerOps()) {		if (Subtarget->hasMVEIntegerOps()) {
setTargetDAGCombine(ISD::SMIN);		setTargetDAGCombine(ISD::SMIN);
setTargetDAGCombine(ISD::UMIN);		setTargetDAGCombine(ISD::UMIN);
setTargetDAGCombine(ISD::SMAX);		setTargetDAGCombine(ISD::SMAX);
setTargetDAGCombine(ISD::UMAX);		setTargetDAGCombine(ISD::UMAX);
setTargetDAGCombine(ISD::FP_EXTEND);		setTargetDAGCombine(ISD::FP_EXTEND);
		setTargetDAGCombine(ISD::SELECT);
		setTargetDAGCombine(ISD::SELECT_CC);
}		}

if (!Subtarget->hasFP64()) {		if (!Subtarget->hasFP64()) {
// When targeting a floating-point unit with only single-precision		// When targeting a floating-point unit with only single-precision
// operations, f64 is legal for the few double-precision instructions which		// operations, f64 is legal for the few double-precision instructions which
// are present However, no double-precision operations other than moves,		// are present However, no double-precision operations other than moves,
// loads and stores are provided by the hardware.		// loads and stores are provided by the hardware.
setOperationAction(ISD::FADD, MVT::f64, Expand);		setOperationAction(ISD::FADD, MVT::f64, Expand);
▲ Show 20 Lines • Show All 737 Lines • ▼ Show 20 Lines	const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VMLALVs: return "ARMISD::VMLALVs";		case ARMISD::VMLALVs: return "ARMISD::VMLALVs";
case ARMISD::VMLALVu: return "ARMISD::VMLALVu";		case ARMISD::VMLALVu: return "ARMISD::VMLALVu";
case ARMISD::VMLALVps: return "ARMISD::VMLALVps";		case ARMISD::VMLALVps: return "ARMISD::VMLALVps";
case ARMISD::VMLALVpu: return "ARMISD::VMLALVpu";		case ARMISD::VMLALVpu: return "ARMISD::VMLALVpu";
case ARMISD::VMLALVAs: return "ARMISD::VMLALVAs";		case ARMISD::VMLALVAs: return "ARMISD::VMLALVAs";
case ARMISD::VMLALVAu: return "ARMISD::VMLALVAu";		case ARMISD::VMLALVAu: return "ARMISD::VMLALVAu";
case ARMISD::VMLALVAps: return "ARMISD::VMLALVAps";		case ARMISD::VMLALVAps: return "ARMISD::VMLALVAps";
case ARMISD::VMLALVApu: return "ARMISD::VMLALVApu";		case ARMISD::VMLALVApu: return "ARMISD::VMLALVApu";
		case ARMISD::VMINVu: return "ARMISD::VMINVu";
		case ARMISD::VMINVs: return "ARMISD::VMINVs";
		case ARMISD::VMAXVu: return "ARMISD::VMAXVu";
		case ARMISD::VMAXVs: return "ARMISD::VMAXVs";
case ARMISD::UMAAL: return "ARMISD::UMAAL";		case ARMISD::UMAAL: return "ARMISD::UMAAL";
case ARMISD::UMLAL: return "ARMISD::UMLAL";		case ARMISD::UMLAL: return "ARMISD::UMLAL";
case ARMISD::SMLAL: return "ARMISD::SMLAL";		case ARMISD::SMLAL: return "ARMISD::SMLAL";
case ARMISD::SMLALBB: return "ARMISD::SMLALBB";		case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
case ARMISD::SMLALBT: return "ARMISD::SMLALBT";		case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
case ARMISD::SMLALTB: return "ARMISD::SMLALTB";		case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
case ARMISD::SMLALTT: return "ARMISD::SMLALTT";		case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
case ARMISD::SMULWB: return "ARMISD::SMULWB";		case ARMISD::SMULWB: return "ARMISD::SMULWB";
▲ Show 20 Lines • Show All 3,393 Lines • ▼ Show 20 Lines	if (VT == MVT::f32)
return !Subtarget->hasVFP2Base();		return !Subtarget->hasVFP2Base();
if (VT == MVT::f64)		if (VT == MVT::f64)
return !Subtarget->hasFP64();		return !Subtarget->hasFP64();
if (VT == MVT::f16)		if (VT == MVT::f16)
return !Subtarget->hasFullFP16();		return !Subtarget->hasFullFP16();
return false;		return false;
}		}

SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {		SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
		dmgreenUnsubmitted Done Reply Inline Actions Please clang-format, and possible just make it a static function? dmgreen: Please clang-format, and possible just make it a static function?
EVT VT = Op.getValueType();		EVT VT = Op.getValueType();
SDLoc dl(Op);		SDLoc dl(Op);

// Try to convert two saturating conditional selects into a single SSAT		// Try to convert two saturating conditional selects into a single SSAT
SDValue SatValue;		SDValue SatValue;
uint64_t SatConstant;		uint64_t SatConstant;
bool SatUSat;		bool SatUSat;
if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) \|\| Subtarget->isThumb2()) &&		if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) \|\| Subtarget->isThumb2()) &&
isSaturatingConditional(Op, SatValue, SatConstant, SatUSat)) {		isSaturatingConditional(Op, SatValue, SatConstant, SatUSat)) {
if (SatUSat)		if (SatUSat)
return DAG.getNode(ARMISD::USAT, dl, VT, SatValue,		return DAG.getNode(ARMISD::USAT, dl, VT, SatValue,
DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));		DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
else		else
return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,		return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));		DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
}		}

// Try to convert expressions of the form x < k ? k : x (and similar forms)		// Try to convert expressions of the form x < k ? k : x (and similar forms)
// into more efficient bit operations, which is possible when k is 0 or -1		// into more efficient bit operations, which is possible when k is 0 or -1
// On ARM and Thumb-2 which have flexible operand 2 this will result in		// On ARM and Thumb-2 which have flexible operand 2 this will result in
// single instructions. On Thumb the shift and the bit operation will be two		// single instructions. On Thumb the shift and the bit operation will be two
// instructions.		// instructions.
// Only allow this transformation on full-width (32-bit) operations		// Only allow this transformation on full-width (32-bit) operations
SDValue LowerSatConstant;		SDValue LowerSatConstant;
if (VT == MVT::i32 &&		if (VT == MVT::i32 &&
		dmgreenUnsubmitted Done Reply Inline Actions else return false; Then it doesn't need the extra indenting. dmgreen: else return false; Then it doesn't need the extra indenting.
		samtebbsAuthorUnsubmitted Done Reply Inline Actions Thumbs up samtebbs: Thumbs up
isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {		isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,		SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
DAG.getConstant(31, dl, VT));		DAG.getConstant(31, dl, VT));
if (isNullConstant(LowerSatConstant)) {		if (isNullConstant(LowerSatConstant)) {
SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,		SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
DAG.getAllOnesConstant(dl, VT));		DAG.getAllOnesConstant(dl, VT));
return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);		return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
} else if (isAllOnesConstant(LowerSatConstant))		} else if (isAllOnesConstant(LowerSatConstant))
return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);		return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
}		}

SDValue LHS = Op.getOperand(0);		SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);		SDValue RHS = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();		ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
SDValue TrueVal = Op.getOperand(2);		SDValue TrueVal = Op.getOperand(2);
SDValue FalseVal = Op.getOperand(3);		SDValue FalseVal = Op.getOperand(3);
ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);		ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);		ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
		dmgreenUnsubmitted Done Reply Inline Actions This looks like it need to be a bit stricter still. The AND needs to go with a umin/umax, and the sign extend with smin/smax (they shouldn't be the other way around). The AND needs a mask of the right size (it'll be 255 for i8 for example, which is the same as a "zero extend inreg"). The sign extend will have a type as the second argument, which should be the same as the vecreduce's scalar type. Some of this might be difficult to get to come up in practice, but we should make sure it won't be subtly wrong and cause bugs. dmgreen: This looks like it need to be a bit stricter still. The AND needs to go with a umin/umax, and…
		samtebbsAuthorUnsubmitted Done Reply Inline Actions I thought about those two but hoped that the IR type system would make those situations impossible. Will add these, thanks. samtebbs: I thought about those two but hoped that the IR type system would make those situations…

if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&		if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
		dmgreenUnsubmitted Done Reply Inline Actions It's might be worth pulling this out into a separate function, similar to isLowerSaturatingConditional or something like PerformSplittingToNarrowingStores where we return the new SDValue. dmgreen: It's might be worth pulling this out into a separate function, similar to…
		samtebbsAuthorUnsubmitted Done Reply Inline Actions That's much cleaner! Done. samtebbs: That's much cleaner! Done.
LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {		LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
		dmgreenUnsubmitted Done Reply Inline Actions This could do with a few extra tests to make sure it's a min/max. LHS==TrueVal and RHS==FalseVal for example. Plus it's worth checking if commuted min/max work too. min(vecreduce.min, x) or min(x, vecreduce.min) dmgreen: This could do with a few extra tests to make sure it's a min/max. LHS==TrueVal and…
		samtebbsAuthorUnsubmitted Done Reply Inline Actions Thanks for pointing that out, done now. samtebbs: Thanks for pointing that out, done now.
unsigned TVal = CTVal->getZExtValue();		unsigned TVal = CTVal->getZExtValue();
unsigned FVal = CFVal->getZExtValue();		unsigned FVal = CFVal->getZExtValue();
unsigned Opcode = 0;		unsigned Opcode = 0;

if (TVal == ~FVal) {		if (TVal == ~FVal) {
Opcode = ARMISD::CSINV;		Opcode = ARMISD::CSINV;
		dmgreenUnsubmitted Not Done Reply Inline Actions -> FalseVal->getOperand(0).getValueType().getVectorElementType(); dmgreen: -> FalseVal->getOperand(0).getValueType().getVectorElementType();
} else if (TVal == ~FVal + 1) {		} else if (TVal == ~FVal + 1) {
Opcode = ARMISD::CSNEG;		Opcode = ARMISD::CSNEG;
} else if (TVal + 1 == FVal) {		} else if (TVal + 1 == FVal) {
Opcode = ARMISD::CSINC;		Opcode = ARMISD::CSINC;
} else if (TVal == FVal + 1) {		} else if (TVal == FVal + 1) {
Opcode = ARMISD::CSINC;		Opcode = ARMISD::CSINC;
		dmgreenUnsubmitted Done Reply Inline Actions Hmm. This is getting complicated. I would suggest changing this to something more like: if (Type == i32) return Selected == Compared; else // Check the opcodes are and/signexted/assertzext/assertsext. But I don't think that will work very well. For i8/i16 I think we might actually need to be matching: t7: i32 = AssertZext t5, ValueType:ch:i8 t17: i32 = vecreduce_umin t3 t25: i32 = and t17, Constant:i32<255> t27: i32 = select_cc t25, t7, t17, t7, setult:ch t21: i32 = and t27, Constant:i32<255> Including that root "and" (which is the same as the two TrueVal/FalseVal sides of the select_cc being extended). But one loop I tried had no visible way I could see to prove the value needed to be a i16. It might be easier to do this from a combine, not during lowering. So before we have legalized the types. What do you think about getting i32 working first with this patch, and doing i8/i16 as a followup? That way we can get all the boilerplate out of the way, and it's just this code that produces the ARMISD::VMINVs that we need to work further on. dmgreen: Hmm. This is getting complicated. I would suggest changing this to something more like: if…
std::swap(TrueVal, FalseVal);		std::swap(TrueVal, FalseVal);
std::swap(TVal, FVal);		std::swap(TVal, FVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());		CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}		}

if (Opcode) {		if (Opcode) {
// If one of the constants is cheaper than another, materialise the		// If one of the constants is cheaper than another, materialise the
// cheaper one and let the csel generate the other.		// cheaper one and let the csel generate the other.
if (Opcode != ARMISD::CSINC &&		if (Opcode != ARMISD::CSINC &&
HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {		HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
std::swap(TrueVal, FalseVal);		std::swap(TrueVal, FalseVal);
std::swap(TVal, FVal);		std::swap(TVal, FVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());		CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}		}

// Attempt to use ZR checking TVal is 0, possibly inverting the condition		// Attempt to use ZR checking TVal is 0, possibly inverting the condition
// to get there. CSINC not is invertable like the other two (~(~a) == a,		// to get there. CSINC not is invertable like the other two (~(~a) == a,
// -(-a) == a, but (a+1)+1 != a).		// -(-a) == a, but (a+1)+1 != a).
if (FVal == 0 && Opcode != ARMISD::CSINC) {		if (FVal == 0 && Opcode != ARMISD::CSINC) {
std::swap(TrueVal, FalseVal);		std::swap(TrueVal, FalseVal);
std::swap(TVal, FVal);		std::swap(TVal, FVal);
		dmgreenUnsubmitted Done Reply Inline Actions This could use std::swap? dmgreen: This could use std::swap?
		samtebbsAuthorUnsubmitted Done Reply Inline Actions Ah I didn't know that existed. Done. samtebbs: Ah I didn't know that existed. Done.
CC = ISD::getSetCCInverse(CC, LHS.getValueType());		CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}		}
if (TVal == 0)		if (TVal == 0)
TrueVal = DAG.getRegister(ARM::ZR, MVT::i32);		TrueVal = DAG.getRegister(ARM::ZR, MVT::i32);

// Drops F's value because we can get it by inverting/negating TVal.		// Drops F's value because we can get it by inverting/negating TVal.
FalseVal = TrueVal;		FalseVal = TrueVal;

SDValue ARMcc;		SDValue ARMcc;
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);		SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
EVT VT = TrueVal.getValueType();		EVT VT = TrueVal.getValueType();
		dmgreenUnsubmitted Done Reply Inline Actions I think we might need to check these things, otherwise we might be matching things incorrectly. I believe the exact semantics of a vminv.s8 are that it reads the bottom 8 bits of Rn to do the final scalar min. So a 32bit min could actually produce a different value. We should make sure we are getting this correct too. dmgreen: I think we might need to check these things, otherwise we might be matching things incorrectly.
		samtebbsAuthorUnsubmitted Done Reply Inline Actions Thanks. I've added explicit checks for the LHS opcode and have made the lowering use the LHS instead of the TrueVal as the scalar so that we know the top 24 bits aren't set. samtebbs: Thanks. I've added explicit checks for the LHS opcode and have made the lowering use the LHS…
return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);		return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
}		}
}		}

if (isUnsupportedFloatingType(LHS.getValueType())) {		if (isUnsupportedFloatingType(LHS.getValueType())) {
DAG.getTargetLoweringInfo().softenSetCCOperands(		DAG.getTargetLoweringInfo().softenSetCCOperands(
DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);		DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);

▲ Show 20 Lines • Show All 6,837 Lines • ▼ Show 20 Lines	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
}		}
}		}
} else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {		} else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
return AddCombineTo64bitMLAL(N, DCI, Subtarget);		return AddCombineTo64bitMLAL(N, DCI, Subtarget);
}		}
return SDValue();		return SDValue();
}		}

		static SDValue PerformSELECTCombine(SDNode *N,
		TargetLowering::DAGCombinerInfo &DCI,
		const ARMSubtarget *Subtarget) {
		if (!Subtarget->hasMVEIntegerOps())
		return SDValue();

		SDLoc dl(N);
		SDValue SetCC;
		SDValue LHS;
		SDValue RHS;
		ISD::CondCode CC;
		dmgreenUnsubmitted Done Reply Inline Actions I think it needs to check that operand is a Setcc too, to be sure it's not something strange. dmgreen: I think it needs to check that operand is a Setcc too, to be sure it's not something strange.
		samtebbsAuthorUnsubmitted Done Reply Inline Actions Good idea. samtebbs: Good idea.
		SDValue TrueVal;
		SDValue FalseVal;

		if (N->getOpcode() == ISD::SELECT &&
		N->getOperand(0)->getOpcode() == ISD::SETCC) {
		SetCC = N->getOperand(0);
		LHS = SetCC->getOperand(0);
		RHS = SetCC->getOperand(1);
		CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
		TrueVal = N->getOperand(1);
		FalseVal = N->getOperand(2);
		} else if (N->getOpcode() == ISD::SELECT_CC) {
		LHS = N->getOperand(0);
		RHS = N->getOperand(1);
		CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
		TrueVal = N->getOperand(2);
		FalseVal = N->getOperand(3);
		} else {
		return SDValue();
		}

		unsigned int Opcode = 0;
		if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN \|\|
		FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
		(CC == ISD::SETULT \|\| CC == ISD::SETUGT)) {
		Opcode = ARMISD::VMINVu;
		if (CC == ISD::SETUGT)
		std::swap(TrueVal, FalseVal);
		} else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN \|\|
		FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
		(CC == ISD::SETLT \|\| CC == ISD::SETGT)) {
		Opcode = ARMISD::VMINVs;
		if (CC == ISD::SETGT)
		std::swap(TrueVal, FalseVal);
		} else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX \|\|
		FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
		(CC == ISD::SETUGT \|\| CC == ISD::SETULT)) {
		Opcode = ARMISD::VMAXVu;
		if (CC == ISD::SETULT)
		std::swap(TrueVal, FalseVal);
		} else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX \|\|
		FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
		(CC == ISD::SETGT \|\| CC == ISD::SETLT)) {
		Opcode = ARMISD::VMAXVs;
		if (CC == ISD::SETLT)
		std::swap(TrueVal, FalseVal);
		} else
		return SDValue();

		// Normalise to the right hand side being the vector reduction
		switch (TrueVal->getOpcode()) {
		case ISD::VECREDUCE_UMIN:
		case ISD::VECREDUCE_SMIN:
		case ISD::VECREDUCE_UMAX:
		case ISD::VECREDUCE_SMAX:
		std::swap(LHS, RHS);
		std::swap(TrueVal, FalseVal);
		break;
		}

		EVT VectorType = FalseVal->getOperand(0)->getValueType(0);
		dmgreenUnsubmitted Not Done Reply Inline Actions -> getOperand(0).getValueType() dmgreen: -> getOperand(0).getValueType()

		if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
		VectorType != MVT::v4i32)
		return SDValue();

		EVT VectorScalarType = VectorType.getVectorElementType();

		// The values being selected must also be the ones being compared
		if (TrueVal != LHS \|\| FalseVal != RHS)
		return SDValue();

		EVT LeftType = LHS->getValueType(0);
		dmgreenUnsubmitted Done Reply Inline Actions The top bits of this are not read by the instruction. Can we change it to an ANY_EXTEND? dmgreen: The top bits of this are not read by the instruction. Can we change it to an ANY_EXTEND?
		samtebbsAuthorUnsubmitted Done Reply Inline Actions Sure. samtebbs: Sure.
		EVT RightType = RHS->getValueType(0);
		dmgreenUnsubmitted Done Reply Inline Actions I think the last operand isn't needed here. dmgreen: I think the last operand isn't needed here.
		samtebbsAuthorUnsubmitted Done Reply Inline Actions You are indeed correct. samtebbs: You are indeed correct.

		// The types must match the reduced type too
		if (LeftType != VectorScalarType \|\| RightType != VectorScalarType)
		return SDValue();

		// Legalise the scalar to an i32
		if (VectorScalarType != MVT::i32)
		LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);

		// Generate the reduction as an i32 for legalisation purposes
		auto Reduction =
		DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));

		// The result isn't actually an i32 so truncate it back to its original type
		if (VectorScalarType != MVT::i32)
		Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);

		return Reduction;
		}

static SDValue PerformVSELECTCombine(SDNode *N,		static SDValue PerformVSELECTCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,		TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {		const ARMSubtarget *Subtarget) {
// Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).		// Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
//		//
// We need to re-implement this optimization here as the implementation in the		// We need to re-implement this optimization here as the implementation in the
// Target-Independent DAGCombiner does not handle the kind of constant we make		// Target-Independent DAGCombiner does not handle the kind of constant we make
// (it calls isConstOrConstSplat with AllowTruncation set to false - and for		// (it calls isConstOrConstSplat with AllowTruncation set to false - and for
▲ Show 20 Lines • Show All 3,940 Lines • ▼ Show 20 Lines	static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG,

return SDValue();		return SDValue();
}		}

SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,		SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {		DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {		switch (N->getOpcode()) {
default: break;		default: break;
		case ISD::SELECT_CC:
		case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);		case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
		dmgreenUnsubmitted Done Reply Inline Actions Move the hasMVEIntegerOps check into the function? So it can look like the rest. dmgreen: Move the hasMVEIntegerOps check into the function? So it can look like the rest.
case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);		case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);		case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);		case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);		case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);		case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);		case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);		case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);		case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
▲ Show 20 Lines • Show All 3,019 Lines • Show Last 20 Lines

llvm/lib/Target/ARM/ARMInstrMVE.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 938 Lines • ▼ Show 20 Lines	multiclass MVE_VMINMAXV_ty<string iname, bit isMin, string intrBaseName> {
defm s8 : MVE_VMINMAXV_p<iname, 1, isMin, MVE_v16s8, intrBaseName>;		defm s8 : MVE_VMINMAXV_p<iname, 1, isMin, MVE_v16s8, intrBaseName>;
defm s16: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v8s16, intrBaseName>;		defm s16: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v8s16, intrBaseName>;
defm s32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4s32, intrBaseName>;		defm s32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4s32, intrBaseName>;
defm u8 : MVE_VMINMAXV_p<iname, 1, isMin, MVE_v16u8, intrBaseName>;		defm u8 : MVE_VMINMAXV_p<iname, 1, isMin, MVE_v16u8, intrBaseName>;
defm u16: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v8u16, intrBaseName>;		defm u16: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v8u16, intrBaseName>;
defm u32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4u32, intrBaseName>;		defm u32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4u32, intrBaseName>;
}		}

		def SDTVecReduceR : SDTypeProfile<1, 2, [ // Reduction of an integer and vector into an integer
		SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>
		]>;
		def ARMVMINVu : SDNode<"ARMISD::VMINVu", SDTVecReduceR>;
		def ARMVMINVs : SDNode<"ARMISD::VMINVs", SDTVecReduceR>;
		def ARMVMAXVu : SDNode<"ARMISD::VMAXVu", SDTVecReduceR>;
		def ARMVMAXVs : SDNode<"ARMISD::VMAXVs", SDTVecReduceR>;

defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 1, "int_arm_mve_minv">;		defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 1, "int_arm_mve_minv">;
defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0, "int_arm_mve_maxv">;		defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0, "int_arm_mve_maxv">;

let Predicates = [HasMVEInt] in {		let Predicates = [HasMVEInt] in {
def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))),		def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))),
(i32 (MVE_VMAXVs8 (t2MVNi (i32 127)), $src))>;		(i32 (MVE_VMAXVs8 (t2MVNi (i32 127)), $src))>;
def : Pat<(i32 (vecreduce_smax (v8i16 MQPR:$src))),		def : Pat<(i32 (vecreduce_smax (v8i16 MQPR:$src))),
(i32 (MVE_VMAXVs16 (t2MOVi32imm (i32 -32768)), $src))>;		(i32 (MVE_VMAXVs16 (t2MOVi32imm (i32 -32768)), $src))>;
Show All 14 Lines	def : Pat<(i32 (vecreduce_smin (v4i32 MQPR:$src))),
(i32 (MVE_VMINVs32 (t2MVNi (i32 -2147483648)), $src))>;		(i32 (MVE_VMINVs32 (t2MVNi (i32 -2147483648)), $src))>;
def : Pat<(i32 (vecreduce_umin (v16i8 MQPR:$src))),		def : Pat<(i32 (vecreduce_umin (v16i8 MQPR:$src))),
(i32 (MVE_VMINVu8 (t2MOVi (i32 255)), $src))>;		(i32 (MVE_VMINVu8 (t2MOVi (i32 255)), $src))>;
def : Pat<(i32 (vecreduce_umin (v8i16 MQPR:$src))),		def : Pat<(i32 (vecreduce_umin (v8i16 MQPR:$src))),
(i32 (MVE_VMINVu16 (t2MOVi16 (i32 65535)), $src))>;		(i32 (MVE_VMINVu16 (t2MOVi16 (i32 65535)), $src))>;
def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))),		def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))),
(i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>;		(i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>;

		def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v16i8 MQPR:$src))),
		(i32 (MVE_VMINVu8 $x, $src))>;
		def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v8i16 MQPR:$src))),
		(i32 (MVE_VMINVu16 $x, $src))>;
		def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v4i32 MQPR:$src))),
		(i32 (MVE_VMINVu32 $x, $src))>;
		def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v16i8 MQPR:$src))),
		(i32 (MVE_VMINVs8 $x, $src))>;
		def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v8i16 MQPR:$src))),
		(i32 (MVE_VMINVs16 $x, $src))>;
		def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v4i32 MQPR:$src))),
		(i32 (MVE_VMINVs32 $x, $src))>;

		def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v16i8 MQPR:$src))),
		(i32 (MVE_VMAXVu8 $x, $src))>;
		def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v8i16 MQPR:$src))),
		(i32 (MVE_VMAXVu16 $x, $src))>;
		def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v4i32 MQPR:$src))),
		(i32 (MVE_VMAXVu32 $x, $src))>;
		def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v16i8 MQPR:$src))),
		(i32 (MVE_VMAXVs8 $x, $src))>;
		def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v8i16 MQPR:$src))),
		(i32 (MVE_VMAXVs16 $x, $src))>;
		def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v4i32 MQPR:$src))),
		(i32 (MVE_VMAXVs32 $x, $src))>;

}		}

multiclass MVE_VMINMAXAV_ty<string iname, bit isMin, string intrBaseName> {		multiclass MVE_VMINMAXAV_ty<string iname, bit isMin, string intrBaseName> {
defm s8 : MVE_VMINMAXV_p<iname, 0, isMin, MVE_v16s8, intrBaseName>;		defm s8 : MVE_VMINMAXV_p<iname, 0, isMin, MVE_v16s8, intrBaseName>;
defm s16: MVE_VMINMAXV_p<iname, 0, isMin, MVE_v8s16, intrBaseName>;		defm s16: MVE_VMINMAXV_p<iname, 0, isMin, MVE_v8s16, intrBaseName>;
defm s32: MVE_VMINMAXV_p<iname, 0, isMin, MVE_v4s32, intrBaseName>;		defm s32: MVE_VMINMAXV_p<iname, 0, isMin, MVE_v4s32, intrBaseName>;
}		}

▲ Show 20 Lines • Show All 6,351 Lines • Show Last 20 Lines

llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - \| FileCheck %s

				define arm_aapcs_vfpcc zeroext i8 @uminv16i8(<16 x i8> %vec, i8 zeroext %min) {
				; CHECK-LABEL: uminv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.u8 r0, q0
				; CHECK-NEXT: uxtb r0, r0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %vec)
				%cmp = icmp ult i8 %x, %min
				%1 = select i1 %cmp, i8 %x, i8 %min
				ret i8 %1
				}

				define arm_aapcs_vfpcc zeroext i16 @uminv8i16(<8 x i16> %vec, i16 zeroext %min) {
				; CHECK-LABEL: uminv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.u16 r0, q0
				; CHECK-NEXT: uxth r0, r0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %vec)
				%cmp = icmp ult i16 %x, %min
				%1 = select i1 %cmp, i16 %x, i16 %min
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @uminv4i32(<4 x i32> %vec, i32 %min) {
				; CHECK-LABEL: uminv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.u32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %vec)
				%cmp = icmp ult i32 %x, %min
				%1 = select i1 %cmp, i32 %x, i32 %min
				ret i32 %1
				}

				define arm_aapcs_vfpcc signext i8 @sminv16i8(<16 x i8> %vec, i8 signext %min) {
				; CHECK-LABEL: sminv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.s8 r0, q0
				; CHECK-NEXT: sxtb r0, r0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %vec)
				%cmp = icmp slt i8 %x, %min
				%1 = select i1 %cmp, i8 %x, i8 %min
				ret i8 %1
				}

				define arm_aapcs_vfpcc signext i16 @sminv8i16(<8 x i16> %vec, i16 signext %min) {
				; CHECK-LABEL: sminv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.s16 r0, q0
				; CHECK-NEXT: sxth r0, r0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %vec)
				%cmp = icmp slt i16 %x, %min
				%1 = select i1 %cmp, i16 %x, i16 %min
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @sminv4i32(<4 x i32> %vec, i32 %min) {
				; CHECK-LABEL: sminv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.s32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %vec)
				%cmp = icmp slt i32 %x, %min
				%1 = select i1 %cmp, i32 %x, i32 %min
				ret i32 %1
				}

				define arm_aapcs_vfpcc zeroext i8 @umaxv16i8(<16 x i8> %vec, i8 zeroext %max) {
				; CHECK-LABEL: umaxv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.u8 r0, q0
				; CHECK-NEXT: uxtb r0, r0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %vec)
				%cmp = icmp ugt i8 %x, %max
				%1 = select i1 %cmp, i8 %x, i8 %max
				ret i8 %1
				}

				define arm_aapcs_vfpcc zeroext i16 @umaxv8i16(<8 x i16> %vec, i16 zeroext %max) {
				; CHECK-LABEL: umaxv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.u16 r0, q0
				; CHECK-NEXT: uxth r0, r0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec)
				%cmp = icmp ugt i16 %x, %max
				%1 = select i1 %cmp, i16 %x, i16 %max
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @umaxv4i32(<4 x i32> %vec, i32 %max) {
				; CHECK-LABEL: umaxv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.u32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %vec)
				%cmp = icmp ugt i32 %x, %max
				%1 = select i1 %cmp, i32 %x, i32 %max
				ret i32 %1
				}

				define arm_aapcs_vfpcc signext i8 @smaxv16i8(<16 x i8> %vec, i8 signext %max) {
				; CHECK-LABEL: smaxv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.s8 r0, q0
				; CHECK-NEXT: sxtb r0, r0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec)
				%cmp = icmp sgt i8 %x, %max
				%1 = select i1 %cmp, i8 %x, i8 %max
				ret i8 %1
				}

				define arm_aapcs_vfpcc signext i16 @smaxv8i16(<8 x i16> %vec, i16 signext %max) {
				; CHECK-LABEL: smaxv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.s16 r0, q0
				; CHECK-NEXT: sxth r0, r0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec)
				%cmp = icmp sgt i16 %x, %max
				%1 = select i1 %cmp, i16 %x, i16 %max
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @smaxv4i32(<4 x i32> %vec, i32 %max) {
				; CHECK-LABEL: smaxv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.s32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %vec)
				%cmp = icmp sgt i32 %x, %max
				%1 = select i1 %cmp, i32 %x, i32 %max
				ret i32 %1
				}

				define arm_aapcs_vfpcc zeroext i8 @commute_uminv16i8(<16 x i8> %vec, i8 zeroext %min) {
				; CHECK-LABEL: commute_uminv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.u8 r0, q0
				; CHECK-NEXT: uxtb r0, r0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %vec)
				%cmp = icmp ult i8 %min, %x
				%1 = select i1 %cmp, i8 %min, i8 %x
				ret i8 %1
				}

				define arm_aapcs_vfpcc zeroext i16 @commute_uminv8i16(<8 x i16> %vec, i16 zeroext %min) {
				; CHECK-LABEL: commute_uminv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.u16 r0, q0
				; CHECK-NEXT: uxth r0, r0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %vec)
				%cmp = icmp ult i16 %min, %x
				%1 = select i1 %cmp, i16 %min, i16 %x
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @commute_uminv4i32(<4 x i32> %vec, i32 %min) {
				; CHECK-LABEL: commute_uminv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.u32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %vec)
				%cmp = icmp ult i32 %min, %x
				%1 = select i1 %cmp, i32 %min, i32 %x
				ret i32 %1
				}

				define arm_aapcs_vfpcc signext i8 @commute_sminv16i8(<16 x i8> %vec, i8 signext %min) {
				; CHECK-LABEL: commute_sminv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.s8 r0, q0
				; CHECK-NEXT: sxtb r0, r0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %vec)
				%cmp = icmp slt i8 %min, %x
				%1 = select i1 %cmp, i8 %min, i8 %x
				ret i8 %1
				}

				define arm_aapcs_vfpcc signext i16 @commute_sminv8i16(<8 x i16> %vec, i16 signext %min) {
				; CHECK-LABEL: commute_sminv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.s16 r0, q0
				; CHECK-NEXT: sxth r0, r0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %vec)
				%cmp = icmp slt i16 %min, %x
				%1 = select i1 %cmp, i16 %min, i16 %x
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @commute_sminv4i32(<4 x i32> %vec, i32 %min) {
				; CHECK-LABEL: commute_sminv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.s32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %vec)
				%cmp = icmp slt i32 %min, %x
				%1 = select i1 %cmp, i32 %min, i32 %x
				ret i32 %1
				}

				define arm_aapcs_vfpcc zeroext i8 @commute_umaxv16i8(<16 x i8> %vec, i8 zeroext %max) {
				; CHECK-LABEL: commute_umaxv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.u8 r0, q0
				; CHECK-NEXT: uxtb r0, r0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %vec)
				%cmp = icmp ugt i8 %max, %x
				%1 = select i1 %cmp, i8 %max, i8 %x
				ret i8 %1
				}

				define arm_aapcs_vfpcc zeroext i16 @commute_umaxv8i16(<8 x i16> %vec, i16 zeroext %max) {
				; CHECK-LABEL: commute_umaxv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.u16 r0, q0
				; CHECK-NEXT: uxth r0, r0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec)
				%cmp = icmp ugt i16 %max, %x
				%1 = select i1 %cmp, i16 %max, i16 %x
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @commute_umaxv4i32(<4 x i32> %vec, i32 %max) {
				; CHECK-LABEL: commute_umaxv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.u32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %vec)
				%cmp = icmp ugt i32 %max, %x
				%1 = select i1 %cmp, i32 %max, i32 %x
				ret i32 %1
				}

				define arm_aapcs_vfpcc signext i8 @commute_smaxv16i8(<16 x i8> %vec, i8 signext %max) {
				; CHECK-LABEL: commute_smaxv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.s8 r0, q0
				; CHECK-NEXT: sxtb r0, r0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec)
				%cmp = icmp sgt i8 %max, %x
				%1 = select i1 %cmp, i8 %max, i8 %x
				ret i8 %1
				}

				define arm_aapcs_vfpcc signext i16 @commute_smaxv8i16(<8 x i16> %vec, i16 signext %max) {
				; CHECK-LABEL: commute_smaxv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.s16 r0, q0
				; CHECK-NEXT: sxth r0, r0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec)
				%cmp = icmp sgt i16 %max, %x
				%1 = select i1 %cmp, i16 %max, i16 %x
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @commute_smaxv4i32(<4 x i32> %vec, i32 %max) {
				; CHECK-LABEL: commute_smaxv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.s32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %vec)
				%cmp = icmp sgt i32 %max, %x
				%1 = select i1 %cmp, i32 %max, i32 %x
				ret i32 %1
				}

				define arm_aapcs_vfpcc signext i8 @mismatch_smaxv16i8(<16 x i8> %vec, i8 signext %max) {
				; CHECK-LABEL: mismatch_smaxv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: mvn r1, #127
				; CHECK-NEXT: vmaxv.s8 r1, q0
				; CHECK-NEXT: sxtb r2, r1
				; CHECK-NEXT: cmp r2, r0
				; CHECK-NEXT: csel r0, r0, r1, gt
				; CHECK-NEXT: sxtb r0, r0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec)
				%cmp = icmp sgt i8 %x, %max
				%1 = select i1 %cmp, i8 %max, i8 %x
				ret i8 %1
				}

				define arm_aapcs_vfpcc signext i8 @mismatch2_smaxv16i8(<16 x i8> %vec, i8 signext %max) {
				; CHECK-LABEL: mismatch2_smaxv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: mvn r1, #127
				; CHECK-NEXT: vmaxv.s8 r1, q0
				; CHECK-NEXT: sxtb r2, r1
				; CHECK-NEXT: cmp r0, r2
				; CHECK-NEXT: csel r0, r1, r0, gt
				; CHECK-NEXT: sxtb r0, r0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec)
				%cmp = icmp sgt i8 %max, %x
				%1 = select i1 %cmp, i8 %x, i8 %max
				ret i8 %1
				}

				define arm_aapcs_vfpcc zeroext i8 @inverted_uminv16i8(<16 x i8> %vec, i8 zeroext %min) {
				; CHECK-LABEL: inverted_uminv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.u8 r0, q0
				; CHECK-NEXT: uxtb r0, r0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %vec)
				%cmp = icmp ugt i8 %x, %min
				%1 = select i1 %cmp, i8 %min, i8 %x
				ret i8 %1
				}

				define arm_aapcs_vfpcc zeroext i16 @inverted_uminv8i16(<8 x i16> %vec, i16 zeroext %min) {
				; CHECK-LABEL: inverted_uminv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.u16 r0, q0
				; CHECK-NEXT: uxth r0, r0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %vec)
				%cmp = icmp ugt i16 %x, %min
				%1 = select i1 %cmp, i16 %min, i16 %x
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @inverted_uminv4i32(<4 x i32> %vec, i32 %min) {
				; CHECK-LABEL: inverted_uminv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.u32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %vec)
				%cmp = icmp ugt i32 %x, %min
				%1 = select i1 %cmp, i32 %min, i32 %x
				ret i32 %1
				}

				define arm_aapcs_vfpcc signext i8 @inverted_sminv16i8(<16 x i8> %vec, i8 signext %min) {
				; CHECK-LABEL: inverted_sminv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.s8 r0, q0
				; CHECK-NEXT: sxtb r0, r0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %vec)
				%cmp = icmp sgt i8 %x, %min
				%1 = select i1 %cmp, i8 %min, i8 %x
				ret i8 %1
				}

				define arm_aapcs_vfpcc signext i16 @inverted_sminv8i16(<8 x i16> %vec, i16 signext %min) {
				; CHECK-LABEL: inverted_sminv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.s16 r0, q0
				; CHECK-NEXT: sxth r0, r0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %vec)
				%cmp = icmp sgt i16 %x, %min
				%1 = select i1 %cmp, i16 %min, i16 %x
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @inverted_sminv4i32(<4 x i32> %vec, i32 %min) {
				; CHECK-LABEL: inverted_sminv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.s32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %vec)
				%cmp = icmp sgt i32 %x, %min
				%1 = select i1 %cmp, i32 %min, i32 %x
				ret i32 %1
				}

				define arm_aapcs_vfpcc zeroext i8 @inverted_umaxv16i8(<16 x i8> %vec, i8 zeroext %max) {
				; CHECK-LABEL: inverted_umaxv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.u8 r0, q0
				; CHECK-NEXT: uxtb r0, r0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %vec)
				%cmp = icmp ult i8 %x, %max
				%1 = select i1 %cmp, i8 %max, i8 %x
				ret i8 %1
				}

				define arm_aapcs_vfpcc zeroext i16 @inverted_umaxv8i16(<8 x i16> %vec, i16 zeroext %max) {
				; CHECK-LABEL: inverted_umaxv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.u16 r0, q0
				; CHECK-NEXT: uxth r0, r0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec)
				%cmp = icmp ult i16 %x, %max
				%1 = select i1 %cmp, i16 %max, i16 %x
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @inverted_umaxv4i32(<4 x i32> %vec, i32 %max) {
				; CHECK-LABEL: inverted_umaxv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.u32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %vec)
				%cmp = icmp ult i32 %x, %max
				%1 = select i1 %cmp, i32 %max, i32 %x
				ret i32 %1
				}

				define arm_aapcs_vfpcc signext i8 @inverted_smaxv16i8(<16 x i8> %vec, i8 signext %max) {
				; CHECK-LABEL: inverted_smaxv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.s8 r0, q0
				; CHECK-NEXT: sxtb r0, r0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec)
				%cmp = icmp slt i8 %x, %max
				%1 = select i1 %cmp, i8 %max, i8 %x
				ret i8 %1
				}

				define arm_aapcs_vfpcc signext i16 @inverted_smaxv8i16(<8 x i16> %vec, i16 signext %max) {
				; CHECK-LABEL: inverted_smaxv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.s16 r0, q0
				; CHECK-NEXT: sxth r0, r0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec)
				%cmp = icmp slt i16 %x, %max
				%1 = select i1 %cmp, i16 %max, i16 %x
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @inverted_smaxv4i32(<4 x i32> %vec, i32 %max) {
				; CHECK-LABEL: inverted_smaxv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.s32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %vec)
				%cmp = icmp slt i32 %x, %max
				%1 = select i1 %cmp, i32 %max, i32 %x
				ret i32 %1
				}

				define arm_aapcs_vfpcc signext i16 @trunc_and_sext(<8 x i16> %vec, i32 %max) #1 {
				; CHECK-LABEL: trunc_and_sext:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: movw r1, #32768
				; CHECK-NEXT: movt r1, #65535
				; CHECK-NEXT: vmaxv.s16 r1, q0
				; CHECK-NEXT: sxth r2, r1
				; CHECK-NEXT: cmp r0, r2
				; CHECK-NEXT: csel r0, r0, r1, gt
				; CHECK-NEXT: sxth r0, r0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec)
				%xs = sext i16 %x to i32
				%cmp = icmp sgt i32 %max, %xs
				%mt = trunc i32 %max to i16
				%1 = select i1 %cmp, i16 %mt, i16 %x
				ret i16 %1
				}

				define arm_aapcs_vfpcc signext i16 @trunc_and_zext(<8 x i16> %vec, i32 %max) #1 {
				; CHECK-LABEL: trunc_and_zext:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: movs r1, #0
				; CHECK-NEXT: vmaxv.u16 r1, q0
				; CHECK-NEXT: uxth r2, r1
				; CHECK-NEXT: cmp r0, r2
				; CHECK-NEXT: csel r0, r0, r1, gt
				; CHECK-NEXT: sxth r0, r0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec)
				%xs = zext i16 %x to i32
				%cmp = icmp sgt i32 %max, %xs
				%mt = trunc i32 %max to i16
				%1 = select i1 %cmp, i16 %mt, i16 %x
				ret i16 %1
				}

				define arm_aapcs_vfpcc i64 @uminv2i64(<2 x i64> %vec, i64 %min) {
				; CHECK-LABEL: uminv2i64:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: .save {r4, r5, r7, lr}
				; CHECK-NEXT: push {r4, r5, r7, lr}
				; CHECK-NEXT: vmov r12, s3
				; CHECK-NEXT: vmov lr, s1
				; CHECK-NEXT: vmov r2, s0
				; CHECK-NEXT: vmov r3, s2
				; CHECK-NEXT: cmp lr, r12
				; CHECK-NEXT: csel r4, r2, r3, lo
				; CHECK-NEXT: cmp r2, r3
				; CHECK-NEXT: csel r2, r2, r3, lo
				; CHECK-NEXT: cmp lr, r12
				; CHECK-NEXT: csel r5, r2, r4, eq
				; CHECK-NEXT: csel r3, lr, r12, lo
				; CHECK-NEXT: subs r2, r5, r0
				; CHECK-NEXT: mov.w r4, #0
				; CHECK-NEXT: sbcs.w r2, r3, r1
				; CHECK-NEXT: it lo
				; CHECK-NEXT: movlo r4, #1
				; CHECK-NEXT: cmp r4, #0
				; CHECK-NEXT: csel r0, r5, r0, ne
				; CHECK-NEXT: csel r1, r3, r1, ne
				; CHECK-NEXT: pop {r4, r5, r7, pc}
				%x = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> %vec)
				%cmp = icmp ult i64 %x, %min
				%1 = select i1 %cmp, i64 %x, i64 %min
				ret i64 %1
				}

				define arm_aapcs_vfpcc i64 @sminv2i64(<2 x i64> %vec, i64 %min) {
				; CHECK-LABEL: sminv2i64:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: .save {r4, r5, r7, lr}
				; CHECK-NEXT: push {r4, r5, r7, lr}
				; CHECK-NEXT: vmov r12, s3
				; CHECK-NEXT: vmov lr, s1
				; CHECK-NEXT: vmov r2, s0
				; CHECK-NEXT: vmov r3, s2
				; CHECK-NEXT: cmp lr, r12
				; CHECK-NEXT: csel r4, r2, r3, lt
				; CHECK-NEXT: cmp r2, r3
				; CHECK-NEXT: csel r2, r2, r3, lo
				; CHECK-NEXT: cmp lr, r12
				; CHECK-NEXT: csel r5, r2, r4, eq
				; CHECK-NEXT: csel r3, lr, r12, lt
				; CHECK-NEXT: subs r2, r5, r0
				; CHECK-NEXT: mov.w r4, #0
				; CHECK-NEXT: sbcs.w r2, r3, r1
				; CHECK-NEXT: it lt
				; CHECK-NEXT: movlt r4, #1
				; CHECK-NEXT: cmp r4, #0
				; CHECK-NEXT: csel r0, r5, r0, ne
				; CHECK-NEXT: csel r1, r3, r1, ne
				; CHECK-NEXT: pop {r4, r5, r7, pc}
				%x = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> %vec)
				%cmp = icmp slt i64 %x, %min
				%1 = select i1 %cmp, i64 %x, i64 %min
				ret i64 %1
				}

				define arm_aapcs_vfpcc i64 @umaxv2i64(<2 x i64> %vec, i64 %max) {
				; CHECK-LABEL: umaxv2i64:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: .save {r4, r5, r7, lr}
				; CHECK-NEXT: push {r4, r5, r7, lr}
				; CHECK-NEXT: vmov r12, s3
				; CHECK-NEXT: vmov lr, s1
				; CHECK-NEXT: vmov r2, s0
				; CHECK-NEXT: vmov r3, s2
				; CHECK-NEXT: cmp lr, r12
				; CHECK-NEXT: csel r4, r2, r3, hi
				; CHECK-NEXT: cmp r2, r3
				; CHECK-NEXT: csel r2, r2, r3, hi
				; CHECK-NEXT: cmp lr, r12
				; CHECK-NEXT: csel r5, r2, r4, eq
				; CHECK-NEXT: csel r3, lr, r12, hi
				; CHECK-NEXT: subs r2, r0, r5
				; CHECK-NEXT: mov.w r4, #0
				; CHECK-NEXT: sbcs.w r2, r1, r3
				; CHECK-NEXT: it lo
				; CHECK-NEXT: movlo r4, #1
				; CHECK-NEXT: cmp r4, #0
				; CHECK-NEXT: csel r0, r5, r0, ne
				; CHECK-NEXT: csel r1, r3, r1, ne
				; CHECK-NEXT: pop {r4, r5, r7, pc}
				%x = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %vec)
				%cmp = icmp ugt i64 %x, %max
				%1 = select i1 %cmp, i64 %x, i64 %max
				ret i64 %1
				}

				define arm_aapcs_vfpcc i64 @smaxv2i64(<2 x i64> %vec, i64 %max) {
				; CHECK-LABEL: smaxv2i64:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: .save {r4, r5, r7, lr}
				; CHECK-NEXT: push {r4, r5, r7, lr}
				; CHECK-NEXT: vmov r12, s3
				; CHECK-NEXT: vmov lr, s1
				; CHECK-NEXT: vmov r2, s0
				; CHECK-NEXT: vmov r3, s2
				; CHECK-NEXT: cmp lr, r12
				; CHECK-NEXT: csel r4, r2, r3, gt
				; CHECK-NEXT: cmp r2, r3
				; CHECK-NEXT: csel r2, r2, r3, hi
				; CHECK-NEXT: cmp lr, r12
				; CHECK-NEXT: csel r5, r2, r4, eq
				; CHECK-NEXT: csel r3, lr, r12, gt
				; CHECK-NEXT: subs r2, r0, r5
				; CHECK-NEXT: mov.w r4, #0
				; CHECK-NEXT: sbcs.w r2, r1, r3
				; CHECK-NEXT: it lt
				; CHECK-NEXT: movlt r4, #1
				; CHECK-NEXT: cmp r4, #0
				; CHECK-NEXT: csel r0, r5, r0, ne
				; CHECK-NEXT: csel r1, r3, r1, ne
				; CHECK-NEXT: pop {r4, r5, r7, pc}
				%x = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> %vec)
				%cmp = icmp sgt i64 %x, %max
				%1 = select i1 %cmp, i64 %x, i64 %max
				ret i64 %1
				}

				declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>)

				declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>)

				declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>)

				declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>)

				declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>)

				declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>)

				declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>)

				declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>)

				declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>)

				declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>)

				declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>)

				declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>)

				declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>)

				declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>)

				declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>)

				declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>)

This is an archive of the discontinued LLVM Phabricator instance.

[ARM] Fold select_cc(vecreduce_[u|s][min|max], x) into VMINV or VMAXV
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 296132

llvm/lib/Target/ARM/ARMISelLowering.h

llvm/lib/Target/ARM/ARMISelLowering.cpp

llvm/lib/Target/ARM/ARMInstrMVE.td

llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll

This is an archive of the discontinued LLVM Phabricator instance.

[ARM] Fold select_cc(vecreduce_[u|s][min|max], x) into VMINV or VMAXVClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 296132

llvm/lib/Target/ARM/ARMISelLowering.h

llvm/lib/Target/ARM/ARMISelLowering.cpp

llvm/lib/Target/ARM/ARMInstrMVE.td

llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll

[ARM] Fold select_cc(vecreduce_[u|s][min|max], x) into VMINV or VMAXV
ClosedPublic