Diff 293200

llvm/lib/Target/ARM/ARMISelLowering.h

Show First 20 Lines • Show All 235 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
VMLALVs, // Same as VMLAV but with i64, returning the low and		VMLALVs, // Same as VMLAV but with i64, returning the low and
VMLALVu, // high 32-bit halves of the sum		VMLALVu, // high 32-bit halves of the sum
VMLALVps, // Same as VMLALV[su] with a v4i1 predicate mask		VMLALVps, // Same as VMLALV[su] with a v4i1 predicate mask
VMLALVpu,		VMLALVpu,
VMLALVAs, // Same as VMLALV but also add an input accumulator		VMLALVAs, // Same as VMLALV but also add an input accumulator
VMLALVAu, // provided as low and high halves		VMLALVAu, // provided as low and high halves
VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask		VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask
VMLALVApu,		VMLALVApu,
		VMINVu, // Find minimum unsigned value of a vector and register
		VMINVs, // Find minimum signed value of a vector and register
		VMAXVu, // Find maximum unsigned value of a vector and register
		VMAXVs, // Find maximum signed value of a vector and register

SMULWB, // Signed multiply word by half word, bottom		SMULWB, // Signed multiply word by half word, bottom
SMULWT, // Signed multiply word by half word, top		SMULWT, // Signed multiply word by half word, top
UMLAL, // 64bit Unsigned Accumulate Multiply		UMLAL, // 64bit Unsigned Accumulate Multiply
SMLAL, // 64bit Signed Accumulate Multiply		SMLAL, // 64bit Signed Accumulate Multiply
UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply		UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply
SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16		SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16
SMLALBT, // 64-bit signed accumulate multiply bottom, top 16		SMLALBT, // 64-bit signed accumulate multiply bottom, top 16
▲ Show 20 Lines • Show All 494 Lines • ▼ Show 20 Lines	SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
TLSModel::Model model) const;		TLSModel::Model model) const;
SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
		SDValue LowerSelectCCToVectorReduction(SDValue Op, SelectionDAG &DAG) const;
		dmgreenUnsubmitted Done Reply Inline Actions This is no longer used? dmgreen: This is no longer used?
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;		SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
▲ Show 20 Lines • Show All 169 Lines • Show Last 20 Lines

llvm/lib/Target/ARM/ARMISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,734 Lines • ▼ Show 20 Lines	const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VMLALVs: return "ARMISD::VMLALVs";		case ARMISD::VMLALVs: return "ARMISD::VMLALVs";
case ARMISD::VMLALVu: return "ARMISD::VMLALVu";		case ARMISD::VMLALVu: return "ARMISD::VMLALVu";
case ARMISD::VMLALVps: return "ARMISD::VMLALVps";		case ARMISD::VMLALVps: return "ARMISD::VMLALVps";
case ARMISD::VMLALVpu: return "ARMISD::VMLALVpu";		case ARMISD::VMLALVpu: return "ARMISD::VMLALVpu";
case ARMISD::VMLALVAs: return "ARMISD::VMLALVAs";		case ARMISD::VMLALVAs: return "ARMISD::VMLALVAs";
case ARMISD::VMLALVAu: return "ARMISD::VMLALVAu";		case ARMISD::VMLALVAu: return "ARMISD::VMLALVAu";
case ARMISD::VMLALVAps: return "ARMISD::VMLALVAps";		case ARMISD::VMLALVAps: return "ARMISD::VMLALVAps";
case ARMISD::VMLALVApu: return "ARMISD::VMLALVApu";		case ARMISD::VMLALVApu: return "ARMISD::VMLALVApu";
		case ARMISD::VMINVu: return "ARMISD::VMINVu";
		case ARMISD::VMINVs: return "ARMISD::VMINVs";
		case ARMISD::VMAXVu: return "ARMISD::VMAXVu";
		case ARMISD::VMAXVs: return "ARMISD::VMAXVs";
case ARMISD::UMAAL: return "ARMISD::UMAAL";		case ARMISD::UMAAL: return "ARMISD::UMAAL";
case ARMISD::UMLAL: return "ARMISD::UMLAL";		case ARMISD::UMLAL: return "ARMISD::UMLAL";
case ARMISD::SMLAL: return "ARMISD::SMLAL";		case ARMISD::SMLAL: return "ARMISD::SMLAL";
case ARMISD::SMLALBB: return "ARMISD::SMLALBB";		case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
case ARMISD::SMLALBT: return "ARMISD::SMLALBT";		case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
case ARMISD::SMLALTB: return "ARMISD::SMLALTB";		case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
case ARMISD::SMLALTT: return "ARMISD::SMLALTT";		case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
case ARMISD::SMULWB: return "ARMISD::SMULWB";		case ARMISD::SMULWB: return "ARMISD::SMULWB";
▲ Show 20 Lines • Show All 3,393 Lines • ▼ Show 20 Lines	if (VT == MVT::f32)
return !Subtarget->hasVFP2Base();		return !Subtarget->hasVFP2Base();
if (VT == MVT::f64)		if (VT == MVT::f64)
return !Subtarget->hasFP64();		return !Subtarget->hasFP64();
if (VT == MVT::f16)		if (VT == MVT::f16)
return !Subtarget->hasFullFP16();		return !Subtarget->hasFullFP16();
return false;		return false;
}		}

		SDValue ARMTargetLowering::LowerSelectCCToVectorReduction(SDValue Op, SelectionDAG &DAG) const {
		dmgreenUnsubmitted Done Reply Inline Actions Please clang-format, and possible just make it a static function? dmgreen: Please clang-format, and possible just make it a static function?
		EVT VT = Op.getValueType();
		SDLoc dl(Op);
		SDValue LHS = Op.getOperand(0);
		SDValue RHS = Op.getOperand(1);
		ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
		SDValue TrueVal = Op.getOperand(2);
		SDValue FalseVal = Op.getOperand(3);

		unsigned int Opcode = 0;
		bool IsUnsigned = false;
		if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN \|\|
		FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
		CC == ISD::SETULT) {
		Opcode = ARMISD::VMINVu;
		IsUnsigned = true;
		} else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN \|\|
		FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
		CC == ISD::SETLT)
		Opcode = ARMISD::VMINVs;
		else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX \|\|
		FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
		CC == ISD::SETUGT) {
		Opcode = ARMISD::VMAXVu;
		IsUnsigned = true;
		} else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX \|\|
		dmgreenUnsubmitted Done Reply Inline Actions else return false; Then it doesn't need the extra indenting. dmgreen: else return false; Then it doesn't need the extra indenting.
		samtebbsAuthorUnsubmitted Done Reply Inline Actions Thumbs up samtebbs: Thumbs up
		FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
		CC == ISD::SETGT)
		Opcode = ARMISD::VMAXVs;
		else
		return SDValue();

		// Normalise to the right hand side being the vector reduction
		switch (TrueVal->getOpcode()) {
		case ISD::VECREDUCE_UMIN:
		case ISD::VECREDUCE_SMIN:
		case ISD::VECREDUCE_UMAX:
		case ISD::VECREDUCE_SMAX:
		std::swap(LHS, RHS);
		std::swap(TrueVal, FalseVal);
		break;
		}

		EVT VectorScalarType =
		dmgreenUnsubmitted Done Reply Inline Actions This looks like it need to be a bit stricter still. The AND needs to go with a umin/umax, and the sign extend with smin/smax (they shouldn't be the other way around). The AND needs a mask of the right size (it'll be 255 for i8 for example, which is the same as a "zero extend inreg"). The sign extend will have a type as the second argument, which should be the same as the vecreduce's scalar type. Some of this might be difficult to get to come up in practice, but we should make sure it won't be subtly wrong and cause bugs. dmgreen: This looks like it need to be a bit stricter still. The AND needs to go with a umin/umax, and…
		samtebbsAuthorUnsubmitted Done Reply Inline Actions I thought about those two but hoped that the IR type system would make those situations impossible. Will add these, thanks. samtebbs: I thought about those two but hoped that the IR type system would make those situations…
		FalseVal->getOperand(0)->getValueType(0).getVectorElementType();

		auto OperandsAreValid = [&](SDValue Compared, SDValue Selected) {
		switch (Compared->getOpcode()) {
		case ISD::AND:
		if (!IsUnsigned)
		return false;
		if (Selected != Compared->getOperand(0))
		return false;
		dmgreenUnsubmitted Not Done Reply Inline Actions -> FalseVal->getOperand(0).getValueType().getVectorElementType(); dmgreen: -> FalseVal->getOperand(0).getValueType().getVectorElementType();
		if (auto Mask = dyn_cast<ConstantSDNode>(Compared->getOperand(1)))
		return Mask->getAPIntValue().isMask(
		VectorScalarType.getScalarSizeInBits());
		return false;
		case ISD::SIGN_EXTEND_INREG:
		if (IsUnsigned)
		dmgreenUnsubmitted Done Reply Inline Actions Hmm. This is getting complicated. I would suggest changing this to something more like: if (Type == i32) return Selected == Compared; else // Check the opcodes are and/signexted/assertzext/assertsext. But I don't think that will work very well. For i8/i16 I think we might actually need to be matching: t7: i32 = AssertZext t5, ValueType:ch:i8 t17: i32 = vecreduce_umin t3 t25: i32 = and t17, Constant:i32<255> t27: i32 = select_cc t25, t7, t17, t7, setult:ch t21: i32 = and t27, Constant:i32<255> Including that root "and" (which is the same as the two TrueVal/FalseVal sides of the select_cc being extended). But one loop I tried had no visible way I could see to prove the value needed to be a i16. It might be easier to do this from a combine, not during lowering. So before we have legalized the types. What do you think about getting i32 working first with this patch, and doing i8/i16 as a followup? That way we can get all the boilerplate out of the way, and it's just this code that produces the ARMISD::VMINVs that we need to work further on. dmgreen: Hmm. This is getting complicated. I would suggest changing this to something more like: if…
		return false;
		if (Selected != Compared->getOperand(0))
		return false;
		EVT ExtendedType = cast<VTSDNode>(Compared->getOperand(1))->getVT();
		return ExtendedType == VectorScalarType;
		}
		return Selected == Compared;
		};

		// Make sure that the values being compared are those being selected,
		// otherwise it isn't a min/max. Sometimes the left and/or right side wrap
		// the scalar reduction in another operation, like an AND or sign-extension,
		// so check the first operand in those cases
		if (!OperandsAreValid(LHS, TrueVal) \|\| !OperandsAreValid(RHS, FalseVal))
		return SDValue();

		return DAG.getNode(Opcode, dl, VT, LHS, FalseVal->getOperand(0));
		}

SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {		SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();		EVT VT = Op.getValueType();
SDLoc dl(Op);		SDLoc dl(Op);

// Try to convert two saturating conditional selects into a single SSAT		// Try to convert two saturating conditional selects into a single SSAT
SDValue SatValue;		SDValue SatValue;
uint64_t SatConstant;		uint64_t SatConstant;
bool SatUSat;		bool SatUSat;
Show All 21 Lines	if (VT == MVT::i32 &&
if (isNullConstant(LowerSatConstant)) {		if (isNullConstant(LowerSatConstant)) {
SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,		SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
DAG.getAllOnesConstant(dl, VT));		DAG.getAllOnesConstant(dl, VT));
return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);		return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
} else if (isAllOnesConstant(LowerSatConstant))		} else if (isAllOnesConstant(LowerSatConstant))
return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);		return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
}		}

		if (Subtarget->hasMVEIntegerOps())
		if (SDValue Reduction = LowerSelectCCToVectorReduction(Op, DAG))
		return Reduction;

SDValue LHS = Op.getOperand(0);		SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);		SDValue RHS = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();		ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
SDValue TrueVal = Op.getOperand(2);		SDValue TrueVal = Op.getOperand(2);
SDValue FalseVal = Op.getOperand(3);		SDValue FalseVal = Op.getOperand(3);
ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);		ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);		ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);

if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&		if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
		dmgreenUnsubmitted Done Reply Inline Actions It's might be worth pulling this out into a separate function, similar to isLowerSaturatingConditional or something like PerformSplittingToNarrowingStores where we return the new SDValue. dmgreen: It's might be worth pulling this out into a separate function, similar to…
		samtebbsAuthorUnsubmitted Done Reply Inline Actions That's much cleaner! Done. samtebbs: That's much cleaner! Done.
LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {		LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
		dmgreenUnsubmitted Done Reply Inline Actions This could do with a few extra tests to make sure it's a min/max. LHS==TrueVal and RHS==FalseVal for example. Plus it's worth checking if commuted min/max work too. min(vecreduce.min, x) or min(x, vecreduce.min) dmgreen: This could do with a few extra tests to make sure it's a min/max. LHS==TrueVal and…
		samtebbsAuthorUnsubmitted Done Reply Inline Actions Thanks for pointing that out, done now. samtebbs: Thanks for pointing that out, done now.
unsigned TVal = CTVal->getZExtValue();		unsigned TVal = CTVal->getZExtValue();
unsigned FVal = CFVal->getZExtValue();		unsigned FVal = CFVal->getZExtValue();
unsigned Opcode = 0;		unsigned Opcode = 0;

if (TVal == ~FVal) {		if (TVal == ~FVal) {
Opcode = ARMISD::CSINV;		Opcode = ARMISD::CSINV;
} else if (TVal == ~FVal + 1) {		} else if (TVal == ~FVal + 1) {
Opcode = ARMISD::CSNEG;		Opcode = ARMISD::CSNEG;
Show All 14 Lines	if (Opcode) {
std::swap(TrueVal, FalseVal);		std::swap(TrueVal, FalseVal);
std::swap(TVal, FVal);		std::swap(TVal, FVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());		CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}		}

// Attempt to use ZR checking TVal is 0, possibly inverting the condition		// Attempt to use ZR checking TVal is 0, possibly inverting the condition
// to get there. CSINC not is invertable like the other two (~(~a) == a,		// to get there. CSINC not is invertable like the other two (~(~a) == a,
// -(-a) == a, but (a+1)+1 != a).		// -(-a) == a, but (a+1)+1 != a).
if (FVal == 0 && Opcode != ARMISD::CSINC) {		if (FVal == 0 && Opcode != ARMISD::CSINC) {
std::swap(TrueVal, FalseVal);		std::swap(TrueVal, FalseVal);
std::swap(TVal, FVal);		std::swap(TVal, FVal);
		dmgreenUnsubmitted Done Reply Inline Actions This could use std::swap? dmgreen: This could use std::swap?
		samtebbsAuthorUnsubmitted Done Reply Inline Actions Ah I didn't know that existed. Done. samtebbs: Ah I didn't know that existed. Done.
CC = ISD::getSetCCInverse(CC, LHS.getValueType());		CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}		}
if (TVal == 0)		if (TVal == 0)
TrueVal = DAG.getRegister(ARM::ZR, MVT::i32);		TrueVal = DAG.getRegister(ARM::ZR, MVT::i32);

// Drops F's value because we can get it by inverting/negating TVal.		// Drops F's value because we can get it by inverting/negating TVal.
FalseVal = TrueVal;		FalseVal = TrueVal;

SDValue ARMcc;		SDValue ARMcc;
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);		SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
EVT VT = TrueVal.getValueType();		EVT VT = TrueVal.getValueType();
		dmgreenUnsubmitted Done Reply Inline Actions I think we might need to check these things, otherwise we might be matching things incorrectly. I believe the exact semantics of a vminv.s8 are that it reads the bottom 8 bits of Rn to do the final scalar min. So a 32bit min could actually produce a different value. We should make sure we are getting this correct too. dmgreen: I think we might need to check these things, otherwise we might be matching things incorrectly.
		samtebbsAuthorUnsubmitted Done Reply Inline Actions Thanks. I've added explicit checks for the LHS opcode and have made the lowering use the LHS instead of the TrueVal as the scalar so that we know the top 24 bits aren't set. samtebbs: Thanks. I've added explicit checks for the LHS opcode and have made the lowering use the LHS…
return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);		return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
}		}
}		}

if (isUnsupportedFloatingType(LHS.getValueType())) {		if (isUnsupportedFloatingType(LHS.getValueType())) {
DAG.getTargetLoweringInfo().softenSetCCOperands(		DAG.getTargetLoweringInfo().softenSetCCOperands(
DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);		DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);

▲ Show 20 Lines • Show All 6,847 Lines • ▼ Show 20 Lines	static SDValue PerformVSELECTCombine(SDNode *N,
const ARMSubtarget *Subtarget) {		const ARMSubtarget *Subtarget) {
// Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).		// Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
//		//
// We need to re-implement this optimization here as the implementation in the		// We need to re-implement this optimization here as the implementation in the
// Target-Independent DAGCombiner does not handle the kind of constant we make		// Target-Independent DAGCombiner does not handle the kind of constant we make
// (it calls isConstOrConstSplat with AllowTruncation set to false - and for		// (it calls isConstOrConstSplat with AllowTruncation set to false - and for
// good reason, allowing truncation there would break other targets).		// good reason, allowing truncation there would break other targets).
//		//
// Currently, this is only done for MVE, as it's the only target that benefits		// Currently, this is only done for MVE, as it's the only target that benefits
		dmgreenUnsubmitted Done Reply Inline Actions I think it needs to check that operand is a Setcc too, to be sure it's not something strange. dmgreen: I think it needs to check that operand is a Setcc too, to be sure it's not something strange.
		samtebbsAuthorUnsubmitted Done Reply Inline Actions Good idea. samtebbs: Good idea.
// from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).		// from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
if (!Subtarget->hasMVEIntegerOps())		if (!Subtarget->hasMVEIntegerOps())
return SDValue();		return SDValue();

if (N->getOperand(0).getOpcode() != ISD::XOR)		if (N->getOperand(0).getOpcode() != ISD::XOR)
return SDValue();		return SDValue();
SDValue XOR = N->getOperand(0);		SDValue XOR = N->getOperand(0);

▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines	static SDValue PerformADDECombine(SDNode *N,
if (DCI.isBeforeLegalize()) return SDValue();		if (DCI.isBeforeLegalize()) return SDValue();

return AddCombineTo64bitUMAAL(N, DCI, Subtarget);		return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
}		}

/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with		/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
/// operands N0 and N1. This is a helper for PerformADDCombine that is		/// operands N0 and N1. This is a helper for PerformADDCombine that is
/// called with the default operands, and if that fails, with commuted		/// called with the default operands, and if that fails, with commuted
/// operands.		/// operands.
		dmgreenUnsubmitted Not Done Reply Inline Actions -> getOperand(0).getValueType() dmgreen: -> getOperand(0).getValueType()
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,		static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
TargetLowering::DAGCombinerInfo &DCI,		TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget){		const ARMSubtarget *Subtarget){
// Attempt to create vpadd for this add.		// Attempt to create vpadd for this add.
if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))		if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
return Result;		return Result;

// Attempt to create vpaddl for this add.		// Attempt to create vpaddl for this add.
if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))		if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
return Result;		return Result;
if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,		if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
Subtarget))		Subtarget))
		dmgreenUnsubmitted Done Reply Inline Actions The top bits of this are not read by the instruction. Can we change it to an ANY_EXTEND? dmgreen: The top bits of this are not read by the instruction. Can we change it to an ANY_EXTEND?
		samtebbsAuthorUnsubmitted Done Reply Inline Actions Sure. samtebbs: Sure.
return Result;		return Result;
		dmgreenUnsubmitted Done Reply Inline Actions I think the last operand isn't needed here. dmgreen: I think the last operand isn't needed here.
		samtebbsAuthorUnsubmitted Done Reply Inline Actions You are indeed correct. samtebbs: You are indeed correct.

// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))		// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
if (N0.getNode()->hasOneUse())		if (N0.getNode()->hasOneUse())
if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))		if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
return Result;		return Result;
return SDValue();		return SDValue();
}		}

▲ Show 20 Lines • Show All 3,857 Lines • ▼ Show 20 Lines
}		}

SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,		SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {		DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {		switch (N->getOpcode()) {
default: break;		default: break;
case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);		case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);		case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);		case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
		dmgreenUnsubmitted Done Reply Inline Actions Move the hasMVEIntegerOps check into the function? So it can look like the rest. dmgreen: Move the hasMVEIntegerOps check into the function? So it can look like the rest.
case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);		case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);		case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);		case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);		case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);		case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);		case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);		case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
case ISD::BRCOND:		case ISD::BRCOND:
▲ Show 20 Lines • Show All 3,017 Lines • Show Last 20 Lines

llvm/lib/Target/ARM/ARMInstrMVE.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 938 Lines • ▼ Show 20 Lines	multiclass MVE_VMINMAXV_ty<string iname, bit isMin, string intrBaseName> {
defm s8 : MVE_VMINMAXV_p<iname, 1, isMin, MVE_v16s8, intrBaseName>;		defm s8 : MVE_VMINMAXV_p<iname, 1, isMin, MVE_v16s8, intrBaseName>;
defm s16: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v8s16, intrBaseName>;		defm s16: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v8s16, intrBaseName>;
defm s32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4s32, intrBaseName>;		defm s32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4s32, intrBaseName>;
defm u8 : MVE_VMINMAXV_p<iname, 1, isMin, MVE_v16u8, intrBaseName>;		defm u8 : MVE_VMINMAXV_p<iname, 1, isMin, MVE_v16u8, intrBaseName>;
defm u16: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v8u16, intrBaseName>;		defm u16: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v8u16, intrBaseName>;
defm u32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4u32, intrBaseName>;		defm u32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4u32, intrBaseName>;
}		}

		def SDTVecReduceR : SDTypeProfile<1, 2, [ // Reduction of an integer and vector into an integer
		SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>
		]>;
		def ARMVMINVu : SDNode<"ARMISD::VMINVu", SDTVecReduceR>;
		def ARMVMINVs : SDNode<"ARMISD::VMINVs", SDTVecReduceR>;
		def ARMVMAXVu : SDNode<"ARMISD::VMAXVu", SDTVecReduceR>;
		def ARMVMAXVs : SDNode<"ARMISD::VMAXVs", SDTVecReduceR>;

defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 1, "int_arm_mve_minv">;		defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 1, "int_arm_mve_minv">;
defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0, "int_arm_mve_maxv">;		defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0, "int_arm_mve_maxv">;

let Predicates = [HasMVEInt] in {		let Predicates = [HasMVEInt] in {
def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))),		def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))),
(i32 (MVE_VMAXVs8 (t2MVNi (i32 127)), $src))>;		(i32 (MVE_VMAXVs8 (t2MVNi (i32 127)), $src))>;
def : Pat<(i32 (vecreduce_smax (v8i16 MQPR:$src))),		def : Pat<(i32 (vecreduce_smax (v8i16 MQPR:$src))),
(i32 (MVE_VMAXVs16 (t2MOVi32imm (i32 -32768)), $src))>;		(i32 (MVE_VMAXVs16 (t2MOVi32imm (i32 -32768)), $src))>;
Show All 14 Lines	def : Pat<(i32 (vecreduce_smin (v4i32 MQPR:$src))),
(i32 (MVE_VMINVs32 (t2MVNi (i32 -2147483648)), $src))>;		(i32 (MVE_VMINVs32 (t2MVNi (i32 -2147483648)), $src))>;
def : Pat<(i32 (vecreduce_umin (v16i8 MQPR:$src))),		def : Pat<(i32 (vecreduce_umin (v16i8 MQPR:$src))),
(i32 (MVE_VMINVu8 (t2MOVi (i32 255)), $src))>;		(i32 (MVE_VMINVu8 (t2MOVi (i32 255)), $src))>;
def : Pat<(i32 (vecreduce_umin (v8i16 MQPR:$src))),		def : Pat<(i32 (vecreduce_umin (v8i16 MQPR:$src))),
(i32 (MVE_VMINVu16 (t2MOVi16 (i32 65535)), $src))>;		(i32 (MVE_VMINVu16 (t2MOVi16 (i32 65535)), $src))>;
def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))),		def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))),
(i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>;		(i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>;

		def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v16i8 MQPR:$src))),
		(i32 (MVE_VMINVu8 $x, $src))>;
		def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v8i16 MQPR:$src))),
		(i32 (MVE_VMINVu16 $x, $src))>;
		def : Pat<(i32 (ARMVMINVu (i32 rGPR:$x), (v4i32 MQPR:$src))),
		(i32 (MVE_VMINVu32 $x, $src))>;
		def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v16i8 MQPR:$src))),
		(i32 (MVE_VMINVs8 $x, $src))>;
		def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v8i16 MQPR:$src))),
		(i32 (MVE_VMINVs16 $x, $src))>;
		def : Pat<(i32 (ARMVMINVs (i32 rGPR:$x), (v4i32 MQPR:$src))),
		(i32 (MVE_VMINVs32 $x, $src))>;

		def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v16i8 MQPR:$src))),
		(i32 (MVE_VMAXVu8 $x, $src))>;
		def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v8i16 MQPR:$src))),
		(i32 (MVE_VMAXVu16 $x, $src))>;
		def : Pat<(i32 (ARMVMAXVu (i32 rGPR:$x), (v4i32 MQPR:$src))),
		(i32 (MVE_VMAXVu32 $x, $src))>;
		def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v16i8 MQPR:$src))),
		(i32 (MVE_VMAXVs8 $x, $src))>;
		def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v8i16 MQPR:$src))),
		(i32 (MVE_VMAXVs16 $x, $src))>;
		def : Pat<(i32 (ARMVMAXVs (i32 rGPR:$x), (v4i32 MQPR:$src))),
		(i32 (MVE_VMAXVs32 $x, $src))>;

}		}

multiclass MVE_VMINMAXAV_ty<string iname, bit isMin, string intrBaseName> {		multiclass MVE_VMINMAXAV_ty<string iname, bit isMin, string intrBaseName> {
defm s8 : MVE_VMINMAXV_p<iname, 0, isMin, MVE_v16s8, intrBaseName>;		defm s8 : MVE_VMINMAXV_p<iname, 0, isMin, MVE_v16s8, intrBaseName>;
defm s16: MVE_VMINMAXV_p<iname, 0, isMin, MVE_v8s16, intrBaseName>;		defm s16: MVE_VMINMAXV_p<iname, 0, isMin, MVE_v8s16, intrBaseName>;
defm s32: MVE_VMINMAXV_p<iname, 0, isMin, MVE_v4s32, intrBaseName>;		defm s32: MVE_VMINMAXV_p<iname, 0, isMin, MVE_v4s32, intrBaseName>;
}		}

▲ Show 20 Lines • Show All 6,349 Lines • Show Last 20 Lines

llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll

This file was added.

				; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - \| FileCheck %s

				define arm_aapcs_vfpcc i8 @uminv16i8(<16 x i8> %vec, i8 %min) {
				; CHECK-LABEL: uminv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: uxtb r0, r0
				; CHECK-NEXT: vminv.u8 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %vec)
				%cmp = icmp ult i8 %x, %min
				%1 = select i1 %cmp, i8 %x, i8 %min
				ret i8 %1
				}

				define arm_aapcs_vfpcc i16 @uminv8i16(<8 x i16> %vec, i16 %min) {
				; CHECK-LABEL: uminv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: uxth r0, r0
				; CHECK-NEXT: vminv.u16 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %vec)
				%cmp = icmp ult i16 %x, %min
				%1 = select i1 %cmp, i16 %x, i16 %min
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @uminv4i32(<4 x i32> %vec, i32 %min) {
				; CHECK-LABEL: uminv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.u32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %vec)
				%cmp = icmp ult i32 %x, %min
				%1 = select i1 %cmp, i32 %x, i32 %min
				ret i32 %1
				}

				define arm_aapcs_vfpcc i8 @sminv16i8(<16 x i8> %vec, i8 %min) {
				; CHECK-LABEL: sminv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: sxtb r0, r0
				; CHECK-NEXT: vminv.s8 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %vec)
				%cmp = icmp slt i8 %x, %min
				%1 = select i1 %cmp, i8 %x, i8 %min
				ret i8 %1
				}

				define arm_aapcs_vfpcc i16 @sminv8i16(<8 x i16> %vec, i16 %min) {
				; CHECK-LABEL: sminv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: sxth r0, r0
				; CHECK-NEXT: vminv.s16 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %vec)
				%cmp = icmp slt i16 %x, %min
				%1 = select i1 %cmp, i16 %x, i16 %min
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @sminv4i32(<4 x i32> %vec, i32 %min) {
				; CHECK-LABEL: sminv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.s32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %vec)
				%cmp = icmp slt i32 %x, %min
				%1 = select i1 %cmp, i32 %x, i32 %min
				ret i32 %1
				}

				define arm_aapcs_vfpcc i8 @umaxv16i8(<16 x i8> %vec, i8 %max) {
				; CHECK-LABEL: umaxv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: uxtb r0, r0
				; CHECK-NEXT: vmaxv.u8 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %vec)
				%cmp = icmp ugt i8 %x, %max
				%1 = select i1 %cmp, i8 %x, i8 %max
				ret i8 %1
				}

				define arm_aapcs_vfpcc i16 @umaxv8i16(<8 x i16> %vec, i16 %max) {
				; CHECK-LABEL: umaxv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: uxth r0, r0
				; CHECK-NEXT: vmaxv.u16 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec)
				%cmp = icmp ugt i16 %x, %max
				%1 = select i1 %cmp, i16 %x, i16 %max
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @umaxv4i32(<4 x i32> %vec, i32 %max) {
				; CHECK-LABEL: umaxv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.u32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %vec)
				%cmp = icmp ugt i32 %x, %max
				%1 = select i1 %cmp, i32 %x, i32 %max
				ret i32 %1
				}

				define arm_aapcs_vfpcc i8 @smaxv16i8(<16 x i8> %vec, i8 %max) {
				; CHECK-LABEL: smaxv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: sxtb r0, r0
				; CHECK-NEXT: vmaxv.s8 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec)
				%cmp = icmp sgt i8 %x, %max
				%1 = select i1 %cmp, i8 %x, i8 %max
				ret i8 %1
				}

				define arm_aapcs_vfpcc i16 @smaxv8i16(<8 x i16> %vec, i16 %max) {
				; CHECK-LABEL: smaxv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: sxth r0, r0
				; CHECK-NEXT: vmaxv.s16 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec)
				%cmp = icmp sgt i16 %x, %max
				%1 = select i1 %cmp, i16 %x, i16 %max
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @smaxv4i32(<4 x i32> %vec, i32 %max) {
				; CHECK-LABEL: smaxv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.s32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %vec)
				%cmp = icmp sgt i32 %x, %max
				%1 = select i1 %cmp, i32 %x, i32 %max
				ret i32 %1
				}

				define arm_aapcs_vfpcc i8 @commute_uminv16i8(<16 x i8> %vec, i8 %min) {
				; CHECK-LABEL: commute_uminv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: uxtb r0, r0
				; CHECK-NEXT: vminv.u8 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %vec)
				%cmp = icmp ult i8 %min, %x
				%1 = select i1 %cmp, i8 %min, i8 %x
				ret i8 %1
				}

				define arm_aapcs_vfpcc i16 @commute_uminv8i16(<8 x i16> %vec, i16 %min) {
				; CHECK-LABEL: commute_uminv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: uxth r0, r0
				; CHECK-NEXT: vminv.u16 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %vec)
				%cmp = icmp ult i16 %min, %x
				%1 = select i1 %cmp, i16 %min, i16 %x
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @commute_uminv4i32(<4 x i32> %vec, i32 %min) {
				; CHECK-LABEL: commute_uminv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.u32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %vec)
				%cmp = icmp ult i32 %min, %x
				%1 = select i1 %cmp, i32 %min, i32 %x
				ret i32 %1
				}

				define arm_aapcs_vfpcc i8 @commute_sminv16i8(<16 x i8> %vec, i8 %min) {
				; CHECK-LABEL: commute_sminv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: sxtb r0, r0
				; CHECK-NEXT: vminv.s8 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %vec)
				%cmp = icmp slt i8 %min, %x
				%1 = select i1 %cmp, i8 %min, i8 %x
				ret i8 %1
				}

				define arm_aapcs_vfpcc i16 @commute_sminv8i16(<8 x i16> %vec, i16 %min) {
				; CHECK-LABEL: commute_sminv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: sxth r0, r0
				; CHECK-NEXT: vminv.s16 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %vec)
				%cmp = icmp slt i16 %min, %x
				%1 = select i1 %cmp, i16 %min, i16 %x
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @commute_sminv4i32(<4 x i32> %vec, i32 %min) {
				; CHECK-LABEL: commute_sminv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vminv.s32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %vec)
				%cmp = icmp slt i32 %min, %x
				%1 = select i1 %cmp, i32 %min, i32 %x
				ret i32 %1
				}

				define arm_aapcs_vfpcc i8 @commute_umaxv16i8(<16 x i8> %vec, i8 %max) {
				; CHECK-LABEL: commute_umaxv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: uxtb r0, r0
				; CHECK-NEXT: vmaxv.u8 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %vec)
				%cmp = icmp ugt i8 %max, %x
				%1 = select i1 %cmp, i8 %max, i8 %x
				ret i8 %1
				}

				define arm_aapcs_vfpcc i16 @commute_umaxv8i16(<8 x i16> %vec, i16 %max) {
				; CHECK-LABEL: commute_umaxv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: uxth r0, r0
				; CHECK-NEXT: vmaxv.u16 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %vec)
				%cmp = icmp ugt i16 %max, %x
				%1 = select i1 %cmp, i16 %max, i16 %x
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @commute_umaxv4i32(<4 x i32> %vec, i32 %max) {
				; CHECK-LABEL: commute_umaxv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.u32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %vec)
				%cmp = icmp ugt i32 %max, %x
				%1 = select i1 %cmp, i32 %max, i32 %x
				ret i32 %1
				}

				define arm_aapcs_vfpcc i8 @commute_smaxv16i8(<16 x i8> %vec, i8 %max) {
				; CHECK-LABEL: commute_smaxv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: sxtb r0, r0
				; CHECK-NEXT: vmaxv.s8 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec)
				%cmp = icmp sgt i8 %max, %x
				%1 = select i1 %cmp, i8 %max, i8 %x
				ret i8 %1
				}

				define arm_aapcs_vfpcc i16 @commute_smaxv8i16(<8 x i16> %vec, i16 %max) {
				; CHECK-LABEL: commute_smaxv8i16:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: sxth r0, r0
				; CHECK-NEXT: vmaxv.s16 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %vec)
				%cmp = icmp sgt i16 %max, %x
				%1 = select i1 %cmp, i16 %max, i16 %x
				ret i16 %1
				}

				define arm_aapcs_vfpcc i32 @commute_smaxv4i32(<4 x i32> %vec, i32 %max) {
				; CHECK-LABEL: commute_smaxv4i32:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: vmaxv.s32 r0, q0
				; CHECK-NEXT: bx lr
				%x = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %vec)
				%cmp = icmp sgt i32 %max, %x
				%1 = select i1 %cmp, i32 %max, i32 %x
				ret i32 %1
				}

				define arm_aapcs_vfpcc i8 @mismatch_smaxv16i8(<16 x i8> %vec, i8 %max) {
				; CHECK-LABEL: mismatch_smaxv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: mvn r1, #127
				; CHECK-NEXT: sxtb r3, r0
				; CHECK-NEXT: vmaxv.s8 r1, q0
				; CHECK-NEXT: sxtb r2, r1
				; CHECK-NEXT: cmp r2, r3
				; CHECK-NEXT: csel r0, r0, r1, gt
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec)
				%cmp = icmp sgt i8 %x, %max
				%1 = select i1 %cmp, i8 %max, i8 %x
				ret i8 %1
				}

				define arm_aapcs_vfpcc i8 @mismatch2_smaxv16i8(<16 x i8> %vec, i8 %max) {
				; CHECK-LABEL: mismatch2_smaxv16i8:
				; CHECK: @ %bb.0:
				; CHECK-NEXT: mvn r1, #127
				; CHECK-NEXT: sxtb r3, r0
				; CHECK-NEXT: vmaxv.s8 r1, q0
				; CHECK-NEXT: sxtb r2, r1
				; CHECK-NEXT: cmp r3, r2
				; CHECK-NEXT: csel r0, r1, r0, gt
				; CHECK-NEXT: bx lr
				%x = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %vec)
				%cmp = icmp sgt i8 %max, %x
				%1 = select i1 %cmp, i8 %x, i8 %max
				ret i8 %1
				}

				declare i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8>)

				declare i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16>)

				declare i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32>)

				declare i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8>)

				declare i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16>)

				declare i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32>)

				declare i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8>)

				declare i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16>)

				declare i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32>)

				declare i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8>)

				declare i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16>)

				declare i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32>)

This is an archive of the discontinued LLVM Phabricator instance.

[ARM] Fold select_cc(vecreduce_[u|s][min|max], x) into VMINV or VMAXV
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 293200

llvm/lib/Target/ARM/ARMISelLowering.h

llvm/lib/Target/ARM/ARMISelLowering.cpp

llvm/lib/Target/ARM/ARMInstrMVE.td

llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll

This is an archive of the discontinued LLVM Phabricator instance.

[ARM] Fold select_cc(vecreduce_[u|s][min|max], x) into VMINV or VMAXVClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 293200

llvm/lib/Target/ARM/ARMISelLowering.h

llvm/lib/Target/ARM/ARMISelLowering.cpp

llvm/lib/Target/ARM/ARMInstrMVE.td

llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll

[ARM] Fold select_cc(vecreduce_[u|s][min|max], x) into VMINV or VMAXV
ClosedPublic