Diff 181471

include/llvm/CodeGen/TargetLowering.h

	Show First 20 Lines • Show All 3,835 Lines • ▼ Show 20 Lines
	/// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of			/// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of
	/// bounds the returned pointer is unspecified, but will be within the vector			/// bounds the returned pointer is unspecified, but will be within the vector
	/// bounds.			/// bounds.
	SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,			SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
	SDValue Index) const;			SDValue Index) const;

	/// Method for building the DAG expansion of ISD::[US][ADD\|SUB]SAT. This			/// Method for building the DAG expansion of ISD::[US][ADD\|SUB]SAT. This
	/// method accepts integers as its arguments.			/// method accepts integers as its arguments.
	SDValue getExpandedSaturationAdditionSubtraction(SDNode *Node,			SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const;
	SelectionDAG &DAG) const;

	/// Method for building the DAG expansion of ISD::SMULFIX. This method accepts			/// Method for building the DAG expansion of ISD::SMULFIX. This method accepts
	/// integers as its arguments.			/// integers as its arguments.
	SDValue getExpandedFixedPointMultiplication(SDNode *Node,			SDValue getExpandedFixedPointMultiplication(SDNode *Node,
	SelectionDAG &DAG) const;			SelectionDAG &DAG) const;

	//===--------------------------------------------------------------------===//			//===--------------------------------------------------------------------===//
	// Instruction Emitting Hooks			// Instruction Emitting Hooks
	▲ Show 20 Lines • Show All 69 Lines • Show Last 20 Lines

lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Show First 20 Lines • Show All 3,307 Lines • ▼ Show 20 Lines	bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::ROTL:		case ISD::ROTL:
case ISD::ROTR:		case ISD::ROTR:
if (TLI.expandROT(Node, Tmp1, DAG))		if (TLI.expandROT(Node, Tmp1, DAG))
Results.push_back(Tmp1);		Results.push_back(Tmp1);
break;		break;
case ISD::SADDSAT:		case ISD::SADDSAT:
case ISD::UADDSAT:		case ISD::UADDSAT:
case ISD::SSUBSAT:		case ISD::SSUBSAT:
case ISD::USUBSAT: {		case ISD::USUBSAT:
Results.push_back(TLI.getExpandedSaturationAdditionSubtraction(Node, DAG));		Results.push_back(TLI.expandAddSubSat(Node, DAG));
break;		break;
}
case ISD::SMULFIX: {		case ISD::SMULFIX: {
Results.push_back(TLI.getExpandedFixedPointMultiplication(Node, DAG));		Results.push_back(TLI.getExpandedFixedPointMultiplication(Node, DAG));
break;		break;
}		}
case ISD::SADDO:		case ISD::SADDO:
case ISD::SSUBO: {		case ISD::SSUBO: {
SDValue LHS = Node->getOperand(0);		SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);		SDValue RHS = Node->getOperand(1);
▲ Show 20 Lines • Show All 1,314 Lines • Show Last 20 Lines

lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Show First 20 Lines • Show All 2,590 Lines • ▼ Show 20 Lines	void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
SDValue R = DAG.getNode(N->getOpcode(), DL, VTs, N->getOperand(0));		SDValue R = DAG.getNode(N->getOpcode(), DL, VTs, N->getOperand(0));
Lo = R.getValue(0);		Lo = R.getValue(0);
Hi = R.getValue(1);		Hi = R.getValue(1);
ReplaceValueWith(SDValue(N, 1), R.getValue(2));		ReplaceValueWith(SDValue(N, 1), R.getValue(2));
}		}

void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo,		void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo,
SDValue &Hi) {		SDValue &Hi) {
SDValue Result = TLI.getExpandedSaturationAdditionSubtraction(N, DAG);		SDValue Result = TLI.expandAddSubSat(N, DAG);
SplitInteger(Result, Lo, Hi);		SplitInteger(Result, Lo, Hi);
}		}

void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,		void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
SDValue &Hi) {		SDValue &Hi) {
SDLoc dl(N);		SDLoc dl(N);
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);		SDValue LHS = N->getOperand(0);
▲ Show 20 Lines • Show All 1,274 Lines • Show Last 20 Lines

lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

Show First 20 Lines • Show All 129 Lines • ▼ Show 20 Lines	class VectorLegalizer {
SDValue ExpandFSUB(SDValue Op);		SDValue ExpandFSUB(SDValue Op);
SDValue ExpandBITREVERSE(SDValue Op);		SDValue ExpandBITREVERSE(SDValue Op);
SDValue ExpandCTPOP(SDValue Op);		SDValue ExpandCTPOP(SDValue Op);
SDValue ExpandCTLZ(SDValue Op);		SDValue ExpandCTLZ(SDValue Op);
SDValue ExpandCTTZ(SDValue Op);		SDValue ExpandCTTZ(SDValue Op);
SDValue ExpandFunnelShift(SDValue Op);		SDValue ExpandFunnelShift(SDValue Op);
SDValue ExpandROT(SDValue Op);		SDValue ExpandROT(SDValue Op);
SDValue ExpandFMINNUM_FMAXNUM(SDValue Op);		SDValue ExpandFMINNUM_FMAXNUM(SDValue Op);
		SDValue ExpandAddSubSat(SDValue Op);
SDValue ExpandStrictFPOp(SDValue Op);		SDValue ExpandStrictFPOp(SDValue Op);

/// Implements vector promotion.		/// Implements vector promotion.
///		///
/// This is essentially just bitcasting the operands to a different type and		/// This is essentially just bitcasting the operands to a different type and
/// bitcasting the result back to the original type.		/// bitcasting the result back to the original type.
SDValue Promote(SDValue Op);		SDValue Promote(SDValue Op);

▲ Show 20 Lines • Show All 179 Lines • ▼ Show 20 Lines	case ISD::STRICT_FTRUNC:
// These pseudo-ops get legalized as if they were their non-strict		// These pseudo-ops get legalized as if they were their non-strict
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT		// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does		// is also legal, but if ISD::FSQRT requires expansion then so does
// ISD::STRICT_FSQRT.		// ISD::STRICT_FSQRT.
Action = TLI.getStrictFPOperationAction(Node->getOpcode(),		Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
Node->getValueType(0));		Node->getValueType(0));
break;		break;
case ISD::ADD:		case ISD::ADD:
case ISD::SUB:		case ISD::SUB:
		RKSimonUnsubmitted Done Reply Inline Actions Add ISD::USUBSAT et al here as well? RKSimon: Add ISD::USUBSAT et al here as well?
		nikicAuthorUnsubmitted Done Reply Inline Actions It's already present a bit lower (L413). nikic: It's already present a bit lower (L413).
case ISD::MUL:		case ISD::MUL:
case ISD::MULHS:		case ISD::MULHS:
case ISD::MULHU:		case ISD::MULHU:
case ISD::SDIV:		case ISD::SDIV:
case ISD::UDIV:		case ISD::UDIV:
case ISD::SREM:		case ISD::SREM:
case ISD::UREM:		case ISD::UREM:
case ISD::SDIVREM:		case ISD::SDIVREM:
▲ Show 20 Lines • Show All 421 Lines • ▼ Show 20 Lines	SDValue VectorLegalizer::Expand(SDValue Op) {
case ISD::FSHR:		case ISD::FSHR:
return ExpandFunnelShift(Op);		return ExpandFunnelShift(Op);
case ISD::ROTL:		case ISD::ROTL:
case ISD::ROTR:		case ISD::ROTR:
return ExpandROT(Op);		return ExpandROT(Op);
case ISD::FMINNUM:		case ISD::FMINNUM:
case ISD::FMAXNUM:		case ISD::FMAXNUM:
return ExpandFMINNUM_FMAXNUM(Op);		return ExpandFMINNUM_FMAXNUM(Op);
		case ISD::USUBSAT:
		case ISD::SSUBSAT:
		case ISD::UADDSAT:
		case ISD::SADDSAT:
		return ExpandAddSubSat(Op);
case ISD::STRICT_FADD:		case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:		case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:		case ISD::STRICT_FMUL:
case ISD::STRICT_FDIV:		case ISD::STRICT_FDIV:
case ISD::STRICT_FREM:		case ISD::STRICT_FREM:
case ISD::STRICT_FSQRT:		case ISD::STRICT_FSQRT:
case ISD::STRICT_FMA:		case ISD::STRICT_FMA:
case ISD::STRICT_FPOW:		case ISD::STRICT_FPOW:
▲ Show 20 Lines • Show All 403 Lines • ▼ Show 20 Lines
}		}

SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) {		SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) {
if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Op.getNode(), DAG))		if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Op.getNode(), DAG))
return Expanded;		return Expanded;
return DAG.UnrollVectorOp(Op.getNode());		return DAG.UnrollVectorOp(Op.getNode());
}		}

		SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) {
		if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG))
		return Expanded;
		return DAG.UnrollVectorOp(Op.getNode());
		}

SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {		SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
EVT VT = Op.getValueType();		EVT VT = Op.getValueType();
EVT EltVT = VT.getVectorElementType();		EVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();		unsigned NumElems = VT.getVectorNumElements();
unsigned NumOpers = Op.getNumOperands();		unsigned NumOpers = Op.getNumOperands();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();		const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT ValueVTs[] = {EltVT, MVT::Other};		EVT ValueVTs[] = {EltVT, MVT::Other};
SDValue Chain = Op.getOperand(0);		SDValue Chain = Op.getOperand(0);
▲ Show 20 Lines • Show All 69 Lines • Show Last 20 Lines

lib/CodeGen/SelectionDAG/TargetLowering.cpp

Show First 20 Lines • Show All 5,297 Lines • ▼ Show 20 Lines	if (C->isNullValue() && CC == ISD::SETEQ) {
SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);		SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,		SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
DAG.getConstant(Log2b, dl, MVT::i32));		DAG.getConstant(Log2b, dl, MVT::i32));
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);		return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
}		}
}		}
return SDValue();		return SDValue();
}		}

SDValue TargetLowering::getExpandedSaturationAdditionSubtraction(		SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
		RKSimonUnsubmitted Done Reply Inline Actions Really pedantic, but this function name is massive - why not just TargetLowering::expandAddSubSat ? RKSimon: Really pedantic, but this function name is massive - why not just TargetLowering…
		nikicAuthorUnsubmitted Done Reply Inline Actions Agreed, I've renamed the method. nikic: Agreed, I've renamed the method.
		RKSimonUnsubmitted Done Reply Inline Actions Thanks - if you can, please pull this out and commit this as a NFC straightaway. RKSimon: Thanks - if you can, please pull this out and commit this as a NFC straightaway.
SDNode *Node, SelectionDAG &DAG) const {
unsigned Opcode = Node->getOpcode();		unsigned Opcode = Node->getOpcode();
		SDValue LHS = Node->getOperand(0);
		SDValue RHS = Node->getOperand(1);
		SDLoc dl(Node);

		EVT VT = LHS.getValueType();
		if (VT.isVector()) {
		// usub.sat(a, b) -> umax(a, b) - b
		if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
		RKSimonUnsubmitted Done Reply Inline Actions isOperationLegalOrCustom? RKSimon: isOperationLegalOrCustom?
		nikicAuthorUnsubmitted Done Reply Inline Actions Extra diff allowing custom: https://gist.github.com/nikic/4c46634cec8f319e687c6b5cb0496648 This is presumably better than scalarizing, but I was wondering if a more explicit expansion would work better? As another variant, this is what happens if I just fall through to the USUBO+SELECT expansion: https://gist.github.com/nikic/d989121f7f9898437a1255548c148904 nikic: Extra diff allowing custom: https://gist.github.com/nikic/4c46634cec8f319e687c6b5cb0496648…
		RKSimonUnsubmitted Done Reply Inline Actions Allowing legalorcustom looks OK to me - we can tweak per-target codegen in future commits if its useful. Also, should we try to do this on scalars as well before defaulting to add/sub overflow? For instance AMDGPU i32 for instance should benefit. RKSimon: Allowing legalorcustom looks OK to me - we can tweak per-target codegen in future commits if…
		nikicAuthorUnsubmitted Done Reply Inline Actions Okay, I did both changes. I agree that preferring this expansion makes sense even for scalar. I've also dropped the changes in X86ISelLowering. Now that custom is allowed here, they are no longer necessary. We'll just expand to a wide UMAX here that will get split (rather than the USUBSAT getting split and then each half expanded to UMAX). nikic: Okay, I did both changes. I agree that preferring this expansion makes sense even for scalar.
		SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
		return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
		}

		return SDValue();
		}

unsigned OverflowOp;		unsigned OverflowOp;
switch (Opcode) {		switch (Opcode) {
case ISD::SADDSAT:		case ISD::SADDSAT:
OverflowOp = ISD::SADDO;		OverflowOp = ISD::SADDO;
break;		break;
case ISD::UADDSAT:		case ISD::UADDSAT:
OverflowOp = ISD::UADDO;		OverflowOp = ISD::UADDO;
break;		break;
case ISD::SSUBSAT:		case ISD::SSUBSAT:
OverflowOp = ISD::SSUBO;		OverflowOp = ISD::SSUBO;
break;		break;
case ISD::USUBSAT:		case ISD::USUBSAT:
OverflowOp = ISD::USUBO;		OverflowOp = ISD::USUBO;
break;		break;
default:		default:
llvm_unreachable("Expected method to receive signed or unsigned saturation "		llvm_unreachable("Expected method to receive signed or unsigned saturation "
"addition or subtraction node.");		"addition or subtraction node.");
}		}
assert(Node->getNumOperands() == 2 && "Expected node to have 2 operands.");

SDLoc dl(Node);
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
assert(LHS.getValueType().isScalarInteger() &&		assert(LHS.getValueType().isScalarInteger() &&
"Expected operands to be integers. Vector of int arguments should "		"Expected operands to be integers. Vector of int arguments should "
"already be unrolled.");		"already be unrolled.");
assert(RHS.getValueType().isScalarInteger() &&		assert(RHS.getValueType().isScalarInteger() &&
"Expected operands to be integers. Vector of int arguments should "		"Expected operands to be integers. Vector of int arguments should "
"already be unrolled.");		"already be unrolled.");
assert(LHS.getValueType() == RHS.getValueType() &&		assert(LHS.getValueType() == RHS.getValueType() &&
"Expected both operands to be the same type");		"Expected both operands to be the same type");
▲ Show 20 Lines • Show All 182 Lines • Show Last 20 Lines

lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,214 Lines • ▼ Show 20 Lines	if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);		setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);		setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);		setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);		setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);		setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);		setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);		setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);		setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
		setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);

		RKSimonUnsubmitted Not Done Reply Inline Actions LowerADDSAT_SUBSAT supports AVX1 splitting, so why not: setOperationAction(ISD::USUBSAT, MVT::v8i32, HasInt256 ? Legal : Custom); Also, v4i64 should be ok as well. RKSimon: LowerADDSAT_SUBSAT supports AVX1 splitting, so why not: ``` setOperationAction(ISD::USUBSAT…
		nikicAuthorUnsubmitted Done Reply Inline Actions I've added this to get the splitting, but I don't think v8i32 would be legal, as we need it to expand. Maybe `hasInt256 ? Expand : Custom` would make it more obvious what the intention here is? nikic: I've added this to get the splitting, but I don't think v8i32 would be legal, as we need it to…
		RKSimonUnsubmitted Not Done Reply Inline Actions Better for the existing x86 lowering code to handle it IMO - that's what we do for nearly all 256-bit integer code on AVX1 targets. RKSimon: Better for the existing x86 lowering code to handle it IMO - that's what we do for nearly all…
		nikicAuthorUnsubmitted Done Reply Inline Actions That's what this is intended to do. v8i32 is custom so lowering splits it into two v4i32s, which then get expanded. If I change this to `HasInt256 ? Legal : Custom`, then I get: LLVM ERROR: Cannot select: t5: v8i32 = usubsat t2, t4 Or am I misunderstanding what you have in mind here? nikic: That's what this is intended to do. v8i32 is custom so lowering splits it into two v4i32s…
		RKSimonUnsubmitted Not Done Reply Inline Actions If you look at LowerABS just below LowerADDSAT_SUBSAT you'll see the approach that I took. RKSimon: If you look at LowerABS just below LowerADDSAT_SUBSAT you'll see the approach that I took.
		nikicAuthorUnsubmitted Done Reply Inline Actions I've changed this to always Custom and changed the LowerADDSAT_SUBSAT implementation to look more like LowerABS. Is this right? nikic: I've changed this to always Custom and changed the LowerADDSAT_SUBSAT implementation to look…
		RKSimonUnsubmitted Not Done Reply Inline Actions setOperationAction(ISD::USUBSAT, MVT::v8i32, HasInt256 ? Legal : Custom); RKSimon: ``` setOperationAction(ISD::USUBSAT, MVT::v8i32, HasInt256 ? Legal : Custom); ```
		RKSimonUnsubmitted Not Done Reply Inline Actions Sorry, ignore that! RKSimon: Sorry, ignore that!
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {		for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);		setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);		setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);		setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);		setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);		setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
}		}

▲ Show 20 Lines • Show All 22,178 Lines • ▼ Show 20 Lines	if (VT.getScalarType() == MVT::i1)
return DAG.getNode(ISD::XOR, SDLoc(Op), VT,		return DAG.getNode(ISD::XOR, SDLoc(Op), VT,
Op.getOperand(0), Op.getOperand(1));		Op.getOperand(0), Op.getOperand(1));
assert(Op.getSimpleValueType().is256BitVector() &&		assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&		Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");		"Only handle AVX 256-bit vector integer operation");
return split256IntArith(Op, DAG);		return split256IntArith(Op, DAG);
}		}

static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG) {		static SDValue LowerADDSAT_SUBSAT(SDValue Op, const X86Subtarget &Subtraget,
		SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();		MVT VT = Op.getSimpleValueType();
if (VT.getScalarType() == MVT::i1) {		if (VT.getScalarType() == MVT::i1) {
SDLoc dl(Op);		SDLoc dl(Op);
switch (Op.getOpcode()) {		switch (Op.getOpcode()) {
default: llvm_unreachable("Expected saturated arithmetic opcode");		default: llvm_unreachable("Expected saturated arithmetic opcode");
case ISD::UADDSAT:		case ISD::UADDSAT:
case ISD::SADDSAT:		case ISD::SADDSAT:
return DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1));		return DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1));
case ISD::USUBSAT:		case ISD::USUBSAT:
case ISD::SSUBSAT:		case ISD::SSUBSAT:
return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),		return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
DAG.getNOT(dl, Op.getOperand(1), VT));		DAG.getNOT(dl, Op.getOperand(1), VT));
}		}
}		}

assert(Op.getSimpleValueType().is256BitVector() &&		if (VT.is256BitVector() && !Subtraget.hasInt256()) {
Op.getSimpleValueType().isInteger() &&		assert(VT.isInteger() &&
"Only handle AVX 256-bit vector integer operation");		"Only handle AVX 256-bit vector integer operation");
return split256IntArith(Op, DAG);		return split256IntArith(Op, DAG);
}		}

		// Default to expand.
		return SDValue();
		}

static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {		static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();		MVT VT = Op.getSimpleValueType();
if (VT == MVT::i16 \|\| VT == MVT::i32 \|\| VT == MVT::i64) {		if (VT == MVT::i16 \|\| VT == MVT::i32 \|\| VT == MVT::i64) {
// Since X86 does not have CMOV for 8-bit integer, we don't convert		// Since X86 does not have CMOV for 8-bit integer, we don't convert
// 8-bit integer abs to NEG and CMOV.		// 8-bit integer abs to NEG and CMOV.
SDLoc DL(Op);		SDLoc DL(Op);
SDValue N0 = Op.getOperand(0);		SDValue N0 = Op.getOperand(0);
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),		SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),
▲ Show 20 Lines • Show All 2,681 Lines • ▼ Show 20 Lines	SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG);		case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG);
case ISD::ADDCARRY:		case ISD::ADDCARRY:
case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);		case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
case ISD::ADD:		case ISD::ADD:
case ISD::SUB: return LowerADD_SUB(Op, DAG);		case ISD::SUB: return LowerADD_SUB(Op, DAG);
case ISD::UADDSAT:		case ISD::UADDSAT:
case ISD::SADDSAT:		case ISD::SADDSAT:
case ISD::USUBSAT:		case ISD::USUBSAT:
case ISD::SSUBSAT: return LowerADDSAT_SUBSAT(Op, DAG);		case ISD::SSUBSAT: return LowerADDSAT_SUBSAT(Op, Subtarget, DAG);
case ISD::SMAX:		case ISD::SMAX:
case ISD::SMIN:		case ISD::SMIN:
case ISD::UMAX:		case ISD::UMAX:
case ISD::UMIN: return LowerMINMAX(Op, DAG);		case ISD::UMIN: return LowerMINMAX(Op, DAG);
case ISD::ABS: return LowerABS(Op, DAG);		case ISD::ABS: return LowerABS(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);		case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);		case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);
case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG);		case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG);
▲ Show 20 Lines • Show All 16,378 Lines • Show Last 20 Lines

lib/Target/X86/X86TargetTransformInfo.cpp

Show First 20 Lines • Show All 1,766 Lines • ▼ Show 20 Lines	static const CostTblEntry AVX512CostTbl[] = {
{ ISD::BITREVERSE, MVT::v8i64, 36 },		{ ISD::BITREVERSE, MVT::v8i64, 36 },
{ ISD::BITREVERSE, MVT::v16i32, 24 },		{ ISD::BITREVERSE, MVT::v16i32, 24 },
{ ISD::CTLZ, MVT::v8i64, 29 },		{ ISD::CTLZ, MVT::v8i64, 29 },
{ ISD::CTLZ, MVT::v16i32, 35 },		{ ISD::CTLZ, MVT::v16i32, 35 },
{ ISD::CTPOP, MVT::v8i64, 16 },		{ ISD::CTPOP, MVT::v8i64, 16 },
{ ISD::CTPOP, MVT::v16i32, 24 },		{ ISD::CTPOP, MVT::v16i32, 24 },
{ ISD::CTTZ, MVT::v8i64, 20 },		{ ISD::CTTZ, MVT::v8i64, 20 },
{ ISD::CTTZ, MVT::v16i32, 28 },		{ ISD::CTTZ, MVT::v16i32, 28 },
		{ ISD::USUBSAT, MVT::v16i32, 2 }, // pmaxud + psubd
		{ ISD::USUBSAT, MVT::v2i64, 2 }, // pmaxuq + psubq
		{ ISD::USUBSAT, MVT::v4i64, 2 }, // pmaxuq + psubq
		{ ISD::USUBSAT, MVT::v8i64, 2 }, // pmaxuq + psubq
};		};
static const CostTblEntry XOPCostTbl[] = {		static const CostTblEntry XOPCostTbl[] = {
{ ISD::BITREVERSE, MVT::v4i64, 4 },		{ ISD::BITREVERSE, MVT::v4i64, 4 },
{ ISD::BITREVERSE, MVT::v8i32, 4 },		{ ISD::BITREVERSE, MVT::v8i32, 4 },
{ ISD::BITREVERSE, MVT::v16i16, 4 },		{ ISD::BITREVERSE, MVT::v16i16, 4 },
{ ISD::BITREVERSE, MVT::v32i8, 4 },		{ ISD::BITREVERSE, MVT::v32i8, 4 },
{ ISD::BITREVERSE, MVT::v2i64, 1 },		{ ISD::BITREVERSE, MVT::v2i64, 1 },
{ ISD::BITREVERSE, MVT::v4i32, 1 },		{ ISD::BITREVERSE, MVT::v4i32, 1 },
Show All 27 Lines	static const CostTblEntry AVX2CostTbl[] = {
{ ISD::SADDSAT, MVT::v16i16, 1 },		{ ISD::SADDSAT, MVT::v16i16, 1 },
{ ISD::SADDSAT, MVT::v32i8, 1 },		{ ISD::SADDSAT, MVT::v32i8, 1 },
{ ISD::SSUBSAT, MVT::v16i16, 1 },		{ ISD::SSUBSAT, MVT::v16i16, 1 },
{ ISD::SSUBSAT, MVT::v32i8, 1 },		{ ISD::SSUBSAT, MVT::v32i8, 1 },
{ ISD::UADDSAT, MVT::v16i16, 1 },		{ ISD::UADDSAT, MVT::v16i16, 1 },
{ ISD::UADDSAT, MVT::v32i8, 1 },		{ ISD::UADDSAT, MVT::v32i8, 1 },
{ ISD::USUBSAT, MVT::v16i16, 1 },		{ ISD::USUBSAT, MVT::v16i16, 1 },
{ ISD::USUBSAT, MVT::v32i8, 1 },		{ ISD::USUBSAT, MVT::v32i8, 1 },
		{ ISD::USUBSAT, MVT::v8i32, 2 }, // pmaxud + psubd
{ ISD::FSQRT, MVT::f32, 7 }, // Haswell from http://www.agner.org/		{ ISD::FSQRT, MVT::f32, 7 }, // Haswell from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/		{ ISD::FSQRT, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/
{ ISD::FSQRT, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/		{ ISD::FSQRT, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/
{ ISD::FSQRT, MVT::f64, 14 }, // Haswell from http://www.agner.org/		{ ISD::FSQRT, MVT::f64, 14 }, // Haswell from http://www.agner.org/
{ ISD::FSQRT, MVT::v2f64, 14 }, // Haswell from http://www.agner.org/		{ ISD::FSQRT, MVT::v2f64, 14 }, // Haswell from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f64, 28 }, // Haswell from http://www.agner.org/		{ ISD::FSQRT, MVT::v4f64, 28 }, // Haswell from http://www.agner.org/
};		};
static const CostTblEntry AVX1CostTbl[] = {		static const CostTblEntry AVX1CostTbl[] = {
Show All 19 Lines	static const CostTblEntry AVX1CostTbl[] = {
{ ISD::SADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert		{ ISD::SADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::SADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert		{ ISD::SADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::SSUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert		{ ISD::SSUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::SSUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert		{ ISD::SSUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::UADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert		{ ISD::UADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::UADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert		{ ISD::UADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::USUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert		{ ISD::USUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::USUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert		{ ISD::USUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
		{ ISD::USUBSAT, MVT::v8i32, 6 }, // 2 x 128-bit Op + extract/insert
{ ISD::FSQRT, MVT::f32, 14 }, // SNB from http://www.agner.org/		{ ISD::FSQRT, MVT::f32, 14 }, // SNB from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 14 }, // SNB from http://www.agner.org/		{ ISD::FSQRT, MVT::v4f32, 14 }, // SNB from http://www.agner.org/
{ ISD::FSQRT, MVT::v8f32, 28 }, // SNB from http://www.agner.org/		{ ISD::FSQRT, MVT::v8f32, 28 }, // SNB from http://www.agner.org/
{ ISD::FSQRT, MVT::f64, 21 }, // SNB from http://www.agner.org/		{ ISD::FSQRT, MVT::f64, 21 }, // SNB from http://www.agner.org/
{ ISD::FSQRT, MVT::v2f64, 21 }, // SNB from http://www.agner.org/		{ ISD::FSQRT, MVT::v2f64, 21 }, // SNB from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f64, 43 }, // SNB from http://www.agner.org/		{ ISD::FSQRT, MVT::v4f64, 43 }, // SNB from http://www.agner.org/
};		};
static const CostTblEntry GLMCostTbl[] = {		static const CostTblEntry GLMCostTbl[] = {
{ ISD::FSQRT, MVT::f32, 19 }, // sqrtss		{ ISD::FSQRT, MVT::f32, 19 }, // sqrtss
{ ISD::FSQRT, MVT::v4f32, 37 }, // sqrtps		{ ISD::FSQRT, MVT::v4f32, 37 }, // sqrtps
{ ISD::FSQRT, MVT::f64, 34 }, // sqrtsd		{ ISD::FSQRT, MVT::f64, 34 }, // sqrtsd
{ ISD::FSQRT, MVT::v2f64, 67 }, // sqrtpd		{ ISD::FSQRT, MVT::v2f64, 67 }, // sqrtpd
};		};
static const CostTblEntry SLMCostTbl[] = {		static const CostTblEntry SLMCostTbl[] = {
{ ISD::FSQRT, MVT::f32, 20 }, // sqrtss		{ ISD::FSQRT, MVT::f32, 20 }, // sqrtss
{ ISD::FSQRT, MVT::v4f32, 40 }, // sqrtps		{ ISD::FSQRT, MVT::v4f32, 40 }, // sqrtps
{ ISD::FSQRT, MVT::f64, 35 }, // sqrtsd		{ ISD::FSQRT, MVT::f64, 35 }, // sqrtsd
{ ISD::FSQRT, MVT::v2f64, 70 }, // sqrtpd		{ ISD::FSQRT, MVT::v2f64, 70 }, // sqrtpd
};		};
static const CostTblEntry SSE42CostTbl[] = {		static const CostTblEntry SSE42CostTbl[] = {
		{ ISD::USUBSAT, MVT::v4i32, 2 }, // pmaxud + psubd
{ ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/		{ ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/		{ ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/
};		};
static const CostTblEntry SSSE3CostTbl[] = {		static const CostTblEntry SSSE3CostTbl[] = {
{ ISD::BITREVERSE, MVT::v2i64, 5 },		{ ISD::BITREVERSE, MVT::v2i64, 5 },
{ ISD::BITREVERSE, MVT::v4i32, 5 },		{ ISD::BITREVERSE, MVT::v4i32, 5 },
{ ISD::BITREVERSE, MVT::v8i16, 5 },		{ ISD::BITREVERSE, MVT::v8i16, 5 },
{ ISD::BITREVERSE, MVT::v16i8, 5 },		{ ISD::BITREVERSE, MVT::v16i8, 5 },
▲ Show 20 Lines • Show All 1,374 Lines • Show Last 20 Lines

test/Analysis/CostModel/X86/arith-usat.ll

	Show First 20 Lines • Show All 244 Lines • ▼ Show 20 Lines
	declare <32 x i16> @llvm.usub.sat.v32i16(<32 x i16>, <32 x i16>)			declare <32 x i16> @llvm.usub.sat.v32i16(<32 x i16>, <32 x i16>)

	declare i8 @llvm.usub.sat.i8(i8, i8)			declare i8 @llvm.usub.sat.i8(i8, i8)
	declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)			declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)
	declare <32 x i8> @llvm.usub.sat.v32i8(<32 x i8>, <32 x i8>)			declare <32 x i8> @llvm.usub.sat.v32i8(<32 x i8>, <32 x i8>)
	declare <64 x i8> @llvm.usub.sat.v64i8(<64 x i8>, <64 x i8>)			declare <64 x i8> @llvm.usub.sat.v64i8(<64 x i8>, <64 x i8>)

	define i32 @sub(i32 %arg) {			define i32 @sub(i32 %arg) {
	; SSE-LABEL: 'sub'			; SSSE3-LABEL: 'sub'
	; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
	; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
	; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
	; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
	; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
	; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)			; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
	; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)			; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
	; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				;
				; SSE42-LABEL: 'sub'
				; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
				; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
				; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
				; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
				; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
				; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
				; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
				; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
				; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
				; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
				; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
				; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
				; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
				; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
				; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
				; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
				; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX1-LABEL: 'sub'			; AVX1-LABEL: 'sub'
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
	; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
	; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)			; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
	; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX2-LABEL: 'sub'			; AVX2-LABEL: 'sub'
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
	; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
	; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
	; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512F-LABEL: 'sub'			; AVX512F-LABEL: 'sub'
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512BW-LABEL: 'sub'			; AVX512BW-LABEL: 'sub'
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512DQ-LABEL: 'sub'			; AVX512DQ-LABEL: 'sub'
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; SLM-LABEL: 'sub'			; SLM-LABEL: 'sub'
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
	; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
	; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
	; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)			; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
	; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)			; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; GLM-LABEL: 'sub'			; GLM-LABEL: 'sub'
	; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)			; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
	; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)			; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
	; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)			; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
	; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)			; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
	; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)			; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
	; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)			; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
	; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)			; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
	; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; BTVER2-LABEL: 'sub'			; BTVER2-LABEL: 'sub'
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)			; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)			; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)			; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)			; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)			; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)			; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)			; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)			; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)			; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
	Show All 24 Lines

test/CodeGen/X86/usub_sat_vec.ll

	Show First 20 Lines • Show All 688 Lines • ▼ Show 20 Lines
	; SSE41-NEXT: movq %xmm0, %rcx			; SSE41-NEXT: movq %xmm0, %rcx
	; SSE41-NEXT: subq %rax, %rcx			; SSE41-NEXT: subq %rax, %rcx
	; SSE41-NEXT: cmovbq %rdx, %rcx			; SSE41-NEXT: cmovbq %rdx, %rcx
	; SSE41-NEXT: movq %rcx, %xmm0			; SSE41-NEXT: movq %rcx, %xmm0
	; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]			; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
	; SSE41-NEXT: psrlq $32, %xmm0			; SSE41-NEXT: psrlq $32, %xmm0
	; SSE41-NEXT: retq			; SSE41-NEXT: retq
	;			;
	; AVX-LABEL: v2i32:			; AVX1-LABEL: v2i32:
	; AVX: # %bb.0:			; AVX1: # %bb.0:
	; AVX-NEXT: vpsllq $32, %xmm1, %xmm1			; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1
	; AVX-NEXT: vpextrq $1, %xmm1, %rax			; AVX1-NEXT: vpextrq $1, %xmm1, %rax
	; AVX-NEXT: vpsllq $32, %xmm0, %xmm0			; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
	; AVX-NEXT: vpextrq $1, %xmm0, %rcx			; AVX1-NEXT: vpextrq $1, %xmm0, %rcx
	; AVX-NEXT: xorl %edx, %edx			; AVX1-NEXT: xorl %edx, %edx
	; AVX-NEXT: subq %rax, %rcx			; AVX1-NEXT: subq %rax, %rcx
	; AVX-NEXT: cmovbq %rdx, %rcx			; AVX1-NEXT: cmovbq %rdx, %rcx
	; AVX-NEXT: vmovq %rcx, %xmm2			; AVX1-NEXT: vmovq %rcx, %xmm2
	; AVX-NEXT: vmovq %xmm1, %rax			; AVX1-NEXT: vmovq %xmm1, %rax
	; AVX-NEXT: vmovq %xmm0, %rcx			; AVX1-NEXT: vmovq %xmm0, %rcx
	; AVX-NEXT: subq %rax, %rcx			; AVX1-NEXT: subq %rax, %rcx
	; AVX-NEXT: cmovbq %rdx, %rcx			; AVX1-NEXT: cmovbq %rdx, %rcx
	; AVX-NEXT: vmovq %rcx, %xmm0			; AVX1-NEXT: vmovq %rcx, %xmm0
	; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]			; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
	; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0			; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
	; AVX-NEXT: retq			; AVX1-NEXT: retq
				;
				; AVX2-LABEL: v2i32:
				; AVX2: # %bb.0:
				; AVX2-NEXT: vpsllq $32, %xmm1, %xmm1
				; AVX2-NEXT: vpextrq $1, %xmm1, %rax
				; AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
				; AVX2-NEXT: vpextrq $1, %xmm0, %rcx
				; AVX2-NEXT: xorl %edx, %edx
				; AVX2-NEXT: subq %rax, %rcx
				; AVX2-NEXT: cmovbq %rdx, %rcx
				; AVX2-NEXT: vmovq %rcx, %xmm2
				; AVX2-NEXT: vmovq %xmm1, %rax
				; AVX2-NEXT: vmovq %xmm0, %rcx
				; AVX2-NEXT: subq %rax, %rcx
				; AVX2-NEXT: cmovbq %rdx, %rcx
				; AVX2-NEXT: vmovq %rcx, %xmm0
				; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
				; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0
				; AVX2-NEXT: retq
				;
				; AVX512-LABEL: v2i32:
				; AVX512: # %bb.0:
				; AVX512-NEXT: vpsllq $32, %xmm1, %xmm1
				; AVX512-NEXT: vpsllq $32, %xmm0, %xmm0
				; AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
				; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm0
				; AVX512-NEXT: vpsrlq $32, %xmm0, %xmm0
				; AVX512-NEXT: retq
	%z = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %x, <2 x i32> %y)			%z = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
	ret <2 x i32> %z			ret <2 x i32> %z
	}			}

	define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {			define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
	; SSE2-LABEL: v4i32:			; SSE2-LABEL: v4i32:
	; SSE2: # %bb.0:			; SSE2: # %bb.0:
	; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]			; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]
	▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines
	; SSSE3-NEXT: movd %ecx, %xmm0			; SSSE3-NEXT: movd %ecx, %xmm0
	; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]			; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
	; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]			; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
	; SSSE3-NEXT: movdqa %xmm2, %xmm0			; SSSE3-NEXT: movdqa %xmm2, %xmm0
	; SSSE3-NEXT: retq			; SSSE3-NEXT: retq
	;			;
	; SSE41-LABEL: v4i32:			; SSE41-LABEL: v4i32:
	; SSE41: # %bb.0:			; SSE41: # %bb.0:
	; SSE41-NEXT: pextrd $1, %xmm1, %eax			; SSE41-NEXT: pmaxud %xmm1, %xmm0
	; SSE41-NEXT: pextrd $1, %xmm0, %ecx			; SSE41-NEXT: psubd %xmm1, %xmm0
	; SSE41-NEXT: xorl %edx, %edx
	; SSE41-NEXT: subl %eax, %ecx
	; SSE41-NEXT: cmovbl %edx, %ecx
	; SSE41-NEXT: movd %xmm1, %eax
	; SSE41-NEXT: movd %xmm0, %esi
	; SSE41-NEXT: subl %eax, %esi
	; SSE41-NEXT: cmovbl %edx, %esi
	; SSE41-NEXT: movd %esi, %xmm2
	; SSE41-NEXT: pinsrd $1, %ecx, %xmm2
	; SSE41-NEXT: pextrd $2, %xmm1, %eax
	; SSE41-NEXT: pextrd $2, %xmm0, %ecx
	; SSE41-NEXT: subl %eax, %ecx
	; SSE41-NEXT: cmovbl %edx, %ecx
	; SSE41-NEXT: pinsrd $2, %ecx, %xmm2
	; SSE41-NEXT: pextrd $3, %xmm1, %eax
	; SSE41-NEXT: pextrd $3, %xmm0, %ecx
	; SSE41-NEXT: subl %eax, %ecx
	; SSE41-NEXT: cmovbl %edx, %ecx
	; SSE41-NEXT: pinsrd $3, %ecx, %xmm2
	; SSE41-NEXT: movdqa %xmm2, %xmm0
	; SSE41-NEXT: retq			; SSE41-NEXT: retq
	;			;
	; AVX-LABEL: v4i32:			; AVX-LABEL: v4i32:
	; AVX: # %bb.0:			; AVX: # %bb.0:
	; AVX-NEXT: vpextrd $1, %xmm1, %eax			; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
	; AVX-NEXT: vpextrd $1, %xmm0, %ecx			; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
	; AVX-NEXT: xorl %edx, %edx
	; AVX-NEXT: subl %eax, %ecx
	; AVX-NEXT: cmovbl %edx, %ecx
	; AVX-NEXT: vmovd %xmm1, %eax
	; AVX-NEXT: vmovd %xmm0, %esi
	; AVX-NEXT: subl %eax, %esi
	; AVX-NEXT: cmovbl %edx, %esi
	; AVX-NEXT: vmovd %esi, %xmm2
	; AVX-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
	; AVX-NEXT: vpextrd $2, %xmm1, %eax
	; AVX-NEXT: vpextrd $2, %xmm0, %ecx
	; AVX-NEXT: subl %eax, %ecx
	; AVX-NEXT: cmovbl %edx, %ecx
	; AVX-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
	; AVX-NEXT: vpextrd $3, %xmm1, %eax
	; AVX-NEXT: vpextrd $3, %xmm0, %ecx
	; AVX-NEXT: subl %eax, %ecx
	; AVX-NEXT: cmovbl %edx, %ecx
	; AVX-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm0
	; AVX-NEXT: retq			; AVX-NEXT: retq
	%z = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)			%z = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
	ret <4 x i32> %z			ret <4 x i32> %z
	}			}

	define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {			define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
	; SSE2-LABEL: v8i32:			; SSE2-LABEL: v8i32:
	; SSE2: # %bb.0:			; SSE2: # %bb.0:
	▲ Show 20 Lines • Show All 122 Lines • ▼ Show 20 Lines
	; SSSE3-NEXT: movd %edx, %xmm1			; SSSE3-NEXT: movd %edx, %xmm1
	; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]			; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
	; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]			; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
	; SSSE3-NEXT: movdqa %xmm2, %xmm1			; SSSE3-NEXT: movdqa %xmm2, %xmm1
	; SSSE3-NEXT: retq			; SSSE3-NEXT: retq
	;			;
	; SSE41-LABEL: v8i32:			; SSE41-LABEL: v8i32:
	; SSE41: # %bb.0:			; SSE41: # %bb.0:
	; SSE41-NEXT: movdqa %xmm0, %xmm4			; SSE41-NEXT: pmaxud %xmm2, %xmm0
	; SSE41-NEXT: pextrd $1, %xmm2, %ecx			; SSE41-NEXT: psubd %xmm2, %xmm0
	; SSE41-NEXT: pextrd $1, %xmm0, %edx			; SSE41-NEXT: pmaxud %xmm3, %xmm1
	; SSE41-NEXT: xorl %eax, %eax			; SSE41-NEXT: psubd %xmm3, %xmm1
	; SSE41-NEXT: subl %ecx, %edx
	; SSE41-NEXT: cmovbl %eax, %edx
	; SSE41-NEXT: movd %xmm2, %ecx
	; SSE41-NEXT: movd %xmm0, %esi
	; SSE41-NEXT: subl %ecx, %esi
	; SSE41-NEXT: cmovbl %eax, %esi
	; SSE41-NEXT: movd %esi, %xmm0
	; SSE41-NEXT: pinsrd $1, %edx, %xmm0
	; SSE41-NEXT: pextrd $2, %xmm2, %ecx
	; SSE41-NEXT: pextrd $2, %xmm4, %edx
	; SSE41-NEXT: subl %ecx, %edx
	; SSE41-NEXT: cmovbl %eax, %edx
	; SSE41-NEXT: pinsrd $2, %edx, %xmm0
	; SSE41-NEXT: pextrd $3, %xmm2, %ecx
	; SSE41-NEXT: pextrd $3, %xmm4, %edx
	; SSE41-NEXT: subl %ecx, %edx
	; SSE41-NEXT: cmovbl %eax, %edx
	; SSE41-NEXT: pinsrd $3, %edx, %xmm0
	; SSE41-NEXT: pextrd $1, %xmm3, %ecx
	; SSE41-NEXT: pextrd $1, %xmm1, %edx
	; SSE41-NEXT: subl %ecx, %edx
	; SSE41-NEXT: cmovbl %eax, %edx
	; SSE41-NEXT: movd %xmm3, %ecx
	; SSE41-NEXT: movd %xmm1, %esi
	; SSE41-NEXT: subl %ecx, %esi
	; SSE41-NEXT: cmovbl %eax, %esi
	; SSE41-NEXT: movd %esi, %xmm2
	; SSE41-NEXT: pinsrd $1, %edx, %xmm2
	; SSE41-NEXT: pextrd $2, %xmm3, %ecx
	; SSE41-NEXT: pextrd $2, %xmm1, %edx
	; SSE41-NEXT: subl %ecx, %edx
	; SSE41-NEXT: cmovbl %eax, %edx
	; SSE41-NEXT: pinsrd $2, %edx, %xmm2
	; SSE41-NEXT: pextrd $3, %xmm3, %ecx
	; SSE41-NEXT: pextrd $3, %xmm1, %edx
	; SSE41-NEXT: subl %ecx, %edx
	; SSE41-NEXT: cmovbl %eax, %edx
	; SSE41-NEXT: pinsrd $3, %edx, %xmm2
	; SSE41-NEXT: movdqa %xmm2, %xmm1
	; SSE41-NEXT: retq			; SSE41-NEXT: retq
	;			;
	; AVX1-LABEL: v8i32:			; AVX1-LABEL: v8i32:
	; AVX1: # %bb.0:			; AVX1: # %bb.0:
	; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2			; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
	; AVX1-NEXT: vpextrd $1, %xmm2, %ecx
	; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3			; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
	; AVX1-NEXT: vpextrd $1, %xmm3, %edx			; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm3
	; AVX1-NEXT: xorl %eax, %eax			; AVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2
	; AVX1-NEXT: subl %ecx, %edx			; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
	; AVX1-NEXT: cmovbl %eax, %edx			; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
	; AVX1-NEXT: vmovd %xmm2, %ecx
	; AVX1-NEXT: vmovd %xmm3, %esi
	; AVX1-NEXT: subl %ecx, %esi
	; AVX1-NEXT: cmovbl %eax, %esi
	; AVX1-NEXT: vmovd %esi, %xmm4
	; AVX1-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4
	; AVX1-NEXT: vpextrd $2, %xmm2, %ecx
	; AVX1-NEXT: vpextrd $2, %xmm3, %edx
	; AVX1-NEXT: subl %ecx, %edx
	; AVX1-NEXT: cmovbl %eax, %edx
	; AVX1-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4
	; AVX1-NEXT: vpextrd $3, %xmm2, %ecx
	; AVX1-NEXT: vpextrd $3, %xmm3, %edx
	; AVX1-NEXT: subl %ecx, %edx
	; AVX1-NEXT: cmovbl %eax, %edx
	; AVX1-NEXT: vpinsrd $3, %edx, %xmm4, %xmm2
	; AVX1-NEXT: vpextrd $1, %xmm1, %ecx
	; AVX1-NEXT: vpextrd $1, %xmm0, %edx
	; AVX1-NEXT: subl %ecx, %edx
	; AVX1-NEXT: cmovbl %eax, %edx
	; AVX1-NEXT: vmovd %xmm1, %ecx
	; AVX1-NEXT: vmovd %xmm0, %esi
	; AVX1-NEXT: subl %ecx, %esi
	; AVX1-NEXT: cmovbl %eax, %esi
	; AVX1-NEXT: vmovd %esi, %xmm3
	; AVX1-NEXT: vpinsrd $1, %edx, %xmm3, %xmm3
	; AVX1-NEXT: vpextrd $2, %xmm1, %ecx
	; AVX1-NEXT: vpextrd $2, %xmm0, %edx
	; AVX1-NEXT: subl %ecx, %edx
	; AVX1-NEXT: cmovbl %eax, %edx
	; AVX1-NEXT: vpinsrd $2, %edx, %xmm3, %xmm3
	; AVX1-NEXT: vpextrd $3, %xmm1, %ecx
	; AVX1-NEXT: vpextrd $3, %xmm0, %edx
	; AVX1-NEXT: subl %ecx, %edx
	; AVX1-NEXT: cmovbl %eax, %edx
	; AVX1-NEXT: vpinsrd $3, %edx, %xmm3, %xmm0
	; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: v8i32:			; AVX2-LABEL: v8i32:
	; AVX2: # %bb.0:			; AVX2: # %bb.0:
	; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2			; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
	; AVX2-NEXT: vpextrd $1, %xmm2, %ecx			; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0
	; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
	; AVX2-NEXT: vpextrd $1, %xmm3, %edx
	; AVX2-NEXT: xorl %eax, %eax
	; AVX2-NEXT: subl %ecx, %edx
	; AVX2-NEXT: cmovbl %eax, %edx
	; AVX2-NEXT: vmovd %xmm2, %ecx
	; AVX2-NEXT: vmovd %xmm3, %esi
	; AVX2-NEXT: subl %ecx, %esi
	; AVX2-NEXT: cmovbl %eax, %esi
	; AVX2-NEXT: vmovd %esi, %xmm4
	; AVX2-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4
	; AVX2-NEXT: vpextrd $2, %xmm2, %ecx
	; AVX2-NEXT: vpextrd $2, %xmm3, %edx
	; AVX2-NEXT: subl %ecx, %edx
	; AVX2-NEXT: cmovbl %eax, %edx
	; AVX2-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4
	; AVX2-NEXT: vpextrd $3, %xmm2, %ecx
	; AVX2-NEXT: vpextrd $3, %xmm3, %edx
	; AVX2-NEXT: subl %ecx, %edx
	; AVX2-NEXT: cmovbl %eax, %edx
	; AVX2-NEXT: vpinsrd $3, %edx, %xmm4, %xmm2
	; AVX2-NEXT: vpextrd $1, %xmm1, %ecx
	; AVX2-NEXT: vpextrd $1, %xmm0, %edx
	; AVX2-NEXT: subl %ecx, %edx
	; AVX2-NEXT: cmovbl %eax, %edx
	; AVX2-NEXT: vmovd %xmm1, %ecx
	; AVX2-NEXT: vmovd %xmm0, %esi
	; AVX2-NEXT: subl %ecx, %esi
	; AVX2-NEXT: cmovbl %eax, %esi
	; AVX2-NEXT: vmovd %esi, %xmm3
	; AVX2-NEXT: vpinsrd $1, %edx, %xmm3, %xmm3
	; AVX2-NEXT: vpextrd $2, %xmm1, %ecx
	; AVX2-NEXT: vpextrd $2, %xmm0, %edx
	; AVX2-NEXT: subl %ecx, %edx
	; AVX2-NEXT: cmovbl %eax, %edx
	; AVX2-NEXT: vpinsrd $2, %edx, %xmm3, %xmm3
	; AVX2-NEXT: vpextrd $3, %xmm1, %ecx
	; AVX2-NEXT: vpextrd $3, %xmm0, %edx
	; AVX2-NEXT: subl %ecx, %edx
	; AVX2-NEXT: cmovbl %eax, %edx
	; AVX2-NEXT: vpinsrd $3, %edx, %xmm3, %xmm0
	; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	;			;
	; AVX512-LABEL: v8i32:			; AVX512-LABEL: v8i32:
	; AVX512: # %bb.0:			; AVX512: # %bb.0:
	; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2			; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
	; AVX512-NEXT: vpextrd $1, %xmm2, %ecx			; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm0
	; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm3
	; AVX512-NEXT: vpextrd $1, %xmm3, %edx
	; AVX512-NEXT: xorl %eax, %eax
	; AVX512-NEXT: subl %ecx, %edx
	; AVX512-NEXT: cmovbl %eax, %edx
	; AVX512-NEXT: vmovd %xmm2, %ecx
	; AVX512-NEXT: vmovd %xmm3, %esi
	; AVX512-NEXT: subl %ecx, %esi
	; AVX512-NEXT: cmovbl %eax, %esi
	; AVX512-NEXT: vmovd %esi, %xmm4
	; AVX512-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4
	; AVX512-NEXT: vpextrd $2, %xmm2, %ecx
	; AVX512-NEXT: vpextrd $2, %xmm3, %edx
	; AVX512-NEXT: subl %ecx, %edx
	; AVX512-NEXT: cmovbl %eax, %edx
	; AVX512-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4
	; AVX512-NEXT: vpextrd $3, %xmm2, %ecx
	; AVX512-NEXT: vpextrd $3, %xmm3, %edx
	; AVX512-NEXT: subl %ecx, %edx
	; AVX512-NEXT: cmovbl %eax, %edx
	; AVX512-NEXT: vpinsrd $3, %edx, %xmm4, %xmm2
	; AVX512-NEXT: vpextrd $1, %xmm1, %ecx
	; AVX512-NEXT: vpextrd $1, %xmm0, %edx
	; AVX512-NEXT: subl %ecx, %edx
	; AVX512-NEXT: cmovbl %eax, %edx
	; AVX512-NEXT: vmovd %xmm1, %ecx
	; AVX512-NEXT: vmovd %xmm0, %esi
	; AVX512-NEXT: subl %ecx, %esi
	; AVX512-NEXT: cmovbl %eax, %esi
	; AVX512-NEXT: vmovd %esi, %xmm3
	; AVX512-NEXT: vpinsrd $1, %edx, %xmm3, %xmm3
	; AVX512-NEXT: vpextrd $2, %xmm1, %ecx
	; AVX512-NEXT: vpextrd $2, %xmm0, %edx
	; AVX512-NEXT: subl %ecx, %edx
	; AVX512-NEXT: cmovbl %eax, %edx
	; AVX512-NEXT: vpinsrd $2, %edx, %xmm3, %xmm3
	; AVX512-NEXT: vpextrd $3, %xmm1, %ecx
	; AVX512-NEXT: vpextrd $3, %xmm0, %edx
	; AVX512-NEXT: subl %ecx, %edx
	; AVX512-NEXT: cmovbl %eax, %edx
	; AVX512-NEXT: vpinsrd $3, %edx, %xmm3, %xmm0
	; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
	; AVX512-NEXT: retq			; AVX512-NEXT: retq
	%z = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %x, <8 x i32> %y)			%z = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
	ret <8 x i32> %z			ret <8 x i32> %z
	}			}

	define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {			define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
	; SSE2-LABEL: v16i32:			; SSE2-LABEL: v16i32:
	; SSE2: # %bb.0:			; SSE2: # %bb.0:
	▲ Show 20 Lines • Show All 242 Lines • ▼ Show 20 Lines
	; SSSE3-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1]			; SSSE3-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1]
	; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0]			; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0]
	; SSSE3-NEXT: movdqa %xmm4, %xmm2			; SSSE3-NEXT: movdqa %xmm4, %xmm2
	; SSSE3-NEXT: movdqa %xmm5, %xmm3			; SSSE3-NEXT: movdqa %xmm5, %xmm3
	; SSSE3-NEXT: retq			; SSSE3-NEXT: retq
	;			;
	; SSE41-LABEL: v16i32:			; SSE41-LABEL: v16i32:
	; SSE41: # %bb.0:			; SSE41: # %bb.0:
	; SSE41-NEXT: movdqa %xmm1, %xmm8			; SSE41-NEXT: pmaxud %xmm4, %xmm0
	; SSE41-NEXT: movdqa %xmm0, %xmm1			; SSE41-NEXT: psubd %xmm4, %xmm0
	; SSE41-NEXT: pextrd $1, %xmm4, %ecx			; SSE41-NEXT: pmaxud %xmm5, %xmm1
	; SSE41-NEXT: pextrd $1, %xmm0, %edx			; SSE41-NEXT: psubd %xmm5, %xmm1
	; SSE41-NEXT: xorl %eax, %eax			; SSE41-NEXT: pmaxud %xmm6, %xmm2
	; SSE41-NEXT: subl %ecx, %edx			; SSE41-NEXT: psubd %xmm6, %xmm2
	; SSE41-NEXT: cmovbl %eax, %edx			; SSE41-NEXT: pmaxud %xmm7, %xmm3
	; SSE41-NEXT: movd %xmm4, %ecx			; SSE41-NEXT: psubd %xmm7, %xmm3
	; SSE41-NEXT: movd %xmm0, %esi
	; SSE41-NEXT: subl %ecx, %esi
	; SSE41-NEXT: cmovbl %eax, %esi
	; SSE41-NEXT: movd %esi, %xmm0
	; SSE41-NEXT: pinsrd $1, %edx, %xmm0
	; SSE41-NEXT: pextrd $2, %xmm4, %ecx
	; SSE41-NEXT: pextrd $2, %xmm1, %edx
	; SSE41-NEXT: subl %ecx, %edx
	; SSE41-NEXT: cmovbl %eax, %edx
	; SSE41-NEXT: pinsrd $2, %edx, %xmm0
	; SSE41-NEXT: pextrd $3, %xmm4, %ecx
	; SSE41-NEXT: pextrd $3, %xmm1, %edx
	; SSE41-NEXT: subl %ecx, %edx
	; SSE41-NEXT: cmovbl %eax, %edx
	; SSE41-NEXT: pinsrd $3, %edx, %xmm0
	; SSE41-NEXT: pextrd $1, %xmm5, %ecx
	; SSE41-NEXT: pextrd $1, %xmm8, %edx
	; SSE41-NEXT: subl %ecx, %edx
	; SSE41-NEXT: cmovbl %eax, %edx
	; SSE41-NEXT: movd %xmm5, %ecx
	; SSE41-NEXT: movd %xmm8, %esi
	; SSE41-NEXT: subl %ecx, %esi
	; SSE41-NEXT: cmovbl %eax, %esi
	; SSE41-NEXT: movd %esi, %xmm1
	; SSE41-NEXT: pinsrd $1, %edx, %xmm1
	; SSE41-NEXT: pextrd $2, %xmm5, %ecx
	; SSE41-NEXT: pextrd $2, %xmm8, %edx
	; SSE41-NEXT: subl %ecx, %edx
	; SSE41-NEXT: cmovbl %eax, %edx
	; SSE41-NEXT: pinsrd $2, %edx, %xmm1
	; SSE41-NEXT: pextrd $3, %xmm5, %ecx
	; SSE41-NEXT: pextrd $3, %xmm8, %edx
	; SSE41-NEXT: subl %ecx, %edx
	; SSE41-NEXT: cmovbl %eax, %edx
	; SSE41-NEXT: pinsrd $3, %edx, %xmm1
	; SSE41-NEXT: pextrd $1, %xmm6, %ecx
	; SSE41-NEXT: pextrd $1, %xmm2, %edx
	; SSE41-NEXT: subl %ecx, %edx
	; SSE41-NEXT: cmovbl %eax, %edx
	; SSE41-NEXT: movd %xmm6, %ecx
	; SSE41-NEXT: movd %xmm2, %esi
	; SSE41-NEXT: subl %ecx, %esi
	; SSE41-NEXT: cmovbl %eax, %esi
	; SSE41-NEXT: movd %esi, %xmm4
	; SSE41-NEXT: pinsrd $1, %edx, %xmm4
	; SSE41-NEXT: pextrd $2, %xmm6, %ecx
	; SSE41-NEXT: pextrd $2, %xmm2, %edx
	; SSE41-NEXT: subl %ecx, %edx
	; SSE41-NEXT: cmovbl %eax, %edx
	; SSE41-NEXT: pinsrd $2, %edx, %xmm4
	; SSE41-NEXT: pextrd $3, %xmm6, %ecx
	; SSE41-NEXT: pextrd $3, %xmm2, %edx
	; SSE41-NEXT: subl %ecx, %edx
	; SSE41-NEXT: cmovbl %eax, %edx
	; SSE41-NEXT: pinsrd $3, %edx, %xmm4
	; SSE41-NEXT: pextrd $1, %xmm7, %ecx
	; SSE41-NEXT: pextrd $1, %xmm3, %edx
	; SSE41-NEXT: subl %ecx, %edx
	; SSE41-NEXT: cmovbl %eax, %edx
	; SSE41-NEXT: movd %xmm7, %ecx
	; SSE41-NEXT: movd %xmm3, %esi
	; SSE41-NEXT: subl %ecx, %esi
	; SSE41-NEXT: cmovbl %eax, %esi
	; SSE41-NEXT: movd %esi, %xmm5
	; SSE41-NEXT: pinsrd $1, %edx, %xmm5
	; SSE41-NEXT: pextrd $2, %xmm7, %ecx
	; SSE41-NEXT: pextrd $2, %xmm3, %edx
	; SSE41-NEXT: subl %ecx, %edx
	; SSE41-NEXT: cmovbl %eax, %edx
	; SSE41-NEXT: pinsrd $2, %edx, %xmm5
	; SSE41-NEXT: pextrd $3, %xmm7, %ecx
	; SSE41-NEXT: pextrd $3, %xmm3, %edx
	; SSE41-NEXT: subl %ecx, %edx
	; SSE41-NEXT: cmovbl %eax, %edx
	; SSE41-NEXT: pinsrd $3, %edx, %xmm5
	; SSE41-NEXT: movdqa %xmm4, %xmm2
	; SSE41-NEXT: movdqa %xmm5, %xmm3
	; SSE41-NEXT: retq			; SSE41-NEXT: retq
	;			;
	; AVX1-LABEL: v16i32:			; AVX1-LABEL: v16i32:
	; AVX1: # %bb.0:			; AVX1: # %bb.0:
	; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4			; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
	; AVX1-NEXT: vpextrd $1, %xmm4, %ecx
	; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5			; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
	; AVX1-NEXT: vpextrd $1, %xmm5, %edx			; AVX1-NEXT: vpmaxud %xmm4, %xmm5, %xmm5
	; AVX1-NEXT: xorl %eax, %eax			; AVX1-NEXT: vpsubd %xmm4, %xmm5, %xmm4
	; AVX1-NEXT: subl %ecx, %edx			; AVX1-NEXT: vpmaxud %xmm2, %xmm0, %xmm0
	; AVX1-NEXT: cmovbl %eax, %edx			; AVX1-NEXT: vpsubd %xmm2, %xmm0, %xmm0
	; AVX1-NEXT: vmovd %xmm4, %ecx
	; AVX1-NEXT: vmovd %xmm5, %esi
	; AVX1-NEXT: subl %ecx, %esi
	; AVX1-NEXT: cmovbl %eax, %esi
	; AVX1-NEXT: vmovd %esi, %xmm6
	; AVX1-NEXT: vpinsrd $1, %edx, %xmm6, %xmm6
	; AVX1-NEXT: vpextrd $2, %xmm4, %ecx
	; AVX1-NEXT: vpextrd $2, %xmm5, %edx
	; AVX1-NEXT: subl %ecx, %edx
	; AVX1-NEXT: cmovbl %eax, %edx
	; AVX1-NEXT: vpinsrd $2, %edx, %xmm6, %xmm6
	; AVX1-NEXT: vpextrd $3, %xmm4, %ecx
	; AVX1-NEXT: vpextrd $3, %xmm5, %edx
	; AVX1-NEXT: subl %ecx, %edx
	; AVX1-NEXT: cmovbl %eax, %edx
	; AVX1-NEXT: vpinsrd $3, %edx, %xmm6, %xmm4
	; AVX1-NEXT: vpextrd $1, %xmm2, %ecx
	; AVX1-NEXT: vpextrd $1, %xmm0, %edx
	; AVX1-NEXT: subl %ecx, %edx
	; AVX1-NEXT: cmovbl %eax, %edx
	; AVX1-NEXT: vmovd %xmm2, %ecx
	; AVX1-NEXT: vmovd %xmm0, %esi
	; AVX1-NEXT: subl %ecx, %esi
	; AVX1-NEXT: cmovbl %eax, %esi
	; AVX1-NEXT: vmovd %esi, %xmm5
	; AVX1-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5
	; AVX1-NEXT: vpextrd $2, %xmm2, %ecx
	; AVX1-NEXT: vpextrd $2, %xmm0, %edx
	; AVX1-NEXT: subl %ecx, %edx
	; AVX1-NEXT: cmovbl %eax, %edx
	; AVX1-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5
	; AVX1-NEXT: vpextrd $3, %xmm2, %ecx
	; AVX1-NEXT: vpextrd $3, %xmm0, %edx
	; AVX1-NEXT: subl %ecx, %edx
	; AVX1-NEXT: cmovbl %eax, %edx
	; AVX1-NEXT: vpinsrd $3, %edx, %xmm5, %xmm0
	; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
	; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2			; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
	; AVX1-NEXT: vpextrd $1, %xmm2, %ecx
	; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4			; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
	; AVX1-NEXT: vpextrd $1, %xmm4, %edx			; AVX1-NEXT: vpmaxud %xmm2, %xmm4, %xmm4
	; AVX1-NEXT: subl %ecx, %edx			; AVX1-NEXT: vpsubd %xmm2, %xmm4, %xmm2
	; AVX1-NEXT: cmovbl %eax, %edx			; AVX1-NEXT: vpmaxud %xmm3, %xmm1, %xmm1
	; AVX1-NEXT: vmovd %xmm2, %ecx			; AVX1-NEXT: vpsubd %xmm3, %xmm1, %xmm1
	; AVX1-NEXT: vmovd %xmm4, %esi
	; AVX1-NEXT: subl %ecx, %esi
	; AVX1-NEXT: cmovbl %eax, %esi
	; AVX1-NEXT: vmovd %esi, %xmm5
	; AVX1-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5
	; AVX1-NEXT: vpextrd $2, %xmm2, %ecx
	; AVX1-NEXT: vpextrd $2, %xmm4, %edx
	; AVX1-NEXT: subl %ecx, %edx
	; AVX1-NEXT: cmovbl %eax, %edx
	; AVX1-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5
	; AVX1-NEXT: vpextrd $3, %xmm2, %ecx
	; AVX1-NEXT: vpextrd $3, %xmm4, %edx
	; AVX1-NEXT: subl %ecx, %edx
	; AVX1-NEXT: cmovbl %eax, %edx
	; AVX1-NEXT: vpinsrd $3, %edx, %xmm5, %xmm2
	; AVX1-NEXT: vpextrd $1, %xmm3, %ecx
	; AVX1-NEXT: vpextrd $1, %xmm1, %edx
	; AVX1-NEXT: subl %ecx, %edx
	; AVX1-NEXT: cmovbl %eax, %edx
	; AVX1-NEXT: vmovd %xmm3, %ecx
	; AVX1-NEXT: vmovd %xmm1, %esi
	; AVX1-NEXT: subl %ecx, %esi
	; AVX1-NEXT: cmovbl %eax, %esi
	; AVX1-NEXT: vmovd %esi, %xmm4
	; AVX1-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4
	; AVX1-NEXT: vpextrd $2, %xmm3, %ecx
	; AVX1-NEXT: vpextrd $2, %xmm1, %edx
	; AVX1-NEXT: subl %ecx, %edx
	; AVX1-NEXT: cmovbl %eax, %edx
	; AVX1-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4
	; AVX1-NEXT: vpextrd $3, %xmm3, %ecx
	; AVX1-NEXT: vpextrd $3, %xmm1, %edx
	; AVX1-NEXT: subl %ecx, %edx
	; AVX1-NEXT: cmovbl %eax, %edx
	; AVX1-NEXT: vpinsrd $3, %edx, %xmm4, %xmm1
	; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1			; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: v16i32:			; AVX2-LABEL: v16i32:
	; AVX2: # %bb.0:			; AVX2: # %bb.0:
	; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4			; AVX2-NEXT: vpmaxud %ymm2, %ymm0, %ymm0
	; AVX2-NEXT: vpextrd $1, %xmm4, %ecx			; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0
	; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm5			; AVX2-NEXT: vpmaxud %ymm3, %ymm1, %ymm1
	; AVX2-NEXT: vpextrd $1, %xmm5, %edx			; AVX2-NEXT: vpsubd %ymm3, %ymm1, %ymm1
	; AVX2-NEXT: xorl %eax, %eax
	; AVX2-NEXT: subl %ecx, %edx
	; AVX2-NEXT: cmovbl %eax, %edx
	; AVX2-NEXT: vmovd %xmm4, %ecx
	; AVX2-NEXT: vmovd %xmm5, %esi
	; AVX2-NEXT: subl %ecx, %esi
	; AVX2-NEXT: cmovbl %eax, %esi
	; AVX2-NEXT: vmovd %esi, %xmm6
	; AVX2-NEXT: vpinsrd $1, %edx, %xmm6, %xmm6
	; AVX2-NEXT: vpextrd $2, %xmm4, %ecx
	; AVX2-NEXT: vpextrd $2, %xmm5, %edx
	; AVX2-NEXT: subl %ecx, %edx
	; AVX2-NEXT: cmovbl %eax, %edx
	; AVX2-NEXT: vpinsrd $2, %edx, %xmm6, %xmm6
	; AVX2-NEXT: vpextrd $3, %xmm4, %ecx
	; AVX2-NEXT: vpextrd $3, %xmm5, %edx
	; AVX2-NEXT: subl %ecx, %edx
	; AVX2-NEXT: cmovbl %eax, %edx
	; AVX2-NEXT: vpinsrd $3, %edx, %xmm6, %xmm4
	; AVX2-NEXT: vpextrd $1, %xmm2, %ecx
	; AVX2-NEXT: vpextrd $1, %xmm0, %edx
	; AVX2-NEXT: subl %ecx, %edx
	; AVX2-NEXT: cmovbl %eax, %edx
	; AVX2-NEXT: vmovd %xmm2, %ecx
	; AVX2-NEXT: vmovd %xmm0, %esi
	; AVX2-NEXT: subl %ecx, %esi
	; AVX2-NEXT: cmovbl %eax, %esi
	; AVX2-NEXT: vmovd %esi, %xmm5
	; AVX2-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5
	; AVX2-NEXT: vpextrd $2, %xmm2, %ecx
	; AVX2-NEXT: vpextrd $2, %xmm0, %edx
	; AVX2-NEXT: subl %ecx, %edx
	; AVX2-NEXT: cmovbl %eax, %edx
	; AVX2-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5
	; AVX2-NEXT: vpextrd $3, %xmm2, %ecx
	; AVX2-NEXT: vpextrd $3, %xmm0, %edx
	; AVX2-NEXT: subl %ecx, %edx
	; AVX2-NEXT: cmovbl %eax, %edx
	; AVX2-NEXT: vpinsrd $3, %edx, %xmm5, %xmm0
	; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm0
	; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm2
	; AVX2-NEXT: vpextrd $1, %xmm2, %ecx
	; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
	; AVX2-NEXT: vpextrd $1, %xmm4, %edx
	; AVX2-NEXT: subl %ecx, %edx
	; AVX2-NEXT: cmovbl %eax, %edx
	; AVX2-NEXT: vmovd %xmm2, %ecx
	; AVX2-NEXT: vmovd %xmm4, %esi
	; AVX2-NEXT: subl %ecx, %esi
	; AVX2-NEXT: cmovbl %eax, %esi
	; AVX2-NEXT: vmovd %esi, %xmm5
	; AVX2-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5
	; AVX2-NEXT: vpextrd $2, %xmm2, %ecx
	; AVX2-NEXT: vpextrd $2, %xmm4, %edx
	; AVX2-NEXT: subl %ecx, %edx
	; AVX2-NEXT: cmovbl %eax, %edx
	; AVX2-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5
	; AVX2-NEXT: vpextrd $3, %xmm2, %ecx
	; AVX2-NEXT: vpextrd $3, %xmm4, %edx
	; AVX2-NEXT: subl %ecx, %edx
	; AVX2-NEXT: cmovbl %eax, %edx
	; AVX2-NEXT: vpinsrd $3, %edx, %xmm5, %xmm2
	; AVX2-NEXT: vpextrd $1, %xmm3, %ecx
	; AVX2-NEXT: vpextrd $1, %xmm1, %edx
	; AVX2-NEXT: subl %ecx, %edx
	; AVX2-NEXT: cmovbl %eax, %edx
	; AVX2-NEXT: vmovd %xmm3, %ecx
	; AVX2-NEXT: vmovd %xmm1, %esi
	; AVX2-NEXT: subl %ecx, %esi
	; AVX2-NEXT: cmovbl %eax, %esi
	; AVX2-NEXT: vmovd %esi, %xmm4
	; AVX2-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4
	; AVX2-NEXT: vpextrd $2, %xmm3, %ecx
	; AVX2-NEXT: vpextrd $2, %xmm1, %edx
	; AVX2-NEXT: subl %ecx, %edx
	; AVX2-NEXT: cmovbl %eax, %edx
	; AVX2-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4
	; AVX2-NEXT: vpextrd $3, %xmm3, %ecx
	; AVX2-NEXT: vpextrd $3, %xmm1, %edx
	; AVX2-NEXT: subl %ecx, %edx
	; AVX2-NEXT: cmovbl %eax, %edx
	; AVX2-NEXT: vpinsrd $3, %edx, %xmm4, %xmm1
	; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	;			;
	; AVX512-LABEL: v16i32:			; AVX512-LABEL: v16i32:
	; AVX512: # %bb.0:			; AVX512: # %bb.0:
	; AVX512-NEXT: vextracti32x4 $3, %zmm1, %xmm2			; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
	; AVX512-NEXT: vpextrd $1, %xmm2, %ecx			; AVX512-NEXT: vpsubd %zmm1, %zmm0, %zmm0
	; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm3
	; AVX512-NEXT: vpextrd $1, %xmm3, %edx
	; AVX512-NEXT: xorl %eax, %eax
	; AVX512-NEXT: subl %ecx, %edx
	; AVX512-NEXT: cmovbl %eax, %edx
	; AVX512-NEXT: vmovd %xmm2, %ecx
	; AVX512-NEXT: vmovd %xmm3, %esi
	; AVX512-NEXT: subl %ecx, %esi
	; AVX512-NEXT: cmovbl %eax, %esi
	; AVX512-NEXT: vmovd %esi, %xmm4
	; AVX512-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4
	; AVX512-NEXT: vpextrd $2, %xmm2, %ecx
	; AVX512-NEXT: vpextrd $2, %xmm3, %edx
	; AVX512-NEXT: subl %ecx, %edx
	; AVX512-NEXT: cmovbl %eax, %edx
	; AVX512-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4
	; AVX512-NEXT: vpextrd $3, %xmm2, %ecx
	; AVX512-NEXT: vpextrd $3, %xmm3, %edx
	; AVX512-NEXT: subl %ecx, %edx
	; AVX512-NEXT: cmovbl %eax, %edx
	; AVX512-NEXT: vpinsrd $3, %edx, %xmm4, %xmm2
	; AVX512-NEXT: vextracti32x4 $2, %zmm1, %xmm3
	; AVX512-NEXT: vpextrd $1, %xmm3, %ecx
	; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm4
	; AVX512-NEXT: vpextrd $1, %xmm4, %edx
	; AVX512-NEXT: subl %ecx, %edx
	; AVX512-NEXT: cmovbl %eax, %edx
	; AVX512-NEXT: vmovd %xmm3, %ecx
	; AVX512-NEXT: vmovd %xmm4, %esi
	; AVX512-NEXT: subl %ecx, %esi
	; AVX512-NEXT: cmovbl %eax, %esi
	; AVX512-NEXT: vmovd %esi, %xmm5
	; AVX512-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5
	; AVX512-NEXT: vpextrd $2, %xmm3, %ecx
	; AVX512-NEXT: vpextrd $2, %xmm4, %edx
	; AVX512-NEXT: subl %ecx, %edx
	; AVX512-NEXT: cmovbl %eax, %edx
	; AVX512-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5
	; AVX512-NEXT: vpextrd $3, %xmm3, %ecx
	; AVX512-NEXT: vpextrd $3, %xmm4, %edx
	; AVX512-NEXT: subl %ecx, %edx
	; AVX512-NEXT: cmovbl %eax, %edx
	; AVX512-NEXT: vpinsrd $3, %edx, %xmm5, %xmm3
	; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
	; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm3
	; AVX512-NEXT: vpextrd $1, %xmm3, %ecx
	; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm4
	; AVX512-NEXT: vpextrd $1, %xmm4, %edx
	; AVX512-NEXT: subl %ecx, %edx
	; AVX512-NEXT: cmovbl %eax, %edx
	; AVX512-NEXT: vmovd %xmm3, %ecx
	; AVX512-NEXT: vmovd %xmm4, %esi
	; AVX512-NEXT: subl %ecx, %esi
	; AVX512-NEXT: cmovbl %eax, %esi
	; AVX512-NEXT: vmovd %esi, %xmm5
	; AVX512-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5
	; AVX512-NEXT: vpextrd $2, %xmm3, %ecx
	; AVX512-NEXT: vpextrd $2, %xmm4, %edx
	; AVX512-NEXT: subl %ecx, %edx
	; AVX512-NEXT: cmovbl %eax, %edx
	; AVX512-NEXT: vpinsrd $2, %edx, %xmm5, %xmm5
	; AVX512-NEXT: vpextrd $3, %xmm3, %ecx
	; AVX512-NEXT: vpextrd $3, %xmm4, %edx
	; AVX512-NEXT: subl %ecx, %edx
	; AVX512-NEXT: cmovbl %eax, %edx
	; AVX512-NEXT: vpinsrd $3, %edx, %xmm5, %xmm3
	; AVX512-NEXT: vpextrd $1, %xmm1, %ecx
	; AVX512-NEXT: vpextrd $1, %xmm0, %edx
	; AVX512-NEXT: subl %ecx, %edx
	; AVX512-NEXT: cmovbl %eax, %edx
	; AVX512-NEXT: vmovd %xmm1, %ecx
	; AVX512-NEXT: vmovd %xmm0, %esi
	; AVX512-NEXT: subl %ecx, %esi
	; AVX512-NEXT: cmovbl %eax, %esi
	; AVX512-NEXT: vmovd %esi, %xmm4
	; AVX512-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4
	; AVX512-NEXT: vpextrd $2, %xmm1, %ecx
	; AVX512-NEXT: vpextrd $2, %xmm0, %edx
	; AVX512-NEXT: subl %ecx, %edx
	; AVX512-NEXT: cmovbl %eax, %edx
	; AVX512-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4
	; AVX512-NEXT: vpextrd $3, %xmm1, %ecx
	; AVX512-NEXT: vpextrd $3, %xmm0, %edx
	; AVX512-NEXT: subl %ecx, %edx
	; AVX512-NEXT: cmovbl %eax, %edx
	; AVX512-NEXT: vpinsrd $3, %edx, %xmm4, %xmm0
	; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
	; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
	; AVX512-NEXT: retq			; AVX512-NEXT: retq
	%z = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %x, <16 x i32> %y)			%z = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %x, <16 x i32> %y)
	ret <16 x i32> %z			ret <16 x i32> %z
	}			}

	define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {			define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
	; SSE2-LABEL: v2i64:			; SSE2-LABEL: v2i64:
	; SSE2: # %bb.0:			; SSE2: # %bb.0:
	▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines
	; SSE41-NEXT: movq %xmm1, %rax			; SSE41-NEXT: movq %xmm1, %rax
	; SSE41-NEXT: movq %xmm0, %rcx			; SSE41-NEXT: movq %xmm0, %rcx
	; SSE41-NEXT: subq %rax, %rcx			; SSE41-NEXT: subq %rax, %rcx
	; SSE41-NEXT: cmovbq %rdx, %rcx			; SSE41-NEXT: cmovbq %rdx, %rcx
	; SSE41-NEXT: movq %rcx, %xmm0			; SSE41-NEXT: movq %rcx, %xmm0
	; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]			; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
	; SSE41-NEXT: retq			; SSE41-NEXT: retq
	;			;
	; AVX-LABEL: v2i64:			; AVX1-LABEL: v2i64:
	; AVX: # %bb.0:			; AVX1: # %bb.0:
	; AVX-NEXT: vpextrq $1, %xmm1, %rax			; AVX1-NEXT: vpextrq $1, %xmm1, %rax
	; AVX-NEXT: vpextrq $1, %xmm0, %rcx			; AVX1-NEXT: vpextrq $1, %xmm0, %rcx
	; AVX-NEXT: xorl %edx, %edx			; AVX1-NEXT: xorl %edx, %edx
	; AVX-NEXT: subq %rax, %rcx			; AVX1-NEXT: subq %rax, %rcx
	; AVX-NEXT: cmovbq %rdx, %rcx			; AVX1-NEXT: cmovbq %rdx, %rcx
	; AVX-NEXT: vmovq %rcx, %xmm2			; AVX1-NEXT: vmovq %rcx, %xmm2
	; AVX-NEXT: vmovq %xmm1, %rax			; AVX1-NEXT: vmovq %xmm1, %rax
	; AVX-NEXT: vmovq %xmm0, %rcx			; AVX1-NEXT: vmovq %xmm0, %rcx
	; AVX-NEXT: subq %rax, %rcx			; AVX1-NEXT: subq %rax, %rcx
	; AVX-NEXT: cmovbq %rdx, %rcx			; AVX1-NEXT: cmovbq %rdx, %rcx
	; AVX-NEXT: vmovq %rcx, %xmm0			; AVX1-NEXT: vmovq %rcx, %xmm0
	; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]			; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
	; AVX-NEXT: retq			; AVX1-NEXT: retq
				;
				; AVX2-LABEL: v2i64:
				; AVX2: # %bb.0:
				; AVX2-NEXT: vpextrq $1, %xmm1, %rax
				; AVX2-NEXT: vpextrq $1, %xmm0, %rcx
				; AVX2-NEXT: xorl %edx, %edx
				; AVX2-NEXT: subq %rax, %rcx
				; AVX2-NEXT: cmovbq %rdx, %rcx
				; AVX2-NEXT: vmovq %rcx, %xmm2
				; AVX2-NEXT: vmovq %xmm1, %rax
				; AVX2-NEXT: vmovq %xmm0, %rcx
				; AVX2-NEXT: subq %rax, %rcx
				; AVX2-NEXT: cmovbq %rdx, %rcx
				; AVX2-NEXT: vmovq %rcx, %xmm0
				; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
				; AVX2-NEXT: retq
				;
				; AVX512-LABEL: v2i64:
				; AVX512: # %bb.0:
				; AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
				; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm0
				; AVX512-NEXT: retq
	%z = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %x, <2 x i64> %y)			%z = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
	ret <2 x i64> %z			ret <2 x i64> %z
	}			}

	define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {			define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
	; SSE2-LABEL: v4i64:			; SSE2-LABEL: v4i64:
	; SSE2: # %bb.0:			; SSE2: # %bb.0:
	; SSE2-NEXT: movq %xmm2, %rax			; SSE2-NEXT: movq %xmm2, %rax
	▲ Show 20 Lines • Show All 144 Lines • ▼ Show 20 Lines
	; AVX2-NEXT: cmovbq %rdx, %rcx			; AVX2-NEXT: cmovbq %rdx, %rcx
	; AVX2-NEXT: vmovq %rcx, %xmm0			; AVX2-NEXT: vmovq %rcx, %xmm0
	; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]			; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
	; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0			; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	;			;
	; AVX512-LABEL: v4i64:			; AVX512-LABEL: v4i64:
	; AVX512: # %bb.0:			; AVX512: # %bb.0:
	; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2			; AVX512-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0
	; AVX512-NEXT: vpextrq $1, %xmm2, %rax			; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm0
	; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm3
	; AVX512-NEXT: vpextrq $1, %xmm3, %rcx
	; AVX512-NEXT: xorl %edx, %edx
	; AVX512-NEXT: subq %rax, %rcx
	; AVX512-NEXT: cmovbq %rdx, %rcx
	; AVX512-NEXT: vmovq %rcx, %xmm4
	; AVX512-NEXT: vmovq %xmm2, %rax
	; AVX512-NEXT: vmovq %xmm3, %rcx
	; AVX512-NEXT: subq %rax, %rcx
	; AVX512-NEXT: cmovbq %rdx, %rcx
	; AVX512-NEXT: vmovq %rcx, %xmm2
	; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
	; AVX512-NEXT: vpextrq $1, %xmm1, %rax
	; AVX512-NEXT: vpextrq $1, %xmm0, %rcx
	; AVX512-NEXT: subq %rax, %rcx
	; AVX512-NEXT: cmovbq %rdx, %rcx
	; AVX512-NEXT: vmovq %rcx, %xmm3
	; AVX512-NEXT: vmovq %xmm1, %rax
	; AVX512-NEXT: vmovq %xmm0, %rcx
	; AVX512-NEXT: subq %rax, %rcx
	; AVX512-NEXT: cmovbq %rdx, %rcx
	; AVX512-NEXT: vmovq %rcx, %xmm0
	; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
	; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
	; AVX512-NEXT: retq			; AVX512-NEXT: retq
	%z = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %x, <4 x i64> %y)			%z = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
	ret <4 x i64> %z			ret <4 x i64> %z
	}			}

	define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {			define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
	; SSE2-LABEL: v8i64:			; SSE2-LABEL: v8i64:
	; SSE2: # %bb.0:			; SSE2: # %bb.0:
	▲ Show 20 Lines • Show All 273 Lines • ▼ Show 20 Lines
	; AVX2-NEXT: cmovbq %rax, %rdx			; AVX2-NEXT: cmovbq %rax, %rdx
	; AVX2-NEXT: vmovq %rdx, %xmm1			; AVX2-NEXT: vmovq %rdx, %xmm1
	; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0]			; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0]
	; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1			; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	;			;
	; AVX512-LABEL: v8i64:			; AVX512-LABEL: v8i64:
	; AVX512: # %bb.0:			; AVX512: # %bb.0:
	; AVX512-NEXT: vextracti32x4 $3, %zmm1, %xmm2			; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
	; AVX512-NEXT: vpextrq $1, %xmm2, %rcx			; AVX512-NEXT: vpsubq %zmm1, %zmm0, %zmm0
	; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm3
	; AVX512-NEXT: vpextrq $1, %xmm3, %rdx
	; AVX512-NEXT: xorl %eax, %eax
	; AVX512-NEXT: subq %rcx, %rdx
	; AVX512-NEXT: cmovbq %rax, %rdx
	; AVX512-NEXT: vmovq %rdx, %xmm4
	; AVX512-NEXT: vmovq %xmm2, %rcx
	; AVX512-NEXT: vmovq %xmm3, %rdx
	; AVX512-NEXT: subq %rcx, %rdx
	; AVX512-NEXT: cmovbq %rax, %rdx
	; AVX512-NEXT: vmovq %rdx, %xmm2
	; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
	; AVX512-NEXT: vextracti32x4 $2, %zmm1, %xmm3
	; AVX512-NEXT: vpextrq $1, %xmm3, %rcx
	; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm4
	; AVX512-NEXT: vpextrq $1, %xmm4, %rdx
	; AVX512-NEXT: subq %rcx, %rdx
	; AVX512-NEXT: cmovbq %rax, %rdx
	; AVX512-NEXT: vmovq %rdx, %xmm5
	; AVX512-NEXT: vmovq %xmm3, %rcx
	; AVX512-NEXT: vmovq %xmm4, %rdx
	; AVX512-NEXT: subq %rcx, %rdx
	; AVX512-NEXT: cmovbq %rax, %rdx
	; AVX512-NEXT: vmovq %rdx, %xmm3
	; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0]
	; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
	; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm3
	; AVX512-NEXT: vpextrq $1, %xmm3, %rcx
	; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm4
	; AVX512-NEXT: vpextrq $1, %xmm4, %rdx
	; AVX512-NEXT: subq %rcx, %rdx
	; AVX512-NEXT: cmovbq %rax, %rdx
	; AVX512-NEXT: vmovq %rdx, %xmm5
	; AVX512-NEXT: vmovq %xmm3, %rcx
	; AVX512-NEXT: vmovq %xmm4, %rdx
	; AVX512-NEXT: subq %rcx, %rdx
	; AVX512-NEXT: cmovbq %rax, %rdx
	; AVX512-NEXT: vmovq %rdx, %xmm3
	; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0]
	; AVX512-NEXT: vpextrq $1, %xmm1, %rcx
	; AVX512-NEXT: vpextrq $1, %xmm0, %rdx
	; AVX512-NEXT: subq %rcx, %rdx
	; AVX512-NEXT: cmovbq %rax, %rdx
	; AVX512-NEXT: vmovq %rdx, %xmm4
	; AVX512-NEXT: vmovq %xmm1, %rcx
	; AVX512-NEXT: vmovq %xmm0, %rdx
	; AVX512-NEXT: subq %rcx, %rdx
	; AVX512-NEXT: cmovbq %rax, %rdx
	; AVX512-NEXT: vmovq %rdx, %xmm0
	; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
	; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
	; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
	; AVX512-NEXT: retq			; AVX512-NEXT: retq
	%z = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> %x, <8 x i64> %y)			%z = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> %x, <8 x i64> %y)
	ret <8 x i64> %z			ret <8 x i64> %z
	}			}

	define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {			define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
	; SSE-LABEL: v2i128:			; SSE-LABEL: v2i128:
	; SSE: # %bb.0:			; SSE: # %bb.0:
	Show All 36 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[CodeGen][X86] Expand vector USUBSAT to UMAX+SUB
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 181471

include/llvm/CodeGen/TargetLowering.h

lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

lib/CodeGen/SelectionDAG/TargetLowering.cpp

lib/Target/X86/X86ISelLowering.cpp

lib/Target/X86/X86TargetTransformInfo.cpp

test/Analysis/CostModel/X86/arith-usat.ll

test/CodeGen/X86/usub_sat_vec.ll

This is an archive of the discontinued LLVM Phabricator instance.

[CodeGen][X86] Expand vector USUBSAT to UMAX+SUBClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 181471

include/llvm/CodeGen/TargetLowering.h

lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

lib/CodeGen/SelectionDAG/TargetLowering.cpp

lib/Target/X86/X86ISelLowering.cpp

lib/Target/X86/X86TargetTransformInfo.cpp

test/Analysis/CostModel/X86/arith-usat.ll

test/CodeGen/X86/usub_sat_vec.ll

[CodeGen][X86] Expand vector USUBSAT to UMAX+SUB
ClosedPublic