This is an archive of the discontinued LLVM Phabricator instance.

[X86] Constant folding of adds/subs intrinsics
ClosedPublic

Authored by tkrupa on Aug 9 2018, 2:38 AM.

Download Raw Diff

Details

Reviewers

craig.topper
RKSimon
spatel
chandlerc
efriedma

Commits

rGe766e5f63661: [X86] Constant folding of adds/subs intrinsics
rL339659: [X86] Constant folding of adds/subs intrinsics

Summary

This was originally a part of D46179 but got so different to unsigned versions that it's better to have it in a separate patch.

Diff Detail

Repository: rL LLVM

Event Timeline

tkrupa created this revision.Aug 9 2018, 2:38 AM

Herald added a subscriber: llvm-commits. · View Herald TranscriptAug 9 2018, 2:38 AM

tkrupa edited the summary of this revision. (Show Details)Aug 9 2018, 2:39 AM

The title of this patch includes "Constant folding" - shouldn't the code be in ConstantFolding.cpp or some descendant of that?

I remember that there was a suggestion (years ago?) to split target-specific logic like this into its own files because other targets shouldn't be burdened with code that will never affect them, but this hasn't happened yet. There's x86-specific intrinsic code in ConstantFolding.cpp added with rL123206.

I added it to InstCombineCalls.cpp after @craig.topper's suggestion to do so in order to enable adding more optimizations besides constant folding in the same place.

I'm not finding the mails where the extraction of target-specific code came up, but my vague memory says @rnk had some ideas about how to improve things.

craig.topper added inline comments.Aug 9 2018, 10:04 AM

lib/Transforms/InstCombine/InstCombineCalls.cpp
265 ↗	(On Diff #159884)	We should really remove masking from these intrinsics and use a select. But that can be a follow up.
286 ↗	(On Diff #159884)	Probably could use getIntegerBitWidth instead of getPrimitiveSizeInBits. Should be slightly cheaper since getPrimitiveSizeInBits has to detect that its an integer and then call getIntegerBitWidth.
296 ↗	(On Diff #159884)	const APInt & Val0
296 ↗	(On Diff #159884)	Isn't it possible for the element to be a ConstantExpr? In which case this case would fail

tkrupa marked 3 inline comments as done.Aug 13 2018, 12:00 AM

Implemented suggested changes.
Regarding masking with select - do you mean creating new avx512_padds/avx512_psubs intrinsics without masks and replacing the old calls with new intrinsic+select?

LGTM with those two formatting fixes.

lib/Transforms/InstCombine/InstCombineCalls.cpp
288 ↗	(On Diff #160295)	There's an extra space after 'unsigned'
305 ↗	(On Diff #160295)	This is overindented.

This revision is now accepted and ready to land.Aug 13 2018, 11:53 AM

Closed by commit rL339659: [X86] Constant folding of adds/subs intrinsics (authored by tkrupa). · Explain WhyAug 14 2018, 2:04 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

trunk/

lib/

Transforms/

InstCombine/

InstCombineCalls.cpp

98 lines

test/

Transforms/

InstCombine/

X86/

x86-adds-subs.ll

351 lines

Diff 160533

llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp

Show First 20 Lines • Show All 237 Lines • ▼ Show 20 Lines	if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
// Set the size of the copy to 0, it will be deleted on the next iteration.		// Set the size of the copy to 0, it will be deleted on the next iteration.
MI->setLength(Constant::getNullValue(LenC->getType()));		MI->setLength(Constant::getNullValue(LenC->getType()));
return MI;		return MI;
}		}

return nullptr;		return nullptr;
}		}

		static Value *simplifyX86AddsSubs(const IntrinsicInst &II,
		InstCombiner::BuilderTy &Builder) {
		bool IsAddition = false;
		bool IsMasked = false;

		switch (II.getIntrinsicID()) {
		default: llvm_unreachable("Unexpected intrinsic!");
		case Intrinsic::x86_sse2_padds_b:
		case Intrinsic::x86_sse2_padds_w:
		case Intrinsic::x86_avx2_padds_b:
		case Intrinsic::x86_avx2_padds_w:
		IsAddition = true; IsMasked = false;
		break;
		case Intrinsic::x86_sse2_psubs_b:
		case Intrinsic::x86_sse2_psubs_w:
		case Intrinsic::x86_avx2_psubs_b:
		case Intrinsic::x86_avx2_psubs_w:
		IsAddition = false; IsMasked = false;
		break;
		case Intrinsic::x86_avx512_mask_padds_b_512:
		case Intrinsic::x86_avx512_mask_padds_w_512:
		IsAddition = true; IsMasked = true;
		break;
		case Intrinsic::x86_avx512_mask_psubs_b_512:
		case Intrinsic::x86_avx512_mask_psubs_w_512:
		IsAddition = false; IsMasked = true;
		break;
		}

		auto *Arg0 = dyn_cast<Constant>(II.getOperand(0));
		auto *Arg1 = dyn_cast<Constant>(II.getOperand(1));
		auto VT = cast<VectorType>(II.getType());
		auto SVT = VT->getElementType();
		unsigned NumElems = VT->getNumElements();

		if (!Arg0 \|\| !Arg1 \|\| (IsMasked && !isa<Constant>(II.getOperand(2))))
		return nullptr;

		SmallVector<Constant *, 64> Result;

		APInt MaxValue = APInt::getSignedMaxValue(SVT->getIntegerBitWidth());
		APInt MinValue = APInt::getSignedMinValue(SVT->getIntegerBitWidth());
		for (unsigned i = 0; i < NumElems; ++i) {
		auto *Elt0 = Arg0->getAggregateElement(i);
		auto *Elt1 = Arg1->getAggregateElement(i);
		if (isa<UndefValue>(Elt0) \|\| isa<UndefValue>(Elt1)) {
		Result.push_back(UndefValue::get(SVT));
		continue;
		}

		if (!isa<ConstantInt>(Elt0) \|\| !isa<ConstantInt>(Elt1))
		return nullptr;

		const APInt &Val0 = cast<ConstantInt>(Elt0)->getValue();
		const APInt &Val1 = cast<ConstantInt>(Elt1)->getValue();
		bool Overflow = false;
		APInt ResultElem = IsAddition ? Val0.sadd_ov(Val1, Overflow)
		: Val0.ssub_ov(Val1, Overflow);
		if (Overflow)
		ResultElem = Val0.isNegative() ? MinValue : MaxValue;
		Result.push_back(Constant::getIntegerValue(SVT, ResultElem));
		}

		Value *ResultVec = ConstantVector::get(Result);

		if (II.getNumArgOperands() == 4) { // For masked intrinsics.
		Value *Src = II.getOperand(2);
		auto Mask = II.getOperand(3);
		if (auto *C = dyn_cast<Constant>(Mask))
		if (C->isAllOnesValue())
		return ResultVec;
		auto *MaskTy = VectorType::get(
		Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
		Mask = Builder.CreateBitCast(Mask, MaskTy);
		ResultVec = Builder.CreateSelect(Mask, ResultVec, Src);
		}

		return ResultVec;
		}

static Value *simplifyX86immShift(const IntrinsicInst &II,		static Value *simplifyX86immShift(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder) {		InstCombiner::BuilderTy &Builder) {
bool LogicalShift = false;		bool LogicalShift = false;
bool ShiftLeft = false;		bool ShiftLeft = false;

switch (II.getIntrinsicID()) {		switch (II.getIntrinsicID()) {
default: llvm_unreachable("Unexpected intrinsic!");		default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::x86_sse2_psra_d:		case Intrinsic::x86_sse2_psra_d:
▲ Show 20 Lines • Show All 2,266 Lines • ▼ Show 20 Lines	if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
if (V != II)		if (V != II)
return replaceInstUsesWith(*II, V);		return replaceInstUsesWith(*II, V);
return II;		return II;
} else if (Value V = simplifyX86round(II, Builder))		} else if (Value V = simplifyX86round(II, Builder))
return replaceInstUsesWith(*II, V);		return replaceInstUsesWith(*II, V);
break;		break;
}		}

		// Constant fold add/sub with saturation intrinsics.
		case Intrinsic::x86_sse2_padds_b:
		case Intrinsic::x86_sse2_padds_w:
		case Intrinsic::x86_sse2_psubs_b:
		case Intrinsic::x86_sse2_psubs_w:
		case Intrinsic::x86_avx2_padds_b:
		case Intrinsic::x86_avx2_padds_w:
		case Intrinsic::x86_avx2_psubs_b:
		case Intrinsic::x86_avx2_psubs_w:
		case Intrinsic::x86_avx512_mask_padds_b_512:
		case Intrinsic::x86_avx512_mask_padds_w_512:
		case Intrinsic::x86_avx512_mask_psubs_b_512:
		case Intrinsic::x86_avx512_mask_psubs_w_512:
		if (Value V = simplifyX86AddsSubs(II, Builder))
		return replaceInstUsesWith(*II, V);
		break;


// Constant fold ashr( <A x Bi>, Ci ).		// Constant fold ashr( <A x Bi>, Ci ).
// Constant fold lshr( <A x Bi>, Ci ).		// Constant fold lshr( <A x Bi>, Ci ).
// Constant fold shl( <A x Bi>, Ci ).		// Constant fold shl( <A x Bi>, Ci ).
case Intrinsic::x86_sse2_psrai_d:		case Intrinsic::x86_sse2_psrai_d:
case Intrinsic::x86_sse2_psrai_w:		case Intrinsic::x86_sse2_psrai_w:
case Intrinsic::x86_avx2_psrai_d:		case Intrinsic::x86_avx2_psrai_d:
case Intrinsic::x86_avx2_psrai_w:		case Intrinsic::x86_avx2_psrai_w:
case Intrinsic::x86_avx512_psrai_q_128:		case Intrinsic::x86_avx512_psrai_q_128:
▲ Show 20 Lines • Show All 1,925 Lines • Show Last 20 Lines

llvm/trunk/test/Transforms/InstCombine/X86/x86-adds-subs.ll

				; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
				; RUN: opt < %s -instcombine -S \| FileCheck %s

				define <16 x i8> @sse2_adds_b_constant() {
				; CHECK-LABEL: @sse2_adds_b_constant(
				; CHECK-NEXT: ret <16 x i8> <i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33>
				%1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
				ret <16 x i8> %1
				}

				define <16 x i8> @sse2_adds_b_constant_underflow() {
				; CHECK-LABEL: @sse2_adds_b_constant_underflow(
				; CHECK-NEXT: ret <16 x i8> <i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -128, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33>
				%1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -107, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
				ret <16 x i8> %1
				}

				define <16 x i8> @sse2_adds_b_constant_overflow() {
				; CHECK-LABEL: @sse2_adds_b_constant_overflow(
				; CHECK-NEXT: ret <16 x i8> <i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 127, i8 24, i8 26, i8 28, i8 30, i8 127>
				%1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 125, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 120>)
				ret <16 x i8> %1
				}

				define <16 x i8> @sse2_adds_b_constant_undefs() {
				; CHECK-LABEL: @sse2_adds_b_constant_undefs(
				; CHECK-NEXT: ret <16 x i8> <i8 undef, i8 4, i8 6, i8 8, i8 10, i8 undef, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33>
				%1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> <i8 undef, i8 2, i8 3, i8 4, i8 5, i8 undef, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8> <i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
				ret <16 x i8> %1
				}

				define <32 x i8> @avx2_adds_b_constant() {
				; CHECK-LABEL: @avx2_adds_b_constant(
				; CHECK-NEXT: ret <32 x i8> <i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33>
				%1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
				ret <32 x i8> %1
				}

				define <32 x i8> @avx2_adds_b_constant_underflow() {
				; CHECK-LABEL: @avx2_adds_b_constant_underflow(
				; CHECK-NEXT: ret <32 x i8> <i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -128, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -128, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33>
				%1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -107, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -107, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
				ret <32 x i8> %1
				}

				define <32 x i8> @avx2_adds_b_constant_overflow() {
				; CHECK-LABEL: @avx2_adds_b_constant_overflow(
				; CHECK-NEXT: ret <32 x i8> <i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 127, i8 24, i8 26, i8 28, i8 30, i8 127, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 127, i8 24, i8 26, i8 28, i8 30, i8 127>
				%1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 125, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 125, i8 12, i8 13, i8 14, i8 15, i8 16>, <32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 120, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 120>)
				ret <32 x i8> %1
				}

				define <32 x i8> @avx2_adds_b_constant_undefs() {
				; CHECK-LABEL: @avx2_adds_b_constant_undefs(
				; CHECK-NEXT: ret <32 x i8> <i8 undef, i8 4, i8 6, i8 8, i8 10, i8 undef, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 undef, i8 4, i8 6, i8 8, i8 10, i8 undef, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33>
				%1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> <i8 undef, i8 2, i8 3, i8 4, i8 5, i8 undef, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 undef, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <32 x i8> <i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
				ret <32 x i8> %1
				}

				define <64 x i8> @avx512_mask_adds_b_constant() {
				; CHECK-LABEL: @avx512_mask_adds_b_constant(
				; CHECK-NEXT: ret <64 x i8> <i8 2, i8 0, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33>
				%1 = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <64 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>, <64 x i8> zeroinitializer, i64 -3)
				ret <64 x i8> %1
				}

				define <64 x i8> @avx512_mask_adds_b_constant_underflow() {
				; CHECK-LABEL: @avx512_mask_adds_b_constant_underflow(
				; CHECK-NEXT: ret <64 x i8> <i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -128, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -128, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -128, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 -128, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33>
				%1 = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -107, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -107, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -107, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -107, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <64 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>, <64 x i8> zeroinitializer, i64 -1)
				ret <64 x i8> %1
				}

				define <64 x i8> @avx512_mask_adds_b_constant_overflow() {
				; CHECK-LABEL: @avx512_mask_adds_b_constant_overflow(
				; CHECK-NEXT: ret <64 x i8> <i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 127, i8 24, i8 26, i8 28, i8 30, i8 127, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 127, i8 24, i8 26, i8 28, i8 30, i8 127, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 127, i8 24, i8 26, i8 28, i8 30, i8 127, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 16, i8 18, i8 20, i8 127, i8 24, i8 26, i8 28, i8 30, i8 127>
				%1 = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 125, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 125, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 125, i8 12, i8 13, i8 14, i8 15, i8 16, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 125, i8 12, i8 13, i8 14, i8 15, i8 16>, <64 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 120, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 120, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 120, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 120>, <64 x i8> zeroinitializer, i64 -1)
				ret <64 x i8> %1
				}

				define <64 x i8> @avx512_mask_adds_b_constant_undefs() {
				; CHECK-LABEL: @avx512_mask_adds_b_constant_undefs(
				; CHECK-NEXT: ret <64 x i8> <i8 undef, i8 4, i8 6, i8 8, i8 10, i8 undef, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 undef, i8 4, i8 6, i8 8, i8 10, i8 undef, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 undef, i8 4, i8 6, i8 8, i8 10, i8 undef, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33, i8 undef, i8 4, i8 6, i8 8, i8 10, i8 undef, i8 14, i8 16, i8 18, i8 20, i8 22, i8 24, i8 26, i8 28, i8 30, i8 33>
				%1 = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> <i8 undef, i8 2, i8 3, i8 4, i8 5, i8 undef, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 undef, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 undef, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 undef, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <64 x i8> <i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>, <64 x i8> zeroinitializer, i64 -1)
				ret <64 x i8> %1
				}

				define <8 x i16> @sse2_adds_w_constant() {
				; CHECK-LABEL: @sse2_adds_w_constant(
				; CHECK-NEXT: ret <8 x i16> <i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16>
				%1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>)
				ret <8 x i16> %1
				}

				define <8 x i16> @sse2_adds_w_constant_underflow() {
				; CHECK-LABEL: @sse2_adds_w_constant_underflow(
				; CHECK-NEXT: ret <8 x i16> <i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 -32768>
				%1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 -32107>, <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 -12188>)
				ret <8 x i16> %1
				}

				define <8 x i16> @sse2_adds_w_constant_overflow() {
				; CHECK-LABEL: @sse2_adds_w_constant_overflow(
				; CHECK-NEXT: ret <8 x i16> <i16 2, i16 32767, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16>
				%1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> <i16 1, i16 8248, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> <i16 1, i16 25192, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>)
				ret <8 x i16> %1
				}

				define <8 x i16> @sse2_adds_w_constant_undefs() {
				; CHECK-LABEL: @sse2_adds_w_constant_undefs(
				; CHECK-NEXT: ret <8 x i16> <i16 undef, i16 4, i16 6, i16 8, i16 10, i16 undef, i16 14, i16 16>
				%1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> <i16 undef, i16 2, i16 3, i16 4, i16 5, i16 undef, i16 7, i16 8>, <8 x i16> <i16 undef, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>)
				ret <8 x i16> %1
				}

				define <16 x i16> @avx2_adds_w_constant() {
				; CHECK-LABEL: @avx2_adds_w_constant(
				; CHECK-NEXT: ret <16 x i16> <i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30, i16 33>
				%1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16>, <16 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>)
				ret <16 x i16> %1
				}

				define <16 x i16> @avx2_adds_w_constant_underflow() {
				; CHECK-LABEL: @avx2_adds_w_constant_underflow(
				; CHECK-NEXT: ret <16 x i16> <i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 -32768, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30, i16 33>
				%1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 -21107, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16>, <16 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 -15188, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>)
				ret <16 x i16> %1
				}

				define <16 x i16> @avx2_adds_w_constant_overflow() {
				; CHECK-LABEL: @avx2_adds_w_constant_overflow(
				; CHECK-NEXT: ret <16 x i16> <i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16, i16 18, i16 20, i16 32767, i16 24, i16 26, i16 28, i16 30, i16 32767>
				%1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 20125, i16 12, i16 13, i16 14, i16 15, i16 20160>, <16 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 20230, i16 12, i16 13, i16 14, i16 15, i16 20120>)
				ret <16 x i16> %1
				}

				define <16 x i16> @avx2_adds_w_constant_undefs() {
				; CHECK-LABEL: @avx2_adds_w_constant_undefs(
				; CHECK-NEXT: ret <16 x i16> <i16 undef, i16 4, i16 6, i16 8, i16 10, i16 undef, i16 14, i16 16, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30, i16 33>
				%1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> <i16 undef, i16 2, i16 3, i16 4, i16 5, i16 undef, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16>, <16 x i16> <i16 undef, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>)
				ret <16 x i16> %1
				}

				define <32 x i16> @avx512_mask_adds_w_constant() {
				; CHECK-LABEL: @avx512_mask_adds_w_constant(
				; CHECK-NEXT: ret <32 x i16> <i16 2, i16 0, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30, i16 33, i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30, i16 33>
				%1 = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16>, <32 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>, <32 x i16> zeroinitializer, i32 -3)
				ret <32 x i16> %1
				}

				define <32 x i16> @avx512_mask_adds_w_constant_underflow() {
				; CHECK-LABEL: @avx512_mask_adds_w_constant_underflow(
				; CHECK-NEXT: ret <32 x i16> <i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 -32768, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30, i16 33, i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 -32768, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30, i16 33>
				%1 = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 -20107, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 -20107, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16>, <32 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 -20168, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 -20248, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>, <32 x i16> zeroinitializer, i32 -1)
				ret <32 x i16> %1
				}

				define <32 x i16> @avx512_mask_adds_w_constant_overflow() {
				; CHECK-LABEL: @avx512_mask_adds_w_constant_overflow(
				; CHECK-NEXT: ret <32 x i16> <i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16, i16 18, i16 20, i16 32767, i16 24, i16 26, i16 28, i16 30, i16 32767, i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16, i16 18, i16 20, i16 32767, i16 24, i16 26, i16 28, i16 30, i16 32767>
				%1 = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 20125, i16 12, i16 13, i16 14, i16 15, i16 20200, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 20125, i16 12, i16 13, i16 14, i16 15, i16 20200>, <32 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 20200, i16 12, i16 13, i16 14, i16 15, i16 20120, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 20211, i16 12, i16 13, i16 14, i16 15, i16 20120>, <32 x i16> zeroinitializer, i32 -1)
				ret <32 x i16> %1
				}

				define <32 x i16> @avx512_mask_adds_w_constant_undefs() {
				; CHECK-LABEL: @avx512_mask_adds_w_constant_undefs(
				; CHECK-NEXT: ret <32 x i16> <i16 undef, i16 4, i16 6, i16 8, i16 10, i16 undef, i16 14, i16 16, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30, i16 33, i16 undef, i16 4, i16 6, i16 8, i16 10, i16 undef, i16 14, i16 16, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30, i16 33>
				%1 = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> <i16 undef, i16 2, i16 3, i16 4, i16 5, i16 undef, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 undef, i16 2, i16 3, i16 4, i16 5, i16 undef, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16>, <32 x i16> <i16 undef, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17, i16 undef, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>, <32 x i16> zeroinitializer, i32 -1)
				ret <32 x i16> %1
				}

				define <16 x i8> @sse2_subs_b_constant() {
				; CHECK-LABEL: @sse2_subs_b_constant(
				; CHECK-NEXT: ret <16 x i8> <i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33>
				%1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16>, <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
				ret <16 x i8> %1
				}

				define <16 x i8> @sse2_subs_b_constant_underflow() {
				; CHECK-LABEL: @sse2_subs_b_constant_underflow(
				; CHECK-NEXT: ret <16 x i8> <i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -128, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33>
				%1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -107, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16>, <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
				ret <16 x i8> %1
				}

				define <16 x i8> @sse2_subs_b_constant_overflow() {
				; CHECK-LABEL: @sse2_subs_b_constant_overflow(
				; CHECK-NEXT: ret <16 x i8> <i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 127, i8 -24, i8 -26, i8 -28, i8 -30, i8 127>
				%1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 125, i8 -12, i8 -13, i8 -14, i8 -15, i8 16>, <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 -11, i8 12, i8 13, i8 14, i8 15, i8 -120>)
				ret <16 x i8> %1
				}

				define <16 x i8> @sse2_subs_b_constant_undefs() {
				; CHECK-LABEL: @sse2_subs_b_constant_undefs(
				; CHECK-NEXT: ret <16 x i8> <i8 undef, i8 -4, i8 -6, i8 -8, i8 -10, i8 undef, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33>
				%1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> <i8 undef, i8 -2, i8 -3, i8 -4, i8 -5, i8 undef, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16>, <16 x i8> <i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
				ret <16 x i8> %1
				}

				define <32 x i8> @avx2_subs_b_constant() {
				; CHECK-LABEL: @avx2_subs_b_constant(
				; CHECK-NEXT: ret <32 x i8> <i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33>
				%1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16>, <32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
				ret <32 x i8> %1
				}

				define <32 x i8> @avx2_subs_b_constant_underflow() {
				; CHECK-LABEL: @avx2_subs_b_constant_underflow(
				; CHECK-NEXT: ret <32 x i8> <i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -128, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -128, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33>
				%1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -107, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -107, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16>, <32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
				ret <32 x i8> %1
				}

				define <32 x i8> @avx2_subs_b_constant_overflow() {
				; CHECK-LABEL: @avx2_subs_b_constant_overflow(
				; CHECK-NEXT: ret <32 x i8> <i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 127, i8 -24, i8 -26, i8 -28, i8 -30, i8 127, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 127, i8 -24, i8 -26, i8 -28, i8 -30, i8 127>
				%1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 125, i8 -12, i8 -13, i8 -14, i8 -15, i8 16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 125, i8 -12, i8 -13, i8 -14, i8 -15, i8 16>, <32 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 -11, i8 12, i8 13, i8 14, i8 15, i8 -120, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 -11, i8 12, i8 13, i8 14, i8 15, i8 -120>)
				ret <32 x i8> %1
				}

				define <32 x i8> @avx2_subs_b_constant_undefs() {
				; CHECK-LABEL: @avx2_subs_b_constant_undefs(
				; CHECK-NEXT: ret <32 x i8> <i8 undef, i8 -4, i8 -6, i8 -8, i8 -10, i8 undef, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 undef, i8 -4, i8 -6, i8 -8, i8 -10, i8 undef, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33>
				%1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> <i8 undef, i8 -2, i8 -3, i8 -4, i8 -5, i8 undef, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 undef, i8 -2, i8 -3, i8 -4, i8 -5, i8 undef, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16>, <32 x i8> <i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>)
				ret <32 x i8> %1
				}

				define <64 x i8> @avx512_mask_subs_b_constant() {
				; CHECK-LABEL: @avx512_mask_subs_b_constant(
				; CHECK-NEXT: ret <64 x i8> <i8 -2, i8 0, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33>
				%1 = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16>, <64 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>, <64 x i8> zeroinitializer, i64 -3)
				ret <64 x i8> %1
				}

				define <64 x i8> @avx512_mask_subs_b_constant_underflow() {
				; CHECK-LABEL: @avx512_mask_subs_b_constant_underflow(
				; CHECK-NEXT: ret <64 x i8> <i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -128, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -128, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -128, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -128, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33>
				%1 = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -107, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -107, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -107, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -107, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16>, <64 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 48, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>, <64 x i8> zeroinitializer, i64 -1)
				ret <64 x i8> %1
				}

				define <64 x i8> @avx512_mask_subs_b_constant_overflow() {
				; CHECK-LABEL: @avx512_mask_subs_b_constant_overflow(
				; CHECK-NEXT: ret <64 x i8> <i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 127, i8 -24, i8 -26, i8 -28, i8 -30, i8 127, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 127, i8 -24, i8 -26, i8 -28, i8 -30, i8 127, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 127, i8 -24, i8 -26, i8 -28, i8 -30, i8 127, i8 -2, i8 -4, i8 -6, i8 -8, i8 -10, i8 -12, i8 -14, i8 -16, i8 -18, i8 -20, i8 127, i8 -24, i8 -26, i8 -28, i8 -30, i8 127>
				%1 = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 125, i8 -12, i8 -13, i8 -14, i8 -15, i8 16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 125, i8 -12, i8 -13, i8 -14, i8 -15, i8 16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 125, i8 -12, i8 -13, i8 -14, i8 -15, i8 16, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 125, i8 -12, i8 -13, i8 -14, i8 -15, i8 16>, <64 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 -11, i8 12, i8 13, i8 14, i8 15, i8 -120, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 -11, i8 12, i8 13, i8 14, i8 15, i8 -120, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 -11, i8 12, i8 13, i8 14, i8 15, i8 -120, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 -11, i8 12, i8 13, i8 14, i8 15, i8 -120>, <64 x i8> zeroinitializer, i64 -1)
				ret <64 x i8> %1
				}

				define <64 x i8> @avx512_mask_subs_b_constant_undefs() {
				; CHECK-LABEL: @avx512_mask_subs_b_constant_undefs(
				; CHECK-NEXT: ret <64 x i8> <i8 undef, i8 -4, i8 -6, i8 -8, i8 -10, i8 undef, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 undef, i8 -4, i8 -6, i8 -8, i8 -10, i8 undef, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 undef, i8 -4, i8 -6, i8 -8, i8 -10, i8 undef, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33, i8 undef, i8 -4, i8 -6, i8 -8, i8 -10, i8 undef, i8 -14, i8 -16, i8 -18, i8 -20, i8 -22, i8 -24, i8 -26, i8 -28, i8 -30, i8 -33>
				%1 = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> <i8 undef, i8 -2, i8 -3, i8 -4, i8 -5, i8 undef, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 undef, i8 -2, i8 -3, i8 -4, i8 -5, i8 undef, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 undef, i8 -2, i8 -3, i8 -4, i8 -5, i8 undef, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 undef, i8 -2, i8 -3, i8 -4, i8 -5, i8 undef, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16>, <64 x i8> <i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17>, <64 x i8> zeroinitializer, i64 -1)
				ret <64 x i8> %1
				}

				define <8 x i16> @sse2_subs_w_constant() {
				; CHECK-LABEL: @sse2_subs_w_constant(
				; CHECK-NEXT: ret <8 x i16> <i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -16>
				%1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8>, <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>)
				ret <8 x i16> %1
				}

				define <8 x i16> @sse2_subs_w_constant_underflow() {
				; CHECK-LABEL: @sse2_subs_w_constant_underflow(
				; CHECK-NEXT: ret <8 x i16> <i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -32768>
				%1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -32107>, <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 12188>)
				ret <8 x i16> %1
				}

				define <8 x i16> @sse2_subs_w_constant_overflow() {
				; CHECK-LABEL: @sse2_subs_w_constant_overflow(
				; CHECK-NEXT: ret <8 x i16> <i16 -2, i16 32767, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -16>
				%1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> <i16 -1, i16 8248, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8>, <8 x i16> <i16 1, i16 -25192, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>)
				ret <8 x i16> %1
				}

				define <8 x i16> @sse2_subs_w_constant_undefs() {
				; CHECK-LABEL: @sse2_subs_w_constant_undefs(
				; CHECK-NEXT: ret <8 x i16> <i16 undef, i16 -4, i16 -6, i16 -8, i16 -10, i16 undef, i16 -14, i16 -16>
				%1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> <i16 undef, i16 -2, i16 -3, i16 -4, i16 -5, i16 undef, i16 -7, i16 -8>, <8 x i16> <i16 undef, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>)
				ret <8 x i16> %1
				}

				define <16 x i16> @avx2_subs_w_constant() {
				; CHECK-LABEL: @avx2_subs_w_constant(
				; CHECK-NEXT: ret <16 x i16> <i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -16, i16 -18, i16 -20, i16 -22, i16 -24, i16 -26, i16 -28, i16 -30, i16 -33>
				%1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16>, <16 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>)
				ret <16 x i16> %1
				}

				define <16 x i16> @avx2_subs_w_constant_underflow() {
				; CHECK-LABEL: @avx2_subs_w_constant_underflow(
				; CHECK-NEXT: ret <16 x i16> <i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -32768, i16 -18, i16 -20, i16 -22, i16 -24, i16 -26, i16 -28, i16 -30, i16 -33>
				%1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -21107, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16>, <16 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 15188, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>)
				ret <16 x i16> %1
				}

				define <16 x i16> @avx2_subs_w_constant_overflow() {
				; CHECK-LABEL: @avx2_subs_w_constant_overflow(
				; CHECK-NEXT: ret <16 x i16> <i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -16, i16 -18, i16 -20, i16 32767, i16 -24, i16 -26, i16 -28, i16 -30, i16 32767>
				%1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 20125, i16 -12, i16 -13, i16 -14, i16 -15, i16 20160>, <16 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 -20230, i16 12, i16 13, i16 14, i16 15, i16 -20120>)
				ret <16 x i16> %1
				}

				define <16 x i16> @avx2_subs_w_constant_undefs() {
				; CHECK-LABEL: @avx2_subs_w_constant_undefs(
				; CHECK-NEXT: ret <16 x i16> <i16 undef, i16 -4, i16 -6, i16 -8, i16 -10, i16 undef, i16 -14, i16 -16, i16 -18, i16 -20, i16 -22, i16 -24, i16 -26, i16 -28, i16 -30, i16 -33>
				%1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> <i16 undef, i16 -2, i16 -3, i16 -4, i16 -5, i16 undef, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16>, <16 x i16> <i16 undef, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>)
				ret <16 x i16> %1
				}

				define <32 x i16> @avx512_mask_subs_w_constant() {
				; CHECK-LABEL: @avx512_mask_subs_w_constant(
				; CHECK-NEXT: ret <32 x i16> <i16 -2, i16 0, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -16, i16 -18, i16 -20, i16 -22, i16 -24, i16 -26, i16 -28, i16 -30, i16 -33, i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -16, i16 -18, i16 -20, i16 -22, i16 -24, i16 -26, i16 -28, i16 -30, i16 -33>
				%1 = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16, i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16>, <32 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>, <32 x i16> zeroinitializer, i32 -3)
				ret <32 x i16> %1
				}

				define <32 x i16> @avx512_mask_subs_w_constant_underflow() {
				; CHECK-LABEL: @avx512_mask_subs_w_constant_underflow(
				; CHECK-NEXT: ret <32 x i16> <i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -32768, i16 -18, i16 -20, i16 -22, i16 -24, i16 -26, i16 -28, i16 -30, i16 -33, i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -32768, i16 -18, i16 -20, i16 -22, i16 -24, i16 -26, i16 -28, i16 -30, i16 -33>
				%1 = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -20107, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16, i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -20107, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16>, <32 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 20168, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 20248, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>, <32 x i16> zeroinitializer, i32 -1)
				ret <32 x i16> %1
				}

				define <32 x i16> @avx512_mask_subs_w_constant_overflow() {
				; CHECK-LABEL: @avx512_mask_subs_w_constant_overflow(
				; CHECK-NEXT: ret <32 x i16> <i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -16, i16 -18, i16 -20, i16 32767, i16 -24, i16 -26, i16 -28, i16 -30, i16 32767, i16 -2, i16 -4, i16 -6, i16 -8, i16 -10, i16 -12, i16 -14, i16 -16, i16 -18, i16 -20, i16 32767, i16 -24, i16 -26, i16 -28, i16 -30, i16 32767>
				%1 = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 20125, i16 -12, i16 -13, i16 -14, i16 -15, i16 20200, i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 20125, i16 -12, i16 -13, i16 -14, i16 -15, i16 20200>, <32 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 -20200, i16 12, i16 13, i16 14, i16 15, i16 -20120, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 -20211, i16 12, i16 13, i16 14, i16 15, i16 -20120>, <32 x i16> zeroinitializer, i32 -1)
				ret <32 x i16> %1
				}

				define <32 x i16> @avx512_mask_subs_w_constant_undefs() {
				; CHECK-LABEL: @avx512_mask_subs_w_constant_undefs(
				; CHECK-NEXT: ret <32 x i16> <i16 undef, i16 -4, i16 -6, i16 -8, i16 -10, i16 undef, i16 -14, i16 -16, i16 -18, i16 -20, i16 -22, i16 -24, i16 -26, i16 -28, i16 -30, i16 -33, i16 undef, i16 -4, i16 -6, i16 -8, i16 -10, i16 undef, i16 -14, i16 -16, i16 -18, i16 -20, i16 -22, i16 -24, i16 -26, i16 -28, i16 -30, i16 -33>
				%1 = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> <i16 undef, i16 -2, i16 -3, i16 -4, i16 -5, i16 undef, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16, i16 undef, i16 -2, i16 -3, i16 -4, i16 -5, i16 undef, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16>, <32 x i16> <i16 undef, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17, i16 undef, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 17>, <32 x i16> zeroinitializer, i32 -1)
				ret <32 x i16> %1
				}

				declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
				declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
				declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
				declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
				declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone
				declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone
				declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone
				declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone
				declare <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) nounwind readnone
				declare <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) nounwind readnone
				declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) nounwind readnone
				declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) nounwind readnone