This is an archive of the discontinued LLVM Phabricator instance.

[X86][SSE] Use (V)PHMINPOSUW for vXi8 SMAX/SMIN/UMAX/UMIN horizontal reductions (PR32841)
ClosedPublic

Authored by RKSimon on Dec 15 2017, 8:34 AM.

Download Raw Diff

Details

Reviewers

mkuper
zvi
craig.topper
spatel
andreadb

Commits

rGf6d4ab6daf67: [X86][SSE] Use (V)PHMINPOSUW for vXi8 SMAX/SMIN/UMAX/UMIN horizontal reductions…
rL321070: [X86][SSE] Use (V)PHMINPOSUW for vXi8 SMAX/SMIN/UMAX/UMIN horizontal reductions…

Summary

Extension to D39729 which performed this for vXi16, with the same bit flipping to handle SMAX/SMIN/UMAX cases, vXi8 UMIN horizontal reductions can be performed.

This makes use of the fact that by performing a pair-wise i8 SHUFFLE_UMIN before PHMINPOSUW, we both get the UMIN of each pair but also zero-extend the upper bits ready for v8i16.

Diff Detail

Repository: rL LLVM

Event Timeline

RKSimon created this revision.Dec 15 2017, 8:34 AM

craig.topper added inline comments.Dec 16 2017, 10:50 PM

lib/Target/X86/X86ISelLowering.cpp
30494	Shouldn't this just be getSizeInBits()? It's already scalar.

Use getSizeInBits instead of getScalarSizeInBits for a known scalar

LGTM

This revision is now accepted and ready to land.Dec 18 2017, 1:27 PM

Closed by commit rL321070: [X86][SSE] Use (V)PHMINPOSUW for vXi8 SMAX/SMIN/UMAX/UMIN horizontal reductions… (authored by RKSimon). · Explain WhyDec 19 2017, 4:03 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

lib/

Target/

X86/

	X86ISelLowering.cpp
	X86ISelLowering.cpp (revision 320823)

36 lines

test/

CodeGen/

X86/

	horizontal-reduce-smax.ll
	horizontal-reduce-smax.ll (revision 320823)

284 lines

	horizontal-reduce-smin.ll
	horizontal-reduce-smin.ll (revision 320823)

284 lines

	horizontal-reduce-umax.ll
	horizontal-reduce-umax.ll (revision 320823)

284 lines

	horizontal-reduce-umin.ll
	horizontal-reduce-umin.ll (revision 320823)

170 lines

Diff 127138

lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 30,441 Lines • ▼ Show 20 Lines	static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0,
Ops[0] = Zext1.getOperand(0);		Ops[0] = Zext1.getOperand(0);
SDValue SadOp1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);		SDValue SadOp1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);

// Actually build the SAD		// Actually build the SAD
MVT SadVT = MVT::getVectorVT(MVT::i64, RegSize / 64);		MVT SadVT = MVT::getVectorVT(MVT::i64, RegSize / 64);
return DAG.getNode(X86ISD::PSADBW, DL, SadVT, SadOp0, SadOp1);		return DAG.getNode(X86ISD::PSADBW, DL, SadVT, SadOp0, SadOp1);
}		}

// Attempt to replace an min/max v8i16 horizontal reduction with PHMINPOSUW.		// Attempt to replace an min/max v8i16/v16i8 horizontal reduction with
		// PHMINPOSUW.
static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,		static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {		const X86Subtarget &Subtarget) {
// Bail without SSE41.		// Bail without SSE41.
if (!Subtarget.hasSSE41())		if (!Subtarget.hasSSE41())
return SDValue();		return SDValue();

EVT ExtractVT = Extract->getValueType(0);		EVT ExtractVT = Extract->getValueType(0);
if (ExtractVT != MVT::i16)		if (ExtractVT != MVT::i16 && ExtractVT != MVT::i8)
return SDValue();		return SDValue();

// Check for SMAX/SMIN/UMAX/UMIN horizontal reduction patterns.		// Check for SMAX/SMIN/UMAX/UMIN horizontal reduction patterns.
unsigned BinOp;		unsigned BinOp;
SDValue Src = matchBinOpReduction(		SDValue Src = matchBinOpReduction(
Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN});		Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN});
if (!Src)		if (!Src)
return SDValue();		return SDValue();

EVT SrcVT = Src.getValueType();		EVT SrcVT = Src.getValueType();
EVT SrcSVT = SrcVT.getScalarType();		EVT SrcSVT = SrcVT.getScalarType();
if (SrcSVT != MVT::i16 \|\| (SrcVT.getSizeInBits() % 128) != 0)		if (SrcSVT != ExtractVT \|\| (SrcVT.getSizeInBits() % 128) != 0)
return SDValue();		return SDValue();

SDLoc DL(Extract);		SDLoc DL(Extract);
SDValue MinPos = Src;		SDValue MinPos = Src;

// First, reduce the source down to 128-bit, applying BinOp to lo/hi.		// First, reduce the source down to 128-bit, applying BinOp to lo/hi.
while (SrcVT.getSizeInBits() > 128) {		while (SrcVT.getSizeInBits() > 128) {
unsigned NumElts = SrcVT.getVectorNumElements();		unsigned NumElts = SrcVT.getVectorNumElements();
unsigned NumSubElts = NumElts / 2;		unsigned NumSubElts = NumElts / 2;
SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcSVT, NumSubElts);		SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcSVT, NumSubElts);
unsigned SubSizeInBits = SrcVT.getSizeInBits();		unsigned SubSizeInBits = SrcVT.getSizeInBits();
SDValue Lo = extractSubVector(MinPos, 0, DAG, DL, SubSizeInBits);		SDValue Lo = extractSubVector(MinPos, 0, DAG, DL, SubSizeInBits);
SDValue Hi = extractSubVector(MinPos, NumSubElts, DAG, DL, SubSizeInBits);		SDValue Hi = extractSubVector(MinPos, NumSubElts, DAG, DL, SubSizeInBits);
MinPos = DAG.getNode(BinOp, DL, SrcVT, Lo, Hi);		MinPos = DAG.getNode(BinOp, DL, SrcVT, Lo, Hi);
}		}
assert(SrcVT == MVT::v8i16 && "Unexpected value type");		assert(((SrcVT == MVT::v8i16 && ExtractVT == MVT::i16) \|\|
		(SrcVT == MVT::v16i8 && ExtractVT == MVT::i8)) &&
		"Unexpected value type");

// PHMINPOSUW applies to UMIN(v8i16), for SMIN/SMAX/UMAX we must apply a mask		// PHMINPOSUW applies to UMIN(v8i16), for SMIN/SMAX/UMAX we must apply a mask
// to flip the value accordingly.		// to flip the value accordingly.
SDValue Mask;		SDValue Mask;
		unsigned MaskEltsBits = ExtractVT.getScalarSizeInBits();
		craig.topperUnsubmitted Not Done Reply Inline Actions Shouldn't this just be getSizeInBits()? It's already scalar. craig.topper: Shouldn't this just be getSizeInBits()? It's already scalar.
if (BinOp == ISD::SMAX)		if (BinOp == ISD::SMAX)
Mask = DAG.getConstant(APInt::getSignedMaxValue(16), DL, SrcVT);		Mask = DAG.getConstant(APInt::getSignedMaxValue(MaskEltsBits), DL, SrcVT);
else if (BinOp == ISD::SMIN)		else if (BinOp == ISD::SMIN)
Mask = DAG.getConstant(APInt::getSignedMinValue(16), DL, SrcVT);		Mask = DAG.getConstant(APInt::getSignedMinValue(MaskEltsBits), DL, SrcVT);
else if (BinOp == ISD::UMAX)		else if (BinOp == ISD::UMAX)
Mask = DAG.getConstant(APInt::getAllOnesValue(16), DL, SrcVT);		Mask = DAG.getConstant(APInt::getAllOnesValue(MaskEltsBits), DL, SrcVT);

if (Mask)		if (Mask)
MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos);		MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos);

MinPos = DAG.getNode(X86ISD::PHMINPOS, DL, SrcVT, MinPos);		// For v16i8 cases we need to perform UMIN on pairs of byte elements,
		// shuffling each upper element down and insert zeros. This means that the
		// v16i8 UMIN will leave the upper element as zero, performing zero-extension
		// ready for the PHMINPOS.
		if (ExtractVT == MVT::i8) {
		SDValue Upper = DAG.getVectorShuffle(
		SrcVT, DL, MinPos, getZeroVector(MVT::v16i8, Subtarget, DAG, DL),
		{1, 16, 3, 16, 5, 16, 7, 16, 9, 16, 11, 16, 13, 16, 15, 16});
		MinPos = DAG.getNode(ISD::UMIN, DL, SrcVT, MinPos, Upper);
		}

		// Perform the PHMINPOS on a v8i16 vector,
		MinPos = DAG.getBitcast(MVT::v8i16, MinPos);
		MinPos = DAG.getNode(X86ISD::PHMINPOS, DL, MVT::v8i16, MinPos);
		MinPos = DAG.getBitcast(SrcVT, MinPos);

if (Mask)		if (Mask)
MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos);		MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos);

return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractVT, MinPos,		return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractVT, MinPos,
DAG.getIntPtrConstant(0, DL));		DAG.getIntPtrConstant(0, DL));
}		}

▲ Show 20 Lines • Show All 299 Lines • ▼ Show 20 Lines	static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
// pre-legalization,		// pre-legalization,
if (SDValue SAD = combineBasicSADPattern(N, DAG, Subtarget))		if (SDValue SAD = combineBasicSADPattern(N, DAG, Subtarget))
return SAD;		return SAD;

// Attempt to replace an all_of/any_of horizontal reduction with a MOVMSK.		// Attempt to replace an all_of/any_of horizontal reduction with a MOVMSK.
if (SDValue Cmp = combineHorizontalPredicateResult(N, DAG, Subtarget))		if (SDValue Cmp = combineHorizontalPredicateResult(N, DAG, Subtarget))
return Cmp;		return Cmp;

// Attempt to replace min/max v8i16 reductions with PHMINPOSUW.		// Attempt to replace min/max v8i16/v16i8 reductions with PHMINPOSUW.
if (SDValue MinMax = combineHorizontalMinMaxResult(N, DAG, Subtarget))		if (SDValue MinMax = combineHorizontalMinMaxResult(N, DAG, Subtarget))
return MinMax;		return MinMax;

// Only operate on vectors of 4 elements, where the alternative shuffling		// Only operate on vectors of 4 elements, where the alternative shuffling
// gets to be more expensive.		// gets to be more expensive.
if (SrcVT != MVT::v4i32)		if (SrcVT != MVT::v4i32)
return SDValue();		return SDValue();

▲ Show 20 Lines • Show All 7,607 Lines • Show Last 20 Lines

test/CodeGen/X86/horizontal-reduce-smax.ll

	Show First 20 Lines • Show All 303 Lines • ▼ Show 20 Lines
	; X86-SSE2-NEXT: pandn %xmm0, %xmm1			; X86-SSE2-NEXT: pandn %xmm0, %xmm1
	; X86-SSE2-NEXT: por %xmm2, %xmm1			; X86-SSE2-NEXT: por %xmm2, %xmm1
	; X86-SSE2-NEXT: movd %xmm1, %eax			; X86-SSE2-NEXT: movd %xmm1, %eax
	; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE2-NEXT: retl			; X86-SSE2-NEXT: retl
	;			;
	; X86-SSE42-LABEL: test_reduce_v16i8:			; X86-SSE42-LABEL: test_reduce_v16i8:
	; X86-SSE42: ## %bb.0:			; X86-SSE42: ## %bb.0:
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
	; X86-SSE42-NEXT: pmaxsb %xmm0, %xmm1			; X86-SSE42-NEXT: pxor %xmm1, %xmm0
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
	; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0			; X86-SSE42-NEXT: psrlw $8, %xmm2
	; X86-SSE42-NEXT: movdqa %xmm0, %xmm1			; X86-SSE42-NEXT: pminub %xmm0, %xmm2
	; X86-SSE42-NEXT: psrld $16, %xmm1			; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
	; X86-SSE42-NEXT: pmaxsb %xmm0, %xmm1			; X86-SSE42-NEXT: pxor %xmm1, %xmm0
	; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
	; X86-SSE42-NEXT: psrlw $8, %xmm0
	; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
	; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE42-NEXT: retl			; X86-SSE42-NEXT: retl
	;			;
	; X86-AVX-LABEL: test_reduce_v16i8:			; X86-AVX-LABEL: test_reduce_v16i8:
	; X86-AVX: ## %bb.0:			; X86-AVX: ## %bb.0:
	; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
	; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1			; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
	; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX-NEXT: retl			; X86-AVX-NEXT: retl
	;			;
	; X64-SSE2-LABEL: test_reduce_v16i8:			; X64-SSE2-LABEL: test_reduce_v16i8:
	; X64-SSE2: ## %bb.0:			; X64-SSE2: ## %bb.0:
	; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X64-SSE2-NEXT: movdqa %xmm0, %xmm2			; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
	Show All 22 Lines
	; X64-SSE2-NEXT: pandn %xmm0, %xmm1			; X64-SSE2-NEXT: pandn %xmm0, %xmm1
	; X64-SSE2-NEXT: por %xmm2, %xmm1			; X64-SSE2-NEXT: por %xmm2, %xmm1
	; X64-SSE2-NEXT: movd %xmm1, %eax			; X64-SSE2-NEXT: movd %xmm1, %eax
	; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE2-NEXT: retq			; X64-SSE2-NEXT: retq
	;			;
	; X64-SSE42-LABEL: test_reduce_v16i8:			; X64-SSE42-LABEL: test_reduce_v16i8:
	; X64-SSE42: ## %bb.0:			; X64-SSE42: ## %bb.0:
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
	; X64-SSE42-NEXT: pmaxsb %xmm0, %xmm1			; X64-SSE42-NEXT: pxor %xmm1, %xmm0
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
	; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0			; X64-SSE42-NEXT: psrlw $8, %xmm2
	; X64-SSE42-NEXT: movdqa %xmm0, %xmm1			; X64-SSE42-NEXT: pminub %xmm0, %xmm2
	; X64-SSE42-NEXT: psrld $16, %xmm1			; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
	; X64-SSE42-NEXT: pmaxsb %xmm0, %xmm1			; X64-SSE42-NEXT: pxor %xmm1, %xmm0
	; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
	; X64-SSE42-NEXT: psrlw $8, %xmm0
	; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
	; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE42-NEXT: retq			; X64-SSE42-NEXT: retq
	;			;
	; X64-AVX-LABEL: test_reduce_v16i8:			; X64-AVX-LABEL: test_reduce_v16i8:
	; X64-AVX: ## %bb.0:			; X64-AVX: ## %bb.0:
	; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
	; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
	; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX-NEXT: retq			; X64-AVX-NEXT: retq
	%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%2 = icmp sgt <16 x i8> %a0, %1			%2 = icmp sgt <16 x i8> %a0, %1
	%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1			%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1
	%4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%5 = icmp sgt <16 x i8> %3, %4			%5 = icmp sgt <16 x i8> %3, %4
	▲ Show 20 Lines • Show All 495 Lines • ▼ Show 20 Lines
	; X86-SSE2-NEXT: por %xmm1, %xmm2			; X86-SSE2-NEXT: por %xmm1, %xmm2
	; X86-SSE2-NEXT: movd %xmm2, %eax			; X86-SSE2-NEXT: movd %xmm2, %eax
	; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE2-NEXT: retl			; X86-SSE2-NEXT: retl
	;			;
	; X86-SSE42-LABEL: test_reduce_v32i8:			; X86-SSE42-LABEL: test_reduce_v32i8:
	; X86-SSE42: ## %bb.0:			; X86-SSE42: ## %bb.0:
	; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0			; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
	; X86-SSE42-NEXT: pmaxsb %xmm0, %xmm1			; X86-SSE42-NEXT: pxor %xmm1, %xmm0
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
	; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0			; X86-SSE42-NEXT: psrlw $8, %xmm2
	; X86-SSE42-NEXT: movdqa %xmm0, %xmm1			; X86-SSE42-NEXT: pminub %xmm0, %xmm2
	; X86-SSE42-NEXT: psrld $16, %xmm1			; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
	; X86-SSE42-NEXT: pmaxsb %xmm0, %xmm1			; X86-SSE42-NEXT: pxor %xmm1, %xmm0
	; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
	; X86-SSE42-NEXT: psrlw $8, %xmm0
	; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
	; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE42-NEXT: retl			; X86-SSE42-NEXT: retl
	;			;
	; X86-AVX1-LABEL: test_reduce_v32i8:			; X86-AVX1-LABEL: test_reduce_v32i8:
	; X86-AVX1: ## %bb.0:			; X86-AVX1: ## %bb.0:
	; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1			; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
	; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
	; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1			; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX1-NEXT: vzeroupper			; X86-AVX1-NEXT: vzeroupper
	; X86-AVX1-NEXT: retl			; X86-AVX1-NEXT: retl
	;			;
	; X86-AVX2-LABEL: test_reduce_v32i8:			; X86-AVX2-LABEL: test_reduce_v32i8:
	; X86-AVX2: ## %bb.0:			; X86-AVX2: ## %bb.0:
	; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1			; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
	; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1			; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
	; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX2-NEXT: vzeroupper			; X86-AVX2-NEXT: vzeroupper
	; X86-AVX2-NEXT: retl			; X86-AVX2-NEXT: retl
	;			;
	; X64-SSE2-LABEL: test_reduce_v32i8:			; X64-SSE2-LABEL: test_reduce_v32i8:
	; X64-SSE2: ## %bb.0:			; X64-SSE2: ## %bb.0:
	; X64-SSE2-NEXT: movdqa %xmm0, %xmm2			; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
	Show All 29 Lines
	; X64-SSE2-NEXT: por %xmm1, %xmm2			; X64-SSE2-NEXT: por %xmm1, %xmm2
	; X64-SSE2-NEXT: movd %xmm2, %eax			; X64-SSE2-NEXT: movd %xmm2, %eax
	; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE2-NEXT: retq			; X64-SSE2-NEXT: retq
	;			;
	; X64-SSE42-LABEL: test_reduce_v32i8:			; X64-SSE42-LABEL: test_reduce_v32i8:
	; X64-SSE42: ## %bb.0:			; X64-SSE42: ## %bb.0:
	; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0			; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
	; X64-SSE42-NEXT: pmaxsb %xmm0, %xmm1			; X64-SSE42-NEXT: pxor %xmm1, %xmm0
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
	; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0			; X64-SSE42-NEXT: psrlw $8, %xmm2
	; X64-SSE42-NEXT: movdqa %xmm0, %xmm1			; X64-SSE42-NEXT: pminub %xmm0, %xmm2
	; X64-SSE42-NEXT: psrld $16, %xmm1			; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
	; X64-SSE42-NEXT: pmaxsb %xmm0, %xmm1			; X64-SSE42-NEXT: pxor %xmm1, %xmm0
	; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
	; X64-SSE42-NEXT: psrlw $8, %xmm0
	; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
	; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE42-NEXT: retq			; X64-SSE42-NEXT: retq
	;			;
	; X64-AVX1-LABEL: test_reduce_v32i8:			; X64-AVX1-LABEL: test_reduce_v32i8:
	; X64-AVX1: ## %bb.0:			; X64-AVX1: ## %bb.0:
	; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1			; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
	; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
	; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX1-NEXT: vzeroupper			; X64-AVX1-NEXT: vzeroupper
	; X64-AVX1-NEXT: retq			; X64-AVX1-NEXT: retq
	;			;
	; X64-AVX2-LABEL: test_reduce_v32i8:			; X64-AVX2-LABEL: test_reduce_v32i8:
	; X64-AVX2: ## %bb.0:			; X64-AVX2: ## %bb.0:
	; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1			; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
	; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
	; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX2-NEXT: vzeroupper			; X64-AVX2-NEXT: vzeroupper
	; X64-AVX2-NEXT: retq			; X64-AVX2-NEXT: retq
	;			;
	; X64-AVX512-LABEL: test_reduce_v32i8:			; X64-AVX512-LABEL: test_reduce_v32i8:
	; X64-AVX512: ## %bb.0:			; X64-AVX512: ## %bb.0:
	; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1			; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
	; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
	; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX512-NEXT: vzeroupper			; X64-AVX512-NEXT: vzeroupper
	; X64-AVX512-NEXT: retq			; X64-AVX512-NEXT: retq
	%1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%2 = icmp sgt <32 x i8> %a0, %1			%2 = icmp sgt <32 x i8> %a0, %1
	%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1			%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
	%4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	▲ Show 20 Lines • Show All 673 Lines • ▼ Show 20 Lines
	; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE2-NEXT: retl			; X86-SSE2-NEXT: retl
	;			;
	; X86-SSE42-LABEL: test_reduce_v64i8:			; X86-SSE42-LABEL: test_reduce_v64i8:
	; X86-SSE42: ## %bb.0:			; X86-SSE42: ## %bb.0:
	; X86-SSE42-NEXT: pmaxsb %xmm3, %xmm1			; X86-SSE42-NEXT: pmaxsb %xmm3, %xmm1
	; X86-SSE42-NEXT: pmaxsb %xmm2, %xmm0			; X86-SSE42-NEXT: pmaxsb %xmm2, %xmm0
	; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0			; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
	; X86-SSE42-NEXT: pmaxsb %xmm0, %xmm1			; X86-SSE42-NEXT: pxor %xmm1, %xmm0
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
	; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0			; X86-SSE42-NEXT: psrlw $8, %xmm2
	; X86-SSE42-NEXT: movdqa %xmm0, %xmm1			; X86-SSE42-NEXT: pminub %xmm0, %xmm2
	; X86-SSE42-NEXT: psrld $16, %xmm1			; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
	; X86-SSE42-NEXT: pmaxsb %xmm0, %xmm1			; X86-SSE42-NEXT: pxor %xmm1, %xmm0
	; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
	; X86-SSE42-NEXT: psrlw $8, %xmm0
	; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0
	; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE42-NEXT: retl			; X86-SSE42-NEXT: retl
	;			;
	; X86-AVX1-LABEL: test_reduce_v64i8:			; X86-AVX1-LABEL: test_reduce_v64i8:
	; X86-AVX1: ## %bb.0:			; X86-AVX1: ## %bb.0:
	; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2			; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
	; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3			; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
	; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2			; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
	; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0			; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
	; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1			; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX1-NEXT: vzeroupper			; X86-AVX1-NEXT: vzeroupper
	; X86-AVX1-NEXT: retl			; X86-AVX1-NEXT: retl
	;			;
	; X86-AVX2-LABEL: test_reduce_v64i8:			; X86-AVX2-LABEL: test_reduce_v64i8:
	; X86-AVX2: ## %bb.0:			; X86-AVX2: ## %bb.0:
	; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
	; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1			; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
	; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1			; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
	; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX2-NEXT: vzeroupper			; X86-AVX2-NEXT: vzeroupper
	; X86-AVX2-NEXT: retl			; X86-AVX2-NEXT: retl
	;			;
	; X64-SSE2-LABEL: test_reduce_v64i8:			; X64-SSE2-LABEL: test_reduce_v64i8:
	; X64-SSE2: ## %bb.0:			; X64-SSE2: ## %bb.0:
	; X64-SSE2-NEXT: movdqa %xmm0, %xmm4			; X64-SSE2-NEXT: movdqa %xmm0, %xmm4
	▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines
	; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE2-NEXT: retq			; X64-SSE2-NEXT: retq
	;			;
	; X64-SSE42-LABEL: test_reduce_v64i8:			; X64-SSE42-LABEL: test_reduce_v64i8:
	; X64-SSE42: ## %bb.0:			; X64-SSE42: ## %bb.0:
	; X64-SSE42-NEXT: pmaxsb %xmm3, %xmm1			; X64-SSE42-NEXT: pmaxsb %xmm3, %xmm1
	; X64-SSE42-NEXT: pmaxsb %xmm2, %xmm0			; X64-SSE42-NEXT: pmaxsb %xmm2, %xmm0
	; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0			; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
	; X64-SSE42-NEXT: pmaxsb %xmm0, %xmm1			; X64-SSE42-NEXT: pxor %xmm1, %xmm0
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
	; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0			; X64-SSE42-NEXT: psrlw $8, %xmm2
	; X64-SSE42-NEXT: movdqa %xmm0, %xmm1			; X64-SSE42-NEXT: pminub %xmm0, %xmm2
	; X64-SSE42-NEXT: psrld $16, %xmm1			; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
	; X64-SSE42-NEXT: pmaxsb %xmm0, %xmm1			; X64-SSE42-NEXT: pxor %xmm1, %xmm0
	; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
	; X64-SSE42-NEXT: psrlw $8, %xmm0
	; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0
	; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE42-NEXT: retq			; X64-SSE42-NEXT: retq
	;			;
	; X64-AVX1-LABEL: test_reduce_v64i8:			; X64-AVX1-LABEL: test_reduce_v64i8:
	; X64-AVX1: ## %bb.0:			; X64-AVX1: ## %bb.0:
	; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2			; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
	; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3			; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
	; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2			; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
	; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0			; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
	; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX1-NEXT: vzeroupper			; X64-AVX1-NEXT: vzeroupper
	; X64-AVX1-NEXT: retq			; X64-AVX1-NEXT: retq
	;			;
	; X64-AVX2-LABEL: test_reduce_v64i8:			; X64-AVX2-LABEL: test_reduce_v64i8:
	; X64-AVX2: ## %bb.0:			; X64-AVX2: ## %bb.0:
	; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
	; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1			; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
	; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
	; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX2-NEXT: vzeroupper			; X64-AVX2-NEXT: vzeroupper
	; X64-AVX2-NEXT: retq			; X64-AVX2-NEXT: retq
	;			;
	; X64-AVX512-LABEL: test_reduce_v64i8:			; X64-AVX512-LABEL: test_reduce_v64i8:
	; X64-AVX512: ## %bb.0:			; X64-AVX512: ## %bb.0:
	; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1			; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
	; X64-AVX512-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0			; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
	; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1			; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X64-AVX512-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0			; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
	; X64-AVX512-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0			; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX512-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0			; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX512-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0			; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX512-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
	; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX512-NEXT: vzeroupper			; X64-AVX512-NEXT: vzeroupper
	; X64-AVX512-NEXT: retq			; X64-AVX512-NEXT: retq
	%1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%2 = icmp sgt <64 x i8> %a0, %1			%2 = icmp sgt <64 x i8> %a0, %1
	%3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1			%3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
	%4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	Show All 17 Lines

test/CodeGen/X86/horizontal-reduce-smin.ll

	Show First 20 Lines • Show All 305 Lines • ▼ Show 20 Lines
	; X86-SSE2-NEXT: pandn %xmm0, %xmm1			; X86-SSE2-NEXT: pandn %xmm0, %xmm1
	; X86-SSE2-NEXT: por %xmm2, %xmm1			; X86-SSE2-NEXT: por %xmm2, %xmm1
	; X86-SSE2-NEXT: movd %xmm1, %eax			; X86-SSE2-NEXT: movd %xmm1, %eax
	; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE2-NEXT: retl			; X86-SSE2-NEXT: retl
	;			;
	; X86-SSE42-LABEL: test_reduce_v16i8:			; X86-SSE42-LABEL: test_reduce_v16i8:
	; X86-SSE42: ## %bb.0:			; X86-SSE42: ## %bb.0:
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
	; X86-SSE42-NEXT: pminsb %xmm0, %xmm1			; X86-SSE42-NEXT: pxor %xmm1, %xmm0
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
	; X86-SSE42-NEXT: pminsb %xmm1, %xmm0			; X86-SSE42-NEXT: psrlw $8, %xmm2
	; X86-SSE42-NEXT: movdqa %xmm0, %xmm1			; X86-SSE42-NEXT: pminub %xmm0, %xmm2
	; X86-SSE42-NEXT: psrld $16, %xmm1			; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
	; X86-SSE42-NEXT: pminsb %xmm0, %xmm1			; X86-SSE42-NEXT: pxor %xmm1, %xmm0
	; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
	; X86-SSE42-NEXT: psrlw $8, %xmm0
	; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
	; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE42-NEXT: retl			; X86-SSE42-NEXT: retl
	;			;
	; X86-AVX-LABEL: test_reduce_v16i8:			; X86-AVX-LABEL: test_reduce_v16i8:
	; X86-AVX: ## %bb.0:			; X86-AVX: ## %bb.0:
	; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
	; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1			; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
	; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX-NEXT: retl			; X86-AVX-NEXT: retl
	;			;
	; X64-SSE2-LABEL: test_reduce_v16i8:			; X64-SSE2-LABEL: test_reduce_v16i8:
	; X64-SSE2: ## %bb.0:			; X64-SSE2: ## %bb.0:
	; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X64-SSE2-NEXT: movdqa %xmm1, %xmm2			; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
	Show All 22 Lines
	; X64-SSE2-NEXT: pandn %xmm0, %xmm1			; X64-SSE2-NEXT: pandn %xmm0, %xmm1
	; X64-SSE2-NEXT: por %xmm2, %xmm1			; X64-SSE2-NEXT: por %xmm2, %xmm1
	; X64-SSE2-NEXT: movd %xmm1, %eax			; X64-SSE2-NEXT: movd %xmm1, %eax
	; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE2-NEXT: retq			; X64-SSE2-NEXT: retq
	;			;
	; X64-SSE42-LABEL: test_reduce_v16i8:			; X64-SSE42-LABEL: test_reduce_v16i8:
	; X64-SSE42: ## %bb.0:			; X64-SSE42: ## %bb.0:
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
	; X64-SSE42-NEXT: pminsb %xmm0, %xmm1			; X64-SSE42-NEXT: pxor %xmm1, %xmm0
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
	; X64-SSE42-NEXT: pminsb %xmm1, %xmm0			; X64-SSE42-NEXT: psrlw $8, %xmm2
	; X64-SSE42-NEXT: movdqa %xmm0, %xmm1			; X64-SSE42-NEXT: pminub %xmm0, %xmm2
	; X64-SSE42-NEXT: psrld $16, %xmm1			; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
	; X64-SSE42-NEXT: pminsb %xmm0, %xmm1			; X64-SSE42-NEXT: pxor %xmm1, %xmm0
	; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
	; X64-SSE42-NEXT: psrlw $8, %xmm0
	; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
	; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE42-NEXT: retq			; X64-SSE42-NEXT: retq
	;			;
	; X64-AVX-LABEL: test_reduce_v16i8:			; X64-AVX-LABEL: test_reduce_v16i8:
	; X64-AVX: ## %bb.0:			; X64-AVX: ## %bb.0:
	; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
	; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
	; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX-NEXT: retq			; X64-AVX-NEXT: retq
	%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%2 = icmp slt <16 x i8> %a0, %1			%2 = icmp slt <16 x i8> %a0, %1
	%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1			%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1
	%4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%5 = icmp slt <16 x i8> %3, %4			%5 = icmp slt <16 x i8> %3, %4
	▲ Show 20 Lines • Show All 497 Lines • ▼ Show 20 Lines
	; X86-SSE2-NEXT: por %xmm1, %xmm2			; X86-SSE2-NEXT: por %xmm1, %xmm2
	; X86-SSE2-NEXT: movd %xmm2, %eax			; X86-SSE2-NEXT: movd %xmm2, %eax
	; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE2-NEXT: retl			; X86-SSE2-NEXT: retl
	;			;
	; X86-SSE42-LABEL: test_reduce_v32i8:			; X86-SSE42-LABEL: test_reduce_v32i8:
	; X86-SSE42: ## %bb.0:			; X86-SSE42: ## %bb.0:
	; X86-SSE42-NEXT: pminsb %xmm1, %xmm0			; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
	; X86-SSE42-NEXT: pminsb %xmm0, %xmm1			; X86-SSE42-NEXT: pxor %xmm1, %xmm0
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
	; X86-SSE42-NEXT: pminsb %xmm1, %xmm0			; X86-SSE42-NEXT: psrlw $8, %xmm2
	; X86-SSE42-NEXT: movdqa %xmm0, %xmm1			; X86-SSE42-NEXT: pminub %xmm0, %xmm2
	; X86-SSE42-NEXT: psrld $16, %xmm1			; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
	; X86-SSE42-NEXT: pminsb %xmm0, %xmm1			; X86-SSE42-NEXT: pxor %xmm1, %xmm0
	; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
	; X86-SSE42-NEXT: psrlw $8, %xmm0
	; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
	; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE42-NEXT: retl			; X86-SSE42-NEXT: retl
	;			;
	; X86-AVX1-LABEL: test_reduce_v32i8:			; X86-AVX1-LABEL: test_reduce_v32i8:
	; X86-AVX1: ## %bb.0:			; X86-AVX1: ## %bb.0:
	; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1			; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
	; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
	; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1			; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX1-NEXT: vzeroupper			; X86-AVX1-NEXT: vzeroupper
	; X86-AVX1-NEXT: retl			; X86-AVX1-NEXT: retl
	;			;
	; X86-AVX2-LABEL: test_reduce_v32i8:			; X86-AVX2-LABEL: test_reduce_v32i8:
	; X86-AVX2: ## %bb.0:			; X86-AVX2: ## %bb.0:
	; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1			; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
	; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1			; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
	; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX2-NEXT: vzeroupper			; X86-AVX2-NEXT: vzeroupper
	; X86-AVX2-NEXT: retl			; X86-AVX2-NEXT: retl
	;			;
	; X64-SSE2-LABEL: test_reduce_v32i8:			; X64-SSE2-LABEL: test_reduce_v32i8:
	; X64-SSE2: ## %bb.0:			; X64-SSE2: ## %bb.0:
	; X64-SSE2-NEXT: movdqa %xmm1, %xmm2			; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
	Show All 29 Lines
	; X64-SSE2-NEXT: por %xmm1, %xmm2			; X64-SSE2-NEXT: por %xmm1, %xmm2
	; X64-SSE2-NEXT: movd %xmm2, %eax			; X64-SSE2-NEXT: movd %xmm2, %eax
	; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE2-NEXT: retq			; X64-SSE2-NEXT: retq
	;			;
	; X64-SSE42-LABEL: test_reduce_v32i8:			; X64-SSE42-LABEL: test_reduce_v32i8:
	; X64-SSE42: ## %bb.0:			; X64-SSE42: ## %bb.0:
	; X64-SSE42-NEXT: pminsb %xmm1, %xmm0			; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
	; X64-SSE42-NEXT: pminsb %xmm0, %xmm1			; X64-SSE42-NEXT: pxor %xmm1, %xmm0
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
	; X64-SSE42-NEXT: pminsb %xmm1, %xmm0			; X64-SSE42-NEXT: psrlw $8, %xmm2
	; X64-SSE42-NEXT: movdqa %xmm0, %xmm1			; X64-SSE42-NEXT: pminub %xmm0, %xmm2
	; X64-SSE42-NEXT: psrld $16, %xmm1			; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
	; X64-SSE42-NEXT: pminsb %xmm0, %xmm1			; X64-SSE42-NEXT: pxor %xmm1, %xmm0
	; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
	; X64-SSE42-NEXT: psrlw $8, %xmm0
	; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
	; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE42-NEXT: retq			; X64-SSE42-NEXT: retq
	;			;
	; X64-AVX1-LABEL: test_reduce_v32i8:			; X64-AVX1-LABEL: test_reduce_v32i8:
	; X64-AVX1: ## %bb.0:			; X64-AVX1: ## %bb.0:
	; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1			; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
	; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
	; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX1-NEXT: vzeroupper			; X64-AVX1-NEXT: vzeroupper
	; X64-AVX1-NEXT: retq			; X64-AVX1-NEXT: retq
	;			;
	; X64-AVX2-LABEL: test_reduce_v32i8:			; X64-AVX2-LABEL: test_reduce_v32i8:
	; X64-AVX2: ## %bb.0:			; X64-AVX2: ## %bb.0:
	; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1			; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
	; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
	; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX2-NEXT: vzeroupper			; X64-AVX2-NEXT: vzeroupper
	; X64-AVX2-NEXT: retq			; X64-AVX2-NEXT: retq
	;			;
	; X64-AVX512-LABEL: test_reduce_v32i8:			; X64-AVX512-LABEL: test_reduce_v32i8:
	; X64-AVX512: ## %bb.0:			; X64-AVX512: ## %bb.0:
	; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1			; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
	; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
	; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX512-NEXT: vzeroupper			; X64-AVX512-NEXT: vzeroupper
	; X64-AVX512-NEXT: retq			; X64-AVX512-NEXT: retq
	%1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%2 = icmp slt <32 x i8> %a0, %1			%2 = icmp slt <32 x i8> %a0, %1
	%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1			%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
	%4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	▲ Show 20 Lines • Show All 671 Lines • ▼ Show 20 Lines
	; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE2-NEXT: retl			; X86-SSE2-NEXT: retl
	;			;
	; X86-SSE42-LABEL: test_reduce_v64i8:			; X86-SSE42-LABEL: test_reduce_v64i8:
	; X86-SSE42: ## %bb.0:			; X86-SSE42: ## %bb.0:
	; X86-SSE42-NEXT: pminsb %xmm3, %xmm1			; X86-SSE42-NEXT: pminsb %xmm3, %xmm1
	; X86-SSE42-NEXT: pminsb %xmm2, %xmm0			; X86-SSE42-NEXT: pminsb %xmm2, %xmm0
	; X86-SSE42-NEXT: pminsb %xmm1, %xmm0			; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
	; X86-SSE42-NEXT: pminsb %xmm0, %xmm1			; X86-SSE42-NEXT: pxor %xmm1, %xmm0
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
	; X86-SSE42-NEXT: pminsb %xmm1, %xmm0			; X86-SSE42-NEXT: psrlw $8, %xmm2
	; X86-SSE42-NEXT: movdqa %xmm0, %xmm1			; X86-SSE42-NEXT: pminub %xmm0, %xmm2
	; X86-SSE42-NEXT: psrld $16, %xmm1			; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
	; X86-SSE42-NEXT: pminsb %xmm0, %xmm1			; X86-SSE42-NEXT: pxor %xmm1, %xmm0
	; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
	; X86-SSE42-NEXT: psrlw $8, %xmm0
	; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
	; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE42-NEXT: retl			; X86-SSE42-NEXT: retl
	;			;
	; X86-AVX1-LABEL: test_reduce_v64i8:			; X86-AVX1-LABEL: test_reduce_v64i8:
	; X86-AVX1: ## %bb.0:			; X86-AVX1: ## %bb.0:
	; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2			; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
	; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3			; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
	; X86-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2			; X86-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
	; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0			; X86-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
	; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1			; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX1-NEXT: vzeroupper			; X86-AVX1-NEXT: vzeroupper
	; X86-AVX1-NEXT: retl			; X86-AVX1-NEXT: retl
	;			;
	; X86-AVX2-LABEL: test_reduce_v64i8:			; X86-AVX2-LABEL: test_reduce_v64i8:
	; X86-AVX2: ## %bb.0:			; X86-AVX2: ## %bb.0:
	; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
	; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1			; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
	; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1			; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
	; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX2-NEXT: vzeroupper			; X86-AVX2-NEXT: vzeroupper
	; X86-AVX2-NEXT: retl			; X86-AVX2-NEXT: retl
	;			;
	; X64-SSE2-LABEL: test_reduce_v64i8:			; X64-SSE2-LABEL: test_reduce_v64i8:
	; X64-SSE2: ## %bb.0:			; X64-SSE2: ## %bb.0:
	; X64-SSE2-NEXT: movdqa %xmm3, %xmm4			; X64-SSE2-NEXT: movdqa %xmm3, %xmm4
	▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines
	; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE2-NEXT: retq			; X64-SSE2-NEXT: retq
	;			;
	; X64-SSE42-LABEL: test_reduce_v64i8:			; X64-SSE42-LABEL: test_reduce_v64i8:
	; X64-SSE42: ## %bb.0:			; X64-SSE42: ## %bb.0:
	; X64-SSE42-NEXT: pminsb %xmm3, %xmm1			; X64-SSE42-NEXT: pminsb %xmm3, %xmm1
	; X64-SSE42-NEXT: pminsb %xmm2, %xmm0			; X64-SSE42-NEXT: pminsb %xmm2, %xmm0
	; X64-SSE42-NEXT: pminsb %xmm1, %xmm0			; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
	; X64-SSE42-NEXT: pminsb %xmm0, %xmm1			; X64-SSE42-NEXT: pxor %xmm1, %xmm0
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
	; X64-SSE42-NEXT: pminsb %xmm1, %xmm0			; X64-SSE42-NEXT: psrlw $8, %xmm2
	; X64-SSE42-NEXT: movdqa %xmm0, %xmm1			; X64-SSE42-NEXT: pminub %xmm0, %xmm2
	; X64-SSE42-NEXT: psrld $16, %xmm1			; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
	; X64-SSE42-NEXT: pminsb %xmm0, %xmm1			; X64-SSE42-NEXT: pxor %xmm1, %xmm0
	; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
	; X64-SSE42-NEXT: psrlw $8, %xmm0
	; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
	; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE42-NEXT: retq			; X64-SSE42-NEXT: retq
	;			;
	; X64-AVX1-LABEL: test_reduce_v64i8:			; X64-AVX1-LABEL: test_reduce_v64i8:
	; X64-AVX1: ## %bb.0:			; X64-AVX1: ## %bb.0:
	; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2			; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
	; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3			; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
	; X64-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2			; X64-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
	; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0			; X64-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
	; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX1-NEXT: vzeroupper			; X64-AVX1-NEXT: vzeroupper
	; X64-AVX1-NEXT: retq			; X64-AVX1-NEXT: retq
	;			;
	; X64-AVX2-LABEL: test_reduce_v64i8:			; X64-AVX2-LABEL: test_reduce_v64i8:
	; X64-AVX2: ## %bb.0:			; X64-AVX2: ## %bb.0:
	; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
	; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1			; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
	; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
	; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX2-NEXT: vzeroupper			; X64-AVX2-NEXT: vzeroupper
	; X64-AVX2-NEXT: retq			; X64-AVX2-NEXT: retq
	;			;
	; X64-AVX512-LABEL: test_reduce_v64i8:			; X64-AVX512-LABEL: test_reduce_v64i8:
	; X64-AVX512: ## %bb.0:			; X64-AVX512: ## %bb.0:
	; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1			; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
	; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0			; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
	; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1			; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0			; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
	; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0			; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0			; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0			; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0
	; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX512-NEXT: vzeroupper			; X64-AVX512-NEXT: vzeroupper
	; X64-AVX512-NEXT: retq			; X64-AVX512-NEXT: retq
	%1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%2 = icmp slt <64 x i8> %a0, %1			%2 = icmp slt <64 x i8> %a0, %1
	%3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1			%3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
	%4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	Show All 17 Lines

test/CodeGen/X86/horizontal-reduce-umax.ll

	Show First 20 Lines • Show All 356 Lines • ▼ Show 20 Lines
	; X86-SSE2-NEXT: psrlw $8, %xmm0			; X86-SSE2-NEXT: psrlw $8, %xmm0
	; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0			; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0
	; X86-SSE2-NEXT: movd %xmm0, %eax			; X86-SSE2-NEXT: movd %xmm0, %eax
	; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE2-NEXT: retl			; X86-SSE2-NEXT: retl
	;			;
	; X86-SSE42-LABEL: test_reduce_v16i8:			; X86-SSE42-LABEL: test_reduce_v16i8:
	; X86-SSE42: ## %bb.0:			; X86-SSE42: ## %bb.0:
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
	; X86-SSE42-NEXT: pmaxub %xmm0, %xmm1			; X86-SSE42-NEXT: pxor %xmm1, %xmm0
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
	; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0			; X86-SSE42-NEXT: psrlw $8, %xmm2
	; X86-SSE42-NEXT: movdqa %xmm0, %xmm1			; X86-SSE42-NEXT: pminub %xmm0, %xmm2
	; X86-SSE42-NEXT: psrld $16, %xmm1			; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
	; X86-SSE42-NEXT: pmaxub %xmm0, %xmm1			; X86-SSE42-NEXT: pxor %xmm1, %xmm0
	; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
	; X86-SSE42-NEXT: psrlw $8, %xmm0
	; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
	; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE42-NEXT: retl			; X86-SSE42-NEXT: retl
	;			;
	; X86-AVX-LABEL: test_reduce_v16i8:			; X86-AVX-LABEL: test_reduce_v16i8:
	; X86-AVX: ## %bb.0:			; X86-AVX: ## %bb.0:
	; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
	; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1			; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
	; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX-NEXT: retl			; X86-AVX-NEXT: retl
	;			;
	; X64-SSE2-LABEL: test_reduce_v16i8:			; X64-SSE2-LABEL: test_reduce_v16i8:
	; X64-SSE2: ## %bb.0:			; X64-SSE2: ## %bb.0:
	; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1			; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1
	; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
	; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0			; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0
	; X64-SSE2-NEXT: movdqa %xmm0, %xmm1			; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
	; X64-SSE2-NEXT: psrld $16, %xmm1			; X64-SSE2-NEXT: psrld $16, %xmm1
	; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1			; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1
	; X64-SSE2-NEXT: movdqa %xmm1, %xmm0			; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
	; X64-SSE2-NEXT: psrlw $8, %xmm0			; X64-SSE2-NEXT: psrlw $8, %xmm0
	; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0			; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0
	; X64-SSE2-NEXT: movd %xmm0, %eax			; X64-SSE2-NEXT: movd %xmm0, %eax
	; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE2-NEXT: retq			; X64-SSE2-NEXT: retq
	;			;
	; X64-SSE42-LABEL: test_reduce_v16i8:			; X64-SSE42-LABEL: test_reduce_v16i8:
	; X64-SSE42: ## %bb.0:			; X64-SSE42: ## %bb.0:
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
	; X64-SSE42-NEXT: pmaxub %xmm0, %xmm1			; X64-SSE42-NEXT: pxor %xmm1, %xmm0
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
	; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0			; X64-SSE42-NEXT: psrlw $8, %xmm2
	; X64-SSE42-NEXT: movdqa %xmm0, %xmm1			; X64-SSE42-NEXT: pminub %xmm0, %xmm2
	; X64-SSE42-NEXT: psrld $16, %xmm1			; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
	; X64-SSE42-NEXT: pmaxub %xmm0, %xmm1			; X64-SSE42-NEXT: pxor %xmm1, %xmm0
	; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
	; X64-SSE42-NEXT: psrlw $8, %xmm0
	; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
	; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE42-NEXT: retq			; X64-SSE42-NEXT: retq
	;			;
	; X64-AVX-LABEL: test_reduce_v16i8:			; X64-AVX-LABEL: test_reduce_v16i8:
	; X64-AVX: ## %bb.0:			; X64-AVX: ## %bb.0:
	; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
	; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
	; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX-NEXT: retq			; X64-AVX-NEXT: retq
	%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%2 = icmp ugt <16 x i8> %a0, %1			%2 = icmp ugt <16 x i8> %a0, %1
	%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1			%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1
	%4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%5 = icmp ugt <16 x i8> %3, %4			%5 = icmp ugt <16 x i8> %3, %4
	▲ Show 20 Lines • Show All 583 Lines • ▼ Show 20 Lines
	; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0			; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0
	; X86-SSE2-NEXT: movd %xmm0, %eax			; X86-SSE2-NEXT: movd %xmm0, %eax
	; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE2-NEXT: retl			; X86-SSE2-NEXT: retl
	;			;
	; X86-SSE42-LABEL: test_reduce_v32i8:			; X86-SSE42-LABEL: test_reduce_v32i8:
	; X86-SSE42: ## %bb.0:			; X86-SSE42: ## %bb.0:
	; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0			; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
	; X86-SSE42-NEXT: pmaxub %xmm0, %xmm1			; X86-SSE42-NEXT: pxor %xmm1, %xmm0
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
	; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0			; X86-SSE42-NEXT: psrlw $8, %xmm2
	; X86-SSE42-NEXT: movdqa %xmm0, %xmm1			; X86-SSE42-NEXT: pminub %xmm0, %xmm2
	; X86-SSE42-NEXT: psrld $16, %xmm1			; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
	; X86-SSE42-NEXT: pmaxub %xmm0, %xmm1			; X86-SSE42-NEXT: pxor %xmm1, %xmm0
	; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
	; X86-SSE42-NEXT: psrlw $8, %xmm0
	; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
	; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE42-NEXT: retl			; X86-SSE42-NEXT: retl
	;			;
	; X86-AVX1-LABEL: test_reduce_v32i8:			; X86-AVX1-LABEL: test_reduce_v32i8:
	; X86-AVX1: ## %bb.0:			; X86-AVX1: ## %bb.0:
	; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1			; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
	; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
	; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1			; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX1-NEXT: vzeroupper			; X86-AVX1-NEXT: vzeroupper
	; X86-AVX1-NEXT: retl			; X86-AVX1-NEXT: retl
	;			;
	; X86-AVX2-LABEL: test_reduce_v32i8:			; X86-AVX2-LABEL: test_reduce_v32i8:
	; X86-AVX2: ## %bb.0:			; X86-AVX2: ## %bb.0:
	; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1			; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
	; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1			; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
	; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX2-NEXT: vzeroupper			; X86-AVX2-NEXT: vzeroupper
	; X86-AVX2-NEXT: retl			; X86-AVX2-NEXT: retl
	;			;
	; X64-SSE2-LABEL: test_reduce_v32i8:			; X64-SSE2-LABEL: test_reduce_v32i8:
	; X64-SSE2: ## %bb.0:			; X64-SSE2: ## %bb.0:
	; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0			; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0
	Show All 9 Lines
	; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0			; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0
	; X64-SSE2-NEXT: movd %xmm0, %eax			; X64-SSE2-NEXT: movd %xmm0, %eax
	; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE2-NEXT: retq			; X64-SSE2-NEXT: retq
	;			;
	; X64-SSE42-LABEL: test_reduce_v32i8:			; X64-SSE42-LABEL: test_reduce_v32i8:
	; X64-SSE42: ## %bb.0:			; X64-SSE42: ## %bb.0:
	; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0			; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
	; X64-SSE42-NEXT: pmaxub %xmm0, %xmm1			; X64-SSE42-NEXT: pxor %xmm1, %xmm0
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
	; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0			; X64-SSE42-NEXT: psrlw $8, %xmm2
	; X64-SSE42-NEXT: movdqa %xmm0, %xmm1			; X64-SSE42-NEXT: pminub %xmm0, %xmm2
	; X64-SSE42-NEXT: psrld $16, %xmm1			; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
	; X64-SSE42-NEXT: pmaxub %xmm0, %xmm1			; X64-SSE42-NEXT: pxor %xmm1, %xmm0
	; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
	; X64-SSE42-NEXT: psrlw $8, %xmm0
	; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
	; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE42-NEXT: retq			; X64-SSE42-NEXT: retq
	;			;
	; X64-AVX1-LABEL: test_reduce_v32i8:			; X64-AVX1-LABEL: test_reduce_v32i8:
	; X64-AVX1: ## %bb.0:			; X64-AVX1: ## %bb.0:
	; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1			; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
	; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
	; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX1-NEXT: vzeroupper			; X64-AVX1-NEXT: vzeroupper
	; X64-AVX1-NEXT: retq			; X64-AVX1-NEXT: retq
	;			;
	; X64-AVX2-LABEL: test_reduce_v32i8:			; X64-AVX2-LABEL: test_reduce_v32i8:
	; X64-AVX2: ## %bb.0:			; X64-AVX2: ## %bb.0:
	; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1			; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
	; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
	; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX2-NEXT: vzeroupper			; X64-AVX2-NEXT: vzeroupper
	; X64-AVX2-NEXT: retq			; X64-AVX2-NEXT: retq
	;			;
	; X64-AVX512-LABEL: test_reduce_v32i8:			; X64-AVX512-LABEL: test_reduce_v32i8:
	; X64-AVX512: ## %bb.0:			; X64-AVX512: ## %bb.0:
	; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1			; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X64-AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X64-AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
	; X64-AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
	; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX512-NEXT: vzeroupper			; X64-AVX512-NEXT: vzeroupper
	; X64-AVX512-NEXT: retq			; X64-AVX512-NEXT: retq
	%1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%2 = icmp ugt <32 x i8> %a0, %1			%2 = icmp ugt <32 x i8> %a0, %1
	%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1			%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
	%4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	▲ Show 20 Lines • Show All 817 Lines • ▼ Show 20 Lines
	; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE2-NEXT: retl			; X86-SSE2-NEXT: retl
	;			;
	; X86-SSE42-LABEL: test_reduce_v64i8:			; X86-SSE42-LABEL: test_reduce_v64i8:
	; X86-SSE42: ## %bb.0:			; X86-SSE42: ## %bb.0:
	; X86-SSE42-NEXT: pmaxub %xmm3, %xmm1			; X86-SSE42-NEXT: pmaxub %xmm3, %xmm1
	; X86-SSE42-NEXT: pmaxub %xmm2, %xmm0			; X86-SSE42-NEXT: pmaxub %xmm2, %xmm0
	; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0			; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
	; X86-SSE42-NEXT: pmaxub %xmm0, %xmm1			; X86-SSE42-NEXT: pxor %xmm1, %xmm0
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
	; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0			; X86-SSE42-NEXT: psrlw $8, %xmm2
	; X86-SSE42-NEXT: movdqa %xmm0, %xmm1			; X86-SSE42-NEXT: pminub %xmm0, %xmm2
	; X86-SSE42-NEXT: psrld $16, %xmm1			; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0
	; X86-SSE42-NEXT: pmaxub %xmm0, %xmm1			; X86-SSE42-NEXT: pxor %xmm1, %xmm0
	; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
	; X86-SSE42-NEXT: psrlw $8, %xmm0
	; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0
	; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE42-NEXT: retl			; X86-SSE42-NEXT: retl
	;			;
	; X86-AVX1-LABEL: test_reduce_v64i8:			; X86-AVX1-LABEL: test_reduce_v64i8:
	; X86-AVX1: ## %bb.0:			; X86-AVX1: ## %bb.0:
	; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2			; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
	; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3			; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
	; X86-AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2			; X86-AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2
	; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0			; X86-AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
	; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1			; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX1-NEXT: vzeroupper			; X86-AVX1-NEXT: vzeroupper
	; X86-AVX1-NEXT: retl			; X86-AVX1-NEXT: retl
	;			;
	; X86-AVX2-LABEL: test_reduce_v64i8:			; X86-AVX2-LABEL: test_reduce_v64i8:
	; X86-AVX2: ## %bb.0:			; X86-AVX2: ## %bb.0:
	; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
	; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1			; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
	; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1			; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
	; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX2-NEXT: vzeroupper			; X86-AVX2-NEXT: vzeroupper
	; X86-AVX2-NEXT: retl			; X86-AVX2-NEXT: retl
	;			;
	; X64-SSE2-LABEL: test_reduce_v64i8:			; X64-SSE2-LABEL: test_reduce_v64i8:
	; X64-SSE2: ## %bb.0:			; X64-SSE2: ## %bb.0:
	; X64-SSE2-NEXT: pmaxub %xmm3, %xmm1			; X64-SSE2-NEXT: pmaxub %xmm3, %xmm1
	Show All 13 Lines
	; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE2-NEXT: retq			; X64-SSE2-NEXT: retq
	;			;
	; X64-SSE42-LABEL: test_reduce_v64i8:			; X64-SSE42-LABEL: test_reduce_v64i8:
	; X64-SSE42: ## %bb.0:			; X64-SSE42: ## %bb.0:
	; X64-SSE42-NEXT: pmaxub %xmm3, %xmm1			; X64-SSE42-NEXT: pmaxub %xmm3, %xmm1
	; X64-SSE42-NEXT: pmaxub %xmm2, %xmm0			; X64-SSE42-NEXT: pmaxub %xmm2, %xmm0
	; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0			; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1
	; X64-SSE42-NEXT: pmaxub %xmm0, %xmm1			; X64-SSE42-NEXT: pxor %xmm1, %xmm0
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
	; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0			; X64-SSE42-NEXT: psrlw $8, %xmm2
	; X64-SSE42-NEXT: movdqa %xmm0, %xmm1			; X64-SSE42-NEXT: pminub %xmm0, %xmm2
	; X64-SSE42-NEXT: psrld $16, %xmm1			; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0
	; X64-SSE42-NEXT: pmaxub %xmm0, %xmm1			; X64-SSE42-NEXT: pxor %xmm1, %xmm0
	; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
	; X64-SSE42-NEXT: psrlw $8, %xmm0
	; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0
	; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE42-NEXT: retq			; X64-SSE42-NEXT: retq
	;			;
	; X64-AVX1-LABEL: test_reduce_v64i8:			; X64-AVX1-LABEL: test_reduce_v64i8:
	; X64-AVX1: ## %bb.0:			; X64-AVX1: ## %bb.0:
	; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2			; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
	; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3			; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
	; X64-AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2			; X64-AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2
	; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0			; X64-AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
	; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX1-NEXT: vzeroupper			; X64-AVX1-NEXT: vzeroupper
	; X64-AVX1-NEXT: retq			; X64-AVX1-NEXT: retq
	;			;
	; X64-AVX2-LABEL: test_reduce_v64i8:			; X64-AVX2-LABEL: test_reduce_v64i8:
	; X64-AVX2: ## %bb.0:			; X64-AVX2: ## %bb.0:
	; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
	; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1			; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
	; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
	; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX2-NEXT: vzeroupper			; X64-AVX2-NEXT: vzeroupper
	; X64-AVX2-NEXT: retq			; X64-AVX2-NEXT: retq
	;			;
	; X64-AVX512-LABEL: test_reduce_v64i8:			; X64-AVX512-LABEL: test_reduce_v64i8:
	; X64-AVX512: ## %bb.0:			; X64-AVX512: ## %bb.0:
	; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1			; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
	; X64-AVX512-NEXT: vpmaxub %zmm1, %zmm0, %zmm0			; X64-AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
	; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1			; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X64-AVX512-NEXT: vpmaxub %zmm1, %zmm0, %zmm0			; X64-AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
	; X64-AVX512-NEXT: vpmaxub %zmm1, %zmm0, %zmm0			; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]			; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2
	; X64-AVX512-NEXT: vpmaxub %zmm1, %zmm0, %zmm0			; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1			; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX512-NEXT: vpmaxub %zmm1, %zmm0, %zmm0			; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX512-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
	; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX512-NEXT: vzeroupper			; X64-AVX512-NEXT: vzeroupper
	; X64-AVX512-NEXT: retq			; X64-AVX512-NEXT: retq
	%1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%2 = icmp ugt <64 x i8> %a0, %1			%2 = icmp ugt <64 x i8> %a0, %1
	%3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1			%3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
	%4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	Show All 17 Lines

test/CodeGen/X86/horizontal-reduce-umin.ll

	Show First 20 Lines • Show All 346 Lines • ▼ Show 20 Lines
	; X86-SSE2-NEXT: psrlw $8, %xmm0			; X86-SSE2-NEXT: psrlw $8, %xmm0
	; X86-SSE2-NEXT: pminub %xmm1, %xmm0			; X86-SSE2-NEXT: pminub %xmm1, %xmm0
	; X86-SSE2-NEXT: movd %xmm0, %eax			; X86-SSE2-NEXT: movd %xmm0, %eax
	; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE2-NEXT: retl			; X86-SSE2-NEXT: retl
	;			;
	; X86-SSE42-LABEL: test_reduce_v16i8:			; X86-SSE42-LABEL: test_reduce_v16i8:
	; X86-SSE42: ## %bb.0:			; X86-SSE42: ## %bb.0:
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X86-SSE42-NEXT: pminub %xmm0, %xmm1
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
	; X86-SSE42-NEXT: pminub %xmm1, %xmm0
	; X86-SSE42-NEXT: movdqa %xmm0, %xmm1			; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
	; X86-SSE42-NEXT: psrld $16, %xmm1			; X86-SSE42-NEXT: psrlw $8, %xmm1
	; X86-SSE42-NEXT: pminub %xmm0, %xmm1			; X86-SSE42-NEXT: pminub %xmm0, %xmm1
	; X86-SSE42-NEXT: movdqa %xmm1, %xmm0			; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
	; X86-SSE42-NEXT: psrlw $8, %xmm0
	; X86-SSE42-NEXT: pminub %xmm1, %xmm0
	; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE42-NEXT: retl			; X86-SSE42-NEXT: retl
	;			;
	; X86-AVX-LABEL: test_reduce_v16i8:			; X86-AVX-LABEL: test_reduce_v16i8:
	; X86-AVX: ## %bb.0:			; X86-AVX: ## %bb.0:
	; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
	; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
	; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1			; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0			; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
				; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX-NEXT: retl			; X86-AVX-NEXT: retl
	;			;
	; X64-SSE2-LABEL: test_reduce_v16i8:			; X64-SSE2-LABEL: test_reduce_v16i8:
	; X64-SSE2: ## %bb.0:			; X64-SSE2: ## %bb.0:
	; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X64-SSE2-NEXT: pminub %xmm0, %xmm1			; X64-SSE2-NEXT: pminub %xmm0, %xmm1
	; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]			; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
	; X64-SSE2-NEXT: pminub %xmm1, %xmm0			; X64-SSE2-NEXT: pminub %xmm1, %xmm0
	; X64-SSE2-NEXT: movdqa %xmm0, %xmm1			; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
	; X64-SSE2-NEXT: psrld $16, %xmm1			; X64-SSE2-NEXT: psrld $16, %xmm1
	; X64-SSE2-NEXT: pminub %xmm0, %xmm1			; X64-SSE2-NEXT: pminub %xmm0, %xmm1
	; X64-SSE2-NEXT: movdqa %xmm1, %xmm0			; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
	; X64-SSE2-NEXT: psrlw $8, %xmm0			; X64-SSE2-NEXT: psrlw $8, %xmm0
	; X64-SSE2-NEXT: pminub %xmm1, %xmm0			; X64-SSE2-NEXT: pminub %xmm1, %xmm0
	; X64-SSE2-NEXT: movd %xmm0, %eax			; X64-SSE2-NEXT: movd %xmm0, %eax
	; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE2-NEXT: retq			; X64-SSE2-NEXT: retq
	;			;
	; X64-SSE42-LABEL: test_reduce_v16i8:			; X64-SSE42-LABEL: test_reduce_v16i8:
	; X64-SSE42: ## %bb.0:			; X64-SSE42: ## %bb.0:
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X64-SSE42-NEXT: pminub %xmm0, %xmm1
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
	; X64-SSE42-NEXT: pminub %xmm1, %xmm0
	; X64-SSE42-NEXT: movdqa %xmm0, %xmm1			; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
	; X64-SSE42-NEXT: psrld $16, %xmm1			; X64-SSE42-NEXT: psrlw $8, %xmm1
	; X64-SSE42-NEXT: pminub %xmm0, %xmm1			; X64-SSE42-NEXT: pminub %xmm0, %xmm1
	; X64-SSE42-NEXT: movdqa %xmm1, %xmm0			; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
	; X64-SSE42-NEXT: psrlw $8, %xmm0
	; X64-SSE42-NEXT: pminub %xmm1, %xmm0
	; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE42-NEXT: retq			; X64-SSE42-NEXT: retq
	;			;
	; X64-AVX-LABEL: test_reduce_v16i8:			; X64-AVX-LABEL: test_reduce_v16i8:
	; X64-AVX: ## %bb.0:			; X64-AVX: ## %bb.0:
	; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
	; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1
	; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1			; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0			; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
				; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX-NEXT: retq			; X64-AVX-NEXT: retq
	%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%2 = icmp ult <16 x i8> %a0, %1			%2 = icmp ult <16 x i8> %a0, %1
	%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1			%3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1
	%4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%5 = icmp ult <16 x i8> %3, %4			%5 = icmp ult <16 x i8> %3, %4
	▲ Show 20 Lines • Show All 566 Lines • ▼ Show 20 Lines
	; X86-SSE2-NEXT: pminub %xmm1, %xmm0			; X86-SSE2-NEXT: pminub %xmm1, %xmm0
	; X86-SSE2-NEXT: movd %xmm0, %eax			; X86-SSE2-NEXT: movd %xmm0, %eax
	; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE2-NEXT: retl			; X86-SSE2-NEXT: retl
	;			;
	; X86-SSE42-LABEL: test_reduce_v32i8:			; X86-SSE42-LABEL: test_reduce_v32i8:
	; X86-SSE42: ## %bb.0:			; X86-SSE42: ## %bb.0:
	; X86-SSE42-NEXT: pminub %xmm1, %xmm0			; X86-SSE42-NEXT: pminub %xmm1, %xmm0
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X86-SSE42-NEXT: pminub %xmm0, %xmm1
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
	; X86-SSE42-NEXT: pminub %xmm1, %xmm0
	; X86-SSE42-NEXT: movdqa %xmm0, %xmm1			; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
	; X86-SSE42-NEXT: psrld $16, %xmm1			; X86-SSE42-NEXT: psrlw $8, %xmm1
	; X86-SSE42-NEXT: pminub %xmm0, %xmm1			; X86-SSE42-NEXT: pminub %xmm0, %xmm1
	; X86-SSE42-NEXT: movdqa %xmm1, %xmm0			; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
	; X86-SSE42-NEXT: psrlw $8, %xmm0
	; X86-SSE42-NEXT: pminub %xmm1, %xmm0
	; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE42-NEXT: retl			; X86-SSE42-NEXT: retl
	;			;
	; X86-AVX1-LABEL: test_reduce_v32i8:			; X86-AVX1-LABEL: test_reduce_v32i8:
	; X86-AVX1: ## %bb.0:			; X86-AVX1: ## %bb.0:
	; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1			; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
	; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
	; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
	; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1			; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
				; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX1-NEXT: vzeroupper			; X86-AVX1-NEXT: vzeroupper
	; X86-AVX1-NEXT: retl			; X86-AVX1-NEXT: retl
	;			;
	; X86-AVX2-LABEL: test_reduce_v32i8:			; X86-AVX2-LABEL: test_reduce_v32i8:
	; X86-AVX2: ## %bb.0:			; X86-AVX2: ## %bb.0:
	; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1			; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
	; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
	; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
	; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
	; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
	; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1			; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
				; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX2-NEXT: vzeroupper			; X86-AVX2-NEXT: vzeroupper
	; X86-AVX2-NEXT: retl			; X86-AVX2-NEXT: retl
	;			;
	; X64-SSE2-LABEL: test_reduce_v32i8:			; X64-SSE2-LABEL: test_reduce_v32i8:
	; X64-SSE2: ## %bb.0:			; X64-SSE2: ## %bb.0:
	; X64-SSE2-NEXT: pminub %xmm1, %xmm0			; X64-SSE2-NEXT: pminub %xmm1, %xmm0
	Show All 9 Lines
	; X64-SSE2-NEXT: pminub %xmm1, %xmm0			; X64-SSE2-NEXT: pminub %xmm1, %xmm0
	; X64-SSE2-NEXT: movd %xmm0, %eax			; X64-SSE2-NEXT: movd %xmm0, %eax
	; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE2-NEXT: retq			; X64-SSE2-NEXT: retq
	;			;
	; X64-SSE42-LABEL: test_reduce_v32i8:			; X64-SSE42-LABEL: test_reduce_v32i8:
	; X64-SSE42: ## %bb.0:			; X64-SSE42: ## %bb.0:
	; X64-SSE42-NEXT: pminub %xmm1, %xmm0			; X64-SSE42-NEXT: pminub %xmm1, %xmm0
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X64-SSE42-NEXT: pminub %xmm0, %xmm1
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
	; X64-SSE42-NEXT: pminub %xmm1, %xmm0
	; X64-SSE42-NEXT: movdqa %xmm0, %xmm1			; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
	; X64-SSE42-NEXT: psrld $16, %xmm1			; X64-SSE42-NEXT: psrlw $8, %xmm1
	; X64-SSE42-NEXT: pminub %xmm0, %xmm1			; X64-SSE42-NEXT: pminub %xmm0, %xmm1
	; X64-SSE42-NEXT: movdqa %xmm1, %xmm0			; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
	; X64-SSE42-NEXT: psrlw $8, %xmm0
	; X64-SSE42-NEXT: pminub %xmm1, %xmm0
	; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE42-NEXT: retq			; X64-SSE42-NEXT: retq
	;			;
	; X64-AVX1-LABEL: test_reduce_v32i8:			; X64-AVX1-LABEL: test_reduce_v32i8:
	; X64-AVX1: ## %bb.0:			; X64-AVX1: ## %bb.0:
	; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1			; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
	; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
	; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
	; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1			; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
				; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX1-NEXT: vzeroupper			; X64-AVX1-NEXT: vzeroupper
	; X64-AVX1-NEXT: retq			; X64-AVX1-NEXT: retq
	;			;
	; X64-AVX2-LABEL: test_reduce_v32i8:			; X64-AVX2-LABEL: test_reduce_v32i8:
	; X64-AVX2: ## %bb.0:			; X64-AVX2: ## %bb.0:
	; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1			; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
	; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
	; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
	; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
	; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
	; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1			; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
				; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX2-NEXT: vzeroupper			; X64-AVX2-NEXT: vzeroupper
	; X64-AVX2-NEXT: retq			; X64-AVX2-NEXT: retq
	;			;
	; X64-AVX512-LABEL: test_reduce_v32i8:			; X64-AVX512-LABEL: test_reduce_v32i8:
	; X64-AVX512: ## %bb.0:			; X64-AVX512: ## %bb.0:
	; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1			; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X64-AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0			; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X64-AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
	; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
	; X64-AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
	; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
	; X64-AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
	; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1			; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0			; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
				; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX512-NEXT: vzeroupper			; X64-AVX512-NEXT: vzeroupper
	; X64-AVX512-NEXT: retq			; X64-AVX512-NEXT: retq
	%1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%2 = icmp ult <32 x i8> %a0, %1			%2 = icmp ult <32 x i8> %a0, %1
	%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1			%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
	%4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	▲ Show 20 Lines • Show All 794 Lines • ▼ Show 20 Lines
	; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE2-NEXT: retl			; X86-SSE2-NEXT: retl
	;			;
	; X86-SSE42-LABEL: test_reduce_v64i8:			; X86-SSE42-LABEL: test_reduce_v64i8:
	; X86-SSE42: ## %bb.0:			; X86-SSE42: ## %bb.0:
	; X86-SSE42-NEXT: pminub %xmm3, %xmm1			; X86-SSE42-NEXT: pminub %xmm3, %xmm1
	; X86-SSE42-NEXT: pminub %xmm2, %xmm0			; X86-SSE42-NEXT: pminub %xmm2, %xmm0
	; X86-SSE42-NEXT: pminub %xmm1, %xmm0			; X86-SSE42-NEXT: pminub %xmm1, %xmm0
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X86-SSE42-NEXT: pminub %xmm0, %xmm1
	; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
	; X86-SSE42-NEXT: pminub %xmm1, %xmm0
	; X86-SSE42-NEXT: movdqa %xmm0, %xmm1			; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
	; X86-SSE42-NEXT: psrld $16, %xmm1			; X86-SSE42-NEXT: psrlw $8, %xmm1
	; X86-SSE42-NEXT: pminub %xmm0, %xmm1			; X86-SSE42-NEXT: pminub %xmm0, %xmm1
	; X86-SSE42-NEXT: movdqa %xmm1, %xmm0			; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
	; X86-SSE42-NEXT: psrlw $8, %xmm0
	; X86-SSE42-NEXT: pminub %xmm1, %xmm0
	; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X86-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X86-SSE42-NEXT: retl			; X86-SSE42-NEXT: retl
	;			;
	; X86-AVX1-LABEL: test_reduce_v64i8:			; X86-AVX1-LABEL: test_reduce_v64i8:
	; X86-AVX1: ## %bb.0:			; X86-AVX1: ## %bb.0:
	; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2			; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
	; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3			; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
	; X86-AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2			; X86-AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
	; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0			; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
	; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
	; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1			; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0			; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
				; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX1-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX1-NEXT: vzeroupper			; X86-AVX1-NEXT: vzeroupper
	; X86-AVX1-NEXT: retl			; X86-AVX1-NEXT: retl
	;			;
	; X86-AVX2-LABEL: test_reduce_v64i8:			; X86-AVX2-LABEL: test_reduce_v64i8:
	; X86-AVX2: ## %bb.0:			; X86-AVX2: ## %bb.0:
	; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
	; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1			; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
	; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
	; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
	; X86-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
	; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
	; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1			; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0			; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
				; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
	; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax			; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
	; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax			; X86-AVX2-NEXT: ## kill: def %al killed %al killed %eax
	; X86-AVX2-NEXT: vzeroupper			; X86-AVX2-NEXT: vzeroupper
	; X86-AVX2-NEXT: retl			; X86-AVX2-NEXT: retl
	;			;
	; X64-SSE2-LABEL: test_reduce_v64i8:			; X64-SSE2-LABEL: test_reduce_v64i8:
	; X64-SSE2: ## %bb.0:			; X64-SSE2: ## %bb.0:
	; X64-SSE2-NEXT: pminub %xmm3, %xmm1			; X64-SSE2-NEXT: pminub %xmm3, %xmm1
	Show All 13 Lines
	; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE2-NEXT: retq			; X64-SSE2-NEXT: retq
	;			;
	; X64-SSE42-LABEL: test_reduce_v64i8:			; X64-SSE42-LABEL: test_reduce_v64i8:
	; X64-SSE42: ## %bb.0:			; X64-SSE42: ## %bb.0:
	; X64-SSE42-NEXT: pminub %xmm3, %xmm1			; X64-SSE42-NEXT: pminub %xmm3, %xmm1
	; X64-SSE42-NEXT: pminub %xmm2, %xmm0			; X64-SSE42-NEXT: pminub %xmm2, %xmm0
	; X64-SSE42-NEXT: pminub %xmm1, %xmm0			; X64-SSE42-NEXT: pminub %xmm1, %xmm0
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X64-SSE42-NEXT: pminub %xmm0, %xmm1
	; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
	; X64-SSE42-NEXT: pminub %xmm1, %xmm0
	; X64-SSE42-NEXT: movdqa %xmm0, %xmm1			; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
	; X64-SSE42-NEXT: psrld $16, %xmm1			; X64-SSE42-NEXT: psrlw $8, %xmm1
	; X64-SSE42-NEXT: pminub %xmm0, %xmm1			; X64-SSE42-NEXT: pminub %xmm0, %xmm1
	; X64-SSE42-NEXT: movdqa %xmm1, %xmm0			; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
	; X64-SSE42-NEXT: psrlw $8, %xmm0
	; X64-SSE42-NEXT: pminub %xmm1, %xmm0
	; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax			; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
	; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax			; X64-SSE42-NEXT: ## kill: def %al killed %al killed %eax
	; X64-SSE42-NEXT: retq			; X64-SSE42-NEXT: retq
	;			;
	; X64-AVX1-LABEL: test_reduce_v64i8:			; X64-AVX1-LABEL: test_reduce_v64i8:
	; X64-AVX1: ## %bb.0:			; X64-AVX1: ## %bb.0:
	; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2			; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
	; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3			; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
	; X64-AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2			; X64-AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
	; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0			; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
	; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
	; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1			; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0			; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
				; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX1-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX1-NEXT: vzeroupper			; X64-AVX1-NEXT: vzeroupper
	; X64-AVX1-NEXT: retq			; X64-AVX1-NEXT: retq
	;			;
	; X64-AVX2-LABEL: test_reduce_v64i8:			; X64-AVX2-LABEL: test_reduce_v64i8:
	; X64-AVX2: ## %bb.0:			; X64-AVX2: ## %bb.0:
	; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
	; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1			; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
	; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
	; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
	; X64-AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
	; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
	; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1			; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0			; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
				; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX2-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX2-NEXT: vzeroupper			; X64-AVX2-NEXT: vzeroupper
	; X64-AVX2-NEXT: retq			; X64-AVX2-NEXT: retq
	;			;
	; X64-AVX512-LABEL: test_reduce_v64i8:			; X64-AVX512-LABEL: test_reduce_v64i8:
	; X64-AVX512: ## %bb.0:			; X64-AVX512: ## %bb.0:
	; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1			; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
	; X64-AVX512-NEXT: vpminub %zmm1, %zmm0, %zmm0			; X64-AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
	; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1			; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
	; X64-AVX512-NEXT: vpminub %zmm1, %zmm0, %zmm0			; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
	; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X64-AVX512-NEXT: vpminub %zmm1, %zmm0, %zmm0
	; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
	; X64-AVX512-NEXT: vpminub %zmm1, %zmm0, %zmm0
	; X64-AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
	; X64-AVX512-NEXT: vpminub %zmm1, %zmm0, %zmm0
	; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1			; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
	; X64-AVX512-NEXT: vpminub %zmm1, %zmm0, %zmm0			; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
				; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
	; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax			; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
	; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax			; X64-AVX512-NEXT: ## kill: def %al killed %al killed %eax
	; X64-AVX512-NEXT: vzeroupper			; X64-AVX512-NEXT: vzeroupper
	; X64-AVX512-NEXT: retq			; X64-AVX512-NEXT: retq
	%1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%2 = icmp ult <64 x i8> %a0, %1			%2 = icmp ult <64 x i8> %a0, %1
	%3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1			%3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
	%4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	Show All 17 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[X86][SSE] Use (V)PHMINPOSUW for vXi8 SMAX/SMIN/UMAX/UMIN horizontal reductions (PR32841)ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 127138

lib/Target/X86/X86ISelLowering.cpp

test/CodeGen/X86/horizontal-reduce-smax.ll

test/CodeGen/X86/horizontal-reduce-smin.ll

test/CodeGen/X86/horizontal-reduce-umax.ll

test/CodeGen/X86/horizontal-reduce-umin.ll

[X86][SSE] Use (V)PHMINPOSUW for vXi8 SMAX/SMIN/UMAX/UMIN horizontal reductions (PR32841)
ClosedPublic