Diff 61609

lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 7,151 Lines • ▼ Show 20 Lines
/// \brief Get a 4-lane 8-bit shuffle immediate for a mask.		/// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
///		///
/// This helper function produces an 8-bit shuffle immediate corresponding to		/// This helper function produces an 8-bit shuffle immediate corresponding to
/// the ubiquitous shuffle encoding scheme used in x86 instructions for		/// the ubiquitous shuffle encoding scheme used in x86 instructions for
/// shuffling 4 lanes. It can be used with most of the PSHUF instructions for		/// shuffling 4 lanes. It can be used with most of the PSHUF instructions for
/// example.		/// example.
///		///
/// NB: We rely heavily on "undef" masks preserving the input lane.		/// NB: We rely heavily on "undef" masks preserving the input lane.
static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,		static unsigned getV4X86ShuffleImm(ArrayRef<int> Mask) {
SelectionDAG &DAG) {
assert(Mask.size() == 4 && "Only 4-lane shuffle masks");		assert(Mask.size() == 4 && "Only 4-lane shuffle masks");
assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!");		assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!");
assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!");		assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!");
assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!");		assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!");
assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!");		assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!");

unsigned Imm = 0;		unsigned Imm = 0;
Imm \|= (Mask[0] == -1 ? 0 : Mask[0]) << 0;		Imm \|= (Mask[0] == -1 ? 0 : Mask[0]) << 0;
Imm \|= (Mask[1] == -1 ? 1 : Mask[1]) << 2;		Imm \|= (Mask[1] == -1 ? 1 : Mask[1]) << 2;
Imm \|= (Mask[2] == -1 ? 2 : Mask[2]) << 4;		Imm \|= (Mask[2] == -1 ? 2 : Mask[2]) << 4;
Imm \|= (Mask[3] == -1 ? 3 : Mask[3]) << 6;		Imm \|= (Mask[3] == -1 ? 3 : Mask[3]) << 6;
return DAG.getConstant(Imm, DL, MVT::i8);		return Imm;
		}

		static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
		SelectionDAG &DAG) {
		return DAG.getConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8);
}		}

/// \brief Compute whether each element of a shuffle is zeroable.		/// \brief Compute whether each element of a shuffle is zeroable.
///		///
/// A "zeroable" vector shuffle element is one which can be lowered to zero.		/// A "zeroable" vector shuffle element is one which can be lowered to zero.
/// Either it is an undef element in the shuffle mask, the element of the input		/// Either it is an undef element in the shuffle mask, the element of the input
/// referenced is undef, or the element of the input referenced is known to be		/// referenced is undef, or the element of the input referenced is known to be
/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle		/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
▲ Show 20 Lines • Show All 17,352 Lines • ▼ Show 20 Lines
}		}

// Attempt to match a combined shuffle mask against supported unary shuffle		// Attempt to match a combined shuffle mask against supported unary shuffle
// instructions.		// instructions.
// TODO: Investigate sharing more of this with shuffle lowering.		// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,		static bool matchUnaryVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,
const X86Subtarget &Subtarget,		const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT) {		unsigned &Shuffle, MVT &ShuffleVT) {
bool FloatDomain = SrcVT.isFloatingPoint();		bool FloatDomain = SrcVT.isFloatingPoint() \|\|
		(!Subtarget.hasAVX2() && SrcVT.is256BitVector());
		abUnsubmitted Not Done Reply Inline Actions This looks like independent goodness; maybe extract that out? ab: This looks like independent goodness; maybe extract that out?

// Match a 128-bit integer vector against a VZEXT_MOVL (MOVQ) instruction.		// Match a 128-bit integer vector against a VZEXT_MOVL (MOVQ) instruction.
if (!FloatDomain && SrcVT.is128BitVector() &&		if (!FloatDomain && SrcVT.is128BitVector() &&
isTargetShuffleEquivalent(Mask, {0, SM_SentinelZero})) {		isTargetShuffleEquivalent(Mask, {0, SM_SentinelZero})) {
Shuffle = X86ISD::VZEXT_MOVL;		Shuffle = X86ISD::VZEXT_MOVL;
ShuffleVT = MVT::v2i64;		ShuffleVT = MVT::v2i64;
return true;		return true;
}		}
▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines	if (isTargetShuffleEquivalent(
ShuffleVT = MVT::v16f32;		ShuffleVT = MVT::v16f32;
return true;		return true;
}		}
}		}

return false;		return false;
}		}

		// Attempt to match a combined shuffle mask against supported unary immediate
		// permute instructions.
		// TODO: Investigate sharing more of this with shuffle lowering.
		static bool matchPermuteVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,
		const X86Subtarget &Subtarget,
		unsigned &Shuffle, MVT &ShuffleVT,
		unsigned &PermuteImm) {
		// Ensure we don't contain any zero elements.
		for (int M : Mask) {
		if (M == SM_SentinelZero)
		return false;
		assert(SM_SentinelUndef <= M && M < (int)Mask.size() &&
		"Expected unary shuffle");
		}

		// We only support permutation of 32/64 bit elements.
		// TODO - support PSHUFLW/PSHUFHW.
		unsigned MaskScalarSizeInBits = SrcVT.getSizeInBits() / Mask.size();
		if (MaskScalarSizeInBits != 32 && MaskScalarSizeInBits != 64)
		return false;
		MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits);

		// AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we
		// had to use 2-input SHUFPD/SHUFPS shuffles (not handled here).
		bool FloatDomain = SrcVT.isFloatingPoint();
		if (FloatDomain && !Subtarget.hasAVX())
		return false;

		// Pre-AVX2 we must use float shuffles on 256-bit vectors.
		if (SrcVT.is256BitVector() && !Subtarget.hasAVX2())
		FloatDomain = true;

		// TODO - support LaneCrossing for AVX2 PERMQ/PERMPD
		if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask))
		return false;

		// VPERMILPD can permute with a non-repeating shuffle.
		if (FloatDomain && MaskScalarSizeInBits == 64) {
		Shuffle = X86ISD::VPERMILPI;
		ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size());
		PermuteImm = 0;
		for (int i = 0, e = Mask.size(); i != e; ++i) {
		int M = Mask[i];
		if (M == SM_SentinelUndef)
		continue;
		assert(((M / 2) == (i / 2)) && "Out of range shuffle mask index");
		PermuteImm \|= (M & 1) << i;
		}
		return true;
		}

		// We need a repeating shuffle mask for VPERMILPS/PSHUFD.
		SmallVector<int, 4> RepeatedMask;
		if (!is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask))
		return false;

		// Narrow the repeated mask for 32-bit element permutes.
		SmallVector<int, 4> WordMask = RepeatedMask;
		if (MaskScalarSizeInBits == 64) {
		WordMask.clear();
		for (int M : RepeatedMask) {
		if (M == SM_SentinelUndef) {
		WordMask.append(2, SM_SentinelUndef);
		continue;
		}
		WordMask.push_back((M * 2) + 0);
		WordMask.push_back((M * 2) + 1);
		}
		}

		Shuffle = (FloatDomain ? X86ISD::VPERMILPI : X86ISD::PSHUFD);
		ShuffleVT = (FloatDomain ? MVT::f32 : MVT::i32);
		ShuffleVT = MVT::getVectorVT(ShuffleVT, SrcVT.getSizeInBits() / 32);
		PermuteImm = getV4X86ShuffleImm(WordMask);
		return true;
		}

// Attempt to match a combined unary shuffle mask against supported binary		// Attempt to match a combined unary shuffle mask against supported binary
// shuffle instructions.		// shuffle instructions.
// TODO: Investigate sharing more of this with shuffle lowering.		// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchBinaryVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,		static bool matchBinaryVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,
unsigned &Shuffle, MVT &ShuffleVT) {		unsigned &Shuffle, MVT &ShuffleVT) {
bool FloatDomain = SrcVT.isFloatingPoint();		bool FloatDomain = SrcVT.isFloatingPoint();

if (SrcVT.is128BitVector()) {		if (SrcVT.is128BitVector()) {
▲ Show 20 Lines • Show All 85 Lines • ▼ Show 20 Lines	static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
if (RootVT.getScalarSizeInBits() != MaskEltSizeInBits &&		if (RootVT.getScalarSizeInBits() != MaskEltSizeInBits &&
(RootSizeInBits == 512 \|\|		(RootSizeInBits == 512 \|\|
(Subtarget.hasVLX() && RootSizeInBits >= 128))) {		(Subtarget.hasVLX() && RootSizeInBits >= 128))) {
return false;		return false;
}		}

// Attempt to match the mask against known shuffle patterns.		// Attempt to match the mask against known shuffle patterns.
MVT ShuffleVT;		MVT ShuffleVT;
unsigned Shuffle;		unsigned Shuffle, PermuteImm;

if (matchUnaryVectorShuffle(VT, Mask, Subtarget, Shuffle, ShuffleVT)) {		if (matchUnaryVectorShuffle(VT, Mask, Subtarget, Shuffle, ShuffleVT)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)		if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!		return false; // Nothing to do!
Res = DAG.getBitcast(ShuffleVT, Input);		Res = DAG.getBitcast(ShuffleVT, Input);
DCI.AddToWorklist(Res.getNode());		DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);		Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
DCI.AddToWorklist(Res.getNode());		DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),		DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
/AddTo/ true);		/AddTo/ true);
return true;		return true;
}		}

		if (matchPermuteVectorShuffle(VT, Mask, Subtarget, Shuffle, ShuffleVT,
		PermuteImm)) {
		if (Depth == 1 && Root.getOpcode() == Shuffle)
		return false; // Nothing to do!
		Res = DAG.getBitcast(ShuffleVT, Input);
		DCI.AddToWorklist(Res.getNode());
		Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
		DAG.getConstant(PermuteImm, DL, MVT::i8));
		DCI.AddToWorklist(Res.getNode());
		DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
		/AddTo/ true);
		return true;
		}

if (matchBinaryVectorShuffle(VT, Mask, Shuffle, ShuffleVT)) {		if (matchBinaryVectorShuffle(VT, Mask, Shuffle, ShuffleVT)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)		if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!		return false; // Nothing to do!
Res = DAG.getBitcast(ShuffleVT, Input);		Res = DAG.getBitcast(ShuffleVT, Input);
DCI.AddToWorklist(Res.getNode());		DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, Res);		Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, Res);
DCI.AddToWorklist(Res.getNode());		DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),		DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
▲ Show 20 Lines • Show All 6,692 Lines • Show Last 20 Lines

test/CodeGen/X86/2012-01-12-extract-sv.ll

	; RUN: llc < %s -mattr=+avx -mtriple=i686-pc-win32 \| FileCheck %s			; RUN: llc < %s -mattr=+avx -mtriple=i686-pc-win32 \| FileCheck %s

	define void @endless_loop() {			define void @endless_loop() {
	; CHECK-LABEL: endless_loop:			; CHECK-LABEL: endless_loop:
	; CHECK-NEXT: # BB#0:			; CHECK-NEXT: # BB#0:
	; CHECK-NEXT: vmovaps (%eax), %ymm0			; CHECK-NEXT: vmovaps (%eax), %ymm0
	; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0			; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
	; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]			; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
	; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]			; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
	; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1			; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
	; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2			; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2
	; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]			; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]
	; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]			; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]
	; CHECK-NEXT: vmovaps %ymm0, (%eax)			; CHECK-NEXT: vmovaps %ymm0, (%eax)
	; CHECK-NEXT: vmovaps %ymm1, (%eax)			; CHECK-NEXT: vmovaps %ymm1, (%eax)
	; CHECK-NEXT: vzeroupper			; CHECK-NEXT: vzeroupper
	; CHECK-NEXT: retl			; CHECK-NEXT: retl
	entry:			entry:
	%0 = load <8 x i32>, <8 x i32> addrspace(1)* undef, align 32			%0 = load <8 x i32>, <8 x i32> addrspace(1)* undef, align 32
	%1 = shufflevector <8 x i32> %0, <8 x i32> undef, <16 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%1 = shufflevector <8 x i32> %0, <8 x i32> undef, <16 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%2 = shufflevector <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef>, <16 x i32> %1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 17>			%2 = shufflevector <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef>, <16 x i32> %1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 17>
	store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64			store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64
	ret void			ret void
	}			}

test/CodeGen/X86/2012-04-26-sdglue.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 \| FileCheck %s			; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 \| FileCheck %s

	; rdar://11314175: SD Scheduler, BuildSchedUnits assert:			; rdar://11314175: SD Scheduler, BuildSchedUnits assert:
	; N->getNodeId() == -1 && "Node already inserted!			; N->getNodeId() == -1 && "Node already inserted!

	define void @func() nounwind ssp {			define void @func() nounwind ssp {
	; CHECK-LABEL: func:			; CHECK-LABEL: func:
	; CHECK: ## BB#0:			; CHECK: ## BB#0:
	; CHECK-NEXT: vmovups 0, %xmm0			; CHECK-NEXT: vmovups 0, %xmm0
	; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1			; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
	; CHECK-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]			; CHECK-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
	; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]			; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
	; CHECK-NEXT: vpbroadcastd 32, %xmm3			; CHECK-NEXT: vbroadcastss 32, %xmm3
	; CHECK-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0			; CHECK-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
	; CHECK-NEXT: vmulps %ymm0, %ymm2, %ymm2			; CHECK-NEXT: vmulps %ymm0, %ymm2, %ymm2
	; CHECK-NEXT: vmulps %ymm0, %ymm0, %ymm0			; CHECK-NEXT: vmulps %ymm0, %ymm0, %ymm0
	; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0			; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
	; CHECK-NEXT: vaddps %ymm0, %ymm0, %ymm0			; CHECK-NEXT: vaddps %ymm0, %ymm0, %ymm0
	; CHECK-NEXT: vmulps %xmm0, %xmm0, %xmm0			; CHECK-NEXT: vmulps %xmm0, %xmm0, %xmm0
	; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]			; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
	; CHECK-NEXT: vaddps %ymm0, %ymm0, %ymm0			; CHECK-NEXT: vaddps %ymm0, %ymm0, %ymm0
	; CHECK-NEXT: vhaddps %ymm0, %ymm0, %ymm0			; CHECK-NEXT: vhaddps %ymm0, %ymm0, %ymm0
	; CHECK-NEXT: vsubps %ymm0, %ymm0, %ymm0			; CHECK-NEXT: vsubps %ymm0, %ymm0, %ymm0
	; CHECK-NEXT: vhaddps %ymm0, %ymm1, %ymm0			; CHECK-NEXT: vhaddps %ymm0, %ymm1, %ymm0
	; CHECK-NEXT: vmovaps %ymm0, (%rax)			; CHECK-NEXT: vmovaps %ymm0, (%rax)
	Show All 33 Lines

test/CodeGen/X86/avx-intrinsics-fast-isel.ll

Show First 20 Lines • Show All 2,376 Lines • ▼ Show 20 Lines	; X64-NEXT: retq
%res13 = insertelement <16 x i16> %res12, i16 %a0, i32 13		%res13 = insertelement <16 x i16> %res12, i16 %a0, i32 13
%res14 = insertelement <16 x i16> %res13, i16 %a0, i32 14		%res14 = insertelement <16 x i16> %res13, i16 %a0, i32 14
%res15 = insertelement <16 x i16> %res14, i16 %a0, i32 15		%res15 = insertelement <16 x i16> %res14, i16 %a0, i32 15
%res = bitcast <16 x i16> %res15 to <4 x i64>		%res = bitcast <16 x i16> %res15 to <4 x i64>
ret <4 x i64> %res		ret <4 x i64> %res
}		}

define <4 x i64> @test_mm256_set1_epi32(i32 %a0) nounwind {		define <4 x i64> @test_mm256_set1_epi32(i32 %a0) nounwind {
; X32-LABEL: test_mm256_set1_epi32:		; X32-LABEL: test_mm256_set1_epi32:
; X32: # BB#0:		; X32: # BB#0:
; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero		; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]		; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0		; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl		; X32-NEXT: retl
;		;
; X64-LABEL: test_mm256_set1_epi32:		; X64-LABEL: test_mm256_set1_epi32:
; X64: # BB#0:		; X64: # BB#0:
; X64-NEXT: vmovd %edi, %xmm0		; X64-NEXT: vmovd %edi, %xmm0
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]		; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0		; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq		; X64-NEXT: retq
%res0 = insertelement <8 x i32> undef, i32 %a0, i32 0		%res0 = insertelement <8 x i32> undef, i32 %a0, i32 0
%res1 = insertelement <8 x i32> %res0, i32 %a0, i32 1		%res1 = insertelement <8 x i32> %res0, i32 %a0, i32 1
%res2 = insertelement <8 x i32> %res1, i32 %a0, i32 2		%res2 = insertelement <8 x i32> %res1, i32 %a0, i32 2
%res3 = insertelement <8 x i32> %res2, i32 %a0, i32 3		%res3 = insertelement <8 x i32> %res2, i32 %a0, i32 3
%res4 = insertelement <8 x i32> %res3, i32 %a0, i32 4		%res4 = insertelement <8 x i32> %res3, i32 %a0, i32 4
%res5 = insertelement <8 x i32> %res4, i32 %a0, i32 5		%res5 = insertelement <8 x i32> %res4, i32 %a0, i32 5
%res6 = insertelement <8 x i32> %res5, i32 %a0, i32 6		%res6 = insertelement <8 x i32> %res5, i32 %a0, i32 6
%res7 = insertelement <8 x i32> %res6, i32 %a0, i32 7		%res7 = insertelement <8 x i32> %res6, i32 %a0, i32 7
%res = bitcast <8 x i32> %res7 to <4 x i64>		%res = bitcast <8 x i32> %res7 to <4 x i64>
ret <4 x i64> %res		ret <4 x i64> %res
}		}

define <4 x i64> @test_mm256_set1_epi64x(i64 %a0) nounwind {		define <4 x i64> @test_mm256_set1_epi64x(i64 %a0) nounwind {
; X32-LABEL: test_mm256_set1_epi64x:		; X32-LABEL: test_mm256_set1_epi64x:
; X32: # BB#0:		; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax		; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: vmovd %ecx, %xmm0		; X32-NEXT: vmovd %ecx, %xmm0
; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0		; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0		; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0		; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0		; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl		; X32-NEXT: retl
;		;
; X64-LABEL: test_mm256_set1_epi64x:		; X64-LABEL: test_mm256_set1_epi64x:
; X64: # BB#0:		; X64: # BB#0:
; X64-NEXT: vmovq %rdi, %xmm0		; X64-NEXT: vmovq %rdi, %xmm0
; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]		; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0		; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq		; X64-NEXT: retq
%res0 = insertelement <4 x i64> undef, i64 %a0, i32 0		%res0 = insertelement <4 x i64> undef, i64 %a0, i32 0
%res1 = insertelement <4 x i64> %res0, i64 %a0, i32 1		%res1 = insertelement <4 x i64> %res0, i64 %a0, i32 1
%res2 = insertelement <4 x i64> %res1, i64 %a0, i32 2		%res2 = insertelement <4 x i64> %res1, i64 %a0, i32 2
%res3 = insertelement <4 x i64> %res2, i64 %a0, i32 3		%res3 = insertelement <4 x i64> %res2, i64 %a0, i32 3
ret <4 x i64> %res3		ret <4 x i64> %res3
}		}

define <4 x double> @test_mm256_set1_pd(double %a0) nounwind {		define <4 x double> @test_mm256_set1_pd(double %a0) nounwind {
; X32-LABEL: test_mm256_set1_pd:		; X32-LABEL: test_mm256_set1_pd:
▲ Show 20 Lines • Show All 1,320 Lines • Show Last 20 Lines

test/CodeGen/X86/avx-intrinsics-x86.ll

	Show First 20 Lines • Show All 4,027 Lines • ▼ Show 20 Lines
	}			}
	declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone			declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone

	define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) {			define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) {
	; AVX-LABEL: test_x86_avx_vpermilvar_pd_256_2:			; AVX-LABEL: test_x86_avx_vpermilvar_pd_256_2:
	; AVX: ## BB#0:			; AVX: ## BB#0:
	; AVX-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]			; AVX-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
	; AVX-NEXT: retl			; AVX-NEXT: retl
	;			;
	; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2:			; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2:
	; AVX512VL: ## BB#0:			; AVX512VL: ## BB#0:
	; AVX512VL-NEXT: vpermilpd LCPI227_0, %ymm0, %ymm0			; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
	; AVX512VL-NEXT: retl			; AVX512VL-NEXT: retl
	%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]			%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
	ret <4 x double> %res			ret <4 x double> %res
	}			}

	define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {			define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
	; AVX-LABEL: test_x86_avx_vpermilvar_ps:			; AVX-LABEL: test_x86_avx_vpermilvar_ps:
	; AVX: ## BB#0:			; AVX: ## BB#0:
	; AVX-NEXT: vpermilps %xmm1, %xmm0, %xmm0			; AVX-NEXT: vpermilps %xmm1, %xmm0, %xmm0
	; AVX-NEXT: retl			; AVX-NEXT: retl
	;			;
	▲ Show 20 Lines • Show All 556 Lines • Show Last 20 Lines

test/CodeGen/X86/avx-splat.ll

	Show All 19 Lines
	; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0			; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	entry:			entry:
	%shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>			%shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
	ret <16 x i16> %shuffle			ret <16 x i16> %shuffle
	}			}

	define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {			define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
	; CHECK-LABEL: funcC:			; CHECK-LABEL: funcC:
	; CHECK: ## BB#0: ## %entry			; CHECK: ## BB#0: ## %entry
	; CHECK-NEXT: vmovq %rdi, %xmm0			; CHECK-NEXT: vmovq %rdi, %xmm0
	; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]			; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
	; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0			; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	entry:			entry:
	%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0			%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
	%vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1			%vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
	%vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2			%vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
	%vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3			%vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
	ret <4 x i64> %vecinit6.i			ret <4 x i64> %vecinit6.i
	}			}

	define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {			define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
	▲ Show 20 Lines • Show All 131 Lines • Show Last 20 Lines

test/CodeGen/X86/avx-vbroadcast.ll

	Show First 20 Lines • Show All 164 Lines • ▼ Show 20 Lines
	; X64-NEXT: retq			; X64-NEXT: retq
	entry:			entry:
	%ld = load <4 x i32>, <4 x i32>* %ptr			%ld = load <4 x i32>, <4 x i32>* %ptr
	%ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>			%ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
	ret <4 x i32> %ret			ret <4 x i32> %ret
	}			}

	define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {			define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
	; X32-LABEL: load_splat_8i32_4i32_33333333:			; X32-LABEL: load_splat_8i32_4i32_33333333:
	; X32: ## BB#0: ## %entry			; X32: ## BB#0: ## %entry
	; X32-NEXT: movl {{[0-9]+}}(%esp), %eax			; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X32-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3]			; X32-NEXT: vpshufd {{.*#+}} xmm0 = mem[3,3,3,3]
	; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0			; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
	; X32-NEXT: retl			; X32-NEXT: retl
	;			;
	; X64-LABEL: load_splat_8i32_4i32_33333333:			; X64-LABEL: load_splat_8i32_4i32_33333333:
	; X64: ## BB#0: ## %entry			; X64: ## BB#0: ## %entry
	; X64-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3]			; X64-NEXT: vpshufd {{.*#+}} xmm0 = mem[3,3,3,3]
	; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0			; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
	; X64-NEXT: retq			; X64-NEXT: retq
	entry:			entry:
	%ld = load <4 x i32>, <4 x i32>* %ptr			%ld = load <4 x i32>, <4 x i32>* %ptr
	%ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>			%ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
	ret <8 x i32> %ret			ret <8 x i32> %ret
	}			}

	define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {			define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
	; X32-LABEL: load_splat_8i32_8i32_55555555:			; X32-LABEL: load_splat_8i32_8i32_55555555:
	; X32: ## BB#0: ## %entry			; X32: ## BB#0: ## %entry
	▲ Show 20 Lines • Show All 75 Lines • ▼ Show 20 Lines
	; X64-NEXT: retq			; X64-NEXT: retq
	entry:			entry:
	%ld = load <2 x i64>, <2 x i64>* %ptr			%ld = load <2 x i64>, <2 x i64>* %ptr
	%ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>			%ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
	ret <2 x i64> %ret			ret <2 x i64> %ret
	}			}

	define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {			define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
	; X32-LABEL: load_splat_4i64_2i64_1111:			; X32-LABEL: load_splat_4i64_2i64_1111:
	; X32: ## BB#0: ## %entry			; X32: ## BB#0: ## %entry
	; X32-NEXT: movl {{[0-9]+}}(%esp), %eax			; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X32-NEXT: vmovaps (%eax), %xmm0			; X32-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
	; X32-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]			; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
	; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0			; X32-NEXT: retl
	; X32-NEXT: retl			;
	;			; X64-LABEL: load_splat_4i64_2i64_1111:
	; X64-LABEL: load_splat_4i64_2i64_1111:			; X64: ## BB#0: ## %entry
	; X64: ## BB#0: ## %entry			; X64-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
	; X64-NEXT: vmovaps (%rdi), %xmm0			; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
	; X64-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]			; X64-NEXT: retq
	; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0			entry:
	; X64-NEXT: retq
	entry:
	%ld = load <2 x i64>, <2 x i64>* %ptr			%ld = load <2 x i64>, <2 x i64>* %ptr
	%ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>			%ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
	ret <4 x i64> %ret			ret <4 x i64> %ret
	}			}

	define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {			define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
	; X32-LABEL: load_splat_4i64_4i64_2222:			; X32-LABEL: load_splat_4i64_4i64_2222:
	; X32: ## BB#0: ## %entry			; X32: ## BB#0: ## %entry
	▲ Show 20 Lines • Show All 255 Lines • Show Last 20 Lines

test/CodeGen/X86/merge-consecutive-loads-128.ll

	Show First 20 Lines • Show All 751 Lines • ▼ Show 20 Lines
	; SSE: # BB#0:			; SSE: # BB#0:
	; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero			; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
	; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero			; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
	; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0,0,1,1]			; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0,0,1,1]
	; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]			; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
	; SSE-NEXT: retq			; SSE-NEXT: retq
	;			;
	; AVX-LABEL: merge_4f32_f32_X0YY:			; AVX-LABEL: merge_4f32_f32_X0YY:
	; AVX: # BB#0:			; AVX: # BB#0:
	; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero			; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
	; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero			; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
	; AVX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0,0,1,1]			; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
	; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]			; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
	; AVX-NEXT: retq			; AVX-NEXT: retq
	;			;
	; X32-SSE-LABEL: merge_4f32_f32_X0YY:			; X32-SSE-LABEL: merge_4f32_f32_X0YY:
	; X32-SSE: # BB#0:			; X32-SSE: # BB#0:
	; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax			; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx			; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
	; X32-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero			; X32-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
	; X32-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero			; X32-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
	; X32-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0,0,1,1]			; X32-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0,0,1,1]
	; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]			; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
	Show All 9 Lines

test/CodeGen/X86/pshufb-mask-comments.ll

	Show All 30 Lines
	; CHECK-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,0,0,15,0,2,0,0],zero,xmm0[0,3,0,0],zero,xmm0[0,4]			; CHECK-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,0,0,15,0,2,0,0],zero,xmm0[0,3,0,0],zero,xmm0[0,4]
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %V, <16 x i8> <i8 1, i8 0, i8 0, i8 127, i8 0, i8 2, i8 0, i8 0, i8 128, i8 0, i8 3, i8 0, i8 0, i8 255, i8 0, i8 4>)			%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %V, <16 x i8> <i8 1, i8 0, i8 0, i8 127, i8 0, i8 2, i8 0, i8 0, i8 128, i8 0, i8 3, i8 0, i8 0, i8 255, i8 0, i8 4>)
	ret <16 x i8> %1			ret <16 x i8> %1
	}			}

	; Test that we won't crash when the constant was reused for another instruction.			; Test that we won't crash when the constant was reused for another instruction.

	define <16 x i8> @test4(<16 x i8> %V, <2 x i64>* %P) {			define <16 x i8> @test4(<16 x i8> %V, <2 x i64>* %P) {
	; CHECK-LABEL: test4:			; CHECK-LABEL: test4:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [1084818905618843912,506097522914230528]			; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1084818905618843912,506097522914230528]
	; CHECK-NEXT: movdqa %xmm1, (%rdi)			; CHECK-NEXT: movaps %xmm1, (%rdi)
	; CHECK-NEXT: pshufb %xmm1, %xmm0			; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%1 = insertelement <2 x i64> undef, i64 1084818905618843912, i32 0			%1 = insertelement <2 x i64> undef, i64 1084818905618843912, i32 0
	%2 = insertelement <2 x i64> %1, i64 506097522914230528, i32 1			%2 = insertelement <2 x i64> %1, i64 506097522914230528, i32 1
	store <2 x i64> %2, <2 x i64>* %P, align 16			store <2 x i64> %2, <2 x i64>* %P, align 16
	%3 = bitcast <2 x i64> %2 to <16 x i8>			%3 = bitcast <2 x i64> %2 to <16 x i8>
	%4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %V, <16 x i8> %3)			%4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %V, <16 x i8> %3)
	ret <16 x i8> %4			ret <16 x i8> %4
	}			}

	define <16 x i8> @test5(<16 x i8> %V) {			define <16 x i8> @test5(<16 x i8> %V) {
	; CHECK-LABEL: test5:			; CHECK-LABEL: test5:
	Show All 35 Lines

test/CodeGen/X86/sse3.ll

	Show First 20 Lines • Show All 198 Lines • ▼ Show 20 Lines
	entry:			entry:
	%tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >			%tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
	ret <8 x i16> %tmp7			ret <8 x i16> %tmp7

	}			}

	define <8 x i16> @t12(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {			define <8 x i16> @t12(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
	; X64-LABEL: t12:			; X64-LABEL: t12:
	; X64: ## BB#0: ## %entry			; X64: ## BB#0: ## %entry
	; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]			; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
	; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]			; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
	; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,6,7]			; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,3]
	; X64-NEXT: retq			; X64-NEXT: retq
	entry:			entry:
	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >			%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >
	ret <8 x i16> %tmp9			ret <8 x i16> %tmp9

	}			}

	define <8 x i16> @t13(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {			define <8 x i16> @t13(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
	; X64-LABEL: t13:			; X64-LABEL: t13:
	; X64: ## BB#0: ## %entry			; X64: ## BB#0: ## %entry
	; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]			; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
	; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]			; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
	; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,6,7]			; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,3]
	; X64-NEXT: retq			; X64-NEXT: retq
	entry:			entry:
	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 11, i32 3, i32 undef , i32 undef >			%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 11, i32 3, i32 undef , i32 undef >
	ret <8 x i16> %tmp9			ret <8 x i16> %tmp9
	}			}

	define <8 x i16> @t14(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {			define <8 x i16> @t14(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
	; X64-LABEL: t14:			; X64-LABEL: t14:
	; X64: ## BB#0: ## %entry			; X64: ## BB#0: ## %entry
	; X64-NEXT: psrlq $16, %xmm0			; X64-NEXT: psrlq $16, %xmm0
	; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]			; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
	▲ Show 20 Lines • Show All 53 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-compare-results.ll

	; NOTE: Assertions have been autogenerated by update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 \| FileCheck %s --check-prefix=SSE --check-prefix=SSE2
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 \| FileCheck %s --check-prefix=SSE --check-prefix=SSE2			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 \| FileCheck %s --check-prefix=SSE --check-prefix=SSE42
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 \| FileCheck %s --check-prefix=SSE --check-prefix=SSE42
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx \| FileCheck %s --check-prefix=AVX --check-prefix=AVX1			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx \| FileCheck %s --check-prefix=AVX --check-prefix=AVX1
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 \| FileCheck %s --check-prefix=AVX --check-prefix=AVX2			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 \| FileCheck %s --check-prefix=AVX --check-prefix=AVX2
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f \| FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f \| FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F

	;			;
	; 128-bit vector comparisons			; 128-bit vector comparisons
	;			;

	▲ Show 20 Lines • Show All 126 Lines • ▼ Show 20 Lines
	; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0			; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
	; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1			; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
	; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]			; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
	; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]			; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
	; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]			; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
	; AVX1-NEXT: vzeroupper			; AVX1-NEXT: vzeroupper
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: test_cmp_v4f64:			; AVX2-LABEL: test_cmp_v4f64:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0			; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
	; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]			; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
	; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
	; AVX2-NEXT: vzeroupper			; AVX2-NEXT: vzeroupper
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	;			;
	; AVX512-LABEL: test_cmp_v4f64:			; AVX512-LABEL: test_cmp_v4f64:
	; AVX512: # BB#0:			; AVX512: # BB#0:
	; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0			; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
	; AVX512-NEXT: vpmovqd %zmm0, %ymm0			; AVX512-NEXT: vpmovqd %zmm0, %ymm0
	; AVX512-NEXT: retq			; AVX512-NEXT: retq
	%1 = fcmp ogt <4 x double> %a0, %a1			%1 = fcmp ogt <4 x double> %a0, %a1
	ret <4 x i1> %1			ret <4 x i1> %1
	▲ Show 20 Lines • Show All 514 Lines • ▼ Show 20 Lines
	; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2			; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
	; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]			; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
	; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]			; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
	; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0			; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
	; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0			; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
	; AVX1-NEXT: vzeroupper			; AVX1-NEXT: vzeroupper
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: test_cmp_v8f64:			; AVX2-LABEL: test_cmp_v8f64:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0			; AVX2-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0
	; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]			; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
	; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
	; AVX2-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1			; AVX2-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1
	; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]			; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]
	; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]
	; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0			; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
	; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero			; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
	; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
	; AVX2-NEXT: vzeroupper			; AVX2-NEXT: vzeroupper
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	;			;
	; AVX512-LABEL: test_cmp_v8f64:			; AVX512-LABEL: test_cmp_v8f64:
	; AVX512: # BB#0:			; AVX512: # BB#0:
	; AVX512-NEXT: vcmpltpd %zmm0, %zmm1, %k1			; AVX512-NEXT: vcmpltpd %zmm0, %zmm1, %k1
	; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}			; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
	▲ Show 20 Lines • Show All 1,433 Lines • ▼ Show 20 Lines
	; AVX1-NEXT: vandpd %xmm7, %xmm3, %xmm3			; AVX1-NEXT: vandpd %xmm7, %xmm3, %xmm3
	; AVX1-NEXT: vandpd %xmm7, %xmm0, %xmm0			; AVX1-NEXT: vandpd %xmm7, %xmm0, %xmm0
	; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0			; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
	; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0			; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
	; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0			; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
	; AVX1-NEXT: vzeroupper			; AVX1-NEXT: vzeroupper
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: test_cmp_v16f64:			; AVX2-LABEL: test_cmp_v16f64:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vcmpltpd %ymm2, %ymm6, %ymm2			; AVX2-NEXT: vcmpltpd %ymm2, %ymm6, %ymm2
	; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,0,2,4,6,4,6]			; AVX2-NEXT: vpermilps {{.*#+}} ymm2 = ymm2[0,2,0,2,4,6,4,6]
	; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,3,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,3,2,3]
	; AVX2-NEXT: vcmpltpd %ymm3, %ymm7, %ymm3			; AVX2-NEXT: vcmpltpd %ymm3, %ymm7, %ymm3
	; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,0,2,4,6,4,6]			; AVX2-NEXT: vpermilps {{.*#+}} ymm3 = ymm3[0,2,0,2,4,6,4,6]
	; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,3,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,3,2,3]
	; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2			; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
	; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]			; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
	; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2			; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2
	; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
	; AVX2-NEXT: vmovdqa {{.*#+}} xmm6 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>			; AVX2-NEXT: vmovdqa {{.*#+}} xmm6 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
	; AVX2-NEXT: vpshufb %xmm6, %xmm2, %xmm2			; AVX2-NEXT: vpshufb %xmm6, %xmm2, %xmm2
	; AVX2-NEXT: vcmpltpd %ymm0, %ymm4, %ymm0			; AVX2-NEXT: vcmpltpd %ymm0, %ymm4, %ymm0
	; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]			; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
	; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
	; AVX2-NEXT: vcmpltpd %ymm1, %ymm5, %ymm1			; AVX2-NEXT: vcmpltpd %ymm1, %ymm5, %ymm1
	; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]			; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]
	; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]
	; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0			; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
	; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0			; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0
	; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
	; AVX2-NEXT: vpshufb %xmm6, %xmm0, %xmm0			; AVX2-NEXT: vpshufb %xmm6, %xmm0, %xmm0
	; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]			; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
	; AVX2-NEXT: vzeroupper			; AVX2-NEXT: vzeroupper
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	;			;
	; AVX512-LABEL: test_cmp_v16f64:			; AVX512-LABEL: test_cmp_v16f64:
	; AVX512: # BB#0:			; AVX512: # BB#0:
	▲ Show 20 Lines • Show All 4,440 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-128-v16.ll

	Show First 20 Lines • Show All 916 Lines • ▼ Show 20 Lines
	; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]			; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
	; AVX-NEXT: retq			; AVX-NEXT: retq
	%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>			%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
	ret <16 x i8> %shuffle			ret <16 x i8> %shuffle
	}			}

	define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %a) {			define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %a) {
	; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:			; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
	; SSE2: # BB#0:			; SSE2: # BB#0:
	; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]			; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
	; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]			; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
	; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0,0,1,1]			; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
	; SSE2-NEXT: retq			; SSE2-NEXT: retq
	;			;
	; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:			; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
	; SSSE3: # BB#0:			; SSSE3: # BB#0:
	; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero			; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
	; SSSE3-NEXT: retq			; SSSE3-NEXT: retq
	;			;
	; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:			; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
	; SSE41: # BB#0:			; SSE41: # BB#0:
	; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero			; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
	; SSE41-NEXT: retq			; SSE41-NEXT: retq
	▲ Show 20 Lines • Show All 660 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-128-v2.ll

; NOTE: Assertions have been autogenerated by update_llc_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py		; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
		abUnsubmitted Not Done Reply Inline Actions Extra NOTE ab: Extra NOTE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2		; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3		; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41		; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx \| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1		; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx \| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 \| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2		; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 \| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl \| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL		; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl \| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"		target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"		target triple = "x86_64-unknown-unknown"
▲ Show 20 Lines • Show All 139 Lines • ▼ Show 20 Lines	; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>		%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
ret <2 x double> %shuffle		ret <2 x double> %shuffle
}		}
define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {		define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: shuffle_v2f64_11:		; SSE-LABEL: shuffle_v2f64_11:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]		; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: shuffle_v2f64_11:		; AVX-LABEL: shuffle_v2f64_11:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]		; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
		abUnsubmitted Not Done Reply Inline Actions I'm surprised by this and other changes; isn't the combine for shuffle chains? (it does look better for folding though; just trying to understand) ab: I'm surprised by this and other changes; isn't the combine for shuffle chains? (it does look…
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>		%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
ret <2 x double> %shuffle		ret <2 x double> %shuffle
}		}
define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {		define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
; SSE2-LABEL: shuffle_v2f64_22:		; SSE2-LABEL: shuffle_v2f64_22:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]		; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
; SSE2-NEXT: movaps %xmm1, %xmm0		; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
Show All 35 Lines	; AVX-NEXT: retq
ret <2 x double> %shuffle		ret <2 x double> %shuffle
}		}
define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {		define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: shuffle_v2f64_33:		; SSE-LABEL: shuffle_v2f64_33:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]		; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; SSE-NEXT: movaps %xmm1, %xmm0		; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: shuffle_v2f64_33:		; AVX-LABEL: shuffle_v2f64_33:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]		; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,1]
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>		%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
ret <2 x double> %shuffle		ret <2 x double> %shuffle
}		}
define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {		define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
; SSE2-LABEL: shuffle_v2f64_03:		; SSE2-LABEL: shuffle_v2f64_03:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]		; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; SSE2-NEXT: movapd %xmm1, %xmm0		; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
▲ Show 20 Lines • Show All 1,131 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-128-v4.ll

Show First 20 Lines • Show All 218 Lines • ▼ Show 20 Lines	; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>		%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %shuffle		ret <4 x float> %shuffle
}		}
define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) {		define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: shuffle_v4f32_0011:		; SSE-LABEL: shuffle_v4f32_0011:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]		; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: shuffle_v4f32_0011:		; AVX-LABEL: shuffle_v4f32_0011:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]		; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
		abUnsubmitted Not Done Reply Inline Actions In particular, this looks slightly more expensive according to Agner's Intel tables (for the folded variants) ab: In particular, this looks slightly more expensive according to Agner's Intel tables (for the…
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1>		%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
ret <4 x float> %shuffle		ret <4 x float> %shuffle
}		}
define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) {		define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: shuffle_v4f32_2233:		; SSE-LABEL: shuffle_v4f32_2233:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]		; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: shuffle_v4f32_2233:		; AVX-LABEL: shuffle_v4f32_2233:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]		; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>		%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
ret <4 x float> %shuffle		ret <4 x float> %shuffle
}		}
define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {		define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: shuffle_v4f32_0022:		; SSE2-LABEL: shuffle_v4f32_0022:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,2]		; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSE3-LABEL: shuffle_v4f32_0022:		; SSE3-LABEL: shuffle_v4f32_0022:
▲ Show 20 Lines • Show All 2,031 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-256-v16.ll

Show First 20 Lines • Show All 1,437 Lines • ▼ Show 20 Lines	; AVX2-NEXT: retq
ret <16 x i16> %shuffle		ret <16 x i16> %shuffle
}		}

define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz(<16 x i16> %a) {		define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz(<16 x i16> %a) {
; AVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:		; AVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1		; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]		; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]		; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:		; AVX2-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0		; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
; AVX2-NEXT: retq		; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15, i32 16, i32 16>		%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15, i32 16, i32 16>
ret <16 x i16> %shuffle		ret <16 x i16> %shuffle
▲ Show 20 Lines • Show All 2,085 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-256-v4.ll

	Show First 20 Lines • Show All 479 Lines • ▼ Show 20 Lines
	; ALL-LABEL: shuffle_v4f64_15uu:			; ALL-LABEL: shuffle_v4f64_15uu:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]			; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>			%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
	ret <4 x double> %shuffle			ret <4 x double> %shuffle
	}			}

	define <4 x double> @shuffle_v4f64_11uu(<4 x double> %a, <4 x double> %b) {			define <4 x double> @shuffle_v4f64_11uu(<4 x double> %a, <4 x double> %b) {
	; ALL-LABEL: shuffle_v4f64_11uu:			; ALL-LABEL: shuffle_v4f64_11uu:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]			; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>			%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
	ret <4 x double> %shuffle			ret <4 x double> %shuffle
	}			}

	define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) {			define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) {
	; AVX1-LABEL: shuffle_v4f64_22uu:			; AVX1-LABEL: shuffle_v4f64_22uu:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0			; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
	; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]			; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines
	; AVX512VL-LABEL: shuffle_v4i64_0000:			; AVX512VL-LABEL: shuffle_v4i64_0000:
	; AVX512VL: # BB#0:			; AVX512VL: # BB#0:
	; AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0			; AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0
	; AVX512VL-NEXT: retq			; AVX512VL-NEXT: retq
	%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>			%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
	ret <4 x i64> %shuffle			ret <4 x i64> %shuffle
	}			}

	define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {			define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
	; AVX1-LABEL: shuffle_v4i64_0001:			; AVX1-LABEL: shuffle_v4i64_0001:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]			; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v4i64_0001:			; AVX2-LABEL: shuffle_v4i64_0001:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]			; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	;			;
	; AVX512VL-LABEL: shuffle_v4i64_0001:			; AVX512VL-LABEL: shuffle_v4i64_0001:
	; AVX512VL: # BB#0:			; AVX512VL: # BB#0:
	; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]			; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
	; AVX512VL-NEXT: retq			; AVX512VL-NEXT: retq
	%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>			%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
	ret <4 x i64> %shuffle			ret <4 x i64> %shuffle
	}			}

	define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {			define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {
	; AVX1-LABEL: shuffle_v4i64_0020:			; AVX1-LABEL: shuffle_v4i64_0020:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1			; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
	; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]			; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
	; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]			; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
	; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v4i64_0020:			; AVX2-LABEL: shuffle_v4i64_0020:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]			; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	;			;
	; AVX512VL-LABEL: shuffle_v4i64_0020:			; AVX512VL-LABEL: shuffle_v4i64_0020:
	; AVX512VL: # BB#0:			; AVX512VL: # BB#0:
	; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]			; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
	Show All 39 Lines
	; AVX512VL-LABEL: shuffle_v4i64_0300:			; AVX512VL-LABEL: shuffle_v4i64_0300:
	; AVX512VL: # BB#0:			; AVX512VL: # BB#0:
	; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]			; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
	; AVX512VL-NEXT: retq			; AVX512VL-NEXT: retq
	%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>			%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
	ret <4 x i64> %shuffle			ret <4 x i64> %shuffle
	}			}

	define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {			define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
	; AVX1-LABEL: shuffle_v4i64_1000:			; AVX1-LABEL: shuffle_v4i64_1000:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]			; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]			; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v4i64_1000:			; AVX2-LABEL: shuffle_v4i64_1000:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]			; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	;			;
	; AVX512VL-LABEL: shuffle_v4i64_1000:			; AVX512VL-LABEL: shuffle_v4i64_1000:
	; AVX512VL: # BB#0:			; AVX512VL: # BB#0:
	; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]			; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
	▲ Show 20 Lines • Show All 135 Lines • ▼ Show 20 Lines
	; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]			; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
	; AVX512VL-NEXT: retq			; AVX512VL-NEXT: retq
	%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>			%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
	ret <4 x i64> %shuffle			ret <4 x i64> %shuffle
	}			}

	define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {			define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
	; AVX1-LABEL: shuffle_v4i64_4012:			; AVX1-LABEL: shuffle_v4i64_4012:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2			; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
	; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]			; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
	; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]			; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
	; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
	; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]			; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v4i64_4012:			; AVX2-LABEL: shuffle_v4i64_4012:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]			; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
	; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]			; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	;			;
	; AVX512VL-LABEL: shuffle_v4i64_4012:			; AVX512VL-LABEL: shuffle_v4i64_4012:
	▲ Show 20 Lines • Show All 497 Lines • ▼ Show 20 Lines
	; AVX512VL-LABEL: splat_v4f64:			; AVX512VL-LABEL: splat_v4f64:
	; AVX512VL: # BB#0:			; AVX512VL: # BB#0:
	; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0			; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0
	; AVX512VL-NEXT: retq			; AVX512VL-NEXT: retq
	%1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer			%1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer
	ret <4 x double> %1			ret <4 x double> %1
	}			}

	define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {			define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
	; AVX1-LABEL: splat_mem_v4i64_from_v2i64:			; AVX1-LABEL: splat_mem_v4i64_from_v2i64:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]			; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,1,0,1]
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: splat_mem_v4i64_from_v2i64:			; AVX2-LABEL: splat_mem_v4i64_from_v2i64:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0			; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	;			;
	; AVX512VL-LABEL: splat_mem_v4i64_from_v2i64:			; AVX512VL-LABEL: splat_mem_v4i64_from_v2i64:
	; AVX512VL: # BB#0:			; AVX512VL: # BB#0:
	; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0			; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0
	▲ Show 20 Lines • Show All 151 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-256-v8.ll

	Show First 20 Lines • Show All 143 Lines • ▼ Show 20 Lines
	; ALL-LABEL: shuffle_v8f32_01014545:			; ALL-LABEL: shuffle_v8f32_01014545:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]			; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>			%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
	ret <8 x float> %shuffle			ret <8 x float> %shuffle
	}			}

	define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {			define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
	; AVX1-LABEL: shuffle_v8f32_00112233:			; AVX1-LABEL: shuffle_v8f32_00112233:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1]			; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
	; AVX1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]			; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8f32_00112233:			; AVX2-LABEL: shuffle_v8f32_00112233:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]			; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
	; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0			; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>			%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
	ret <8 x float> %shuffle			ret <8 x float> %shuffle
	}			}
	Show All 20 Lines
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]			; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>			%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
	ret <8 x float> %shuffle			ret <8 x float> %shuffle
	}			}

	define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {			define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
	; AVX1-LABEL: shuffle_v8f32_08080808:			; AVX1-LABEL: shuffle_v8f32_08080808:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]			; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
	; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]			; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8f32_08080808:			; AVX2-LABEL: shuffle_v8f32_08080808:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero			; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
	; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0			; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>			%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
	ret <8 x float> %shuffle			ret <8 x float> %shuffle
	}			}

	define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {			define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
	; ALL-LABEL: shuffle_v8f32_08084c4c:			; ALL-LABEL: shuffle_v8f32_08084c4c:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]			; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
	; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]			; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>			%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
	ret <8 x float> %shuffle			ret <8 x float> %shuffle
	}			}

	define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {			define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {
	; ALL-LABEL: shuffle_v8f32_8823cc67:			; ALL-LABEL: shuffle_v8f32_8823cc67:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]			; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>			%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
	▲ Show 20 Lines • Show All 671 Lines • ▼ Show 20 Lines
	; AVX2-LABEL: shuffle_v8i32_00000000:			; AVX2-LABEL: shuffle_v8i32_00000000:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vbroadcastss %xmm0, %ymm0			; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
	ret <8 x i32> %shuffle			ret <8 x i32> %shuffle
	}			}

	define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {			define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
	; AVX1-LABEL: shuffle_v8i32_00000010:			; AVX1-LABEL: shuffle_v8i32_00000010:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]			; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
	; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]			; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8i32_00000010:			; AVX2-LABEL: shuffle_v8i32_00000010:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]			; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
	; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0			; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
	ret <8 x i32> %shuffle			ret <8 x i32> %shuffle
	}			}

	define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {			define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
	; AVX1-LABEL: shuffle_v8i32_00000200:			; AVX1-LABEL: shuffle_v8i32_00000200:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]			; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
	; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]			; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,0]
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8i32_00000200:			; AVX2-LABEL: shuffle_v8i32_00000200:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]			; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
	; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0			; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
	ret <8 x i32> %shuffle			ret <8 x i32> %shuffle
	}			}

	define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {			define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
	; AVX1-LABEL: shuffle_v8i32_00003000:			; AVX1-LABEL: shuffle_v8i32_00003000:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]			; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
	; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]			; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,0,0,0]
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8i32_00003000:			; AVX2-LABEL: shuffle_v8i32_00003000:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]			; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
	; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0			; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
	ret <8 x i32> %shuffle			ret <8 x i32> %shuffle
	}			}
	▲ Show 20 Lines • Show All 77 Lines • ▼ Show 20 Lines
	; AVX2-LABEL: shuffle_v8i32_01014545:			; AVX2-LABEL: shuffle_v8i32_01014545:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]			; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
	ret <8 x i32> %shuffle			ret <8 x i32> %shuffle
	}			}

	define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {			define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
	; AVX1-LABEL: shuffle_v8i32_00112233:			; AVX1-LABEL: shuffle_v8i32_00112233:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]			; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
	; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]			; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8i32_00112233:			; AVX2-LABEL: shuffle_v8i32_00112233:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]			; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
	; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0			; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
	ret <8 x i32> %shuffle			ret <8 x i32> %shuffle
	}			}

	define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {			define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
	; AVX1-LABEL: shuffle_v8i32_00001111:			; AVX1-LABEL: shuffle_v8i32_00001111:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]			; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
	; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]			; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8i32_00001111:			; AVX2-LABEL: shuffle_v8i32_00001111:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]			; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
	; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0			; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
	ret <8 x i32> %shuffle			ret <8 x i32> %shuffle
	}			}

	define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {			define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {
	; AVX1-LABEL: shuffle_v8i32_81a3c5e7:			; AVX1-LABEL: shuffle_v8i32_81a3c5e7:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]			; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8i32_81a3c5e7:			; AVX2-LABEL: shuffle_v8i32_81a3c5e7:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]			; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
	ret <8 x i32> %shuffle			ret <8 x i32> %shuffle
	}			}

	define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {			define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
	; AVX1-LABEL: shuffle_v8i32_08080808:			; AVX1-LABEL: shuffle_v8i32_08080808:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]			; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
	; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]			; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8i32_08080808:			; AVX2-LABEL: shuffle_v8i32_08080808:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]			; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
	; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0			; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
	ret <8 x i32> %shuffle			ret <8 x i32> %shuffle
	}			}

	define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {			define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
	; AVX1-LABEL: shuffle_v8i32_08084c4c:			; AVX1-LABEL: shuffle_v8i32_08084c4c:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]			; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
	; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]			; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8i32_08084c4c:			; AVX2-LABEL: shuffle_v8i32_08084c4c:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]			; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
	; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]			; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
	; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]			; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
	ret <8 x i32> %shuffle			ret <8 x i32> %shuffle
	}			}
	▲ Show 20 Lines • Show All 108 Lines • ▼ Show 20 Lines
	; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>			; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
	; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1			; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
	; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]			; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
	ret <8 x i32> %shuffle			ret <8 x i32> %shuffle
	}			}

	define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {			define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
	; AVX1-LABEL: shuffle_v8i32_091b2d3f:			; AVX1-LABEL: shuffle_v8i32_091b2d3f:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]			; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
	; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]			; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,3,3]
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
	; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]			; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8i32_091b2d3f:			; AVX2-LABEL: shuffle_v8i32_091b2d3f:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero			; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
	; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]			; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
	ret <8 x i32> %shuffle			ret <8 x i32> %shuffle
	}			}

	define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {			define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
	; AVX1-LABEL: shuffle_v8i32_09ab1def:			; AVX1-LABEL: shuffle_v8i32_09ab1def:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]			; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
	; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
	; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]			; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8i32_09ab1def:			; AVX2-LABEL: shuffle_v8i32_09ab1def:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>			; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
	; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0			; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
	; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]			; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
	▲ Show 20 Lines • Show All 770 Lines • ▼ Show 20 Lines
	}			}

	;			;
	; Shuffle to logical bit shifts			; Shuffle to logical bit shifts
	;			;

	define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {			define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
	; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:			; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1			; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
	; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]			; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
	; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]			; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:			; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0			; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
	ret <8 x i32> %shuffle			ret <8 x i32> %shuffle
	}			}

	define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {			define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
	; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:			; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1			; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
	; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]			; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
	; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]			; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:			; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0			; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
	ret <8 x i32> %shuffle			ret <8 x i32> %shuffle
	}			}

	define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) {			define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) {
	▲ Show 20 Lines • Show All 196 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-combining-avx.ll

	Show First 20 Lines • Show All 58 Lines • ▼ Show 20 Lines
	; ALL-LABEL: combine_vpermilvar_4f32_movsldup:			; ALL-LABEL: combine_vpermilvar_4f32_movsldup:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]			; ALL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 2, i32 undef>)			%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 2, i32 undef>)
	ret <4 x float> %1			ret <4 x float> %1
	}			}

	define <4 x float> @combine_vpermilvar_4f32_unpckh(<4 x float> %a0) {			define <4 x float> @combine_vpermilvar_4f32_unpckh(<4 x float> %a0) {
	; ALL-LABEL: combine_vpermilvar_4f32_unpckh:			; ALL-LABEL: combine_vpermilvar_4f32_unpckh:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]			; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 2, i32 2, i32 3, i32 3>)			%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 2, i32 2, i32 3, i32 3>)
	ret <4 x float> %1			ret <4 x float> %1
	}			}

	define <4 x float> @combine_vpermilvar_4f32_unpckl(<4 x float> %a0) {			define <4 x float> @combine_vpermilvar_4f32_unpckl(<4 x float> %a0) {
	; ALL-LABEL: combine_vpermilvar_4f32_unpckl:			; ALL-LABEL: combine_vpermilvar_4f32_unpckl:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]			; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 1, i32 1>)			%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 1, i32 1>)
	ret <4 x float> %1			ret <4 x float> %1
	}			}

	define <8 x float> @combine_vpermilvar_8f32_identity(<8 x float> %a0) {			define <8 x float> @combine_vpermilvar_8f32_identity(<8 x float> %a0) {
	; ALL-LABEL: combine_vpermilvar_8f32_identity:			; ALL-LABEL: combine_vpermilvar_8f32_identity:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 2, i32 3, i32 0, i32 undef>)			%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 2, i32 3, i32 0, i32 undef>)
	%2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 2, i32 3, i32 0, i32 1>)			%2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 2, i32 3, i32 0, i32 1>)
	▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines
	; ALL-LABEL: combine_vpermilvar_4f64_movddup:			; ALL-LABEL: combine_vpermilvar_4f64_movddup:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]			; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 0, i64 0, i64 4, i64 4>)			%1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 0, i64 0, i64 4, i64 4>)
	ret <4 x double> %1			ret <4 x double> %1
	}			}

	define <4 x float> @combine_vpermilvar_4f32_4stage(<4 x float> %a0) {			define <4 x float> @combine_vpermilvar_4f32_4stage(<4 x float> %a0) {
	; ALL-LABEL: combine_vpermilvar_4f32_4stage:			; ALL-LABEL: combine_vpermilvar_4f32_4stage:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,0,1,2,3,12,13,14,15,4,5,6,7]			; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)			%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
	%2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %1, <4 x i32> <i32 2, i32 3, i32 0, i32 1>)			%2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %1, <4 x i32> <i32 2, i32 3, i32 0, i32 1>)
	%3 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %2, <4 x i32> <i32 0, i32 2, i32 1, i32 3>)			%3 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %2, <4 x i32> <i32 0, i32 2, i32 1, i32 3>)
	%4 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %3, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)			%4 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %3, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
	ret <4 x float> %4			ret <4 x float> %4
	}			}

	define <8 x float> @combine_vpermilvar_8f32_4stage(<8 x float> %a0) {			define <8 x float> @combine_vpermilvar_8f32_4stage(<8 x float> %a0) {
	; AVX1-LABEL: combine_vpermilvar_8f32_4stage:			; ALL-LABEL: combine_vpermilvar_8f32_4stage:
	; AVX1: # BB#0:			; ALL: # BB#0:
	; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]			; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
	; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0			; ALL-NEXT: retq
	; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]			%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
	; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]			%2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>)
	; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0			%3 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %2, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 0, i32 2, i32 1, i32 3>)
	; AVX1-NEXT: retq
	;
	; AVX2-LABEL: combine_vpermilvar_8f32_4stage:
	; AVX2: # BB#0:
	; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,0,1,2,3,12,13,14,15,4,5,6,7,24,25,26,27,16,17,18,19,28,29,30,31,20,21,22,23]
	; AVX2-NEXT: retq
	;
	; AVX512F-LABEL: combine_vpermilvar_8f32_4stage:
	; AVX512F: # BB#0:
	; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,0,1,2,3,12,13,14,15,4,5,6,7,24,25,26,27,16,17,18,19,28,29,30,31,20,21,22,23]
	; AVX512F-NEXT: retq
	%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
	%2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>)
	%3 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %2, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 0, i32 2, i32 1, i32 3>)
	%4 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %3, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)			%4 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %3, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
	ret <8 x float> %4			ret <8 x float> %4
	}			}

test/CodeGen/X86/vector-shuffle-combining-avx2.ll

	Show First 20 Lines • Show All 42 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%tmp0 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>)			%tmp0 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>)
	%tmp1 = bitcast <8 x float> %tmp0 to <32 x i8>			%tmp1 = bitcast <8 x float> %tmp0 to <32 x i8>
	%tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>			%tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>
	ret <32 x i8> %tmp2			ret <32 x i8> %tmp2
	}			}

	define <4 x i64> @combine_permq_pshufb(<4 x i64> %a0) {			define <4 x i64> @combine_permq_pshufb(<4 x i64> %a0) {
	; CHECK-LABEL: combine_permq_pshufb:			; CHECK-LABEL: combine_permq_pshufb:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]			; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
	; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31,16,17,18,19,20,21,22,23]			; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>			%1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
	%2 = bitcast <4 x i64> %1 to <32 x i8>			%2 = bitcast <4 x i64> %1 to <32 x i8>
	%3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)			%3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
	%4 = bitcast <32 x i8> %3 to <4 x i64>			%4 = bitcast <32 x i8> %3 to <4 x i64>
	ret <4 x i64> %4			ret <4 x i64> %4
	}			}

test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll

	Show First 20 Lines • Show All 203 Lines • ▼ Show 20 Lines
	define <16 x float> @combine_vpermt2var_16f32_vmovsldup_mask_load(<16 x float> *%p0, <16 x float> %x1, i16 %m) {			define <16 x float> @combine_vpermt2var_16f32_vmovsldup_mask_load(<16 x float> *%p0, <16 x float> %x1, i16 %m) {
	; CHECK-LABEL: combine_vpermt2var_16f32_vmovsldup_mask_load:			; CHECK-LABEL: combine_vpermt2var_16f32_vmovsldup_mask_load:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: kmovw %esi, %k1			; CHECK-NEXT: kmovw %esi, %k1
	; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]			; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%x0 = load <16 x float>, <16 x float> *%p0			%x0 = load <16 x float>, <16 x float> *%p0
	%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>, <16 x float> %x0, <16 x float> %x1, i16 %m)			%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>, <16 x float> %x0, <16 x float> %x1, i16 %m)
	ret <16 x float> %res0			ret <16 x float> %res0
	}			}

	define <16 x i32> @combine_vpermt2var_16i32_identity(<16 x i32> %x0, <16 x i32> %x1) {			define <16 x float> @combine_vpermt2var_16f32_vpermilps(<16 x float> %x0, <16 x float> %x1) {
	; CHECK-LABEL: combine_vpermt2var_16i32_identity:			; CHECK-LABEL: combine_vpermt2var_16f32_vpermilps:
	; CHECK: # BB#0:			; CHECK: # BB#0:
				; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
				; CHECK-NEXT: retq
				%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>, <16 x float> %x0, <16 x float> %x1, i16 -1)
				ret <16 x float> %res0
				}
				define <16 x float> @combine_vpermt2var_16f32_vpermilps_load(<16 x float> *%p0, <16 x float> %x1) {
				; CHECK-LABEL: combine_vpermt2var_16f32_vpermilps_load:
				; CHECK: # BB#0:
				; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
				; CHECK-NEXT: retq
				%x0 = load <16 x float>, <16 x float> *%p0
				%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>, <16 x float> %x0, <16 x float> %x1, i16 -1)
				ret <16 x float> %res0
				}
				define <16 x float> @combine_vpermt2var_16f32_vpermilps_mask(<16 x float> %x0, <16 x float> %x1, i16 %m) {
				; CHECK-LABEL: combine_vpermt2var_16f32_vpermilps_mask:
				; CHECK: # BB#0:
				; CHECK-NEXT: kmovw %edi, %k1
				; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
				; CHECK-NEXT: retq
				%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>, <16 x float> %x0, <16 x float> %x1, i16 %m)
				ret <16 x float> %res0
				}
				define <16 x float> @combine_vpermt2var_16f32_vpermilps_mask_load(<16 x float> *%p0, <16 x float> %x1, i16 %m) {
				; CHECK-LABEL: combine_vpermt2var_16f32_vpermilps_mask_load:
				; CHECK: # BB#0:
				; CHECK-NEXT: kmovw %esi, %k1
				; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
				; CHECK-NEXT: retq
				%x0 = load <16 x float>, <16 x float> *%p0
				%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>, <16 x float> %x0, <16 x float> %x1, i16 %m)
				ret <16 x float> %res0
				}

				define <16 x i32> @combine_vpermt2var_16i32_identity(<16 x i32> %x0, <16 x i32> %x1) {
				; CHECK-LABEL: combine_vpermt2var_16i32_identity:
				; CHECK: # BB#0:
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 undef>, <16 x i32> %x0, <16 x i32> %x1, i16 -1)			%res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 undef>, <16 x i32> %x0, <16 x i32> %x1, i16 -1)
	%res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 15, i32 30, i32 13, i32 28, i32 undef, i32 26, i32 9, i32 24, i32 7, i32 22, i32 5, i32 20, i32 3, i32 18, i32 1, i32 16>, <16 x i32> %res0, <16 x i32> %res0, i16 -1)			%res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 15, i32 30, i32 13, i32 28, i32 undef, i32 26, i32 9, i32 24, i32 7, i32 22, i32 5, i32 20, i32 3, i32 18, i32 1, i32 16>, <16 x i32> %res0, <16 x i32> %res0, i16 -1)
	ret <16 x i32> %res1			ret <16 x i32> %res1
	}			}
	define <16 x i32> @combine_vpermt2var_16i32_identity_mask(<16 x i32> %x0, <16 x i32> %x1, i16 %m) {			define <16 x i32> @combine_vpermt2var_16i32_identity_mask(<16 x i32> %x0, <16 x i32> %x1, i16 %m) {
	; CHECK-LABEL: combine_vpermt2var_16i32_identity_mask:			; CHECK-LABEL: combine_vpermt2var_16i32_identity_mask:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	▲ Show 20 Lines • Show All 60 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-combining-ssse3.ll

	Show First 20 Lines • Show All 85 Lines • ▼ Show 20 Lines
	; AVX-NEXT: retq			; AVX-NEXT: retq
	%1 = bitcast <4 x float> %a0 to <16 x i8>			%1 = bitcast <4 x float> %a0 to <16 x i8>
	%2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 7, i8 7, i8 7, i8 7, i8 1, i8 1, i8 1, i8 1, i8 3, i8 3, i8 3, i8 3>)			%2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 7, i8 7, i8 7, i8 7, i8 1, i8 1, i8 1, i8 1, i8 3, i8 3, i8 3, i8 3>)
	%3 = bitcast <16 x i8> %2 to <4 x float>			%3 = bitcast <16 x i8> %2 to <4 x float>
	%4 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>			%4 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
	ret <4 x float> %4			ret <4 x float> %4
	}			}

	define <16 x i8> @combine_pshufb_palignr(<16 x i8> %a0, <16 x i8> %a1) {			define <16 x i8> @combine_pshufb_palignr(<16 x i8> %a0, <16 x i8> %a1) {
	; SSE-LABEL: combine_pshufb_palignr:			; SSE-LABEL: combine_pshufb_palignr:
	; SSE: # BB#0:			; SSE: # BB#0:
	; SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,14,15,8,9,10,11,12,13,14,15]			; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
	; SSE-NEXT: movdqa %xmm1, %xmm0			; SSE-NEXT: retq
	; SSE-NEXT: retq			;
	;			; AVX-LABEL: combine_pshufb_palignr:
	; AVX-LABEL: combine_pshufb_palignr:			; AVX: # BB#0:
	; AVX: # BB#0:			; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
	; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15,8,9,10,11,12,13,14,15]			; AVX-NEXT: retq
	; AVX-NEXT: retq			%1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
	%1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>			%2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
	%2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
	ret <16 x i8> %2			ret <16 x i8> %2
	}			}

	define <16 x i8> @combine_pshufb_pslldq(<16 x i8> %a0) {			define <16 x i8> @combine_pshufb_pslldq(<16 x i8> %a0) {
	; SSE-LABEL: combine_pshufb_pslldq:			; SSE-LABEL: combine_pshufb_pslldq:
	; SSE: # BB#0:			; SSE: # BB#0:
	; SSE-NEXT: xorps %xmm0, %xmm0			; SSE-NEXT: xorps %xmm0, %xmm0
	; SSE-NEXT: retq			; SSE-NEXT: retq
	▲ Show 20 Lines • Show All 83 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-combining.ll

Show First 20 Lines • Show All 2,431 Lines • ▼ Show 20 Lines	; AVX-NEXT: retq
ret <4 x float> %2		ret <4 x float> %2
}		}

define <4 x float> @combine_undef_input_test9(<4 x float> %a) {		define <4 x float> @combine_undef_input_test9(<4 x float> %a) {
; SSE-LABEL: combine_undef_input_test9:		; SSE-LABEL: combine_undef_input_test9:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]		; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: combine_undef_input_test9:		; AVX-LABEL: combine_undef_input_test9:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]		; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
; AVX-NEXT: retq		; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>		%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>		%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
ret <4 x float> %2		ret <4 x float> %2
}		}

define <4 x float> @combine_undef_input_test10(<4 x float> %a) {		define <4 x float> @combine_undef_input_test10(<4 x float> %a) {
; ALL-LABEL: combine_undef_input_test10:		; ALL-LABEL: combine_undef_input_test10:
; ALL: # BB#0:		; ALL: # BB#0:
; ALL-NEXT: retq		; ALL-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>		%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
▲ Show 20 Lines • Show All 168 Lines • ▼ Show 20 Lines	; AVX-NEXT: retq
ret <4 x float> %2		ret <4 x float> %2
}		}

define <4 x float> @combine_undef_input_test19(<4 x float> %a) {		define <4 x float> @combine_undef_input_test19(<4 x float> %a) {
; SSE-LABEL: combine_undef_input_test19:		; SSE-LABEL: combine_undef_input_test19:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]		; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: combine_undef_input_test19:		; AVX-LABEL: combine_undef_input_test19:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]		; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
; AVX-NEXT: retq		; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>		%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
%2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>		%2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
ret <4 x float> %2		ret <4 x float> %2
}		}

define <4 x float> @combine_undef_input_test20(<4 x float> %a) {		define <4 x float> @combine_undef_input_test20(<4 x float> %a) {
; ALL-LABEL: combine_undef_input_test20:		; ALL-LABEL: combine_undef_input_test20:
; ALL: # BB#0:		; ALL: # BB#0:
; ALL-NEXT: retq		; ALL-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>		%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
▲ Show 20 Lines • Show All 336 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[X86][SSE] Added support for combining target shuffles to (V)PSHUFD/VPERMILPD/VPERMILPS immediate permute
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 61609

lib/Target/X86/X86ISelLowering.cpp

test/CodeGen/X86/2012-01-12-extract-sv.ll

test/CodeGen/X86/2012-04-26-sdglue.ll

test/CodeGen/X86/avx-intrinsics-fast-isel.ll

test/CodeGen/X86/avx-intrinsics-x86.ll

test/CodeGen/X86/avx-splat.ll

test/CodeGen/X86/avx-vbroadcast.ll

test/CodeGen/X86/merge-consecutive-loads-128.ll

test/CodeGen/X86/pshufb-mask-comments.ll

test/CodeGen/X86/sse3.ll

test/CodeGen/X86/vector-compare-results.ll

test/CodeGen/X86/vector-shuffle-128-v16.ll

test/CodeGen/X86/vector-shuffle-128-v2.ll

test/CodeGen/X86/vector-shuffle-128-v4.ll

test/CodeGen/X86/vector-shuffle-256-v16.ll

test/CodeGen/X86/vector-shuffle-256-v4.ll

test/CodeGen/X86/vector-shuffle-256-v8.ll

test/CodeGen/X86/vector-shuffle-combining-avx.ll

test/CodeGen/X86/vector-shuffle-combining-avx2.ll

test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll

test/CodeGen/X86/vector-shuffle-combining-ssse3.ll

test/CodeGen/X86/vector-shuffle-combining.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86][SSE] Added support for combining target shuffles to (V)PSHUFD/VPERMILPD/VPERMILPS immediate permuteClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 61609

lib/Target/X86/X86ISelLowering.cpp

test/CodeGen/X86/2012-01-12-extract-sv.ll

test/CodeGen/X86/2012-04-26-sdglue.ll

test/CodeGen/X86/avx-intrinsics-fast-isel.ll

test/CodeGen/X86/avx-intrinsics-x86.ll

test/CodeGen/X86/avx-splat.ll

test/CodeGen/X86/avx-vbroadcast.ll

test/CodeGen/X86/merge-consecutive-loads-128.ll

test/CodeGen/X86/pshufb-mask-comments.ll

test/CodeGen/X86/sse3.ll

test/CodeGen/X86/vector-compare-results.ll

test/CodeGen/X86/vector-shuffle-128-v16.ll

test/CodeGen/X86/vector-shuffle-128-v2.ll

test/CodeGen/X86/vector-shuffle-128-v4.ll

test/CodeGen/X86/vector-shuffle-256-v16.ll

test/CodeGen/X86/vector-shuffle-256-v4.ll

test/CodeGen/X86/vector-shuffle-256-v8.ll

test/CodeGen/X86/vector-shuffle-combining-avx.ll

test/CodeGen/X86/vector-shuffle-combining-avx2.ll

test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll

test/CodeGen/X86/vector-shuffle-combining-ssse3.ll

test/CodeGen/X86/vector-shuffle-combining.ll

[X86][SSE] Added support for combining target shuffles to (V)PSHUFD/VPERMILPD/VPERMILPS immediate permute
ClosedPublic