Diff 61211

lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 7,150 Lines • ▼ Show 20 Lines
/// \brief Get a 4-lane 8-bit shuffle immediate for a mask.		/// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
///		///
/// This helper function produces an 8-bit shuffle immediate corresponding to		/// This helper function produces an 8-bit shuffle immediate corresponding to
/// the ubiquitous shuffle encoding scheme used in x86 instructions for		/// the ubiquitous shuffle encoding scheme used in x86 instructions for
/// shuffling 4 lanes. It can be used with most of the PSHUF instructions for		/// shuffling 4 lanes. It can be used with most of the PSHUF instructions for
/// example.		/// example.
///		///
/// NB: We rely heavily on "undef" masks preserving the input lane.		/// NB: We rely heavily on "undef" masks preserving the input lane.
static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,		static unsigned getV4X86ShuffleImm(ArrayRef<int> Mask) {
SelectionDAG &DAG) {
assert(Mask.size() == 4 && "Only 4-lane shuffle masks");		assert(Mask.size() == 4 && "Only 4-lane shuffle masks");
assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!");		assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!");
assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!");		assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!");
assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!");		assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!");
assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!");		assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!");

unsigned Imm = 0;		unsigned Imm = 0;
Imm \|= (Mask[0] == -1 ? 0 : Mask[0]) << 0;		Imm \|= (Mask[0] == -1 ? 0 : Mask[0]) << 0;
Imm \|= (Mask[1] == -1 ? 1 : Mask[1]) << 2;		Imm \|= (Mask[1] == -1 ? 1 : Mask[1]) << 2;
Imm \|= (Mask[2] == -1 ? 2 : Mask[2]) << 4;		Imm \|= (Mask[2] == -1 ? 2 : Mask[2]) << 4;
Imm \|= (Mask[3] == -1 ? 3 : Mask[3]) << 6;		Imm \|= (Mask[3] == -1 ? 3 : Mask[3]) << 6;
return DAG.getConstant(Imm, DL, MVT::i8);		return Imm;
		}

		static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
		SelectionDAG &DAG) {
		return DAG.getConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8);
}		}

/// \brief Compute whether each element of a shuffle is zeroable.		/// \brief Compute whether each element of a shuffle is zeroable.
///		///
/// A "zeroable" vector shuffle element is one which can be lowered to zero.		/// A "zeroable" vector shuffle element is one which can be lowered to zero.
/// Either it is an undef element in the shuffle mask, the element of the input		/// Either it is an undef element in the shuffle mask, the element of the input
/// referenced is undef, or the element of the input referenced is known to be		/// referenced is undef, or the element of the input referenced is known to be
/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle		/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
▲ Show 20 Lines • Show All 17,383 Lines • ▼ Show 20 Lines
}		}

// Attempt to match a combined shuffle mask against supported unary shuffle		// Attempt to match a combined shuffle mask against supported unary shuffle
// instructions.		// instructions.
// TODO: Investigate sharing more of this with shuffle lowering.		// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,		static bool matchUnaryVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,
const X86Subtarget &Subtarget,		const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT) {		unsigned &Shuffle, MVT &ShuffleVT) {
bool FloatDomain = SrcVT.isFloatingPoint();		bool FloatDomain = SrcVT.isFloatingPoint() \|\|
		(!Subtarget.hasAVX2() && SrcVT.is256BitVector());

		RKSimonAuthorUnsubmitted Not Done Reply Inline Actions I can't separate this easily - annoyingly this is only of use once we add support for permutes in this patch, and without it we see regressions with this patch. RKSimon: I can't separate this easily - annoyingly this is only of use once we add support for permutes…
// Match a 128-bit integer vector against a VZEXT_MOVL (MOVQ) instruction.		// Match a 128-bit integer vector against a VZEXT_MOVL (MOVQ) instruction.
if (!FloatDomain && SrcVT.is128BitVector() &&		if (!FloatDomain && SrcVT.is128BitVector() &&
isTargetShuffleEquivalent(Mask, {0, SM_SentinelZero})) {		isTargetShuffleEquivalent(Mask, {0, SM_SentinelZero})) {
Shuffle = X86ISD::VZEXT_MOVL;		Shuffle = X86ISD::VZEXT_MOVL;
ShuffleVT = MVT::v2i64;		ShuffleVT = MVT::v2i64;
return true;		return true;
}		}

▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines	if (isTargetShuffleEquivalent(
ShuffleVT = MVT::v16f32;		ShuffleVT = MVT::v16f32;
return true;		return true;
}		}
}		}

return false;		return false;
}		}

		// Attempt to match a combined shuffle mask against supported unary immediate
		// permute instructions.
		// TODO: Investigate sharing more of this with shuffle lowering.
		static bool matchPermuteVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,
		const X86Subtarget &Subtarget,
		unsigned &Shuffle, MVT &ShuffleVT,
		unsigned &PermuteImm) {
		// Ensure we don't contain any zero elements.
		for (int M : Mask) {
		if (M == SM_SentinelZero)
		return false;
		assert(SM_SentinelUndef <= M && M < Mask.size() &&
		"Expected unary shuffle");
		}

		// We only support permutation of 32/64 bit elements.
		// TODO - support PSHUFLW/PSHUFHW.
		unsigned MaskScalarSizeInBits = SrcVT.getSizeInBits() / Mask.size();
		if (MaskScalarSizeInBits != 32 && MaskScalarSizeInBits != 64)
		return false;
		MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits);

		// AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we
		// had to use 2-input SHUFPD/SHUFPS shuffles (not handled here).
		bool FloatDomain = SrcVT.isFloatingPoint();
		if (FloatDomain && !Subtarget.hasAVX())
		return false;

		// Pre-AVX2 we must use float shuffles on 256-bit vectors.
		if (SrcVT.is256BitVector() && !Subtarget.hasAVX2())
		FloatDomain = true;

		// TODO - support LaneCrossing for AVX2 PERMQ/PERMPD
		if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask))
		return false;

		// VPERMILPD can permute with a non-repeating shuffle.
		if (FloatDomain && MaskScalarSizeInBits == 64) {
		Shuffle = X86ISD::VPERMILPI;
		ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size());
		PermuteImm = 0;
		for (int i = 0, e = Mask.size(); i != e; ++i) {
		int M = Mask[i];
		if (M == SM_SentinelUndef)
		continue;
		assert(((M / 2) == (i / 2)) && "Out of range shuffle mask index");
		PermuteImm \|= (M & 1) << i;
		}
		return true;
		}

		// We need a repeating shuffle mask for VPERMILPS/PSHUFD.
		SmallVector<int, 4> RepeatedMask;
		if (!is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask))
		return false;

		// Narrow the repeated mask for 32-bit element permutes.
		SmallVector<int, 4> WordMask = RepeatedMask;
		if (MaskScalarSizeInBits == 64) {
		WordMask.clear();
		for (int M : Mask) {
		if (M == SM_SentinelUndef) {
		WordMask.append(2, SM_SentinelUndef);
		continue;
		}
		WordMask.push_back((M * 2) + 0);
		WordMask.push_back((M * 2) + 1);
		}
		}

		Shuffle = (FloatDomain ? X86ISD::VPERMILPI : X86ISD::PSHUFD);
		ShuffleVT = (FloatDomain ? MVT::f32 : MVT::i32);
		ShuffleVT = MVT::getVectorVT(ShuffleVT, SrcVT.getSizeInBits() / 32);
		PermuteImm = getV4X86ShuffleImm(WordMask);
		return true;
		}

// Attempt to match a combined unary shuffle mask against supported binary		// Attempt to match a combined unary shuffle mask against supported binary
// shuffle instructions.		// shuffle instructions.
// TODO: Investigate sharing more of this with shuffle lowering.		// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchBinaryVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,		static bool matchBinaryVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,
unsigned &Shuffle, MVT &ShuffleVT) {		unsigned &Shuffle, MVT &ShuffleVT) {
bool FloatDomain = SrcVT.isFloatingPoint();		bool FloatDomain = SrcVT.isFloatingPoint();

if (SrcVT.is128BitVector()) {		if (SrcVT.is128BitVector()) {
▲ Show 20 Lines • Show All 85 Lines • ▼ Show 20 Lines	static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
if (RootVT.getScalarSizeInBits() != MaskEltSizeInBits &&		if (RootVT.getScalarSizeInBits() != MaskEltSizeInBits &&
(RootSizeInBits == 512 \|\|		(RootSizeInBits == 512 \|\|
(Subtarget.hasVLX() && RootSizeInBits >= 128))) {		(Subtarget.hasVLX() && RootSizeInBits >= 128))) {
return false;		return false;
}		}

// Attempt to match the mask against known shuffle patterns.		// Attempt to match the mask against known shuffle patterns.
MVT ShuffleVT;		MVT ShuffleVT;
unsigned Shuffle;		unsigned Shuffle, PermuteImm;

if (matchUnaryVectorShuffle(VT, Mask, Subtarget, Shuffle, ShuffleVT)) {		if (matchUnaryVectorShuffle(VT, Mask, Subtarget, Shuffle, ShuffleVT)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)		if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!		return false; // Nothing to do!
Res = DAG.getBitcast(ShuffleVT, Input);		Res = DAG.getBitcast(ShuffleVT, Input);
DCI.AddToWorklist(Res.getNode());		DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);		Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
DCI.AddToWorklist(Res.getNode());		DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),		DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
/AddTo/ true);		/AddTo/ true);
return true;		return true;
}		}

		if (matchPermuteVectorShuffle(VT, Mask, Subtarget, Shuffle, ShuffleVT,
		PermuteImm)) {
		if (Depth == 1 && Root.getOpcode() == Shuffle)
		return false; // Nothing to do!
		Res = DAG.getBitcast(ShuffleVT, Input);
		DCI.AddToWorklist(Res.getNode());
		Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
		DAG.getConstant(PermuteImm, DL, MVT::i8));
		DCI.AddToWorklist(Res.getNode());
		DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
		/AddTo/ true);
		return true;
		}

if (matchBinaryVectorShuffle(VT, Mask, Shuffle, ShuffleVT)) {		if (matchBinaryVectorShuffle(VT, Mask, Shuffle, ShuffleVT)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)		if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!		return false; // Nothing to do!
Res = DAG.getBitcast(ShuffleVT, Input);		Res = DAG.getBitcast(ShuffleVT, Input);
DCI.AddToWorklist(Res.getNode());		DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, Res);		Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, Res);
DCI.AddToWorklist(Res.getNode());		DCI.AddToWorklist(Res.getNode());
DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),		DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
▲ Show 20 Lines • Show All 6,693 Lines • Show Last 20 Lines

test/CodeGen/X86/2012-01-12-extract-sv.ll

	; RUN: llc < %s -mattr=+avx -mtriple=i686-pc-win32 \| FileCheck %s			; RUN: llc < %s -mattr=+avx -mtriple=i686-pc-win32 \| FileCheck %s

	define void @endless_loop() {			define void @endless_loop() {
	; CHECK-LABEL: endless_loop:			; CHECK-LABEL: endless_loop:
	; CHECK-NEXT: # BB#0:			; CHECK-NEXT: # BB#0:
	; CHECK-NEXT: vmovaps (%eax), %ymm0			; CHECK-NEXT: vmovaps (%eax), %ymm0
	; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0			; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
	; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]			; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
	; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]			; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
	; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1			; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
	; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2			; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2
	; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]			; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]
	; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]			; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]
	; CHECK-NEXT: vmovaps %ymm0, (%eax)			; CHECK-NEXT: vmovaps %ymm0, (%eax)
	; CHECK-NEXT: vmovaps %ymm1, (%eax)			; CHECK-NEXT: vmovaps %ymm1, (%eax)
	; CHECK-NEXT: vzeroupper			; CHECK-NEXT: vzeroupper
	; CHECK-NEXT: retl			; CHECK-NEXT: retl
	entry:			entry:
	%0 = load <8 x i32>, <8 x i32> addrspace(1)* undef, align 32			%0 = load <8 x i32>, <8 x i32> addrspace(1)* undef, align 32
	%1 = shufflevector <8 x i32> %0, <8 x i32> undef, <16 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			%1 = shufflevector <8 x i32> %0, <8 x i32> undef, <16 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	%2 = shufflevector <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef>, <16 x i32> %1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 17>			%2 = shufflevector <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef>, <16 x i32> %1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 17>
	store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64			store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64
	ret void			ret void
	}			}

test/CodeGen/X86/2012-04-26-sdglue.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 \| FileCheck %s			; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 \| FileCheck %s

	; rdar://11314175: SD Scheduler, BuildSchedUnits assert:			; rdar://11314175: SD Scheduler, BuildSchedUnits assert:
	; N->getNodeId() == -1 && "Node already inserted!			; N->getNodeId() == -1 && "Node already inserted!

	define void @func() nounwind ssp {			define void @func() nounwind ssp {
	; CHECK-LABEL: func:			; CHECK-LABEL: func:
	; CHECK: ## BB#0:			; CHECK: ## BB#0:
	; CHECK-NEXT: vmovups 0, %xmm0			; CHECK-NEXT: vmovups 0, %xmm0
	; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1			; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
	; CHECK-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]			; CHECK-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
	; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]			; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
	; CHECK-NEXT: vpbroadcastd 32, %xmm3			; CHECK-NEXT: vbroadcastss 32, %xmm3
	; CHECK-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0			; CHECK-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
	; CHECK-NEXT: vmulps %ymm0, %ymm2, %ymm2			; CHECK-NEXT: vmulps %ymm0, %ymm2, %ymm2
	; CHECK-NEXT: vmulps %ymm0, %ymm0, %ymm0			; CHECK-NEXT: vmulps %ymm0, %ymm0, %ymm0
	; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0			; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
	; CHECK-NEXT: vaddps %ymm0, %ymm0, %ymm0			; CHECK-NEXT: vaddps %ymm0, %ymm0, %ymm0
	; CHECK-NEXT: vmulps %xmm0, %xmm0, %xmm0			; CHECK-NEXT: vmulps %xmm0, %xmm0, %xmm0
	; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]			; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
	; CHECK-NEXT: vaddps %ymm0, %ymm0, %ymm0			; CHECK-NEXT: vaddps %ymm0, %ymm0, %ymm0
	; CHECK-NEXT: vhaddps %ymm0, %ymm0, %ymm0			; CHECK-NEXT: vhaddps %ymm0, %ymm0, %ymm0
	Show All 36 Lines

test/CodeGen/X86/avx-intrinsics-fast-isel.ll

Show First 20 Lines • Show All 2,379 Lines • ▼ Show 20 Lines	; X64-NEXT: retq
%res = bitcast <16 x i16> %res15 to <4 x i64>		%res = bitcast <16 x i16> %res15 to <4 x i64>
ret <4 x i64> %res		ret <4 x i64> %res
}		}

define <4 x i64> @test_mm256_set1_epi32(i32 %a0) nounwind {		define <4 x i64> @test_mm256_set1_epi32(i32 %a0) nounwind {
; X32-LABEL: test_mm256_set1_epi32:		; X32-LABEL: test_mm256_set1_epi32:
; X32: # BB#0:		; X32: # BB#0:
; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero		; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]		; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0		; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl		; X32-NEXT: retl
;		;
; X64-LABEL: test_mm256_set1_epi32:		; X64-LABEL: test_mm256_set1_epi32:
; X64: # BB#0:		; X64: # BB#0:
; X64-NEXT: vmovd %edi, %xmm0		; X64-NEXT: vmovd %edi, %xmm0
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]		; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0		; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq		; X64-NEXT: retq
%res0 = insertelement <8 x i32> undef, i32 %a0, i32 0		%res0 = insertelement <8 x i32> undef, i32 %a0, i32 0
%res1 = insertelement <8 x i32> %res0, i32 %a0, i32 1		%res1 = insertelement <8 x i32> %res0, i32 %a0, i32 1
%res2 = insertelement <8 x i32> %res1, i32 %a0, i32 2		%res2 = insertelement <8 x i32> %res1, i32 %a0, i32 2
%res3 = insertelement <8 x i32> %res2, i32 %a0, i32 3		%res3 = insertelement <8 x i32> %res2, i32 %a0, i32 3
%res4 = insertelement <8 x i32> %res3, i32 %a0, i32 4		%res4 = insertelement <8 x i32> %res3, i32 %a0, i32 4
%res5 = insertelement <8 x i32> %res4, i32 %a0, i32 5		%res5 = insertelement <8 x i32> %res4, i32 %a0, i32 5
Show All 13 Lines
; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0		; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0		; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0		; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl		; X32-NEXT: retl
;		;
; X64-LABEL: test_mm256_set1_epi64x:		; X64-LABEL: test_mm256_set1_epi64x:
; X64: # BB#0:		; X64: # BB#0:
; X64-NEXT: vmovq %rdi, %xmm0		; X64-NEXT: vmovq %rdi, %xmm0
; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]		; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0		; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq		; X64-NEXT: retq
%res0 = insertelement <4 x i64> undef, i64 %a0, i32 0		%res0 = insertelement <4 x i64> undef, i64 %a0, i32 0
%res1 = insertelement <4 x i64> %res0, i64 %a0, i32 1		%res1 = insertelement <4 x i64> %res0, i64 %a0, i32 1
%res2 = insertelement <4 x i64> %res1, i64 %a0, i32 2		%res2 = insertelement <4 x i64> %res1, i64 %a0, i32 2
%res3 = insertelement <4 x i64> %res2, i64 %a0, i32 3		%res3 = insertelement <4 x i64> %res2, i64 %a0, i32 3
ret <4 x i64> %res3		ret <4 x i64> %res3
}		}
▲ Show 20 Lines • Show All 1,323 Lines • Show Last 20 Lines

test/CodeGen/X86/avx-intrinsics-x86.ll

	Show First 20 Lines • Show All 4,030 Lines • ▼ Show 20 Lines
	define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) {			define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) {
	; AVX-LABEL: test_x86_avx_vpermilvar_pd_256_2:			; AVX-LABEL: test_x86_avx_vpermilvar_pd_256_2:
	; AVX: ## BB#0:			; AVX: ## BB#0:
	; AVX-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]			; AVX-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
	; AVX-NEXT: retl			; AVX-NEXT: retl
	;			;
	; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2:			; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2:
	; AVX512VL: ## BB#0:			; AVX512VL: ## BB#0:
	; AVX512VL-NEXT: vpermilpd LCPI227_0, %ymm0, %ymm0			; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
	; AVX512VL-NEXT: retl			; AVX512VL-NEXT: retl
	%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]			%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
	ret <4 x double> %res			ret <4 x double> %res
	}			}

	define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {			define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
	; AVX-LABEL: test_x86_avx_vpermilvar_ps:			; AVX-LABEL: test_x86_avx_vpermilvar_ps:
	; AVX: ## BB#0:			; AVX: ## BB#0:
	▲ Show 20 Lines • Show All 559 Lines • Show Last 20 Lines

test/CodeGen/X86/avx-splat.ll

Show All 22 Lines	entry:
%shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>		%shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
ret <16 x i16> %shuffle		ret <16 x i16> %shuffle
}		}

define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {		define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
; CHECK-LABEL: funcC:		; CHECK-LABEL: funcC:
; CHECK: ## BB#0: ## %entry		; CHECK: ## BB#0: ## %entry
; CHECK-NEXT: vmovq %rdi, %xmm0		; CHECK-NEXT: vmovq %rdi, %xmm0
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]		; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0		; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT: retq		; CHECK-NEXT: retq
entry:		entry:
%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0		%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
%vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1		%vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
%vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2		%vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
%vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3		%vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
ret <4 x i64> %vecinit6.i		ret <4 x i64> %vecinit6.i
▲ Show 20 Lines • Show All 134 Lines • Show Last 20 Lines

test/CodeGen/X86/avx-vbroadcast.ll

Show First 20 Lines • Show All 167 Lines • ▼ Show 20 Lines	entry:
%ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>		%ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %ret		ret <4 x i32> %ret
}		}

define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {		define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
; X32-LABEL: load_splat_8i32_4i32_33333333:		; X32-LABEL: load_splat_8i32_4i32_33333333:
; X32: ## BB#0: ## %entry		; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax		; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3]		; X32-NEXT: vpshufd {{.*#+}} xmm0 = mem[3,3,3,3]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0		; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl		; X32-NEXT: retl
;		;
; X64-LABEL: load_splat_8i32_4i32_33333333:		; X64-LABEL: load_splat_8i32_4i32_33333333:
; X64: ## BB#0: ## %entry		; X64: ## BB#0: ## %entry
; X64-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3]		; X64-NEXT: vpshufd {{.*#+}} xmm0 = mem[3,3,3,3]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0		; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%ld = load <4 x i32>, <4 x i32>* %ptr		%ld = load <4 x i32>, <4 x i32>* %ptr
%ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>		%ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
ret <8 x i32> %ret		ret <8 x i32> %ret
}		}

▲ Show 20 Lines • Show All 81 Lines • ▼ Show 20 Lines	entry:
%ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>		%ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
ret <2 x i64> %ret		ret <2 x i64> %ret
}		}

define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {		define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
; X32-LABEL: load_splat_4i64_2i64_1111:		; X32-LABEL: load_splat_4i64_2i64_1111:
; X32: ## BB#0: ## %entry		; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax		; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vmovaps (%eax), %xmm0		; X32-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
; X32-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0		; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl		; X32-NEXT: retl
;		;
; X64-LABEL: load_splat_4i64_2i64_1111:		; X64-LABEL: load_splat_4i64_2i64_1111:
; X64: ## BB#0: ## %entry		; X64: ## BB#0: ## %entry
; X64-NEXT: vmovaps (%rdi), %xmm0		; X64-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
; X64-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0		; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%ld = load <2 x i64>, <2 x i64>* %ptr		%ld = load <2 x i64>, <2 x i64>* %ptr
%ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>		%ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
ret <4 x i64> %ret		ret <4 x i64> %ret
}		}

▲ Show 20 Lines • Show All 258 Lines • Show Last 20 Lines

test/CodeGen/X86/merge-consecutive-loads-128.ll

	Show First 20 Lines • Show All 754 Lines • ▼ Show 20 Lines
	; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0,0,1,1]			; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0,0,1,1]
	; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]			; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
	; SSE-NEXT: retq			; SSE-NEXT: retq
	;			;
	; AVX-LABEL: merge_4f32_f32_X0YY:			; AVX-LABEL: merge_4f32_f32_X0YY:
	; AVX: # BB#0:			; AVX: # BB#0:
	; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero			; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
	; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero			; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
	; AVX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0,0,1,1]			; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
	; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]			; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
	; AVX-NEXT: retq			; AVX-NEXT: retq
	;			;
	; X32-SSE-LABEL: merge_4f32_f32_X0YY:			; X32-SSE-LABEL: merge_4f32_f32_X0YY:
	; X32-SSE: # BB#0:			; X32-SSE: # BB#0:
	; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax			; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx			; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
	; X32-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero			; X32-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
	Show All 12 Lines

test/CodeGen/X86/pshufb-mask-comments.ll

Show All 33 Lines	; CHECK-NEXT: retq
ret <16 x i8> %1		ret <16 x i8> %1
}		}

; Test that we won't crash when the constant was reused for another instruction.		; Test that we won't crash when the constant was reused for another instruction.

define <16 x i8> @test4(<16 x i8> %V, <2 x i64>* %P) {		define <16 x i8> @test4(<16 x i8> %V, <2 x i64>* %P) {
; CHECK-LABEL: test4:		; CHECK-LABEL: test4:
; CHECK: # BB#0:		; CHECK: # BB#0:
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [1084818905618843912,506097522914230528]		; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1084818905618843912,506097522914230528]
; CHECK-NEXT: movdqa %xmm1, (%rdi)		; CHECK-NEXT: movaps %xmm1, (%rdi)
; CHECK-NEXT: pshufb %xmm1, %xmm0		; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%1 = insertelement <2 x i64> undef, i64 1084818905618843912, i32 0		%1 = insertelement <2 x i64> undef, i64 1084818905618843912, i32 0
%2 = insertelement <2 x i64> %1, i64 506097522914230528, i32 1		%2 = insertelement <2 x i64> %1, i64 506097522914230528, i32 1
store <2 x i64> %2, <2 x i64>* %P, align 16		store <2 x i64> %2, <2 x i64>* %P, align 16
%3 = bitcast <2 x i64> %2 to <16 x i8>		%3 = bitcast <2 x i64> %2 to <16 x i8>
%4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %V, <16 x i8> %3)		%4 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %V, <16 x i8> %3)
ret <16 x i8> %4		ret <16 x i8> %4
}		}
Show All 38 Lines

test/CodeGen/X86/sse3.ll

Show First 20 Lines • Show All 134 Lines • ▼ Show 20 Lines	; X64-NEXT: retq
store <2 x i64> %tmp15.upgrd.2, <2 x i64>* %res		store <2 x i64> %tmp15.upgrd.2, <2 x i64>* %res
ret void		ret void
}		}

define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {		define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
; X64-LABEL: t9:		; X64-LABEL: t9:
; X64: ## BB#0:		; X64: ## BB#0:
; X64-NEXT: movapd (%rdi), %xmm0		; X64-NEXT: movapd (%rdi), %xmm0
; X64-NEXT: movhpd (%rsi), %xmm0		; X64-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; X64-NEXT: movapd %xmm0, (%rdi)		; X64-NEXT: movapd %xmm0, (%rdi)
; X64-NEXT: retq		; X64-NEXT: retq
%tmp = load <4 x float>, <4 x float>* %r		%tmp = load <4 x float>, <4 x float>* %r
%tmp.upgrd.3 = bitcast <2 x i32>* %A to double*		%tmp.upgrd.3 = bitcast <2 x i32>* %A to double*
%tmp.upgrd.4 = load double, double* %tmp.upgrd.3		%tmp.upgrd.4 = load double, double* %tmp.upgrd.3
%tmp.upgrd.5 = insertelement <2 x double> undef, double %tmp.upgrd.4, i32 0		%tmp.upgrd.5 = insertelement <2 x double> undef, double %tmp.upgrd.4, i32 0
%tmp5 = insertelement <2 x double> %tmp.upgrd.5, double undef, i32 1		%tmp5 = insertelement <2 x double> %tmp.upgrd.5, double undef, i32 1
%tmp6 = bitcast <2 x double> %tmp5 to <4 x float>		%tmp6 = bitcast <2 x double> %tmp5 to <4 x float>
▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines

}		}

define <8 x i16> @t12(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {		define <8 x i16> @t12(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
; X64-LABEL: t12:		; X64-LABEL: t12:
; X64: ## BB#0: ## %entry		; X64: ## BB#0: ## %entry
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]		; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]		; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,6,7]		; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,3]
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >		%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >
ret <8 x i16> %tmp9		ret <8 x i16> %tmp9

}		}

define <8 x i16> @t13(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {		define <8 x i16> @t13(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
; X64-LABEL: t13:		; X64-LABEL: t13:
; X64: ## BB#0: ## %entry		; X64: ## BB#0: ## %entry
; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]		; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]		; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,6,7]		; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,3]
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 11, i32 3, i32 undef , i32 undef >		%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 11, i32 3, i32 undef , i32 undef >
ret <8 x i16> %tmp9		ret <8 x i16> %tmp9
}		}

define <8 x i16> @t14(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {		define <8 x i16> @t14(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
; X64-LABEL: t14:		; X64-LABEL: t14:
▲ Show 20 Lines • Show All 56 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-compare-results.ll

	; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 \| FileCheck %s --check-prefix=SSE --check-prefix=SSE2			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 \| FileCheck %s --check-prefix=SSE --check-prefix=SSE2
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 \| FileCheck %s --check-prefix=SSE --check-prefix=SSE42			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 \| FileCheck %s --check-prefix=SSE --check-prefix=SSE42
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx \| FileCheck %s --check-prefix=AVX --check-prefix=AVX1			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx \| FileCheck %s --check-prefix=AVX --check-prefix=AVX1
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 \| FileCheck %s --check-prefix=AVX --check-prefix=AVX2			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 \| FileCheck %s --check-prefix=AVX --check-prefix=AVX2
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f \| FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f \| FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F

	;			;
	▲ Show 20 Lines • Show All 132 Lines • ▼ Show 20 Lines
	; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]			; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
	; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]			; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
	; AVX1-NEXT: vzeroupper			; AVX1-NEXT: vzeroupper
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: test_cmp_v4f64:			; AVX2-LABEL: test_cmp_v4f64:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0			; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
	; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]			; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
	; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
	; AVX2-NEXT: vzeroupper			; AVX2-NEXT: vzeroupper
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	;			;
	; AVX512-LABEL: test_cmp_v4f64:			; AVX512-LABEL: test_cmp_v4f64:
	; AVX512: # BB#0:			; AVX512: # BB#0:
	; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0			; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
	; AVX512-NEXT: vpmovqd %zmm0, %ymm0			; AVX512-NEXT: vpmovqd %zmm0, %ymm0
	▲ Show 20 Lines • Show All 520 Lines • ▼ Show 20 Lines
	; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0			; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
	; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0			; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
	; AVX1-NEXT: vzeroupper			; AVX1-NEXT: vzeroupper
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: test_cmp_v8f64:			; AVX2-LABEL: test_cmp_v8f64:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0			; AVX2-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0
	; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]			; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
	; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
	; AVX2-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1			; AVX2-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1
	; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]			; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]
	; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]
	; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0			; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
	; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero			; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
	; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
	; AVX2-NEXT: vzeroupper			; AVX2-NEXT: vzeroupper
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	;			;
	; AVX512-LABEL: test_cmp_v8f64:			; AVX512-LABEL: test_cmp_v8f64:
	▲ Show 20 Lines • Show All 1,439 Lines • ▼ Show 20 Lines
	; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0			; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
	; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0			; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
	; AVX1-NEXT: vzeroupper			; AVX1-NEXT: vzeroupper
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: test_cmp_v16f64:			; AVX2-LABEL: test_cmp_v16f64:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vcmpltpd %ymm2, %ymm6, %ymm2			; AVX2-NEXT: vcmpltpd %ymm2, %ymm6, %ymm2
	; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,0,2,4,6,4,6]			; AVX2-NEXT: vpermilps {{.*#+}} ymm2 = ymm2[0,2,0,2,4,6,4,6]
	; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,3,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,3,2,3]
	; AVX2-NEXT: vcmpltpd %ymm3, %ymm7, %ymm3			; AVX2-NEXT: vcmpltpd %ymm3, %ymm7, %ymm3
	; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,0,2,4,6,4,6]			; AVX2-NEXT: vpermilps {{.*#+}} ymm3 = ymm3[0,2,0,2,4,6,4,6]
	; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,3,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,3,2,3]
	; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2			; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
	; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]			; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
	; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2			; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2
	; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
	; AVX2-NEXT: vmovdqa {{.*#+}} xmm6 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>			; AVX2-NEXT: vmovdqa {{.*#+}} xmm6 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
	; AVX2-NEXT: vpshufb %xmm6, %xmm2, %xmm2			; AVX2-NEXT: vpshufb %xmm6, %xmm2, %xmm2
	; AVX2-NEXT: vcmpltpd %ymm0, %ymm4, %ymm0			; AVX2-NEXT: vcmpltpd %ymm0, %ymm4, %ymm0
	; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]			; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
	; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
	; AVX2-NEXT: vcmpltpd %ymm1, %ymm5, %ymm1			; AVX2-NEXT: vcmpltpd %ymm1, %ymm5, %ymm1
	; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]			; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,2,0,2,4,6,4,6]
	; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,2,3]
	; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0			; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
	; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0			; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0
	; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]			; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
	; AVX2-NEXT: vpshufb %xmm6, %xmm0, %xmm0			; AVX2-NEXT: vpshufb %xmm6, %xmm0, %xmm0
	; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]			; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
	; AVX2-NEXT: vzeroupper			; AVX2-NEXT: vzeroupper
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	▲ Show 20 Lines • Show All 4,443 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-128-v16.ll

Show First 20 Lines • Show All 919 Lines • ▼ Show 20 Lines	; AVX-NEXT: retq
ret <16 x i8> %shuffle		ret <16 x i8> %shuffle
}		}

define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %a) {		define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %a) {
; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:		; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]		; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]		; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0,0,1,1]		; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:		; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero		; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:		; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
▲ Show 20 Lines • Show All 663 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-128-v2.ll

	; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				abUnsubmitted Not Done Reply Inline Actions Extra NOTE ab: Extra NOTE
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
				abUnsubmitted Not Done Reply Inline Actions I'm surprised by this and other changes; isn't the combine for shuffle chains? (it does look better for folding though; just trying to understand) ab: I'm surprised by this and other changes; isn't the combine for shuffle chains? (it does look…
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx \| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx \| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 \| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 \| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl \| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl \| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL

	target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"			target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
	▲ Show 20 Lines • Show All 143 Lines • ▼ Show 20 Lines
	define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {			define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
	; SSE-LABEL: shuffle_v2f64_11:			; SSE-LABEL: shuffle_v2f64_11:
	; SSE: # BB#0:			; SSE: # BB#0:
	; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]			; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
	; SSE-NEXT: retq			; SSE-NEXT: retq
	;			;
	; AVX-LABEL: shuffle_v2f64_11:			; AVX-LABEL: shuffle_v2f64_11:
	; AVX: # BB#0:			; AVX: # BB#0:
	; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]			; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
	; AVX-NEXT: retq			; AVX-NEXT: retq
	%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>			%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
	ret <2 x double> %shuffle			ret <2 x double> %shuffle
	}			}
	define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {			define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
	; SSE2-LABEL: shuffle_v2f64_22:			; SSE2-LABEL: shuffle_v2f64_22:
	; SSE2: # BB#0:			; SSE2: # BB#0:
	; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]			; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
	▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines
	; SSE-LABEL: shuffle_v2f64_33:			; SSE-LABEL: shuffle_v2f64_33:
	; SSE: # BB#0:			; SSE: # BB#0:
	; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]			; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
	; SSE-NEXT: movaps %xmm1, %xmm0			; SSE-NEXT: movaps %xmm1, %xmm0
	; SSE-NEXT: retq			; SSE-NEXT: retq
	;			;
	; AVX-LABEL: shuffle_v2f64_33:			; AVX-LABEL: shuffle_v2f64_33:
	; AVX: # BB#0:			; AVX: # BB#0:
	; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]			; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,1]
	; AVX-NEXT: retq			; AVX-NEXT: retq
	%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>			%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
	ret <2 x double> %shuffle			ret <2 x double> %shuffle
	}			}
	define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {			define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
	; SSE2-LABEL: shuffle_v2f64_03:			; SSE2-LABEL: shuffle_v2f64_03:
	; SSE2: # BB#0:			; SSE2: # BB#0:
	; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]			; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
	▲ Show 20 Lines • Show All 1,134 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-128-v4.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
				abUnsubmitted Not Done Reply Inline Actions In particular, this looks slightly more expensive according to Agner's Intel tables (for the folded variants) ab: In particular, this looks slightly more expensive according to Agner's Intel tables (for the…
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 \| FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx \| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx \| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
	; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 \| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2			; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 \| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2

	target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"			target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
	target triple = "x86_64-unknown-unknown"			target triple = "x86_64-unknown-unknown"

	▲ Show 20 Lines • Show All 210 Lines • ▼ Show 20 Lines
	define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) {			define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) {
	; SSE-LABEL: shuffle_v4f32_0011:			; SSE-LABEL: shuffle_v4f32_0011:
	; SSE: # BB#0:			; SSE: # BB#0:
	; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]			; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
	; SSE-NEXT: retq			; SSE-NEXT: retq
	;			;
	; AVX-LABEL: shuffle_v4f32_0011:			; AVX-LABEL: shuffle_v4f32_0011:
	; AVX: # BB#0:			; AVX: # BB#0:
	; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]			; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
	; AVX-NEXT: retq			; AVX-NEXT: retq
	%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1>			%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
	ret <4 x float> %shuffle			ret <4 x float> %shuffle
	}			}
	define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) {			define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) {
	; SSE-LABEL: shuffle_v4f32_2233:			; SSE-LABEL: shuffle_v4f32_2233:
	; SSE: # BB#0:			; SSE: # BB#0:
	; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]			; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
	; SSE-NEXT: retq			; SSE-NEXT: retq
	;			;
	; AVX-LABEL: shuffle_v4f32_2233:			; AVX-LABEL: shuffle_v4f32_2233:
	; AVX: # BB#0:			; AVX: # BB#0:
	; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]			; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
	; AVX-NEXT: retq			; AVX-NEXT: retq
	%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>			%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
	ret <4 x float> %shuffle			ret <4 x float> %shuffle
	}			}
	define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {			define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
	; SSE2-LABEL: shuffle_v4f32_0022:			; SSE2-LABEL: shuffle_v4f32_0022:
	; SSE2: # BB#0:			; SSE2: # BB#0:
	; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,2]			; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
	▲ Show 20 Lines • Show All 2,034 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-256-v16.ll

Show First 20 Lines • Show All 1,437 Lines • ▼ Show 20 Lines	; AVX2-NEXT: retq
ret <16 x i16> %shuffle		ret <16 x i16> %shuffle
}		}

define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz(<16 x i16> %a) {		define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz(<16 x i16> %a) {
; AVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:		; AVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1		; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]		; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]		; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:		; AVX2-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0		; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
; AVX2-NEXT: retq		; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15, i32 16, i32 16>		%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15, i32 16, i32 16>
ret <16 x i16> %shuffle		ret <16 x i16> %shuffle
▲ Show 20 Lines • Show All 2,085 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-256-v4.ll

Show First 20 Lines • Show All 482 Lines • ▼ Show 20 Lines
; ALL-NEXT: retq		; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>		%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
ret <4 x double> %shuffle		ret <4 x double> %shuffle
}		}

define <4 x double> @shuffle_v4f64_11uu(<4 x double> %a, <4 x double> %b) {		define <4 x double> @shuffle_v4f64_11uu(<4 x double> %a, <4 x double> %b) {
; ALL-LABEL: shuffle_v4f64_11uu:		; ALL-LABEL: shuffle_v4f64_11uu:
; ALL: # BB#0:		; ALL: # BB#0:
; ALL-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]		; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
; ALL-NEXT: retq		; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>		%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
ret <4 x double> %shuffle		ret <4 x double> %shuffle
}		}

define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) {		define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) {
; AVX1-LABEL: shuffle_v4f64_22uu:		; AVX1-LABEL: shuffle_v4f64_22uu:
; AVX1: # BB#0:		; AVX1: # BB#0:
▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines
; AVX512VL-NEXT: retq		; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>		%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x i64> %shuffle		ret <4 x i64> %shuffle
}		}

define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {		define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_0001:		; AVX1-LABEL: shuffle_v4i64_0001:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]		; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0		; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v4i64_0001:		; AVX2-LABEL: shuffle_v4i64_0001:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]		; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
; AVX2-NEXT: retq		; AVX2-NEXT: retq
;		;
; AVX512VL-LABEL: shuffle_v4i64_0001:		; AVX512VL-LABEL: shuffle_v4i64_0001:
; AVX512VL: # BB#0:		; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]		; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
; AVX512VL-NEXT: retq		; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>		%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
ret <4 x i64> %shuffle		ret <4 x i64> %shuffle
}		}

define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {		define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_0020:		; AVX1-LABEL: shuffle_v4i64_0020:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1		; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]		; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]		; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0		; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v4i64_0020:		; AVX2-LABEL: shuffle_v4i64_0020:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]		; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
; AVX2-NEXT: retq		; AVX2-NEXT: retq
;		;
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines
; AVX512VL-NEXT: retq		; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>		%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
ret <4 x i64> %shuffle		ret <4 x i64> %shuffle
}		}

define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {		define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_1000:		; AVX1-LABEL: shuffle_v4i64_1000:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]		; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]		; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0		; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v4i64_1000:		; AVX2-LABEL: shuffle_v4i64_1000:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]		; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
; AVX2-NEXT: retq		; AVX2-NEXT: retq
;		;
▲ Show 20 Lines • Show All 141 Lines • ▼ Show 20 Lines	; AVX512VL-NEXT: retq
ret <4 x i64> %shuffle		ret <4 x i64> %shuffle
}		}

define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {		define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_4012:		; AVX1-LABEL: shuffle_v4i64_4012:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2		; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]		; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]		; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0		; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]		; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v4i64_4012:		; AVX2-LABEL: shuffle_v4i64_4012:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]		; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]		; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
▲ Show 20 Lines • Show All 503 Lines • ▼ Show 20 Lines
; AVX512VL-NEXT: retq		; AVX512VL-NEXT: retq
%1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer		%1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer
ret <4 x double> %1		ret <4 x double> %1
}		}

define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {		define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
; AVX1-LABEL: splat_mem_v4i64_from_v2i64:		; AVX1-LABEL: splat_mem_v4i64_from_v2i64:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]		; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0		; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: splat_mem_v4i64_from_v2i64:		; AVX2-LABEL: splat_mem_v4i64_from_v2i64:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0		; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
; AVX2-NEXT: retq		; AVX2-NEXT: retq
;		;
▲ Show 20 Lines • Show All 154 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-256-v8.ll

Show First 20 Lines • Show All 146 Lines • ▼ Show 20 Lines
; ALL-NEXT: retq		; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>		%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
ret <8 x float> %shuffle		ret <8 x float> %shuffle
}		}

define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {		define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
; AVX1-LABEL: shuffle_v8f32_00112233:		; AVX1-LABEL: shuffle_v8f32_00112233:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1]		; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
; AVX1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]		; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0		; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v8f32_00112233:		; AVX2-LABEL: shuffle_v8f32_00112233:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]		; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0		; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq		; AVX2-NEXT: retq
Show All 26 Lines	; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>		%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
ret <8 x float> %shuffle		ret <8 x float> %shuffle
}		}

define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {		define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
; AVX1-LABEL: shuffle_v8f32_08080808:		; AVX1-LABEL: shuffle_v8f32_08080808:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]		; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]		; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0		; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v8f32_08080808:		; AVX2-LABEL: shuffle_v8f32_08080808:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero		; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0		; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
; AVX2-NEXT: retq		; AVX2-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>		%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
ret <8 x float> %shuffle		ret <8 x float> %shuffle
}		}

define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {		define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
; ALL-LABEL: shuffle_v8f32_08084c4c:		; ALL-LABEL: shuffle_v8f32_08084c4c:
; ALL: # BB#0:		; ALL: # BB#0:
; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]		; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]		; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; ALL-NEXT: retq		; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>		%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
ret <8 x float> %shuffle		ret <8 x float> %shuffle
}		}

define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {		define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {
; ALL-LABEL: shuffle_v8f32_8823cc67:		; ALL-LABEL: shuffle_v8f32_8823cc67:
; ALL: # BB#0:		; ALL: # BB#0:
▲ Show 20 Lines • Show All 677 Lines • ▼ Show 20 Lines
; AVX2-NEXT: retq		; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>		%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i32> %shuffle		ret <8 x i32> %shuffle
}		}

define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {		define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00000010:		; AVX1-LABEL: shuffle_v8i32_00000010:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]		; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]		; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0		; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v8i32_00000010:		; AVX2-LABEL: shuffle_v8i32_00000010:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]		; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0		; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq		; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>		%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
ret <8 x i32> %shuffle		ret <8 x i32> %shuffle
}		}

define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {		define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00000200:		; AVX1-LABEL: shuffle_v8i32_00000200:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]		; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]		; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0		; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v8i32_00000200:		; AVX2-LABEL: shuffle_v8i32_00000200:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]		; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0		; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq		; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>		%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
ret <8 x i32> %shuffle		ret <8 x i32> %shuffle
}		}

define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {		define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00003000:		; AVX1-LABEL: shuffle_v8i32_00003000:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]		; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]		; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0		; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v8i32_00003000:		; AVX2-LABEL: shuffle_v8i32_00003000:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]		; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0		; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq		; AVX2-NEXT: retq
▲ Show 20 Lines • Show All 83 Lines • ▼ Show 20 Lines
; AVX2-NEXT: retq		; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>		%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
ret <8 x i32> %shuffle		ret <8 x i32> %shuffle
}		}

define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {		define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00112233:		; AVX1-LABEL: shuffle_v8i32_00112233:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]		; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]		; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0		; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v8i32_00112233:		; AVX2-LABEL: shuffle_v8i32_00112233:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]		; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0		; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq		; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>		%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
ret <8 x i32> %shuffle		ret <8 x i32> %shuffle
}		}

define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {		define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00001111:		; AVX1-LABEL: shuffle_v8i32_00001111:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]		; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]		; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0		; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v8i32_00001111:		; AVX2-LABEL: shuffle_v8i32_00001111:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]		; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0		; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq		; AVX2-NEXT: retq
Show All 14 Lines	; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>		%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
ret <8 x i32> %shuffle		ret <8 x i32> %shuffle
}		}

define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {		define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_08080808:		; AVX1-LABEL: shuffle_v8i32_08080808:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]		; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]		; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0		; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v8i32_08080808:		; AVX2-LABEL: shuffle_v8i32_08080808:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]		; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0		; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
; AVX2-NEXT: retq		; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>		%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
ret <8 x i32> %shuffle		ret <8 x i32> %shuffle
}		}

define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {		define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_08084c4c:		; AVX1-LABEL: shuffle_v8i32_08084c4c:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]		; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]		; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v8i32_08084c4c:		; AVX2-LABEL: shuffle_v8i32_08084c4c:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]		; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]		; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]		; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-NEXT: retq		; AVX2-NEXT: retq
▲ Show 20 Lines • Show All 114 Lines • ▼ Show 20 Lines
; AVX2-NEXT: retq		; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>		%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
ret <8 x i32> %shuffle		ret <8 x i32> %shuffle
}		}

define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {		define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_091b2d3f:		; AVX1-LABEL: shuffle_v8i32_091b2d3f:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]		; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]		; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,3,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0		; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]		; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v8i32_091b2d3f:		; AVX2-LABEL: shuffle_v8i32_091b2d3f:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero		; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]		; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-NEXT: retq		; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>		%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
ret <8 x i32> %shuffle		ret <8 x i32> %shuffle
}		}

define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {		define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_09ab1def:		; AVX1-LABEL: shuffle_v8i32_09ab1def:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]		; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0		; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]		; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v8i32_09ab1def:		; AVX2-LABEL: shuffle_v8i32_09ab1def:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>		; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0		; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
▲ Show 20 Lines • Show All 776 Lines • ▼ Show 20 Lines
; Shuffle to logical bit shifts		; Shuffle to logical bit shifts
;		;

define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {		define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:		; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1		; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]		; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]		; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:		; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0		; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
; AVX2-NEXT: retq		; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>		%shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
ret <8 x i32> %shuffle		ret <8 x i32> %shuffle
}		}

define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {		define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:		; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
; AVX1: # BB#0:		; AVX1: # BB#0:
; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1		; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]		; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]		; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:		; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:
; AVX2: # BB#0:		; AVX2: # BB#0:
; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0		; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
; AVX2-NEXT: retq		; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>		%shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
ret <8 x i32> %shuffle		ret <8 x i32> %shuffle
▲ Show 20 Lines • Show All 199 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-combining-avx.ll

	Show First 20 Lines • Show All 61 Lines • ▼ Show 20 Lines
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 2, i32 undef>)			%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 2, i32 undef>)
	ret <4 x float> %1			ret <4 x float> %1
	}			}

	define <4 x float> @combine_vpermilvar_4f32_unpckh(<4 x float> %a0) {			define <4 x float> @combine_vpermilvar_4f32_unpckh(<4 x float> %a0) {
	; ALL-LABEL: combine_vpermilvar_4f32_unpckh:			; ALL-LABEL: combine_vpermilvar_4f32_unpckh:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]			; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 2, i32 2, i32 3, i32 3>)			%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 2, i32 2, i32 3, i32 3>)
	ret <4 x float> %1			ret <4 x float> %1
	}			}

	define <4 x float> @combine_vpermilvar_4f32_unpckl(<4 x float> %a0) {			define <4 x float> @combine_vpermilvar_4f32_unpckl(<4 x float> %a0) {
	; ALL-LABEL: combine_vpermilvar_4f32_unpckl:			; ALL-LABEL: combine_vpermilvar_4f32_unpckl:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]			; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 1, i32 1>)			%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 1, i32 1>)
	ret <4 x float> %1			ret <4 x float> %1
	}			}

	define <8 x float> @combine_vpermilvar_8f32_identity(<8 x float> %a0) {			define <8 x float> @combine_vpermilvar_8f32_identity(<8 x float> %a0) {
	; ALL-LABEL: combine_vpermilvar_8f32_identity:			; ALL-LABEL: combine_vpermilvar_8f32_identity:
	; ALL: # BB#0:			; ALL: # BB#0:
	▲ Show 20 Lines • Show All 74 Lines • ▼ Show 20 Lines
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 0, i64 0, i64 4, i64 4>)			%1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 0, i64 0, i64 4, i64 4>)
	ret <4 x double> %1			ret <4 x double> %1
	}			}

	define <4 x float> @combine_vpermilvar_4f32_4stage(<4 x float> %a0) {			define <4 x float> @combine_vpermilvar_4f32_4stage(<4 x float> %a0) {
	; ALL-LABEL: combine_vpermilvar_4f32_4stage:			; ALL-LABEL: combine_vpermilvar_4f32_4stage:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,0,1,2,3,12,13,14,15,4,5,6,7]			; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)			%1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
	%2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %1, <4 x i32> <i32 2, i32 3, i32 0, i32 1>)			%2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %1, <4 x i32> <i32 2, i32 3, i32 0, i32 1>)
	%3 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %2, <4 x i32> <i32 0, i32 2, i32 1, i32 3>)			%3 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %2, <4 x i32> <i32 0, i32 2, i32 1, i32 3>)
	%4 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %3, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)			%4 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %3, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
	ret <4 x float> %4			ret <4 x float> %4
	}			}

	define <8 x float> @combine_vpermilvar_8f32_4stage(<8 x float> %a0) {			define <8 x float> @combine_vpermilvar_8f32_4stage(<8 x float> %a0) {
	; AVX1-LABEL: combine_vpermilvar_8f32_4stage:			; ALL-LABEL: combine_vpermilvar_8f32_4stage:
	; AVX1: # BB#0:			; ALL: # BB#0:
	; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]			; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
	; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0			; ALL-NEXT: retq
	; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
	; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
	; AVX1-NEXT: vpermilps %ymm1, %ymm0, %ymm0
	; AVX1-NEXT: retq
	;
	; AVX2-LABEL: combine_vpermilvar_8f32_4stage:
	; AVX2: # BB#0:
	; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,0,1,2,3,12,13,14,15,4,5,6,7,24,25,26,27,16,17,18,19,28,29,30,31,20,21,22,23]
	; AVX2-NEXT: retq
	;
	; AVX512F-LABEL: combine_vpermilvar_8f32_4stage:
	; AVX512F: # BB#0:
	; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,0,1,2,3,12,13,14,15,4,5,6,7,24,25,26,27,16,17,18,19,28,29,30,31,20,21,22,23]
	; AVX512F-NEXT: retq
	%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)			%1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
	%2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>)			%2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>)
	%3 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %2, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 0, i32 2, i32 1, i32 3>)			%3 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %2, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 0, i32 2, i32 1, i32 3>)
	%4 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %3, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)			%4 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %3, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>)
	ret <8 x float> %4			ret <8 x float> %4
	}			}

test/CodeGen/X86/vector-shuffle-combining-avx2.ll

Show First 20 Lines • Show All 45 Lines • ▼ Show 20 Lines	; CHECK-NEXT: retq
%tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>		%tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>
ret <32 x i8> %tmp2		ret <32 x i8> %tmp2
}		}

define <4 x i64> @combine_permq_pshufb(<4 x i64> %a0) {		define <4 x i64> @combine_permq_pshufb(<4 x i64> %a0) {
; CHECK-LABEL: combine_permq_pshufb:		; CHECK-LABEL: combine_permq_pshufb:
; CHECK: # BB#0:		; CHECK: # BB#0:
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]		; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31,16,17,18,19,20,21,22,23]		; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>		%1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%2 = bitcast <4 x i64> %1 to <32 x i8>		%2 = bitcast <4 x i64> %1 to <32 x i8>
%3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)		%3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
%4 = bitcast <32 x i8> %3 to <4 x i64>		%4 = bitcast <32 x i8> %3 to <4 x i64>
ret <4 x i64> %4		ret <4 x i64> %4
}		}

test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll

	Show First 20 Lines • Show All 206 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: kmovw %esi, %k1			; CHECK-NEXT: kmovw %esi, %k1
	; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]			; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%x0 = load <16 x float>, <16 x float> *%p0			%x0 = load <16 x float>, <16 x float> *%p0
	%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>, <16 x float> %x0, <16 x float> %x1, i16 %m)			%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>, <16 x float> %x0, <16 x float> %x1, i16 %m)
	ret <16 x float> %res0			ret <16 x float> %res0
	}			}

				define <16 x float> @combine_vpermt2var_16f32_vpermilps(<16 x float> %x0, <16 x float> %x1) {
				; CHECK-LABEL: combine_vpermt2var_16f32_vpermilps:
				; CHECK: # BB#0:
				; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
				; CHECK-NEXT: retq
				%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>, <16 x float> %x0, <16 x float> %x1, i16 -1)
				ret <16 x float> %res0
				}
				define <16 x float> @combine_vpermt2var_16f32_vpermilps_load(<16 x float> *%p0, <16 x float> %x1) {
				; CHECK-LABEL: combine_vpermt2var_16f32_vpermilps_load:
				; CHECK: # BB#0:
				; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
				; CHECK-NEXT: retq
				%x0 = load <16 x float>, <16 x float> *%p0
				%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>, <16 x float> %x0, <16 x float> %x1, i16 -1)
				ret <16 x float> %res0
				}
				define <16 x float> @combine_vpermt2var_16f32_vpermilps_mask(<16 x float> %x0, <16 x float> %x1, i16 %m) {
				; CHECK-LABEL: combine_vpermt2var_16f32_vpermilps_mask:
				; CHECK: # BB#0:
				; CHECK-NEXT: kmovw %edi, %k1
				; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
				; CHECK-NEXT: retq
				%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>, <16 x float> %x0, <16 x float> %x1, i16 %m)
				ret <16 x float> %res0
				}
				define <16 x float> @combine_vpermt2var_16f32_vpermilps_mask_load(<16 x float> *%p0, <16 x float> %x1, i16 %m) {
				; CHECK-LABEL: combine_vpermt2var_16f32_vpermilps_mask_load:
				; CHECK: # BB#0:
				; CHECK-NEXT: kmovw %esi, %k1
				; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
				; CHECK-NEXT: retq
				%x0 = load <16 x float>, <16 x float> *%p0
				%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>, <16 x float> %x0, <16 x float> %x1, i16 %m)
				ret <16 x float> %res0
				}

	define <16 x i32> @combine_vpermt2var_16i32_identity(<16 x i32> %x0, <16 x i32> %x1) {			define <16 x i32> @combine_vpermt2var_16i32_identity(<16 x i32> %x0, <16 x i32> %x1) {
	; CHECK-LABEL: combine_vpermt2var_16i32_identity:			; CHECK-LABEL: combine_vpermt2var_16i32_identity:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 undef>, <16 x i32> %x0, <16 x i32> %x1, i16 -1)			%res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 undef>, <16 x i32> %x0, <16 x i32> %x1, i16 -1)
	%res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 15, i32 30, i32 13, i32 28, i32 undef, i32 26, i32 9, i32 24, i32 7, i32 22, i32 5, i32 20, i32 3, i32 18, i32 1, i32 16>, <16 x i32> %res0, <16 x i32> %res0, i16 -1)			%res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 15, i32 30, i32 13, i32 28, i32 undef, i32 26, i32 9, i32 24, i32 7, i32 22, i32 5, i32 20, i32 3, i32 18, i32 1, i32 16>, <16 x i32> %res0, <16 x i32> %res0, i16 -1)
	ret <16 x i32> %res1			ret <16 x i32> %res1
	}			}
	▲ Show 20 Lines • Show All 63 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-combining-ssse3.ll

Show First 20 Lines • Show All 88 Lines • ▼ Show 20 Lines	; AVX-NEXT: retq
%3 = bitcast <16 x i8> %2 to <4 x float>		%3 = bitcast <16 x i8> %2 to <4 x float>
%4 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>		%4 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
ret <4 x float> %4		ret <4 x float> %4
}		}

define <16 x i8> @combine_pshufb_palignr(<16 x i8> %a0, <16 x i8> %a1) {		define <16 x i8> @combine_pshufb_palignr(<16 x i8> %a0, <16 x i8> %a1) {
; SSE-LABEL: combine_pshufb_palignr:		; SSE-LABEL: combine_pshufb_palignr:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,14,15,8,9,10,11,12,13,14,15]		; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: combine_pshufb_palignr:		; AVX-LABEL: combine_pshufb_palignr:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15,8,9,10,11,12,13,14,15]		; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
; AVX-NEXT: retq		; AVX-NEXT: retq
%1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>		%1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
%2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)		%2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
ret <16 x i8> %2		ret <16 x i8> %2
}		}

define <16 x i8> @combine_pshufb_pslldq(<16 x i8> %a0) {		define <16 x i8> @combine_pshufb_pslldq(<16 x i8> %a0) {
; SSE-LABEL: combine_pshufb_pslldq:		; SSE-LABEL: combine_pshufb_pslldq:
▲ Show 20 Lines • Show All 86 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-combining.ll

	Show First 20 Lines • Show All 2,434 Lines • ▼ Show 20 Lines
	define <4 x float> @combine_undef_input_test9(<4 x float> %a) {			define <4 x float> @combine_undef_input_test9(<4 x float> %a) {
	; SSE-LABEL: combine_undef_input_test9:			; SSE-LABEL: combine_undef_input_test9:
	; SSE: # BB#0:			; SSE: # BB#0:
	; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]			; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
	; SSE-NEXT: retq			; SSE-NEXT: retq
	;			;
	; AVX-LABEL: combine_undef_input_test9:			; AVX-LABEL: combine_undef_input_test9:
	; AVX: # BB#0:			; AVX: # BB#0:
	; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]			; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
	; AVX-NEXT: retq			; AVX-NEXT: retq
	%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>			%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
	%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>			%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
	ret <4 x float> %2			ret <4 x float> %2
	}			}

	define <4 x float> @combine_undef_input_test10(<4 x float> %a) {			define <4 x float> @combine_undef_input_test10(<4 x float> %a) {
	; ALL-LABEL: combine_undef_input_test10:			; ALL-LABEL: combine_undef_input_test10:
	▲ Show 20 Lines • Show All 174 Lines • ▼ Show 20 Lines
	define <4 x float> @combine_undef_input_test19(<4 x float> %a) {			define <4 x float> @combine_undef_input_test19(<4 x float> %a) {
	; SSE-LABEL: combine_undef_input_test19:			; SSE-LABEL: combine_undef_input_test19:
	; SSE: # BB#0:			; SSE: # BB#0:
	; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]			; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
	; SSE-NEXT: retq			; SSE-NEXT: retq
	;			;
	; AVX-LABEL: combine_undef_input_test19:			; AVX-LABEL: combine_undef_input_test19:
	; AVX: # BB#0:			; AVX: # BB#0:
	; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]			; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
	; AVX-NEXT: retq			; AVX-NEXT: retq
	%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>			%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
	%2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			%2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	ret <4 x float> %2			ret <4 x float> %2
	}			}

	define <4 x float> @combine_undef_input_test20(<4 x float> %a) {			define <4 x float> @combine_undef_input_test20(<4 x float> %a) {
	; ALL-LABEL: combine_undef_input_test20:			; ALL-LABEL: combine_undef_input_test20:
	▲ Show 20 Lines • Show All 339 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[X86][SSE] Added support for combining target shuffles to (V)PSHUFD/VPERMILPD/VPERMILPS immediate permute
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 61211

lib/Target/X86/X86ISelLowering.cpp

test/CodeGen/X86/2012-01-12-extract-sv.ll

test/CodeGen/X86/2012-04-26-sdglue.ll

test/CodeGen/X86/avx-intrinsics-fast-isel.ll

test/CodeGen/X86/avx-intrinsics-x86.ll

test/CodeGen/X86/avx-splat.ll

test/CodeGen/X86/avx-vbroadcast.ll

test/CodeGen/X86/merge-consecutive-loads-128.ll

test/CodeGen/X86/pshufb-mask-comments.ll

test/CodeGen/X86/sse3.ll

test/CodeGen/X86/vector-compare-results.ll

test/CodeGen/X86/vector-shuffle-128-v16.ll

test/CodeGen/X86/vector-shuffle-128-v2.ll

test/CodeGen/X86/vector-shuffle-128-v4.ll

test/CodeGen/X86/vector-shuffle-256-v16.ll

test/CodeGen/X86/vector-shuffle-256-v4.ll

test/CodeGen/X86/vector-shuffle-256-v8.ll

test/CodeGen/X86/vector-shuffle-combining-avx.ll

test/CodeGen/X86/vector-shuffle-combining-avx2.ll

test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll

test/CodeGen/X86/vector-shuffle-combining-ssse3.ll

test/CodeGen/X86/vector-shuffle-combining.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86][SSE] Added support for combining target shuffles to (V)PSHUFD/VPERMILPD/VPERMILPS immediate permuteClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 61211

lib/Target/X86/X86ISelLowering.cpp

test/CodeGen/X86/2012-01-12-extract-sv.ll

test/CodeGen/X86/2012-04-26-sdglue.ll

test/CodeGen/X86/avx-intrinsics-fast-isel.ll

test/CodeGen/X86/avx-intrinsics-x86.ll

test/CodeGen/X86/avx-splat.ll

test/CodeGen/X86/avx-vbroadcast.ll

test/CodeGen/X86/merge-consecutive-loads-128.ll

test/CodeGen/X86/pshufb-mask-comments.ll

test/CodeGen/X86/sse3.ll

test/CodeGen/X86/vector-compare-results.ll

test/CodeGen/X86/vector-shuffle-128-v16.ll

test/CodeGen/X86/vector-shuffle-128-v2.ll

test/CodeGen/X86/vector-shuffle-128-v4.ll

test/CodeGen/X86/vector-shuffle-256-v16.ll

test/CodeGen/X86/vector-shuffle-256-v4.ll

test/CodeGen/X86/vector-shuffle-256-v8.ll

test/CodeGen/X86/vector-shuffle-combining-avx.ll

test/CodeGen/X86/vector-shuffle-combining-avx2.ll

test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll

test/CodeGen/X86/vector-shuffle-combining-ssse3.ll

test/CodeGen/X86/vector-shuffle-combining.ll

[X86][SSE] Added support for combining target shuffles to (V)PSHUFD/VPERMILPD/VPERMILPS immediate permute
ClosedPublic