Diff 16369

llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Show First 20 Lines • Show All 90 Lines • ▼ Show 20 Lines
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// ISD Namespace		// ISD Namespace
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

/// isBuildVectorAllOnes - Return true if the specified node is a		/// isBuildVectorAllOnes - Return true if the specified node is a
/// BUILD_VECTOR where all of the elements are ~0 or undef.		/// BUILD_VECTOR where all of the elements are ~0 or undef.
bool ISD::isBuildVectorAllOnes(const SDNode *N) {		bool ISD::isBuildVectorAllOnes(const SDNode *N) {
// Look through a bit convert.		// Look through a bit convert.
if (N->getOpcode() == ISD::BITCAST)		while (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();		N = N->getOperand(0).getNode();

if (N->getOpcode() != ISD::BUILD_VECTOR) return false;		if (N->getOpcode() != ISD::BUILD_VECTOR) return false;

unsigned i = 0, e = N->getNumOperands();		unsigned i = 0, e = N->getNumOperands();

// Skip over all of the undef values.		// Skip over all of the undef values.
while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)		while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
Show All 31 Lines	bool ISD::isBuildVectorAllOnes(const SDNode *N) {
return true;		return true;
}		}


/// isBuildVectorAllZeros - Return true if the specified node is a		/// isBuildVectorAllZeros - Return true if the specified node is a
/// BUILD_VECTOR where all of the elements are 0 or undef.		/// BUILD_VECTOR where all of the elements are 0 or undef.
bool ISD::isBuildVectorAllZeros(const SDNode *N) {		bool ISD::isBuildVectorAllZeros(const SDNode *N) {
// Look through a bit convert.		// Look through a bit convert.
if (N->getOpcode() == ISD::BITCAST)		while (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();		N = N->getOperand(0).getNode();

if (N->getOpcode() != ISD::BUILD_VECTOR) return false;		if (N->getOpcode() != ISD::BUILD_VECTOR) return false;

bool IsAllUndef = true;		bool IsAllUndef = true;
for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {		for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
if (N->getOperand(i).getOpcode() == ISD::UNDEF)		if (N->getOperand(i).getOpcode() == ISD::UNDEF)
continue;		continue;
▲ Show 20 Lines • Show All 6,635 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 7,447 Lines • ▼ Show 20 Lines	static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(SDLoc DL, MVT VT,

V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);		V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);		V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
return DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);		return DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
}		}

/// \brief Try to lower a vector shuffle as a byte rotation.		/// \brief Try to lower a vector shuffle as a byte rotation.
///		///
/// We have a generic PALIGNR instruction in x86 that will do an arbitrary		/// SSSE3 has a generic PALIGNR instruction in x86 that will do an arbitrary
/// byte-rotation of the concatenation of two vectors. This routine will		/// byte-rotation of the concatenation of two vectors; pre-SSSE3 can use
/// try to generically lower a vector shuffle through such an instruction. It		/// a PSRLDQ/PSLLDQ/POR pattern to get a similar effect. This routine will
/// does not check for the availability of PALIGNR-based lowerings, only the		/// try to generically lower a vector shuffle through such an pattern. It
/// applicability of this strategy to the given mask. This matches shuffle		/// does not check for the profitability of lowering either as PALIGNR or
/// vectors that look like:		/// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form.
		/// This matches shuffle vectors that look like:
///		///
/// v8i16 [11, 12, 13, 14, 15, 0, 1, 2]		/// v8i16 [11, 12, 13, 14, 15, 0, 1, 2]
///		///
/// Essentially it concatenates V1 and V2, shifts right by some number of		/// Essentially it concatenates V1 and V2, shifts right by some number of
/// elements, and takes the low elements as the result. Note that while this is		/// elements, and takes the low elements as the result. Note that while this is
/// specified as a right shift because x86 is little-endian, it is a *left		/// specified as a right shift because x86 is little-endian, it is a *left
/// rotate* of the vector lanes.		/// rotate* of the vector lanes.
///		///
/// Note that this only handles 128-bit vector widths currently.		/// Note that this only handles 128-bit vector widths currently.
static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,		static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
SDValue V2,		SDValue V2,
ArrayRef<int> Mask,		ArrayRef<int> Mask,
		const X86Subtarget *Subtarget,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");		assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");

// We need to detect various ways of spelling a rotation:		// We need to detect various ways of spelling a rotation:
// [11, 12, 13, 14, 15, 0, 1, 2]		// [11, 12, 13, 14, 15, 0, 1, 2]
// [-1, 12, 13, 14, -1, -1, 1, -1]		// [-1, 12, 13, 14, -1, -1, 1, -1]
// [-1, -1, -1, -1, -1, -1, 1, 2]		// [-1, -1, -1, -1, -1, -1, 1, 2]
// [ 3, 4, 5, 6, 7, 8, 9, 10]		// [ 3, 4, 5, 6, 7, 8, 9, 10]
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines	static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
// Check that we successfully analyzed the mask, and normalize the results.		// Check that we successfully analyzed the mask, and normalize the results.
assert(Rotation != 0 && "Failed to locate a viable rotation!");		assert(Rotation != 0 && "Failed to locate a viable rotation!");
assert((Lo \|\| Hi) && "Failed to find a rotated input vector!");		assert((Lo \|\| Hi) && "Failed to find a rotated input vector!");
if (!Lo)		if (!Lo)
Lo = Hi;		Lo = Hi;
else if (!Hi)		else if (!Hi)
Hi = Lo;		Hi = Lo;

// Cast the inputs to v16i8 to match PALIGNR.
Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Lo);
Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Hi);

assert(VT.getSizeInBits() == 128 &&		assert(VT.getSizeInBits() == 128 &&
"Rotate-based lowering only supports 128-bit lowering!");		"Rotate-based lowering only supports 128-bit lowering!");
assert(Mask.size() <= 16 &&		assert(Mask.size() <= 16 &&
"Can shuffle at most 16 bytes in a 128-bit vector!");		"Can shuffle at most 16 bytes in a 128-bit vector!");

// The actual rotate instruction rotates bytes, so we need to scale the		// The actual rotate instruction rotates bytes, so we need to scale the
// rotation based on how many bytes are in the vector.		// rotation based on how many bytes are in the vector.
int Scale = 16 / Mask.size();		int Scale = 16 / Mask.size();

		// SSSE3 targets can use the palignr instruction
		if (Subtarget->hasSSSE3()) {
		// Cast the inputs to v16i8 to match PALIGNR.
		Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Lo);
		Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Hi);

return DAG.getNode(ISD::BITCAST, DL, VT,		return DAG.getNode(ISD::BITCAST, DL, VT,
DAG.getNode(X86ISD::PALIGNR, DL, MVT::v16i8, Hi, Lo,		DAG.getNode(X86ISD::PALIGNR, DL, MVT::v16i8, Hi, Lo,
DAG.getConstant(Rotation * Scale, MVT::i8)));		DAG.getConstant(Rotation * Scale, MVT::i8)));
}		}

		// Default SSE2 implementation
		int LoByteShift = 16 - Rotation * Scale;
		int HiByteShift = Rotation * Scale;

		// Cast the inputs to v2i64 to match PSLLDQ/PSRLDQ.
		Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Lo);
		Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Hi);

		SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, Lo,
		DAG.getConstant(8 * LoByteShift, MVT::i8));
		SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, Hi,
		DAG.getConstant(8 * HiByteShift, MVT::i8));
		return DAG.getNode(ISD::BITCAST, DL, VT,
		DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift));
		}

/// \brief Compute whether each element of a shuffle is zeroable.		/// \brief Compute whether each element of a shuffle is zeroable.
///		///
/// A "zeroable" vector shuffle element is one which can be lowered to zero.		/// A "zeroable" vector shuffle element is one which can be lowered to zero.
/// Either it is an undef element in the shuffle mask, the element of the input		/// Either it is an undef element in the shuffle mask, the element of the input
/// referenced is undef, or the element of the input referenced is known to be		/// referenced is undef, or the element of the input referenced is known to be
/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle		/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
/// as many lanes with this technique as possible to simplify the remaining		/// as many lanes with this technique as possible to simplify the remaining
/// shuffle.		/// shuffle.
Show All 23 Lines	for (int i = 0, Size = Mask.size(); i < Size; ++i) {
// worth asserting on (it isn't invalid, just unexpected).		// worth asserting on (it isn't invalid, just unexpected).
if (Input.getOpcode() == ISD::UNDEF \|\| X86::isZeroNode(Input))		if (Input.getOpcode() == ISD::UNDEF \|\| X86::isZeroNode(Input))
Zeroable[i] = true;		Zeroable[i] = true;
}		}

return Zeroable;		return Zeroable;
}		}

		/// \brief Try to lower a vector shuffle as a byte shift (shifts in zeros).
		///
		/// Attempts to match a shuffle mask against the PSRLDQ and PSLLDQ SSE2
		/// byte-shift instructions. The mask must consist of a shifted sequential
		/// shuffle from one of the input vectors and zeroable elements for the
		/// remaining 'shifted in' elements.
		///
		/// Note that this only handles 128-bit vector widths currently.
		static SDValue lowerVectorShuffleAsByteShift(SDLoc DL, MVT VT, SDValue V1,
		SDValue V2, ArrayRef<int> Mask,
		SelectionDAG &DAG) {
		assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");

		SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);

		int Size = Mask.size();
		int Scale = 16 / Size;

		auto isSequential = [](int Base, int StartIndex, int EndIndex, int MaskOffset,
		ArrayRef<int> Mask) {
		for (int i = StartIndex; i < EndIndex; i++) {
		if (Mask[i] < 0)
		continue;
		if (i + Base != Mask[i] - MaskOffset)
		return false;
		}
		return true;
		};

		for (int Shift = 1; Shift < Size; Shift++) {
		int ByteShift = Shift * Scale;

		// PSRLDQ : (little-endian) right byte shift
		// [ 5, 6, 7, zz, zz, zz, zz, zz]
		// [ -1, 5, 6, 7, zz, zz, zz, zz]
		// [ 1, 2, -1, -1, -1, -1, zz, zz]
		bool ZeroableRight = true;
		for (int i = Size - Shift; i < Size; i++) {
		ZeroableRight &= Zeroable[i];
		}

		if (ZeroableRight) {
		bool ValidShiftRight1 = isSequential(Shift, 0, Size - Shift, 0, Mask);
		bool ValidShiftRight2 = isSequential(Shift, 0, Size - Shift, Size, Mask);

		if (ValidShiftRight1 \|\| ValidShiftRight2) {
		// Cast the inputs to v2i64 to match PSRLDQ.
		SDValue &TargetV = ValidShiftRight1 ? V1 : V2;
		SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, TargetV);
		SDValue Shifted = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, V,
		DAG.getConstant(ByteShift * 8, MVT::i8));
		return DAG.getNode(ISD::BITCAST, DL, VT, Shifted);
		}
		}

		// PSLLDQ : (little-endian) left byte shift
		// [ zz, 0, 1, 2, 3, 4, 5, 6]
		// [ zz, zz, -1, -1, 2, 3, 4, -1]
		// [ zz, zz, zz, zz, zz, zz, -1, 1]
		bool ZeroableLeft = true;
		for (int i = 0; i < Shift; i++) {
		ZeroableLeft &= Zeroable[i];
		}

		if (ZeroableLeft) {
		bool ValidShiftLeft1 = isSequential(-Shift, Shift, Size, 0, Mask);
		bool ValidShiftLeft2 = isSequential(-Shift, Shift, Size, Size, Mask);

		if (ValidShiftLeft1 \|\| ValidShiftLeft2) {
		// Cast the inputs to v2i64 to match PSLLDQ.
		SDValue &TargetV = ValidShiftLeft1 ? V1 : V2;
		SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, TargetV);
		SDValue Shifted = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, V,
		DAG.getConstant(ByteShift * 8, MVT::i8));
		return DAG.getNode(ISD::BITCAST, DL, VT, Shifted);
		}
		}
		}

		return SDValue();
		}

/// \brief Lower a vector shuffle as a zero or any extension.		/// \brief Lower a vector shuffle as a zero or any extension.
///		///
/// Given a specific number of elements, element bit width, and extension		/// Given a specific number of elements, element bit width, and extension
/// stride, produce either a zero or any extension based on the available		/// stride, produce either a zero or any extension based on the available
/// features of the subtarget.		/// features of the subtarget.
static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(		static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
SDLoc DL, MVT VT, int NumElements, int Scale, bool AnyExt, SDValue InputV,		SDLoc DL, MVT VT, int NumElements, int Scale, bool AnyExt, SDValue InputV,
const X86Subtarget *Subtarget, SelectionDAG &DAG) {		const X86Subtarget *Subtarget, SelectionDAG &DAG) {
▲ Show 20 Lines • Show All 487 Lines • ▼ Show 20 Lines	static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
if (isShuffleEquivalent(Mask, 1, 3))		if (isShuffleEquivalent(Mask, 1, 3))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);		return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);

if (Subtarget->hasSSE41())		if (Subtarget->hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,		if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
Subtarget, DAG))		Subtarget, DAG))
return Blend;		return Blend;

// Try to use rotation instructions if available.		// Try to use byte shift instructions.
		if (SDValue Shift = lowerVectorShuffleAsByteShift(
		DL, MVT::v2i64, V1, V2, Mask, DAG))
		return Shift;

		// Try to use byte rotation instructions.
		// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
if (Subtarget->hasSSSE3())		if (Subtarget->hasSSSE3())
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(		if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v2i64, V1, V2, Mask, DAG))		DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
return Rotate;		return Rotate;

// We implement this with SHUFPD which is pretty lame because it will likely		// We implement this with SHUFPD which is pretty lame because it will likely
// incur 2 cycles of stall for integer vectors on Nehalem and older chips.		// incur 2 cycles of stall for integer vectors on Nehalem and older chips.
// However, all the alternatives are still more cycles and newer chips don't		// However, all the alternatives are still more cycles and newer chips don't
// have this problem. It would be really nice if x86 had better shuffles here.		// have this problem. It would be really nice if x86 had better shuffles here.
V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V1);		V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V1);
V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V2);		V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V2);
▲ Show 20 Lines • Show All 256 Lines • ▼ Show 20 Lines	static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
if (isShuffleEquivalent(Mask, 2, 6, 3, 7))		if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);		return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);

if (Subtarget->hasSSE41())		if (Subtarget->hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,		if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
Subtarget, DAG))		Subtarget, DAG))
return Blend;		return Blend;

// Try to use rotation instructions if available.		// Try to use byte shift instructions.
		if (SDValue Shift = lowerVectorShuffleAsByteShift(
		DL, MVT::v4i32, V1, V2, Mask, DAG))
		return Shift;

		// Try to use byte rotation instructions.
		// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
if (Subtarget->hasSSSE3())		if (Subtarget->hasSSSE3())
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(		if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v4i32, V1, V2, Mask, DAG))		DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
return Rotate;		return Rotate;

// We implement this with SHUFPS because it can blend from two vectors.		// We implement this with SHUFPS because it can blend from two vectors.
// Because we're going to eventually use SHUFPS, we use SHUFPS even to build		// Because we're going to eventually use SHUFPS, we use SHUFPS even to build
// up the inputs, bypassing domain shift penalties that we would encur if we		// up the inputs, bypassing domain shift penalties that we would encur if we
// directly used PSHUFD on Nehalem and older. For newer chips, this isn't		// directly used PSHUFD on Nehalem and older. For newer chips, this isn't
// relevant.		// relevant.
return DAG.getNode(ISD::BITCAST, DL, MVT::v4i32,		return DAG.getNode(ISD::BITCAST, DL, MVT::v4i32,
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines	if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v8i16, DL, V,
return Broadcast;		return Broadcast;

// Use dedicated unpack instructions for masks that match their pattern.		// Use dedicated unpack instructions for masks that match their pattern.
if (isShuffleEquivalent(Mask, 0, 0, 1, 1, 2, 2, 3, 3))		if (isShuffleEquivalent(Mask, 0, 0, 1, 1, 2, 2, 3, 3))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V, V);		return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V, V);
if (isShuffleEquivalent(Mask, 4, 4, 5, 5, 6, 6, 7, 7))		if (isShuffleEquivalent(Mask, 4, 4, 5, 5, 6, 6, 7, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V, V);		return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V, V);

// Try to use rotation instructions if available.		// Try to use byte shift instructions.
if (Subtarget->hasSSSE3())		if (SDValue Shift = lowerVectorShuffleAsByteShift(
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v8i16, V, V, Mask, DAG))		DL, MVT::v8i16, V, V, Mask, DAG))
		return Shift;

		// Try to use byte rotation instructions.
		if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
		DL, MVT::v8i16, V, V, Mask, Subtarget, DAG))
return Rotate;		return Rotate;

// Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all		// Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all
// such inputs we can swap two of the dwords across the half mark and end up		// such inputs we can swap two of the dwords across the half mark and end up
// with <=2 inputs to each half in each half. Once there, we can fall through		// with <=2 inputs to each half in each half. Once there, we can fall through
// to the generic code below. For example:		// to the generic code below. For example:
//		//
// Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h]		// Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h]
// Mask: [0, 1, 2, 7, 4, 5, 6, 3] -----------------> [0, 1, 4, 7, 2, 3, 6, 5]		// Mask: [0, 1, 2, 7, 4, 5, 6, 3] -----------------> [0, 1, 4, 7, 2, 3, 6, 5]
▲ Show 20 Lines • Show All 603 Lines • ▼ Show 20 Lines	static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
if (isShuffleEquivalent(Mask, 4, 12, 5, 13, 6, 14, 7, 15))		if (isShuffleEquivalent(Mask, 4, 12, 5, 13, 6, 14, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V2);		return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V2);

if (Subtarget->hasSSE41())		if (Subtarget->hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,		if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
Subtarget, DAG))		Subtarget, DAG))
return Blend;		return Blend;

// Try to use rotation instructions if available.		// Try to use byte shift instructions.
if (Subtarget->hasSSSE3())		if (SDValue Shift = lowerVectorShuffleAsByteShift(
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v8i16, V1, V2, Mask, DAG))		DL, MVT::v8i16, V1, V2, Mask, DAG))
		return Shift;

		// Try to use byte rotation instructions.
		if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
		DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
return Rotate;		return Rotate;

if (NumV1Inputs + NumV2Inputs <= 4)		if (NumV1Inputs + NumV2Inputs <= 4)
return lowerV8I16BasicBlendVectorShuffle(DL, V1, V2, Mask, Subtarget, DAG);		return lowerV8I16BasicBlendVectorShuffle(DL, V1, V2, Mask, Subtarget, DAG);

// Check whether an interleaving lowering is likely to be more efficient.		// Check whether an interleaving lowering is likely to be more efficient.
// This isn't perfect but it is a strong heuristic that tends to work well on		// This isn't perfect but it is a strong heuristic that tends to work well on
// the kinds of shuffles that show up in practice.		// the kinds of shuffles that show up in practice.
//		//
▲ Show 20 Lines • Show All 114 Lines • ▼ Show 20 Lines	static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
SDLoc DL(Op);		SDLoc DL(Op);
assert(Op.getSimpleValueType() == MVT::v16i8 && "Bad shuffle type!");		assert(Op.getSimpleValueType() == MVT::v16i8 && "Bad shuffle type!");
assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");		assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");		assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);		ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> OrigMask = SVOp->getMask();		ArrayRef<int> OrigMask = SVOp->getMask();
assert(OrigMask.size() == 16 && "Unexpected mask size for v16 shuffle!");		assert(OrigMask.size() == 16 && "Unexpected mask size for v16 shuffle!");

// Try to use rotation instructions if available.		// Try to use byte shift instructions.
if (Subtarget->hasSSSE3())		if (SDValue Shift = lowerVectorShuffleAsByteShift(
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v16i8, V1, V2, OrigMask, DAG))		DL, MVT::v16i8, V1, V2, OrigMask, DAG))
		return Shift;

		// Try to use byte rotation instructions.
		if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
		DL, MVT::v16i8, V1, V2, OrigMask, Subtarget, DAG))
return Rotate;		return Rotate;

// Try to use a zext lowering.		// Try to use a zext lowering.
if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(		if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
DL, MVT::v16i8, V1, V2, OrigMask, Subtarget, DAG))		DL, MVT::v16i8, V1, V2, OrigMask, Subtarget, DAG))
return ZExt;		return ZExt;

int MaskStorage[16] = {		int MaskStorage[16] = {
OrigMask[0], OrigMask[1], OrigMask[2], OrigMask[3],		OrigMask[0], OrigMask[1], OrigMask[2], OrigMask[3],
▲ Show 20 Lines • Show All 16,832 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/palignr.ll

	Show First 20 Lines • Show All 80 Lines • ▼ Show 20 Lines
	; CHECK-LABEL: test6:			; CHECK-LABEL: test6:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]			; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
	; CHECK-NEXT: movdqa %xmm1, %xmm0			; CHECK-NEXT: movdqa %xmm1, %xmm0
	; CHECK-NEXT: retl			; CHECK-NEXT: retl
	;			;
	; CHECK-YONAH-LABEL: test6:			; CHECK-YONAH-LABEL: test6:
	; CHECK-YONAH: # BB#0:			; CHECK-YONAH: # BB#0:
	; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]			; CHECK-YONAH-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
	; CHECK-YONAH-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]			; CHECK-YONAH-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
	; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]			; CHECK-YONAH-NEXT: por %xmm1, %xmm0
	; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
	; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
	; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,0,1,2,4,5,6,7]
	; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
	; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
	; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,0,2,1,4,5,6,7]
	; CHECK-YONAH-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
	; CHECK-YONAH-NEXT: retl			; CHECK-YONAH-NEXT: retl
	%C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 3, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10 >			%C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 3, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10 >
	ret <8 x i16> %C			ret <8 x i16> %C
	}			}

	define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) nounwind {			define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) nounwind {
	; CHECK-LABEL: test7:			; CHECK-LABEL: test7:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]			; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
	; CHECK-NEXT: movdqa %xmm1, %xmm0			; CHECK-NEXT: movdqa %xmm1, %xmm0
	; CHECK-NEXT: retl			; CHECK-NEXT: retl
	;			;
	; CHECK-YONAH-LABEL: test7:			; CHECK-YONAH-LABEL: test7:
	; CHECK-YONAH: # BB#0:			; CHECK-YONAH: # BB#0:
	; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]			; CHECK-YONAH-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
	; CHECK-YONAH-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]			; CHECK-YONAH-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
	; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]			; CHECK-YONAH-NEXT: por %xmm1, %xmm0
	; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,1,4,5,6,7]
	; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
	; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
	; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,2,3]
	; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,2,3,0,4,5,6,7]
	; CHECK-YONAH-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
	; CHECK-YONAH-NEXT: retl			; CHECK-YONAH-NEXT: retl
	%C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 6, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12 >			%C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 6, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12 >
	ret <8 x i16> %C			ret <8 x i16> %C
	}			}

	define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind {			define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind {
	; CHECK-LABEL: test8:			; CHECK-LABEL: test8:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]			; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
	; CHECK-NEXT: movdqa %xmm1, %xmm0			; CHECK-NEXT: movdqa %xmm1, %xmm0
	; CHECK-NEXT: retl			; CHECK-NEXT: retl
	;			;
	; CHECK-YONAH-LABEL: test8:			; CHECK-YONAH-LABEL: test8:
	; CHECK-YONAH: # BB#0:			; CHECK-YONAH: # BB#0:
	; CHECK-YONAH-NEXT: pxor %xmm3, %xmm3			; CHECK-YONAH-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
	; CHECK-YONAH-NEXT: movdqa %xmm0, %xmm2			; CHECK-YONAH-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4]
	; CHECK-YONAH-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]			; CHECK-YONAH-NEXT: por %xmm1, %xmm0
	; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
	; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,2,3,3,4,5,6,7]
	; CHECK-YONAH-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm3[8],xmm0[9],xmm3[9],xmm0[10],xmm3[10],xmm0[11],xmm3[11],xmm0[12],xmm3[12],xmm0[13],xmm3[13],xmm0[14],xmm3[14],xmm0[15],xmm3[15]
	; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm4 = xmm0[3,1,2,0]
	; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,7,6,7]
	; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,1,2,3]
	; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[1,2,3,0,4,5,6,7]
	; CHECK-YONAH-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
	; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
	; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,3,4,5,6,7]
	; CHECK-YONAH-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
	; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
	; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
	; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,1]
	; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,2,2,4,5,6,7]
	; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,6,7,4]
	; CHECK-YONAH-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
	; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
	; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
	; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
	; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
	; CHECK-YONAH-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
	; CHECK-YONAH-NEXT: packuswb %xmm0, %xmm2
	; CHECK-YONAH-NEXT: movdqa %xmm2, %xmm0
	; CHECK-YONAH-NEXT: retl			; CHECK-YONAH-NEXT: retl
	%C = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> < i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20 >			%C = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> < i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20 >
	ret <16 x i8> %C			ret <16 x i8> %C
	}			}

	; Check that we don't do unary (circular on single operand) palignr incorrectly.			; Check that we don't do unary (circular on single operand) palignr incorrectly.
	; (It is possible, but before this testcase was committed, it was being done			; (It is possible, but before this testcase was committed, it was being done
	; incorrectly. In particular, one of the operands of the palignr node			; incorrectly. In particular, one of the operands of the palignr node
	; was an UNDEF.)			; was an UNDEF.)
	define <8 x i16> @test9(<8 x i16> %A, <8 x i16> %B) nounwind {			define <8 x i16> @test9(<8 x i16> %A, <8 x i16> %B) nounwind {
	; CHECK-LABEL: test9:			; CHECK-LABEL: test9:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]			; CHECK-NEXT: palignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
	; CHECK-NEXT: movdqa %xmm1, %xmm0			; CHECK-NEXT: movdqa %xmm1, %xmm0
	; CHECK-NEXT: retl			; CHECK-NEXT: retl
	;			;
	; CHECK-YONAH-LABEL: test9:			; CHECK-YONAH-LABEL: test9:
	; CHECK-YONAH: # BB#0:			; CHECK-YONAH: # BB#0:
	; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,1,3]			; CHECK-YONAH-NEXT: movdqa %xmm1, %xmm0
	; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]			; CHECK-YONAH-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
	; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]			; CHECK-YONAH-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
	; CHECK-YONAH-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,2,4,5,6,7]			; CHECK-YONAH-NEXT: por %xmm0, %xmm1
	; CHECK-YONAH-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]			; CHECK-YONAH-NEXT: movdqa %xmm1, %xmm0
	; CHECK-YONAH-NEXT: retl			; CHECK-YONAH-NEXT: retl
	%C = shufflevector <8 x i16> %B, <8 x i16> %A, <8 x i32> < i32 undef, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0 >			%C = shufflevector <8 x i16> %B, <8 x i16> %A, <8 x i32> < i32 undef, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0 >
	ret <8 x i16> %C			ret <8 x i16> %C
	}			}

llvm/trunk/test/CodeGen/X86/sse3.ll

	; These are tests for SSE3 codegen.			; These are tests for SSE3 codegen.

	; RUN: llc < %s -march=x86-64 -mcpu=nocona -mtriple=i686-apple-darwin9 -O3 \| FileCheck %s --check-prefix=X64			; RUN: llc < %s -march=x86-64 -mcpu=nocona -mtriple=i686-apple-darwin9 -O3 \| FileCheck %s --check-prefix=X64

	; Test for v8xi16 lowering where we extract the first element of the vector and			; Test for v8xi16 lowering where we extract the first element of the vector and
	; placed it in the second element of the result.			; placed it in the second element of the result.

	define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind {			define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind {
	; X64-LABEL: t0:			; X64-LABEL: t0:
	; X64: ## BB#0: ## %entry			; X64: ## BB#0: ## %entry
	; X64-NEXT: pxor %xmm0, %xmm0			; X64-NEXT: movl $1, %eax
				; X64-NEXT: movd %eax, %xmm0
	; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]			; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
	; X64-NEXT: movdqa %xmm0, (%rdi)			; X64-NEXT: movdqa %xmm0, (%rdi)
	; X64-NEXT: retq			; X64-NEXT: retq
	entry:			entry:
	%tmp3 = load <8 x i16>* %old			%tmp3 = load <8 x i16>* %old
	%tmp6 = shufflevector <8 x i16> %tmp3,			%tmp6 = shufflevector <8 x i16> %tmp3,
	<8 x i16> < i16 0, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >,			<8 x i16> < i16 1, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >,
	<8 x i32> < i32 8, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >			<8 x i32> < i32 8, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
	store <8 x i16> %tmp6, <8 x i16>* %dest			store <8 x i16> %tmp6, <8 x i16>* %dest
	ret void			ret void

	}			}

	define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {			define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
	; X64-LABEL: t1:			; X64-LABEL: t1:
	; X64: ## BB#0:			; X64: ## BB#0:
	; X64-NEXT: movdqa (%rdi), %xmm0			; X64-NEXT: movdqa (%rdi), %xmm0
	; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]			; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
	; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]			; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
	▲ Show 20 Lines • Show All 267 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/vec_insert-5.ll

Show First 20 Lines • Show All 57 Lines • ▼ Show 20 Lines	; CHECK-NEXT: retl
%tmp1 = load <4 x float>* %P		%tmp1 = load <4 x float>* %P
%tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >		%tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
ret <4 x float> %tmp2		ret <4 x float> %tmp2
}		}

define <16 x i8> @t5(<16 x i8> %x) nounwind {		define <16 x i8> @t5(<16 x i8> %x) nounwind {
; CHECK-LABEL: t5:		; CHECK-LABEL: t5:
; CHECK: # BB#0:		; CHECK: # BB#0:
; CHECK-NEXT: pxor %xmm1, %xmm1		; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
; CHECK-NEXT: pshufb {{.*#+}} xmm1 = zero,xmm1[u,u,u,u,u,u,u,u,u,u,u,u,u,u,1]
; CHECK-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,u,u,u,u,u,u,u,u,u,u,u,u,u,u],zero
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: retl		; CHECK-NEXT: retl
%s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>		%s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
ret <16 x i8> %s		ret <16 x i8> %s
}		}

define <16 x i8> @t6(<16 x i8> %x) nounwind {		define <16 x i8> @t6(<16 x i8> %x) nounwind {
; CHECK-LABEL: t6:		; CHECK-LABEL: t6:
; CHECK: # BB#0:		; CHECK: # BB#0:
; CHECK-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]		; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
; CHECK-NEXT: retl		; CHECK-NEXT: retl
%s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>		%s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i8> %s		ret <16 x i8> %s
}		}

define <16 x i8> @t7(<16 x i8> %x) nounwind {		define <16 x i8> @t7(<16 x i8> %x) nounwind {
; CHECK-LABEL: t7:		; CHECK-LABEL: t7:
; CHECK: # BB#0:		; CHECK: # BB#0:
; CHECK-NEXT: palignr {{.*#+}} xmm0 = xmm0[3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2]		; CHECK-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
; CHECK-NEXT: retl		; CHECK-NEXT: retl
%s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2>		%s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2>
ret <16 x i8> %s		ret <16 x i8> %s
}		}

llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll

Show First 20 Lines • Show All 569 Lines • ▼ Show 20 Lines	; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0		%a = insertelement <16 x i8> undef, i8 %i, i32 0
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>		%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i8> %shuffle		ret <16 x i8> %shuffle
}		}

define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) {		define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) {
; SSE2-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:		; SSE2-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: movzbl %dil, %eax		; SSE2-NEXT: movd %edi, %xmm0
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]		; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:		; SSSE3-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: movd %edi, %xmm0		; SSSE3-NEXT: movd %edi, %xmm0
; SSSE3-NEXT: pxor %xmm1, %xmm1		; SSSE3-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,u,u,3,u,u,6,7,8,9,10,11,12,13,14],zero
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[u,u],zero,xmm0[u,u],zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
; SSE41-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:		; SSE41-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
; SSE41: # BB#0:		; SSE41: # BB#0:
; SSE41-NEXT: movd %edi, %xmm0		; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm1		; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,u,u,3,u,u,6,7,8,9,10,11,12,13,14],zero
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[u,u],zero,xmm0[u,u],zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq		; SSE41-NEXT: retq
;		;
; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:		; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vmovd %edi, %xmm0		; AVX-NEXT: vmovd %edi, %xmm0
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1		; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,u,u,3,u,u,6,7,8,9,10,11,12,13,14],zero
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,xmm0[u,u],zero,xmm0[u,u],zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0		%a = insertelement <16 x i8> undef, i8 %i, i32 0
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>		%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
ret <16 x i8> %shuffle		ret <16 x i8> %shuffle
}		}

define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {		define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:		; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: movzbl %dil, %eax		; SSE2-NEXT: movzbl %dil, %eax
; SSE2-NEXT: movd %eax, %xmm0		; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]		; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:		; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: movd %edi, %xmm0		; SSSE3-NEXT: movd %edi, %xmm0
; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12]		; SSSE3-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
; SSSE3-NEXT: pxor %xmm1, %xmm1		; SSSE3-NEXT: pxor %xmm1, %xmm1
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3,4,5,6,7,8,9,10,11,12,13,14,15]		; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3,4,5,6,7,8,9,10,11,12,13,14,15]
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero		; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: por %xmm1, %xmm0		; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:		; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE41: # BB#0:		; SSE41: # BB#0:
; SSE41-NEXT: movd %edi, %xmm0		; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12]		; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
; SSE41-NEXT: pxor %xmm1, %xmm1		; SSE41-NEXT: pxor %xmm1, %xmm1
; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3,4,5,6,7,8,9,10,11,12,13,14,15]		; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3,4,5,6,7,8,9,10,11,12,13,14,15]
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero		; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: por %xmm1, %xmm0		; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq		; SSE41-NEXT: retq
;		;
; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:		; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vmovd %edi, %xmm0		; AVX-NEXT: vmovd %edi, %xmm0
; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12]		; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1		; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3,4,5,6,7,8,9,10,11,12,13,14,15]		; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3,4,5,6,7,8,9,10,11,12,13,14,15]
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero		; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0		; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 3		%a = insertelement <16 x i8> undef, i8 %i, i32 3
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>		%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %shuffle		ret <16 x i8> %shuffle
}		}

		define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu(<16 x i8> %a) {
		; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu:
		; SSE2: # BB#0:
		; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
		; SSE2-NEXT: retq
		;
		; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu:
		; SSSE3: # BB#0:
		; SSSE3-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
		; SSSE3-NEXT: retq
		;
		; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu:
		; SSE41: # BB#0:
		; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
		; SSE41-NEXT: retq
		;
		; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu:
		; AVX: # BB#0:
		; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
		; AVX-NEXT: retq
		%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 09, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 undef, i32 18, i32 undef>
		ret <16 x i8> %shuffle
		}

		define <16 x i8> @shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) {
		; SSE2-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
		; SSE2: # BB#0:
		; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
		; SSE2-NEXT: retq
		;
		; SSSE3-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
		; SSSE3: # BB#0:
		; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
		; SSSE3-NEXT: retq
		;
		; SSE41-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
		; SSE41: # BB#0:
		; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
		; SSE41-NEXT: retq
		;
		; AVX-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
		; AVX: # BB#0:
		; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
		; AVX-NEXT: retq
		%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 28, i32 undef, i32 30, i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 09, i32 0, i32 0, i32 0, i32 0, i32 0>
		ret <16 x i8> %shuffle
		}

define <16 x i8> @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {		define <16 x i8> @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
; SSE2-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:		; SSE2-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pxor %xmm2, %xmm2		; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: movdqa %xmm0, %xmm3		; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]		; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[3,1,2,0]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,7,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[3,0,1,2,4,5,6,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm5[0,3,2,3,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[1,2,3,0,4,5,6,7]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,1,2,3,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,2,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: packuswb %xmm3, %xmm0
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:		; SSSE3-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]		; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
; SSE41-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:		; SSE41-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
; SSE41: # BB#0:		; SSE41: # BB#0:
; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]		; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; SSE41-NEXT: retq		; SSE41-NEXT: retq
;		;
; AVX-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:		; AVX-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]		; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>		%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
ret <16 x i8> %shuffle		ret <16 x i8> %shuffle
}		}

define <16 x i8> @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {		define <16 x i8> @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
; SSE2-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:		; SSE2-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pxor %xmm1, %xmm1		; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: movdqa %xmm0, %xmm2		; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]		; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]		; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; SSE2-NEXT: movdqa %xmm2, %xmm3
; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,0,1,2,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[3,1,2,0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,3,2,3,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[1,2,3,0,4,5,6,7]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,1,2,0]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,0,1,2,4,5,6,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm3[0,3,2,3,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,2,3,0,4,5,6,7]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: packuswb %xmm1, %xmm0
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:		; SSSE3-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]		; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
; SSE41-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:		; SSE41-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
; SSE41: # BB#0:		; SSE41: # BB#0:
; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]		; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; SSE41-NEXT: retq		; SSE41-NEXT: retq
;		;
; AVX-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:		; AVX-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]		; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>		%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
ret <16 x i8> %shuffle		ret <16 x i8> %shuffle
}		}

define <16 x i8> @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00(<16 x i8> %a, <16 x i8> %b) {		define <16 x i8> @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00(<16 x i8> %a, <16 x i8> %b) {
; SSE2-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:		; SSE2-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pxor %xmm2, %xmm2		; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
; SSE2-NEXT: movdqa %xmm1, %xmm3		; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]		; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[3,1,2,0]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm3[2,1,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm2[0,2,3,1,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm4[0,1,2,3,4,7,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,1,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,2,3,0,4,5,6,7]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,4,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,2,3,0,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,6,7,7]
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,4]
; SSE2-NEXT: movdqa %xmm1, %xmm3
; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE2-NEXT: packuswb %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:		; SSSE3-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]		; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
; SSE41-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:		; SSE41-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
; SSE41: # BB#0:		; SSE41: # BB#0:
; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]		; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; SSE41-NEXT: retq		; SSE41-NEXT: retq
;		;
; AVX-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:		; AVX-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]		; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0>		%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0>
ret <16 x i8> %shuffle		ret <16 x i8> %shuffle
}		}

define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b) {		define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b) {
; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:		; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pxor %xmm2, %xmm2		; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
; SSE2-NEXT: movdqa %xmm0, %xmm3		; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]		; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[3,1,2,0]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm3[2,1,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm2[0,2,3,1,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm4[0,1,2,3,4,7,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,1,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,2,3,0,4,5,6,7]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,4,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,7]
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,4]
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: packuswb %xmm0, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:		; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]		; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
; SSSE3-NEXT: movdqa %xmm1, %xmm0		; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
Show All 9 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>		%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
ret <16 x i8> %shuffle		ret <16 x i8> %shuffle
}		}

define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00(<16 x i8> %a, <16 x i8> %b) {		define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00(<16 x i8> %a, <16 x i8> %b) {
; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:		; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pxor %xmm1, %xmm1		; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: movdqa %xmm0, %xmm2		; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]		; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]		; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
; SSE2-NEXT: movdqa %xmm2, %xmm3
; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,1,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,3,1,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[3,1,2,0]
; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,7,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,1,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[1,2,3,0,4,5,6,7]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,0]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,3,1,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,7,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: packuswb %xmm3, %xmm0
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:		; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]		; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:		; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
; SSE41: # BB#0:		; SSE41: # BB#0:
; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]		; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
; SSE41-NEXT: retq		; SSE41-NEXT: retq
;		;
; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:		; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]		; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0>		%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0>
ret <16 x i8> %shuffle		ret <16 x i8> %shuffle
}		}

define <16 x i8> @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<16 x i8> %a, <16 x i8> %b) {		define <16 x i8> @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<16 x i8> %a, <16 x i8> %b) {
; SSE2-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:		; SSE2-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pxor %xmm2, %xmm2		; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: movdqa %xmm1, %xmm3		; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]		; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[3,1,2,0]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,7,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[3,0,1,2,4,5,6,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm5[0,3,2,3,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[1,2,3,0,4,5,6,7]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,3,2,3,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,1,2,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,4,5,6]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[2,1,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: packuswb %xmm3, %xmm0
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:		; SSSE3-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]		; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; SSSE3-NEXT: movdqa %xmm1, %xmm0		; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
▲ Show 20 Lines • Show All 269 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll

Show First 20 Lines • Show All 1,461 Lines • ▼ Show 20 Lines	; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 3		%a = insertelement <8 x i16> undef, i16 %i, i32 3
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>		%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) {		define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_def01234:		; SSE2-LABEL: shuffle_v8i16_def01234:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]		; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]		; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,1,2,3,4,5,6,7]		; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,3,1,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v8i16_def01234:		; SSSE3-LABEL: shuffle_v8i16_def01234:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]		; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
; SSE41-LABEL: shuffle_v8i16_def01234:		; SSE41-LABEL: shuffle_v8i16_def01234:
; SSE41: # BB#0:		; SSE41: # BB#0:
; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]		; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
; SSE41-NEXT: retq		; SSE41-NEXT: retq
;		;
; AVX-LABEL: shuffle_v8i16_def01234:		; AVX-LABEL: shuffle_v8i16_def01234:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]		; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>		%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) {		define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_ueuu123u:		; SSE2-LABEL: shuffle_v8i16_ueuu123u:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]		; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]		; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]		; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,5,7]
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v8i16_ueuu123u:		; SSSE3-LABEL: shuffle_v8i16_ueuu123u:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]		; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
; SSE41-LABEL: shuffle_v8i16_ueuu123u:		; SSE41-LABEL: shuffle_v8i16_ueuu123u:
; SSE41: # BB#0:		; SSE41: # BB#0:
; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]		; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
; SSE41-NEXT: retq		; SSE41-NEXT: retq
;		;
; AVX-LABEL: shuffle_v8i16_ueuu123u:		; AVX-LABEL: shuffle_v8i16_ueuu123u:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]		; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>		%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) {		define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_56701234:		; SSE2-LABEL: shuffle_v8i16_56701234:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]		; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]		; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]		; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]		; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v8i16_56701234:		; SSSE3-LABEL: shuffle_v8i16_56701234:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]		; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
; SSE41-LABEL: shuffle_v8i16_56701234:		; SSE41-LABEL: shuffle_v8i16_56701234:
; SSE41: # BB#0:		; SSE41: # BB#0:
; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]		; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
; SSE41-NEXT: retq		; SSE41-NEXT: retq
;		;
; AVX-LABEL: shuffle_v8i16_56701234:		; AVX-LABEL: shuffle_v8i16_56701234:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]		; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>		%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) {		define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_u6uu123u:		; SSE2-LABEL: shuffle_v8i16_u6uu123u:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,0,1]		; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]		; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,7]		; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
		; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v8i16_u6uu123u:		; SSSE3-LABEL: shuffle_v8i16_u6uu123u:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]		; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
; SSE41-LABEL: shuffle_v8i16_u6uu123u:		; SSE41-LABEL: shuffle_v8i16_u6uu123u:
; SSE41: # BB#0:		; SSE41: # BB#0:
; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]		; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
; SSE41-NEXT: retq		; SSE41-NEXT: retq
;		;
; AVX-LABEL: shuffle_v8i16_u6uu123u:		; AVX-LABEL: shuffle_v8i16_u6uu123u:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]		; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>		%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) {		define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_uuuu123u:		; SSE2-LABEL: shuffle_v8i16_uuuu123u:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]		; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,7]
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v8i16_uuuu123u:		; SSSE3-LABEL: shuffle_v8i16_uuuu123u:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]		; SSSE3-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
; SSE41-LABEL: shuffle_v8i16_uuuu123u:		; SSE41-LABEL: shuffle_v8i16_uuuu123u:
; SSE41: # BB#0:		; SSE41: # BB#0:
; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]		; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
; SSE41-NEXT: retq		; SSE41-NEXT: retq
;		;
; AVX-LABEL: shuffle_v8i16_uuuu123u:		; AVX-LABEL: shuffle_v8i16_uuuu123u:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]		; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>		%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) {		define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_bcdef012:		; SSE2-LABEL: shuffle_v8i16_bcdef012:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]		; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]		; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]		; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[3,0,1,2,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v8i16_bcdef012:		; SSSE3-LABEL: shuffle_v8i16_bcdef012:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]		; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
; SSE41-LABEL: shuffle_v8i16_bcdef012:		; SSE41-LABEL: shuffle_v8i16_bcdef012:
; SSE41: # BB#0:		; SSE41: # BB#0:
; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]		; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
; SSE41-NEXT: retq		; SSE41-NEXT: retq
;		;
; AVX-LABEL: shuffle_v8i16_bcdef012:		; AVX-LABEL: shuffle_v8i16_bcdef012:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]		; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2>		%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2>
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) {		define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_ucdeuu1u:		; SSE2-LABEL: shuffle_v8i16_ucdeuu1u:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]		; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]		; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]		; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,2,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u:		; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]		; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
; SSE41-LABEL: shuffle_v8i16_ucdeuu1u:		; SSE41-LABEL: shuffle_v8i16_ucdeuu1u:
; SSE41: # BB#0:		; SSE41: # BB#0:
; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]		; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
; SSE41-NEXT: retq		; SSE41-NEXT: retq
;		;
; AVX-LABEL: shuffle_v8i16_ucdeuu1u:		; AVX-LABEL: shuffle_v8i16_ucdeuu1u:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]		; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef>		%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef>
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) {		define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_34567012:		; SSE2-LABEL: shuffle_v8i16_34567012:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]		; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]		; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]		; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]		; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v8i16_34567012:		; SSSE3-LABEL: shuffle_v8i16_34567012:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]		; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
; SSE41-LABEL: shuffle_v8i16_34567012:		; SSE41-LABEL: shuffle_v8i16_34567012:
; SSE41: # BB#0:		; SSE41: # BB#0:
; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]		; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
; SSE41-NEXT: retq		; SSE41-NEXT: retq
;		;
; AVX-LABEL: shuffle_v8i16_34567012:		; AVX-LABEL: shuffle_v8i16_34567012:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]		; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2>		%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2>
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) {		define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_u456uu1u:		; SSE2-LABEL: shuffle_v8i16_u456uu1u:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,3]		; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,2,4,5,6,7]		; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]		; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
		; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v8i16_u456uu1u:		; SSSE3-LABEL: shuffle_v8i16_u456uu1u:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]		; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
; SSE41-LABEL: shuffle_v8i16_u456uu1u:		; SSE41-LABEL: shuffle_v8i16_u456uu1u:
; SSE41: # BB#0:		; SSE41: # BB#0:
; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]		; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
; SSE41-NEXT: retq		; SSE41-NEXT: retq
;		;
; AVX-LABEL: shuffle_v8i16_u456uu1u:		; AVX-LABEL: shuffle_v8i16_u456uu1u:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]		; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef>		%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef>
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) {		define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_u456uuuu:		; SSE2-LABEL: shuffle_v8i16_u456uuuu:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]		; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,2,4,5,6,7]
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v8i16_u456uuuu:		; SSSE3-LABEL: shuffle_v8i16_u456uuuu:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]		; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
; SSE41-LABEL: shuffle_v8i16_u456uuuu:		; SSE41-LABEL: shuffle_v8i16_u456uuuu:
; SSE41: # BB#0:		; SSE41: # BB#0:
; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]		; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq		; SSE41-NEXT: retq
;		;
; AVX-LABEL: shuffle_v8i16_u456uuuu:		; AVX-LABEL: shuffle_v8i16_u456uuuu:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]		; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>		%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) {		define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_3456789a:		; SSE2-LABEL: shuffle_v8i16_3456789a:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]		; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]		; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]		; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,0,1,2,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v8i16_3456789a:		; SSSE3-LABEL: shuffle_v8i16_3456789a:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]		; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
; SSSE3-NEXT: movdqa %xmm1, %xmm0		; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
Show All 9 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>		%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) {		define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_u456uu9u:		; SSE2-LABEL: shuffle_v8i16_u456uu9u:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]		; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]		; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]		; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,2,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v8i16_u456uu9u:		; SSSE3-LABEL: shuffle_v8i16_u456uu9u:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]		; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
; SSSE3-NEXT: movdqa %xmm1, %xmm0		; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
Show All 9 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef>		%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef>
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) {		define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_56789abc:		; SSE2-LABEL: shuffle_v8i16_56789abc:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]		; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]		; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]		; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,3,1,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,2,3,0,4,5,6,7]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v8i16_56789abc:		; SSSE3-LABEL: shuffle_v8i16_56789abc:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]		; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
; SSSE3-NEXT: movdqa %xmm1, %xmm0		; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
Show All 9 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>		%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) {		define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_u6uu9abu:		; SSE2-LABEL: shuffle_v8i16_u6uu9abu:
; SSE2: # BB#0:		; SSE2: # BB#0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]		; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]		; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7]		; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,5,7]
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSSE3-LABEL: shuffle_v8i16_u6uu9abu:		; SSSE3-LABEL: shuffle_v8i16_u6uu9abu:
; SSSE3: # BB#0:		; SSSE3: # BB#0:
; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]		; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
; SSSE3-NEXT: movdqa %xmm1, %xmm0		; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq		; SSSE3-NEXT: retq
;		;
▲ Show 20 Lines • Show All 117 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[X86][SSE] pslldq/psrldq byte shifts/rotation for SSE2
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 16369

llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

llvm/trunk/test/CodeGen/X86/palignr.ll

llvm/trunk/test/CodeGen/X86/sse3.ll

llvm/trunk/test/CodeGen/X86/vec_insert-5.ll

llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll

llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86][SSE] pslldq/psrldq byte shifts/rotation for SSE2ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 16369

llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

llvm/trunk/test/CodeGen/X86/palignr.ll

llvm/trunk/test/CodeGen/X86/sse3.ll

llvm/trunk/test/CodeGen/X86/vec_insert-5.ll

llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll

llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll

[X86][SSE] pslldq/psrldq byte shifts/rotation for SSE2
ClosedPublic