Diff 43524

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 10,353 Lines • ▼ Show 20 Lines	for (int i = 0; i < Size; ++i)
if (Mask[i] >= 0)		if (Mask[i] >= 0)
NewMask[i] = (i / LaneSize) * LaneSize + Mask[i] % LaneSize;		NewMask[i] = (i / LaneSize) * LaneSize + Mask[i] % LaneSize;
assert(!is128BitLaneCrossingShuffleMask(VT, NewMask) &&		assert(!is128BitLaneCrossingShuffleMask(VT, NewMask) &&
"Must not introduce lane crosses at this point!");		"Must not introduce lane crosses at this point!");

return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask);		return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask);
}		}

		/// Lower shuffles where an entire half of a 256-bit vector is UNDEF.
		/// This allows for fast cases such as subvector extraction/insertion
		/// or shuffling smaller vector types which can lower more efficiently.
		static SDValue lowerVectorShuffleWithUndefHalf(SDLoc DL, MVT VT, SDValue V1,
		SDValue V2, ArrayRef<int> Mask,
		const X86Subtarget *Subtarget,
		SelectionDAG &DAG) {
		assert(VT.getSizeInBits() == 256 && "Expected 256-bit vector");

		unsigned NumElts = VT.getVectorNumElements();
		unsigned HalfNumElts = NumElts / 2;
		MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts);

		bool UndefLower = isUndefInRange(Mask, 0, HalfNumElts);
		bool UndefUpper = isUndefInRange(Mask, HalfNumElts, HalfNumElts);
		if (!UndefLower && !UndefUpper)
		return SDValue();

		// Upper half is undef and lower half is whole upper subvector.
		// e.g. vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
		if (UndefUpper &&
		isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) {
		SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
		DAG.getIntPtrConstant(HalfNumElts, DL));
		return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
		DAG.getIntPtrConstant(0, DL));
		}

		// Lower half is undef and upper half is whole lower subvector.
		// e.g. vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
		if (UndefLower &&
		isSequentialOrUndefInRange(Mask, HalfNumElts, HalfNumElts, 0)) {
		SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
		DAG.getIntPtrConstant(0, DL));
		return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
		DAG.getIntPtrConstant(HalfNumElts, DL));
		}

		// AVX2 supports efficient immediate 64-bit element cross-lane shuffles.
		if (UndefLower && Subtarget->hasAVX2() &&
		(VT == MVT::v4f64 \|\| VT == MVT::v4i64))
		return SDValue();

		// If the shuffle only uses the lower halves of the input operands,
		// then extract them and perform the 'half' shuffle at half width.
		// e.g. vector_shuffle <X, X, X, X, u, u, u, u> or <X, X, u, u>
		int HalfIdx1 = -1, HalfIdx2 = -1;
		SmallVector<int, 8> HalfMask;
		unsigned Offset = UndefLower ? HalfNumElts : 0;
		for (unsigned i = 0; i != HalfNumElts; ++i) {
		int M = Mask[i + Offset];
		if (M < 0) {
		HalfMask.push_back(M);
		continue;
		}

		// Determine which of the 4 half vectors this element is from.
		// i.e. 0 = Lower V1, 1 = Upper V1, 2 = Lower V2, 3 = Upper V2.
		int HalfIdx = M / HalfNumElts;

		// Only shuffle using the lower halves of the inputs.
		// TODO: Investigate usefulness of shuffling with upper halves.
		if (HalfIdx != 0 && HalfIdx != 2)
		return SDValue();

		// Determine the element index into its half vector source.
		int HalfElt = M % HalfNumElts;

		// We can shuffle with up to 2 half vectors, set the new 'half'
		// shuffle mask accordingly.
		if (-1 == HalfIdx1 \|\| HalfIdx1 == HalfIdx) {
		HalfMask.push_back(HalfElt);
		HalfIdx1 = HalfIdx;
		continue;
		}
		if (-1 == HalfIdx2 \|\| HalfIdx2 == HalfIdx) {
		HalfMask.push_back(HalfElt + HalfNumElts);
		HalfIdx2 = HalfIdx;
		continue;
		}

		// Too many half vectors referenced.
		return SDValue();
		}
		assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length");

		auto GetHalfVector = [&](int HalfIdx) {
		if (HalfIdx < 0)
		return DAG.getUNDEF(HalfVT);
		SDValue V = (HalfIdx < 2 ? V1 : V2);
		HalfIdx = (HalfIdx % 2) * HalfNumElts;
		return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V,
		DAG.getIntPtrConstant(HalfIdx, DL));
		};

		SDValue Half1 = GetHalfVector(HalfIdx1);
		SDValue Half2 = GetHalfVector(HalfIdx2);
		SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask);
		return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V,
		DAG.getIntPtrConstant(Offset, DL));
		}

/// \brief Test whether the specified input (0 or 1) is in-place blended by the		/// \brief Test whether the specified input (0 or 1) is in-place blended by the
/// given mask.		/// given mask.
///		///
/// This returns true if the elements from a particular input are already in the		/// This returns true if the elements from a particular input are already in the
/// slot required by the given mask and require no permutation.		/// slot required by the given mask and require no permutation.
static bool isShuffleMaskInputInPlace(int Input, ArrayRef<int> Mask) {		static bool isShuffleMaskInputInPlace(int Input, ArrayRef<int> Mask) {
assert((Input == 0 \|\| Input == 1) && "Only two inputs to shuffles.");		assert((Input == 0 \|\| Input == 1) && "Only two inputs to shuffles.");
int Size = Mask.size();		int Size = Mask.size();
▲ Show 20 Lines • Show All 553 Lines • ▼ Show 20 Lines	int NumV2Elements = std::count_if(Mask.begin(), Mask.end(), [NumElts](int M) {
return M >= NumElts;		return M >= NumElts;
});		});

if (NumV2Elements == 1 && Mask[0] >= NumElts)		if (NumV2Elements == 1 && Mask[0] >= NumElts)
if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(		if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
DL, VT, V1, V2, Mask, Subtarget, DAG))		DL, VT, V1, V2, Mask, Subtarget, DAG))
return Insertion;		return Insertion;

		// Handle special cases where the lower or upper half is UNDEF.
		if (SDValue V =
		lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
		return V;

// There is a really nice hard cut-over between AVX1 and AVX2 that means we		// There is a really nice hard cut-over between AVX1 and AVX2 that means we
// can check for those subtargets here and avoid much of the subtarget		// can check for those subtargets here and avoid much of the subtarget
// querying in the per-vector-type lowering routines. With AVX1 we have		// querying in the per-vector-type lowering routines. With AVX1 we have
// essentially zero ability to manipulate a 256-bit vector with integer		// essentially zero ability to manipulate a 256-bit vector with integer
// types. Since we'll use floating point types there eventually, just		// types. Since we'll use floating point types there eventually, just
// immediately cast everything to a float and operate entirely in that domain.		// immediately cast everything to a float and operate entirely in that domain.
if (VT.isInteger() && !Subtarget->hasAVX2()) {		if (VT.isInteger() && !Subtarget->hasAVX2()) {
int ElementBits = VT.getScalarSizeInBits();		int ElementBits = VT.getScalarSizeInBits();
▲ Show 20 Lines • Show All 8,922 Lines • ▼ Show 20 Lines	else {
if (IndexVT.getScalarType() == MVT::i32)		if (IndexVT.getScalarType() == MVT::i32)
Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);		Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);

// Mask		// Mask
// At this point we have promoted mask operand		// At this point we have promoted mask operand
assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type");		assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type");
MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts);		MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts);
// Use the original mask here, do not modify the mask twice		// Use the original mask here, do not modify the mask twice
Mask = ExtendToType(N->getMask(), ExtMaskVT, DAG, true);		Mask = ExtendToType(N->getMask(), ExtMaskVT, DAG, true);

// The value that should be stored		// The value that should be stored
MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts);		MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts);
Src = ExtendToType(Src, NewVT, DAG);		Src = ExtendToType(Src, NewVT, DAG);
}		}
}		}
// If the mask is "wide" at this point - truncate it to i1 vector		// If the mask is "wide" at this point - truncate it to i1 vector
MVT BitMaskVT = MVT::getVectorVT(MVT::i1, NumElts);		MVT BitMaskVT = MVT::getVectorVT(MVT::i1, NumElts);
▲ Show 20 Lines • Show All 2,726 Lines • ▼ Show 20 Lines	if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();		GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset();		Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset();
return true;		return true;
}		}
}		}
return TargetLowering::isGAPlusOffset(N, GA, Offset);		return TargetLowering::isGAPlusOffset(N, GA, Offset);
}		}

/// isShuffleHigh128VectorInsertLow - Checks whether the shuffle node is the
/// same as extracting the high 128-bit part of 256-bit vector and then
/// inserting the result into the low part of a new 256-bit vector
static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) {
EVT VT = SVOp->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();

// vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
for (unsigned i = 0, j = NumElems/2; i != NumElems/2; ++i, ++j)
if (!isUndefOrEqual(SVOp->getMaskElt(i), j) \|\|
SVOp->getMaskElt(j) >= 0)
return false;

return true;
}

/// isShuffleLow128VectorInsertHigh - Checks whether the shuffle node is the
/// same as extracting the low 128-bit part of 256-bit vector and then
/// inserting the result into the high part of a new 256-bit vector
static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) {
EVT VT = SVOp->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();

// vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
for (unsigned i = NumElems/2, j = 0; i != NumElems; ++i, ++j)
if (!isUndefOrEqual(SVOp->getMaskElt(i), j) \|\|
SVOp->getMaskElt(j) >= 0)
return false;

return true;
}

/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.		/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
		/// FIXME: This could be expanded to support 512 bit vectors as well.
static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,		static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,		TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget* Subtarget) {		const X86Subtarget* Subtarget) {
SDLoc dl(N);		SDLoc dl(N);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);		ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
SDValue V1 = SVOp->getOperand(0);		SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);		SDValue V2 = SVOp->getOperand(1);
MVT VT = SVOp->getSimpleValueType(0);		MVT VT = SVOp->getSimpleValueType(0);
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines	if (V1.getOpcode() == ISD::CONCAT_VECTORS &&

// Emit a zeroed vector and insert the desired subvector on its		// Emit a zeroed vector and insert the desired subvector on its
// first half.		// first half.
SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);		SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0), 0, DAG, dl);		SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0), 0, DAG, dl);
return DCI.CombineTo(N, InsV);		return DCI.CombineTo(N, InsV);
}		}

//===--------------------------------------------------------------------===//
// Combine some shuffles into subvector extracts and inserts:
//

// vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
if (isShuffleHigh128VectorInsertLow(SVOp)) {
SDValue V = Extract128BitVector(V1, NumElems/2, DAG, dl);
SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, 0, DAG, dl);
return DCI.CombineTo(N, InsV);
}

// vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
if (isShuffleLow128VectorInsertHigh(SVOp)) {
SDValue V = Extract128BitVector(V1, 0, DAG, dl);
SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, NumElems/2, DAG, dl);
return DCI.CombineTo(N, InsV);
}

return SDValue();		return SDValue();
}		}

/// \brief Combine an arbitrary chain of shuffles into a single instruction if		/// \brief Combine an arbitrary chain of shuffles into a single instruction if
/// possible.		/// possible.
///		///
/// This is the leaf of the recursive combinine below. When we have found some		/// This is the leaf of the recursive combinine below. When we have found some
/// chain of single-use x86 shuffle instructions and accumulated the combined		/// chain of single-use x86 shuffle instructions and accumulated the combined
▲ Show 20 Lines • Show All 3,961 Lines • ▼ Show 20 Lines	static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);		SDLoc DL(N);

// Let legalize expand this if it isn't a legal type yet.		// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))		if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();		return SDValue();

// If we're negating a FMUL node on a target with FMA, then we can avoid the		// If we're negating a FMUL node on a target with FMA, then we can avoid the
// use of a constant by performing (-0 - A*B) instead.		// use of a constant by performing (-0 - A*B) instead.
// FIXME: Check rounding control flags as well once it becomes available.		// FIXME: Check rounding control flags as well once it becomes available.
if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 \|\| SVT == MVT::f64) &&		if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 \|\| SVT == MVT::f64) &&
Arg->getFlags()->hasNoSignedZeros() && Subtarget->hasAnyFMA()) {		Arg->getFlags()->hasNoSignedZeros() && Subtarget->hasAnyFMA()) {
SDValue Zero = DAG.getConstantFP(0.0, DL, VT);		SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),		return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
Arg.getOperand(1), Zero);		Arg.getOperand(1), Zero);
}		}

// If we're negating a FMA node, then we can adjust the		// If we're negating a FMA node, then we can adjust the
▲ Show 20 Lines • Show All 1,853 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/avx-splat.ll

Show First 20 Lines • Show All 93 Lines • ▼ Show 20 Lines	__load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499
%load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]		%load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
ret <8 x float> %load_broadcast12281250		ret <8 x float> %load_broadcast12281250
}		}

define <8 x float> @funcF(i32 %val) nounwind {		define <8 x float> @funcF(i32 %val) nounwind {
; CHECK-LABEL: funcF:		; CHECK-LABEL: funcF:
; CHECK: ## BB#0:		; CHECK: ## BB#0:
; CHECK-NEXT: vmovd %edi, %xmm0		; CHECK-NEXT: vmovd %edi, %xmm0
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,0]		; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0		; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%ret6 = insertelement <8 x i32> undef, i32 %val, i32 6		%ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
%ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7		%ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
%tmp = bitcast <8 x i32> %ret7 to <8 x float>		%tmp = bitcast <8 x i32> %ret7 to <8 x float>
ret <8 x float> %tmp		ret <8 x float> %tmp
}		}

▲ Show 20 Lines • Show All 63 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll

	Show First 20 Lines • Show All 153 Lines • ▼ Show 20 Lines
	; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]			; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
	; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]			; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
	; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]			; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:			; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]			; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
	; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]			; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
	; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0			; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
	; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]			; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
	; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0			; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>			%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
	ret <16 x i16> %shuffle			ret <16 x i16> %shuffle
	}			}

	define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {			define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
	; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:			; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	▲ Show 20 Lines • Show All 3,070 Lines • ▼ Show 20 Lines
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15]			; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15]
	; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31]			; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31]
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef>			%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef>
	ret <16 x i16> %shuffle			ret <16 x i16> %shuffle
	}			}

				define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19(<16 x i16> %a, <16 x i16> %b) {
				; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
				; AVX1: # BB#0:
				; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
				; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
				; AVX1-NEXT: retq
				;
				; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
				; AVX2: # BB#0:
				; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
				; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
				; AVX2-NEXT: retq
				%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19>
				ret <16 x i16> %shuffle
				}

	define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3(<16 x i16> %a, <16 x i16> %b) {			define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3(<16 x i16> %a, <16 x i16> %b) {
	; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:			; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]			; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:			; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
	Show All 18 Lines
	; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0			; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
	; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0			; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
	; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0			; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>			%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
	ret <16 x i16> %shuffle			ret <16 x i16> %shuffle
	}			}

				define <16 x i16> @shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
				; ALL-LABEL: shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u:
				; ALL: # BB#0:
				; ALL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
				; ALL-NEXT: retq
				%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				ret <16 x i16> %shuffle
				}

				define <16 x i16> @shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
				; ALL-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
				; ALL: # BB#0:
				; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
				; ALL-NEXT: retq
				%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				ret <16 x i16> %shuffle
				}

	define <16 x i16> @shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {			define <16 x i16> @shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
	; AVX1-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:			; AVX1-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0			; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
	; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]			; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:			; AVX2-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
	▲ Show 20 Lines • Show All 128 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll

	Show First 20 Lines • Show All 319 Lines • ▼ Show 20 Lines
	; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:			; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]			; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
	; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2			; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
	; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1			; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
	; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0			; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
	; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]			; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
	; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0			; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>			%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
	ret <32 x i8> %shuffle			ret <32 x i8> %shuffle
	}			}

	define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {			define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
	▲ Show 20 Lines • Show All 1,671 Lines • ▼ Show 20 Lines
	; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0			; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
	; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0			; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
	; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0			; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>			%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
	ret <32 x i8> %shuffle			ret <32 x i8> %shuffle
	}			}

				define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
				; AVX1-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
				; AVX1: # BB#0:
				; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,12,12,13,13,14,14,15,15]
				; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
				; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
				; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
				; AVX1-NEXT: retq
				;
				; AVX2-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
				; AVX2: # BB#0:
				; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
				; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,12,12,13,13,14,14,15,15]
				; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
				; AVX2-NEXT: retq
				%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				ret <32 x i8> %shuffle
				}

				define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
				; ALL-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
				; ALL: # BB#0:
				; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
				; ALL-NEXT: retq
				%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
				ret <32 x i8> %shuffle
				}

	define <32 x i8> @shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {			define <32 x i8> @shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
	; AVX1-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:			; AVX1-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0			; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
	; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]			; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:			; AVX2-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
	▲ Show 20 Lines • Show All 111 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll

	Show First 20 Lines • Show All 230 Lines • ▼ Show 20 Lines
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]			; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2>			%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2>
	ret <4 x double> %shuffle			ret <4 x double> %shuffle
	}			}

	define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {			define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
	; AVX1-LABEL: shuffle_v4f64_0423:			; ALL-LABEL: shuffle_v4f64_0423:
	; AVX1: # BB#0:			; ALL: # BB#0:
	; AVX1-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]			; ALL-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
	; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]			; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
	; AVX1-NEXT: retq			; ALL-NEXT: retq
	;
	; AVX2-LABEL: shuffle_v4f64_0423:
	; AVX2: # BB#0:
	; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1
	; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
	; AVX2-NEXT: retq
	;
	; AVX512VL-LABEL: shuffle_v4f64_0423:
	; AVX512VL: # BB#0:
	; AVX512VL-NEXT: vbroadcastsd %xmm1, %ymm1
	; AVX512VL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
	; AVX512VL-NEXT: retq
	%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>			%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
	ret <4 x double> %shuffle			ret <4 x double> %shuffle
	}			}

	define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) {			define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) {
	; ALL-LABEL: shuffle_v4f64_0462:			; ALL-LABEL: shuffle_v4f64_0462:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]			; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
	▲ Show 20 Lines • Show All 219 Lines • ▼ Show 20 Lines
	; ALL-LABEL: shuffle_v4f64_u062:			; ALL-LABEL: shuffle_v4f64_u062:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]			; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 undef, i32 0, i32 6, i32 2>			%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 undef, i32 0, i32 6, i32 2>
	ret <4 x double> %shuffle			ret <4 x double> %shuffle
	}			}

				define <4 x double> @shuffle_v4f64_15uu(<4 x double> %a, <4 x double> %b) {
				; ALL-LABEL: shuffle_v4f64_15uu:
				; ALL: # BB#0:
				; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
				; ALL-NEXT: retq
				%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
				ret <4 x double> %shuffle
				}

	define <4 x double> @shuffle_v4f64_11uu(<4 x double> %a, <4 x double> %b) {			define <4 x double> @shuffle_v4f64_11uu(<4 x double> %a, <4 x double> %b) {
	; ALL-LABEL: shuffle_v4f64_11uu:			; ALL-LABEL: shuffle_v4f64_11uu:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,2]			; ALL-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>			%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
	ret <4 x double> %shuffle			ret <4 x double> %shuffle
	}			}

	define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) {			define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) {
	; AVX1-LABEL: shuffle_v4f64_22uu:			; AVX1-LABEL: shuffle_v4f64_22uu:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	▲ Show 20 Lines • Show All 242 Lines • ▼ Show 20 Lines
	; AVX512VL-NEXT: retq			; AVX512VL-NEXT: retq
	%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>			%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
	ret <4 x i64> %shuffle			ret <4 x i64> %shuffle
	}			}

	define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {			define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
	; AVX1-LABEL: shuffle_v4i64_0142:			; AVX1-LABEL: shuffle_v4i64_0142:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1			; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
	; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,1,2,2]			; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
	; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]			; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v4i64_0142:			; AVX2-LABEL: shuffle_v4i64_0142:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1			; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
	; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]			; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
	; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]			; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	;			;
	; AVX512VL-LABEL: shuffle_v4i64_0142:			; AVX512VL-LABEL: shuffle_v4i64_0142:
	; AVX512VL: # BB#0:			; AVX512VL: # BB#0:
	; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm1, %ymm1			; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm1
	; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]			; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
	; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]			; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
	; AVX512VL-NEXT: retq			; AVX512VL-NEXT: retq
	%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>			%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
	ret <4 x i64> %shuffle			ret <4 x i64> %shuffle
	}			}

	define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {			define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
	; AVX1-LABEL: shuffle_v4i64_0412:			; AVX1-LABEL: shuffle_v4i64_0412:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2			; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
	; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]			; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
	; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
	; AVX1-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]			; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
	; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]			; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v4i64_0412:			; AVX2-LABEL: shuffle_v4i64_0412:
	; AVX2: # BB#0:			; AVX2: # BB#0:
				; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
	; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]			; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
	; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
	; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]			; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	;			;
	; AVX512VL-LABEL: shuffle_v4i64_0412:			; AVX512VL-LABEL: shuffle_v4i64_0412:
	; AVX512VL: # BB#0:			; AVX512VL: # BB#0:
				; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
	; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]			; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
	; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
	; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]			; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
	; AVX512VL-NEXT: retq			; AVX512VL-NEXT: retq
	%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>			%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
	ret <4 x i64> %shuffle			ret <4 x i64> %shuffle
	}			}

	define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {			define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
	; AVX1-LABEL: shuffle_v4i64_4012:			; AVX1-LABEL: shuffle_v4i64_4012:
	▲ Show 20 Lines • Show All 325 Lines • ▼ Show 20 Lines
	; AVX512VL-LABEL: shuffle_v4i64_40u2:			; AVX512VL-LABEL: shuffle_v4i64_40u2:
	; AVX512VL: # BB#0:			; AVX512VL: # BB#0:
	; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]			; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
	; AVX512VL-NEXT: retq			; AVX512VL-NEXT: retq
	%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 2>			%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 2>
	ret <4 x i64> %shuffle			ret <4 x i64> %shuffle
	}			}

				define <4 x i64> @shuffle_v4i64_15uu(<4 x i64> %a, <4 x i64> %b) {
				; ALL-LABEL: shuffle_v4i64_15uu:
				; ALL: # BB#0:
				; ALL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
				; ALL-NEXT: retq
				%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
				ret <4 x i64> %shuffle
				}

	define <4 x i64> @shuffle_v4i64_11uu(<4 x i64> %a, <4 x i64> %b) {			define <4 x i64> @shuffle_v4i64_11uu(<4 x i64> %a, <4 x i64> %b) {
	; AVX1-LABEL: shuffle_v4i64_11uu:			; ALL-LABEL: shuffle_v4i64_11uu:
	; AVX1: # BB#0:			; ALL: # BB#0:
	; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,2]			; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
	; AVX1-NEXT: retq			; ALL-NEXT: retq
	;
	; AVX2-LABEL: shuffle_v4i64_11uu:
	; AVX2: # BB#0:
	; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
	; AVX2-NEXT: retq
	;
	; AVX512VL-LABEL: shuffle_v4i64_11uu:
	; AVX512VL: # BB#0:
	; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
	; AVX512VL-NEXT: retq
	%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>			%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
	ret <4 x i64> %shuffle			ret <4 x i64> %shuffle
	}			}

	define <4 x i64> @shuffle_v4i64_22uu(<4 x i64> %a, <4 x i64> %b) {			define <4 x i64> @shuffle_v4i64_22uu(<4 x i64> %a, <4 x i64> %b) {
	; AVX1-LABEL: shuffle_v4i64_22uu:			; AVX1-LABEL: shuffle_v4i64_22uu:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0			; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
	▲ Show 20 Lines • Show All 352 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll

	Show First 20 Lines • Show All 67 Lines • ▼ Show 20 Lines
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>			%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
	ret <8 x float> %shuffle			ret <8 x float> %shuffle
	}			}

	define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {			define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
	; AVX1-LABEL: shuffle_v8f32_00040000:			; AVX1-LABEL: shuffle_v8f32_00040000:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]			; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3]
	; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]			; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
	; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]			; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
	; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]			; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8f32_00040000:			; AVX2-LABEL: shuffle_v8f32_00040000:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]			; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
	; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0			; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>			%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
	▲ Show 20 Lines • Show All 739 Lines • ▼ Show 20 Lines
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]			; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7>			%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7>
	ret <8 x float> %shuffle			ret <8 x float> %shuffle
	}			}

	define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) {			define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) {
	; AVX1-LABEL: shuffle_v8f32_uuuu1111:			; ALL-LABEL: shuffle_v8f32_uuuu1111:
	; AVX1: # BB#0:			; ALL: # BB#0:
	; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]			; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0			; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
	; AVX1-NEXT: retq			; ALL-NEXT: retq
	;
	; AVX2-LABEL: shuffle_v8f32_uuuu1111:
	; AVX2: # BB#0:
	; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
	; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
	; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>			%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
	ret <8 x float> %shuffle			ret <8 x float> %shuffle
	}			}

	define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) {			define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) {
	; AVX1-LABEL: shuffle_v8f32_44444444:			; AVX1-LABEL: shuffle_v8f32_44444444:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0			; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
	; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]			; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8f32_44444444:			; AVX2-LABEL: shuffle_v8f32_44444444:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1			; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
	; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0			; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>			%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
	ret <8 x float> %shuffle			ret <8 x float> %shuffle
	}			}

				define <8 x float> @shuffle_v8f32_1188uuuu(<8 x float> %a, <8 x float> %b) {
				; ALL-LABEL: shuffle_v8f32_1188uuuu:
				; ALL: # BB#0:
				; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
				; ALL-NEXT: retq
				%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
				ret <8 x float> %shuffle
				}

				define <8 x float> @shuffle_v8f32_uuuu3210(<8 x float> %a, <8 x float> %b) {
				; ALL-LABEL: shuffle_v8f32_uuuu3210:
				; ALL: # BB#0:
				; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
				; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
				; ALL-NEXT: retq
				%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 2, i32 1, i32 0>
				ret <8 x float> %shuffle
				}

				define <8 x float> @shuffle_v8f32_uuuu1188(<8 x float> %a, <8 x float> %b) {
				; ALL-LABEL: shuffle_v8f32_uuuu1188:
				; ALL: # BB#0:
				; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
				; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
				; ALL-NEXT: retq
				%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 8, i32 8>
				ret <8 x float> %shuffle
				}

				define <8 x float> @shuffle_v8f32_1111uuuu(<8 x float> %a, <8 x float> %b) {
				; ALL-LABEL: shuffle_v8f32_1111uuuu:
				; ALL: # BB#0:
				; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
				; ALL-NEXT: retq
				%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
				ret <8 x float> %shuffle
				}

	define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) {			define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) {
	; AVX1-LABEL: shuffle_v8f32_5555uuuu:			; AVX1-LABEL: shuffle_v8f32_5555uuuu:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0			; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
	; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]			; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8f32_5555uuuu:			; AVX2-LABEL: shuffle_v8f32_5555uuuu:
	▲ Show 20 Lines • Show All 69 Lines • ▼ Show 20 Lines
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
	ret <8 x i32> %shuffle			ret <8 x i32> %shuffle
	}			}

	define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {			define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
	; AVX1-LABEL: shuffle_v8i32_00040000:			; AVX1-LABEL: shuffle_v8i32_00040000:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]			; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3]
	; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]			; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
	; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]			; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
	; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]			; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8i32_00040000:			; AVX2-LABEL: shuffle_v8i32_00040000:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]			; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
	; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0			; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
	▲ Show 20 Lines • Show All 979 Lines • ▼ Show 20 Lines
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef>
	ret <8 x i32> %shuffle			ret <8 x i32> %shuffle
	}			}

	define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) {			define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) {
	; AVX1-LABEL: shuffle_v8i32_uuuu1111:			; AVX1-LABEL: shuffle_v8i32_uuuu1111:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]			; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuffle_v8i32_uuuu1111:			; AVX2-LABEL: shuffle_v8i32_uuuu1111:
	; AVX2: # BB#0:			; AVX2: # BB#0:
	; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1			; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
	; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0			; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>			%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
	ret <8 x i32> %shuffle			ret <8 x i32> %shuffle
	}			}

				define <8 x i32> @shuffle_v8i32_2222uuuu(<8 x i32> %a, <8 x i32> %b) {
				; ALL-LABEL: shuffle_v8i32_2222uuuu:
				; ALL: # BB#0:
				; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
				; ALL-NEXT: retq
				%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
				ret <8 x i32> %shuffle
				}

				define <8 x i32> @shuffle_v8i32_2A3Buuuu(<8 x i32> %a, <8 x i32> %b) {
				; ALL-LABEL: shuffle_v8i32_2A3Buuuu:
				; ALL: # BB#0:
				; ALL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
				; ALL-NEXT: retq
				%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
				ret <8 x i32> %shuffle
				}

	define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) {			define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) {
	; AVX1-LABEL: shuffle_v8i32_44444444:			; AVX1-LABEL: shuffle_v8i32_44444444:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0			; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
	; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]			; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	▲ Show 20 Lines • Show All 284 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/vector-zext.ll

	Show First 20 Lines • Show All 1,161 Lines • ▼ Show 20 Lines
	; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero			; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
	; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero			; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
	; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero			; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11:			; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11:
	; AVX2: # BB#0: # %entry			; AVX2: # BB#0: # %entry
	; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero			; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
	; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero			; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	entry:			entry:
	%B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>			%B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
	%Z = bitcast <32 x i8> %B to <4 x i64>			%Z = bitcast <32 x i8> %B to <4 x i64>
	ret <4 x i64> %Z			ret <4 x i64> %Z
	}			}

	▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines
	; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero			; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
	; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]			; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
	; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero			; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2:			; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2:
	; AVX2: # BB#0: # %entry			; AVX2: # BB#0: # %entry
	; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,2,3,5,6,6,7]			; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
	; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero			; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	entry:			entry:
	%B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>			%B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>
	%Z = bitcast <16 x i16> %B to <4 x i64>			%Z = bitcast <16 x i16> %B to <4 x i64>
	ret <4 x i64> %Z			ret <4 x i64> %Z
	}			}

	▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines
	; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2			; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
	; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]			; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
	; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero			; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
	; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3:			; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3:
	; AVX2: # BB#0: # %entry			; AVX2: # BB#0: # %entry
	; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero			; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
	; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero			; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	entry:			entry:
	%B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8>			%B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8>
	%Z = bitcast <16 x i16> %B to <8 x i32>			%Z = bitcast <16 x i16> %B to <8 x i32>
	ret <8 x i32> %Z			ret <8 x i32> %Z
	}			}

	Show All 30 Lines
	; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2			; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
	; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]			; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
	; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero			; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
	; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8:			; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8:
	; AVX2: # BB#0: # %entry			; AVX2: # BB#0: # %entry
	; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]			; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
	; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero			; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	entry:			entry:
	%B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16>			%B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16>
	%Z = bitcast <16 x i16> %B to <8 x i32>			%Z = bitcast <16 x i16> %B to <8 x i32>
	ret <8 x i32> %Z			ret <8 x i32> %Z
	}			}

	▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines
	; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[3],zero,zero,zero			; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[3],zero,zero,zero
	; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2			; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
	; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]			; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
	; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0			; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1:			; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1:
	; AVX2: # BB#0: # %entry			; AVX2: # BB#0: # %entry
	; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,3,5,6,7,7]			; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
	; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero			; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
	; AVX2-NEXT: retq			; AVX2-NEXT: retq
	entry:			entry:
	%B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4>			%B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4>
	%Z = bitcast <8 x i32> %B to <4 x i64>			%Z = bitcast <8 x i32> %B to <4 x i64>
	ret <4 x i64> %Z			ret <4 x i64> %Z
	}			}

This is an archive of the discontinued LLVM Phabricator instance.

[X86][AVX] Only shuffle the lower half of vectors if the upper half is undefined
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 43524

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

llvm/trunk/test/CodeGen/X86/avx-splat.ll

llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll

llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll

llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll

llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll

llvm/trunk/test/CodeGen/X86/vector-zext.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86][AVX] Only shuffle the lower half of vectors if the upper half is undefinedClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 43524

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

llvm/trunk/test/CodeGen/X86/avx-splat.ll

llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll

llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll

llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll

llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll

llvm/trunk/test/CodeGen/X86/vector-zext.ll

[X86][AVX] Only shuffle the lower half of vectors if the upper half is undefined
ClosedPublic