Diff 83832

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 8,084 Lines • ▼ Show 20 Lines	if ((V.getNumOperands() % Size) == 0) {
Zeroable[i] = AllZeroable;		Zeroable[i] = AllZeroable;
continue;		continue;
}		}
}		}

return Zeroable;		return Zeroable;
}		}

		// The Shuffle result is as follow:
		// 0a[0]0a[1]...0*a[n] , n >=0 where a[] elements in a ascending order.
		// Each Zeroable's element correspond to a particular Mask's element.
		// As described in computeZeroableShuffleElements function.
		//
		// The function looks for a sub-mask that the nonzero elements are in
		// increasing order. If such sub-mask exist. The function returns true.
		static bool isNonZeroElementsInOrder(const SmallBitVector Zeroable,
		ArrayRef<int> Mask,const EVT &VectorType,
		bool &IsZeroSideLeft) {
		int NextElement = -1;
		// Check if the Mask's nonzero elements are in increasing order.
		for (int i = 0, e = Zeroable.size(); i < e; i++) {
		// Checks if the mask's zeros elements are built from only zeros.
		if (Mask[i] == -1)
		return false;
		if (Zeroable[i])
		continue;
		// Find the lowest non zero element
		if (NextElement == -1) {
		NextElement = Mask[i] != 0 ? VectorType.getVectorNumElements() : 0;
		IsZeroSideLeft = NextElement != 0;
		}
		// Exit if the mask's non zero elements are not in increasing order.
		if (NextElement != Mask[i])
		return false;
		NextElement++;
		}
		return true;
		}

/// Try to lower a shuffle with a single PSHUFB of V1 or V2.		/// Try to lower a shuffle with a single PSHUFB of V1 or V2.
static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,		static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,		ArrayRef<int> Mask, SDValue V1,
SDValue V2,		SDValue V2,
const SmallBitVector &Zeroable,		const SmallBitVector &Zeroable,
const X86Subtarget &Subtarget,		const X86Subtarget &Subtarget,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
int Size = Mask.size();		int Size = Mask.size();
Show All 39 Lines	static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
assert(V && "Failed to find a source input");		assert(V && "Failed to find a source input");

MVT I8VT = MVT::getVectorVT(MVT::i8, NumBytes);		MVT I8VT = MVT::getVectorVT(MVT::i8, NumBytes);
return DAG.getBitcast(		return DAG.getBitcast(
VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V),		VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V),
DAG.getBuildVector(I8VT, DL, PSHUFBMask)));		DAG.getBuildVector(I8VT, DL, PSHUFBMask)));
}		}

		static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
		const X86Subtarget &Subtarget, SelectionDAG &DAG,
		const SDLoc &dl);

		// Function convertBitVectorToUnsigned - The function gets SmallBitVector
		// as argument and convert him to unsigned.
		// The output of the function is not(zeroable)
		static unsigned convertBitVectorToUnsiged(const SmallBitVector &Zeroable) {
		unsigned convertBit = 0;
		for (int i = 0, e = Zeroable.size(); i < e; i++)
		convertBit \|= !(Zeroable[i]) << i;
		return convertBit;
		}

		// X86 has dedicated shuffle that can be lowered to VEXPAND
		static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT,
		const SmallBitVector &Zeroable,
		ArrayRef<int> Mask, SDValue &V1,
		SDValue &V2, SelectionDAG &DAG,
		const X86Subtarget &Subtarget) {
		bool IsLeftZeroSide = true;
		if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(),
		IsLeftZeroSide))
		return SDValue();
		unsigned VEXPANDMask = convertBitVectorToUnsiged(Zeroable);
		MVT IntegerType =
		MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
		SDValue MaskNode = DAG.getConstant(VEXPANDMask, DL, IntegerType);
		unsigned NumElts = VT.getVectorNumElements();
		assert((NumElts == 4 \|\| NumElts == 8 \|\| NumElts == 16) &&
		"Unexpected number of vector elements");
		SDValue VMask = getMaskNode(MaskNode, MVT::getVectorVT(MVT::i1, NumElts),
		Subtarget, DAG, DL);
		SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL);
		SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1;
		return DAG.getNode(ISD::VSELECT, DL, VT, VMask,
		DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector),
		ZeroVector);
		}

// X86 has dedicated unpack instructions that can handle specific blend		// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.		// operations: UNPCKH and UNPCKL.
static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT,		static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,		ArrayRef<int> Mask, SDValue V1,
SDValue V2, SelectionDAG &DAG) {		SDValue V2, SelectionDAG &DAG) {
SmallVector<int, 8> Unpckl;		SmallVector<int, 8> Unpckl;
createUnpackShuffleMask(VT, Unpckl, /* Lo = / true, / Unary = */ false);		createUnpackShuffleMask(VT, Unpckl, /* Lo = / true, / Unary = */ false);
if (isShuffleEquivalent(V1, V2, Mask, Unpckl))		if (isShuffleEquivalent(V1, V2, Mask, Unpckl))
▲ Show 20 Lines • Show All 3,998 Lines • ▼ Show 20 Lines	static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// shuffle. However, if we have AVX2 and either inputs are already in place,		// shuffle. However, if we have AVX2 and either inputs are already in place,
// we will be able to shuffle even across lanes the other input in a single		// we will be able to shuffle even across lanes the other input in a single
// instruction so skip this pattern.		// instruction so skip this pattern.
if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) \|\|		if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) \|\|
isShuffleMaskInputInPlace(1, Mask))))		isShuffleMaskInputInPlace(1, Mask))))
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(		if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))		DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
return Result;		return Result;
		// If we have VLX support, we can use VEXPAND.
		if (Subtarget.hasVLX())
		if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask,
		V1, V2, DAG, Subtarget))
		return V;

// If we have AVX2 then we always want to lower with a blend because an v4 we		// If we have AVX2 then we always want to lower with a blend because an v4 we
// can fully permute the elements.		// can fully permute the elements.
if (Subtarget.hasAVX2())		if (Subtarget.hasAVX2())
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2,		return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2,
Mask, DAG);		Mask, DAG);

// Otherwise fall back on generic lowering.		// Otherwise fall back on generic lowering.
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines	return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4i64, V1,
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));		getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
}		}

// Try to use shift instructions.		// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask,		if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask,
Zeroable, Subtarget, DAG))		Zeroable, Subtarget, DAG))
return Shift;		return Shift;

// If we have VLX support, we can use VALIGN.		// If we have VLX support, we can use VALIGN or VEXPAND.
if (Subtarget.hasVLX())		if (Subtarget.hasVLX()) {
if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i64, V1, V2,		if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i64, V1, V2,
Mask, Subtarget, DAG))		Mask, Subtarget, DAG))
return Rotate;		return Rotate;

		if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask,
		V1, V2, DAG, Subtarget))
		return V;
		}

// Try to use PALIGNR.		// Try to use PALIGNR.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v4i64, V1, V2,		if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v4i64, V1, V2,
Mask, Subtarget, DAG))		Mask, Subtarget, DAG))
return Rotate;		return Rotate;

// Use dedicated unpack instructions for masks that match their pattern.		// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =		if (SDValue V =
lowerVectorShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG))		lowerVectorShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG))
▲ Show 20 Lines • Show All 84 Lines • ▼ Show 20 Lines	return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
DAG);		DAG);
}		}

// Try to simplify this by merging 128-bit lanes to enable a lane-based		// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.		// shuffle.
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(		if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))		DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
return Result;		return Result;
		// If we have VLX support, we can use VEXPAND.
		if (Subtarget.hasVLX())
		if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8f32, Zeroable, Mask,
		V1, V2, DAG, Subtarget))
		return V;

// If we have AVX2 then we always want to lower with a blend because at v8 we		// If we have AVX2 then we always want to lower with a blend because at v8 we
// can fully permute the elements.		// can fully permute the elements.
if (Subtarget.hasAVX2())		if (Subtarget.hasAVX2())
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2,		return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2,
Mask, DAG);		Mask, DAG);

// Otherwise fall back on generic lowering.		// Otherwise fall back on generic lowering.
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines	if (SDValue V =
return V;		return V;
}		}

// Try to use shift instructions.		// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask,		if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask,
Zeroable, Subtarget, DAG))		Zeroable, Subtarget, DAG))
return Shift;		return Shift;

// If we have VLX support, we can use VALIGN.		// If we have VLX support, we can use VALIGN or EXPAND.
if (Subtarget.hasVLX())		if (Subtarget.hasVLX()) {
if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i32, V1, V2,		if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i32, V1, V2,
Mask, Subtarget, DAG))		Mask, Subtarget, DAG))
return Rotate;		return Rotate;

		if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask,
		V1, V2, DAG, Subtarget))
		return V;
		}

// Try to use byte rotation instructions.		// Try to use byte rotation instructions.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(		if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))		DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
return Rotate;		return Rotate;

// Try to create an in-lane repeating shuffle mask and then shuffle the		// Try to create an in-lane repeating shuffle mask and then shuffle the
// results into the target lanes.		// results into the target lanes.
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(		if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
▲ Show 20 Lines • Show All 340 Lines • ▼ Show 20 Lines	static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
}		}

return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],		return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],
DAG.getConstant(PermMask, DL, MVT::i8));		DAG.getConstant(PermMask, DL, MVT::i8));
}		}

/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.		/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,		static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
		const SmallBitVector &Zeroable,
SDValue V1, SDValue V2,		SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,		const X86Subtarget &Subtarget,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");		assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");		assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");		assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");

if (V2.isUndef()) {		if (V2.isUndef()) {
Show All 26 Lines	if (SDValue Unpck =
lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG))		lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG))
return Unpck;		return Unpck;

// Check if the blend happens to exactly fit that of SHUFPD.		// Check if the blend happens to exactly fit that of SHUFPD.
if (SDValue Op =		if (SDValue Op =
lowerVectorShuffleWithSHUFPD(DL, MVT::v8f64, Mask, V1, V2, DAG))		lowerVectorShuffleWithSHUFPD(DL, MVT::v8f64, Mask, V1, V2, DAG))
return Op;		return Op;

		if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1,
		V2, DAG, Subtarget))
		return V;

return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);		return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);
}		}

/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.		/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
static SDValue lowerV16F32VectorShuffle(SDLoc DL, ArrayRef<int> Mask,		static SDValue lowerV16F32VectorShuffle(SDLoc DL, ArrayRef<int> Mask,
		const SmallBitVector &Zeroable,
SDValue V1, SDValue V2,		SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,		const X86Subtarget &Subtarget,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");		assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");		assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");		assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");

// If the shuffle mask is repeated in each 128-bit lane, we have many more		// If the shuffle mask is repeated in each 128-bit lane, we have many more
Show All 15 Lines	if (is128BitLaneRepeatedShuffleMask(MVT::v16f32, Mask, RepeatedMask)) {
// Use dedicated unpack instructions for masks that match their pattern.		// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue Unpck =		if (SDValue Unpck =
lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))		lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))
return Unpck;		return Unpck;

// Otherwise, fall back to a SHUFPS sequence.		// Otherwise, fall back to a SHUFPS sequence.
return lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG);		return lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG);
}		}
		// If we have AVX512F support, we can use VEXPAND.
		if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask,
		V1, V2, DAG, Subtarget))
		return V;

return lowerVectorShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG);		return lowerVectorShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG);
}		}

/// \brief Handle lowering of 8-lane 64-bit integer shuffles.		/// \brief Handle lowering of 8-lane 64-bit integer shuffles.
static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,		static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const SmallBitVector &Zeroable,		const SmallBitVector &Zeroable,
SDValue V1, SDValue V2,		SDValue V1, SDValue V2,
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines	static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Try to use PALIGNR.		// Try to use PALIGNR.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i64, V1, V2,		if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i64, V1, V2,
Mask, Subtarget, DAG))		Mask, Subtarget, DAG))
return Rotate;		return Rotate;

if (SDValue Unpck =		if (SDValue Unpck =
lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))		lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))
return Unpck;		return Unpck;
		// If we have AVX512F support, we can use VEXPAND.
		if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1,
		V2, DAG, Subtarget))
		return V;

return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);		return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);
}		}

/// \brief Handle lowering of 16-lane 32-bit integer shuffles.		/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,		static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const SmallBitVector &Zeroable,		const SmallBitVector &Zeroable,
SDValue V1, SDValue V2,		SDValue V1, SDValue V2,
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines	static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// If some CPU is harmed by the domain switch, we can fix it in a later pass.		// If some CPU is harmed by the domain switch, we can fix it in a later pass.
if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) {		if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) {
SDValue CastV1 = DAG.getBitcast(MVT::v16f32, V1);		SDValue CastV1 = DAG.getBitcast(MVT::v16f32, V1);
SDValue CastV2 = DAG.getBitcast(MVT::v16f32, V2);		SDValue CastV2 = DAG.getBitcast(MVT::v16f32, V2);
SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask,		SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask,
CastV1, CastV2, DAG);		CastV1, CastV2, DAG);
return DAG.getBitcast(MVT::v16i32, ShufPS);		return DAG.getBitcast(MVT::v16i32, ShufPS);
}		}
		// If we have AVX512F support, we can use VEXPAND.
		if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask,
		V1, V2, DAG, Subtarget))
		return V;

return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);		return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
}		}

/// \brief Handle lowering of 32-lane 16-bit integer shuffles.		/// \brief Handle lowering of 32-lane 16-bit integer shuffles.
static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,		static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const SmallBitVector &Zeroable,		const SmallBitVector &Zeroable,
SDValue V1, SDValue V2,		SDValue V1, SDValue V2,
▲ Show 20 Lines • Show All 120 Lines • ▼ Show 20 Lines	if (SDValue Broadcast =
return Broadcast;		return Broadcast;

// Dispatch to each element type for lowering. If we don't have support for		// Dispatch to each element type for lowering. If we don't have support for
// specific element type shuffles at 512 bits, immediately split them and		// specific element type shuffles at 512 bits, immediately split them and
// lower them. Each lowering routine of a given type is allowed to assume that		// lower them. Each lowering routine of a given type is allowed to assume that
// the requisite ISA extensions for that element type are available.		// the requisite ISA extensions for that element type are available.
switch (VT.SimpleTy) {		switch (VT.SimpleTy) {
case MVT::v8f64:		case MVT::v8f64:
return lowerV8F64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);		return lowerV8F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v16f32:		case MVT::v16f32:
return lowerV16F32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);		return lowerV16F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v8i64:		case MVT::v8i64:
return lowerV8I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);		return lowerV8I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v16i32:		case MVT::v16i32:
return lowerV16I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);		return lowerV16I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v32i16:		case MVT::v32i16:
return lowerV32I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);		return lowerV32I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v64i8:		case MVT::v64i8:
return lowerV64I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);		return lowerV64I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
▲ Show 20 Lines • Show All 21,586 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mcpu=skx \| FileCheck %s --check-prefix=SKX
				; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mcpu=knl \| FileCheck %s --check-prefix=KNL

				;expand 128 -> 256 include <4 x float> <2 x double>
				define <8 x float> @expand(<4 x float> %a) {
				; SKX-LABEL: expand:
				; SKX: # BB#0:
				; SKX-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
				; SKX-NEXT: movb $5, %al
				; SKX-NEXT: kmovb %eax, %k1
				; SKX-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z}
				; SKX-NEXT: retq
				;
				; KNL-LABEL: expand:
				; KNL: # BB#0:
				; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
				; KNL-NEXT: vxorps %ymm1, %ymm1, %ymm1
				; KNL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4,5,6,7]
				; KNL-NEXT: retq
				%res = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 5, i32 1, i32 5, i32 5, i32 5, i32 5, i32 5>
				ret <8 x float> %res
				}

				define <8 x float> @expand1(<4 x float> %a ) {
				; SKX-LABEL: expand1:
				; SKX: # BB#0:
				; SKX-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
				; SKX-NEXT: movb $-86, %al
				; SKX-NEXT: kmovb %eax, %k1
				; SKX-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z}
				; SKX-NEXT: retq
				;
				; KNL-LABEL: expand1:
				; KNL: # BB#0:
				; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
				; KNL-NEXT: vmovaps {{.*#+}} ymm1 = <u,0,u,1,u,2,u,3>
				; KNL-NEXT: vpermps %ymm0, %ymm1, %ymm0
				; KNL-NEXT: vxorps %ymm1, %ymm1, %ymm1
				; KNL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
				; KNL-NEXT: retq
				%res = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
				ret <8 x float> %res
				}

				;Expand 128 -> 256 test <2 x double> -> <4 x double>
				define <4 x double> @expand2(<2 x double> %a) {
				; SKX-LABEL: expand2:
				; SKX: # BB#0:
				; SKX-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
				; SKX-NEXT: movb $9, %al
				; SKX-NEXT: kmovb %eax, %k1
				; SKX-NEXT: vexpandpd %ymm0, %ymm0 {%k1} {z}
				; SKX-NEXT: retq
				;
				; KNL-LABEL: expand2:
				; KNL: # BB#0:
				; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
				; KNL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1]
				; KNL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
				; KNL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
				; KNL-NEXT: retq
				%res = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 2, i32 1>
				ret <4 x double> %res
				}

				;expand 128 -> 256 include case <4 x i32> <8 x i32>
				define <8 x i32> @expand3(<4 x i32> %a ) {
				; SKX-LABEL: expand3:
				; SKX: # BB#0:
				; SKX-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
				; SKX-NEXT: movb $-127, %al
				; SKX-NEXT: kmovb %eax, %k1
				; SKX-NEXT: vpexpandd %ymm0, %ymm0 {%k1} {z}
				; SKX-NEXT: retq
				;
				; KNL-LABEL: expand3:
				; KNL: # BB#0:
				; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
				; KNL-NEXT: vpbroadcastq %xmm0, %ymm0
				; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
				; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6],ymm0[7]
				; KNL-NEXT: retq
				%res = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <8 x i32> <i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,i32 5>
				ret <8 x i32> %res
				}

				;expand 128 -> 256 include case <2 x i64> <4 x i64>
				define <4 x i64> @expand4(<2 x i64> %a ) {
				; SKX-LABEL: expand4:
				; SKX: # BB#0:
				; SKX-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
				; SKX-NEXT: movb $9, %al
				; SKX-NEXT: kmovb %eax, %k1
				; SKX-NEXT: vpexpandq %ymm0, %ymm0 {%k1} {z}
				; SKX-NEXT: retq
				;
				; KNL-LABEL: expand4:
				; KNL: # BB#0:
				; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
				; KNL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,1]
				; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
				; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
				; KNL-NEXT: retq
				%res = shufflevector <2 x i64> zeroinitializer, <2 x i64> %a, <4 x i32> <i32 2, i32 0, i32 0, i32 3>
				ret <4 x i64> %res
				}

				;Negative test for 128-> 256
				define <8 x float> @expand5(<4 x float> %a ) {
				; SKX-LABEL: expand5:
				; SKX: # BB#0:
				; SKX-NEXT: vbroadcastss %xmm0, %ymm0
				; SKX-NEXT: vxorps %ymm1, %ymm1, %ymm1
				; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
				; SKX-NEXT: retq
				;
				; KNL-LABEL: expand5:
				; KNL: # BB#0:
				; KNL-NEXT: vbroadcastss %xmm0, %ymm0
				; KNL-NEXT: vxorps %ymm1, %ymm1, %ymm1
				; KNL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
				; KNL-NEXT: retq
				%res = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
				ret <8 x float> %res
				}

				;expand 256 -> 512 include <8 x float> <16 x float>
				define <8 x float> @expand6(<4 x float> %a ) {
				; SKX-LABEL: expand6:
				; SKX: # BB#0:
				; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1
				; SKX-NEXT: vinsertf{{.*}}$1, %xmm0, %ymm1, %ymm0
				; SKX-NEXT: retq
				;
				; KNL-LABEL: expand6:
				; KNL: # BB#0:
				; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1
				; KNL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
				; KNL-NEXT: retq
				%res = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
				ret <8 x float> %res
				}

				define <16 x float> @expand7(<8 x float> %a) {
				; SKX-LABEL: expand7:
				; SKX: # BB#0:
				; SKX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
				; SKX-NEXT: movw $1285, %ax # imm = 0x505
				; SKX-NEXT: kmovw %eax, %k1
				; SKX-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z}
				; SKX-NEXT: retq
				;
				; KNL-LABEL: expand7:
				; KNL: # BB#0:
				; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
				; KNL-NEXT: movw $1285, %ax # imm = 0x505
				; KNL-NEXT: kmovw %eax, %k1
				; KNL-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z}
				; KNL-NEXT: retq
				%res = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 8, i32 8, i32 8, i32 8, i32 2, i32 8, i32 3, i32 8, i32 8, i32 8, i32 8, i32 8>
				ret <16 x float> %res
				}

				define <16 x float> @expand8(<8 x float> %a ) {
				; SKX-LABEL: expand8:
				; SKX: # BB#0:
				; SKX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
				; SKX-NEXT: movw $-21846, %ax # imm = 0xAAAA
				; SKX-NEXT: kmovw %eax, %k1
				; SKX-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z}
				; SKX-NEXT: retq
				;
				; KNL-LABEL: expand8:
				; KNL: # BB#0:
				; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
				; KNL-NEXT: movw $-21846, %ax # imm = 0xAAAA
				; KNL-NEXT: kmovw %eax, %k1
				; KNL-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z}
				; KNL-NEXT: retq
				%res = shufflevector <8 x float> zeroinitializer, <8 x float> %a, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
				ret <16 x float> %res
				}

				;expand 256 -> 512 include <4 x double> <8 x double>
				define <8 x double> @expand9(<4 x double> %a) {
				; SKX-LABEL: expand9:
				; SKX: # BB#0:
				; SKX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
				; SKX-NEXT: movb $-127, %al
				; SKX-NEXT: kmovb %eax, %k1
				; SKX-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z}
				; SKX-NEXT: retq
				;
				; KNL-LABEL: expand9:
				; KNL: # BB#0:
				; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
				; KNL-NEXT: movb $-127, %al
				; KNL-NEXT: kmovw %eax, %k1
				; KNL-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z}
				; KNL-NEXT: retq
				%res = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 1>
				ret <8 x double> %res
				}

				define <16 x i32> @expand10(<8 x i32> %a ) {
				; SKX-LABEL: expand10:
				; SKX: # BB#0:
				; SKX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
				; SKX-NEXT: movw $-21846, %ax # imm = 0xAAAA
				; SKX-NEXT: kmovw %eax, %k1
				; SKX-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
				; SKX-NEXT: retq
				;
				; KNL-LABEL: expand10:
				; KNL: # BB#0:
				; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
				; KNL-NEXT: movw $-21846, %ax # imm = 0xAAAA
				; KNL-NEXT: kmovw %eax, %k1
				; KNL-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
				; KNL-NEXT: retq
				%res = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
				ret <16 x i32> %res
				}

				define <8 x i64> @expand11(<4 x i64> %a) {
				; SKX-LABEL: expand11:
				; SKX: # BB#0:
				; SKX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
				; SKX-NEXT: movb $-127, %al
				; SKX-NEXT: kmovb %eax, %k1
				; SKX-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z}
				; SKX-NEXT: retq
				;
				; KNL-LABEL: expand11:
				; KNL: # BB#0:
				; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
				; KNL-NEXT: movb $-127, %al
				; KNL-NEXT: kmovw %eax, %k1
				; KNL-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z}
				; KNL-NEXT: retq
				%res = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 1>
				ret <8 x i64> %res
				}

				;Negative test for 256-> 512
				define <16 x float> @expand12(<8 x float> %a) {
				; SKX-LABEL: expand12:
				; SKX: # BB#0:
				; SKX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
				; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [0,16,2,16,4,16,6,16,0,16,1,16,2,16,3,16]
				; SKX-NEXT: vxorps %zmm1, %zmm1, %zmm1
				; SKX-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1
				; SKX-NEXT: vmovaps %zmm1, %zmm0
				; SKX-NEXT: retq
				;
				; KNL-LABEL: expand12:
				; KNL: # BB#0:
				; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
				; KNL-NEXT: vmovaps {{.*#+}} zmm2 = [0,16,2,16,4,16,6,16,0,16,1,16,2,16,3,16]
				; KNL-NEXT: vpxord %zmm1, %zmm1, %zmm1
				; KNL-NEXT: vpermt2ps %zmm0, %zmm2, %zmm1
				; KNL-NEXT: vmovaps %zmm1, %zmm0
				; KNL-NEXT: retq
				%res = shufflevector <8 x float> zeroinitializer, <8 x float> %a, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8,i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8>
				ret <16 x float> %res
				}

				define <16 x float> @expand13(<8 x float> %a ) {
				; SKX-LABEL: expand13:
				; SKX: # BB#0:
				; SKX-NEXT: vxorps %ymm1, %ymm1, %ymm1
				; SKX-NEXT: vinsertf32x8 $1, %ymm0, %zmm1, %zmm0
				; SKX-NEXT: retq
				;
				; KNL-LABEL: expand13:
				; KNL: # BB#0:
				; KNL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
				; KNL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
				; KNL-NEXT: retq
				%res = shufflevector <8 x float> zeroinitializer, <8 x float> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
				ret <16 x float> %res
				}

				; The function checks for a case where the vector is mixed values vector ,and the mask points on zero elements from this vector.

				define <8 x float> @expand14(<4 x float> %a) {
				; SKX-LABEL: expand14:
				; SKX: # BB#0:
				; SKX-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
				; SKX-NEXT: movb $20, %al
				; SKX-NEXT: kmovb %eax, %k1
				; SKX-NEXT: vexpandps %ymm0, %ymm0 {%k1} {z}
				; SKX-NEXT: retq
				;
				; KNL-LABEL: expand14:
				; KNL: # BB#0:
				; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
				; KNL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
				; KNL-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,0,u,u,u,u>
				; KNL-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,0,0]
				; KNL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
				; KNL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7]
				; KNL-NEXT: retq
				%addV = fadd <4 x float> <float 0.0,float 1.0,float 2.0,float 0.0> , <float 0.0,float 1.0,float 2.0,float 0.0>
				%res = shufflevector <4 x float> %addV, <4 x float> %a, <8 x i32> <i32 3, i32 3, i32 4, i32 0, i32 5, i32 0, i32 0, i32 0>
				ret <8 x float> %res
				}

				;Negative test.
				define <8 x float> @expand15(<4 x float> %a) {
				; SKX-LABEL: expand15:
				; SKX: # BB#0:
				; SKX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,1,3]
				; SKX-NEXT: vmovaps {{.*#+}} ymm0 = <0,2,4,0,u,u,u,u>
				; SKX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,0,0]
				; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,8,3,10,3,2,3]
				; SKX-NEXT: vpermi2ps %ymm1, %ymm2, %ymm0
				; SKX-NEXT: retq
				;
				; KNL-LABEL: expand15:
				; KNL: # BB#0:
				; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
				; KNL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
				; KNL-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,0,u,u,u,u>
				; KNL-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,1,0,0]
				; KNL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
				; KNL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7]
				; KNL-NEXT: retq
				%addV = fadd <4 x float> <float 0.0,float 1.0,float 2.0,float 0.0> , <float 0.0,float 1.0,float 2.0,float 0.0>
				%res = shufflevector <4 x float> %addV, <4 x float> %a, <8 x i32> <i32 0, i32 1, i32 4, i32 0, i32 5, i32 0, i32 0, i32 0>
				ret <8 x float> %res
				}

This is an archive of the discontinued LLVM Phabricator instance.

Improving shuffle lowering by using AVX-512 EXPAND* instructions
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 83832

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll

This is an archive of the discontinued LLVM Phabricator instance.

Improving shuffle lowering by using AVX-512 EXPAND* instructionsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 83832

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll

Improving shuffle lowering by using AVX-512 EXPAND* instructions
ClosedPublic