Diff 209169

lib/Target/PowerPC/PPCISelLowering.h

Show First 20 Lines • Show All 454 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
/// Maps directly to an lxvd2x instruction that will be followed by		/// Maps directly to an lxvd2x instruction that will be followed by
/// an xxswapd.		/// an xxswapd.
LXVD2X,		LXVD2X,

/// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a		/// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
/// v2f32 value into the lower half of a VSR register.		/// v2f32 value into the lower half of a VSR register.
LD_VSX_LH,		LD_VSX_LH,

		/// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory
		jsjiUnsubmitted Not Done Reply Inline Actions This ISD has chain, please update the comments to describe it. jsji: This ISD has chain, please update the comments to describe it.
		nemanjaiAuthorUnsubmitted Done Reply Inline Actions Sure, I omitted it since all of these nodes have a chain and all these comments seem superfluous. But you're right, consistency is more important. nemanjai: Sure, I omitted it since all of these nodes have a chain and all these comments seem…
		/// instructions such as LXVDSX, LXVWSX.
		LD_SPLAT,

/// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.		/// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
/// Maps directly to an stxvd2x instruction that will be preceded by		/// Maps directly to an stxvd2x instruction that will be preceded by
/// an xxswapd.		/// an xxswapd.
STXVD2X,		STXVD2X,

/// Store scalar integers from VSR.		/// Store scalar integers from VSR.
ST_VSR_SCAL_INT,		ST_VSR_SCAL_INT,

▲ Show 20 Lines • Show All 87 Lines • ▼ Show 20 Lines	namespace PPC {
/// shuffle of v4f32/v4i32 vectors that just inserts one element from one		/// shuffle of v4f32/v4i32 vectors that just inserts one element from one
/// vector into the other. This function will also set a couple of		/// vector into the other. This function will also set a couple of
/// output parameters for how much the source vector needs to be shifted and		/// output parameters for how much the source vector needs to be shifted and
/// what byte number needs to be specified for the instruction to put the		/// what byte number needs to be specified for the instruction to put the
/// element in the desired location of the target vector.		/// element in the desired location of the target vector.
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,		bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
unsigned &InsertAtByte, bool &Swap, bool IsLE);		unsigned &InsertAtByte, bool &Swap, bool IsLE);

/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the		/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.		/// appropriate for PPC mnemonics (which have a big endian bias - namely
unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);		/// elements are counted from the left of the vector register).
		unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
		SelectionDAG &DAG);

/// get_VSPLTI_elt - If this is a build_vector of constants which can be		/// get_VSPLTI_elt - If this is a build_vector of constants which can be
/// formed by using a vspltis[bhw] instruction of the specified element		/// formed by using a vspltis[bhw] instruction of the specified element
/// size, return the constant being splatted. The ByteSize field indicates		/// size, return the constant being splatted. The ByteSize field indicates
/// the number of bytes of each element [124] -> [bhw].		/// the number of bytes of each element [124] -> [bhw].
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);		SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);

/// If this is a qvaligni shuffle mask, return the shift		/// If this is a qvaligni shuffle mask, return the shift
▲ Show 20 Lines • Show All 625 Lines • Show Last 20 Lines

lib/Target/PowerPC/PPCISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,390 Lines • ▼ Show 20 Lines	const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";		case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
case PPCISD::QBFLT: return "PPCISD::QBFLT";		case PPCISD::QBFLT: return "PPCISD::QBFLT";
case PPCISD::QVLFSb: return "PPCISD::QVLFSb";		case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";		case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";		case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";		case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";		case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";		case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
		case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
case PPCISD::FP_EXTEND_LH: return "PPCISD::FP_EXTEND_LH";		case PPCISD::FP_EXTEND_LH: return "PPCISD::FP_EXTEND_LH";
}		}
return nullptr;		return nullptr;
}		}

EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,		EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
EVT VT) const {		EVT VT) const {
if (!VT.isVector())		if (!VT.isVector())
▲ Show 20 Lines • Show All 357 Lines • ▼ Show 20 Lines	int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
if (isLE)		if (isLE)
ShiftAmt = 16 - ShiftAmt;		ShiftAmt = 16 - ShiftAmt;

return ShiftAmt;		return ShiftAmt;
}		}

/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand		/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to		/// specifies a splat of a single element that is suitable for input to
/// VSPLTB/VSPLTH/VSPLTW.		/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {		bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
assert(N->getValueType(0) == MVT::v16i8 &&		assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
(EltSize == 1 \|\| EltSize == 2 \|\| EltSize == 4));		EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
		jsjiUnsubmitted Not Done Reply Inline Actions Looks like you are repurposing this function: instead of using it just for 'VSPLT/VSPLTH/VSPLTW' , use it for `lxvdsx` as well. I don't think it is a great idea to just update the assert here. We should either rename the function, or re-structure the code to two different functions, in another NFC patch? jsji: Looks like you are repurposing this function: instead of using it just for…
		nemanjaiAuthorUnsubmitted Done Reply Inline Actions Updating the comment, I can see. But why do you not feel this function is adequate for 64-bit splats? The name is adequeate - we are looking for a splat shuffle mask. And we are in a legalized DAG which will make all shuffles `v16i8`. nemanjai: Updating the comment, I can see. But why do you not feel this function is adequate for 64-bit…

// The consecutive indices need to specify an element, not part of two		// The consecutive indices need to specify an element, not part of two
// different elements. So abandon ship early if this isn't the case.		// different elements. So abandon ship early if this isn't the case.
if (N->getMaskElt(0) % EltSize != 0)		if (N->getMaskElt(0) % EltSize != 0)
return false;		return false;

// This is a splat operation if each element of the permute is the same, and		// This is a splat operation if each element of the permute is the same, and
// if the value doesn't reference the second vector.		// if the value doesn't reference the second vector.
▲ Show 20 Lines • Show All 276 Lines • ▼ Show 20 Lines	if (IsLE) {

// Note: if control flow comes here that means Swap is already set above		// Note: if control flow comes here that means Swap is already set above
DM = (M0 << 1) + (M1 & 1);		DM = (M0 << 1) + (M1 & 1);
return true;		return true;
}		}
}		}


/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the		/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.		/// appropriate for PPC mnemonics (which have a big endian bias - namely
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,		/// elements are counted from the left of the vector register).
		unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);		ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));		assert(isSplatShuffleMask(SVOp, EltSize));
if (DAG.getDataLayout().isLittleEndian())		if (DAG.getDataLayout().isLittleEndian())
return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);		return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
else		else
return SVOp->getMaskElt(0) / EltSize;		return SVOp->getMaskElt(0) / EltSize;
}		}

▲ Show 20 Lines • Show All 3,063 Lines • ▼ Show 20 Lines	for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {

SDValue Val;		SDValue Val;

if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {		if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,		SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
InFlag);		InFlag);
Chain = Lo.getValue(1);		Chain = Lo.getValue(1);
InFlag = Lo.getValue(2);		InFlag = Lo.getValue(2);
VA = RVLocs[++i]; // skip ahead to next loc		VA = RVLocs[++i]; // skip ahead to next loc
		amykUnsubmitted Not Done Reply Inline Actions Do comments like these need to be full sentences with periods, as well? amyk: Do comments like these need to be full sentences with periods, as well?
		amykUnsubmitted Not Done Reply Inline Actions Sorry, I just noticed that for some reason, it highlighted this as your change. Please disregard my comment. amyk: Sorry, I just noticed that for some reason, it highlighted this as your change. Please…
SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,		SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
InFlag);		InFlag);
Chain = Hi.getValue(1);		Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);		InFlag = Hi.getValue(2);
if (!Subtarget.isLittleEndian())		if (!Subtarget.isLittleEndian())
std::swap (Lo, Hi);		std::swap (Lo, Hi);
Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);		Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
} else {		} else {
▲ Show 20 Lines • Show All 2,973 Lines • ▼ Show 20 Lines	if (!EnableQuadPrecision \|\|
(Op0.getOperand(0).getValueType() != MVT::i64) \|\|		(Op0.getOperand(0).getValueType() != MVT::i64) \|\|
(Op0.getOperand(1).getValueType() != MVT::i64))		(Op0.getOperand(1).getValueType() != MVT::i64))
return SDValue();		return SDValue();

return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),		return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
Op0.getOperand(1));		Op0.getOperand(1));
}		}

		const SDValue *getNormalLoadInput(const SDValue &Op) {
		jsjiUnsubmitted Not Done Reply Inline Actions We can't support indexed load as well, and we are also trying to return the `InputLoad`, so maybe something like `getNormalLoadInput` and add comments about returning true for success? jsji: We can't support indexed load as well, and we are also trying to return the `InputLoad`, so…
		nemanjaiAuthorUnsubmitted Done Reply Inline Actions This is a good point. I can actually make it return a `const SDValue ` so success is simply signaled by the returned value not being `nullptr`. nemanjai:* This is a good point. I can actually make it return a `const SDValue *` so success is simply…
		const SDValue *InputLoad = &Op;
		if (InputLoad->getOpcode() == ISD::BITCAST)
		InputLoad = &InputLoad->getOperand(0);
		if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR)
		jsjiUnsubmitted Not Done Reply Inline Actions Any other ISDs we can/should peek through? How about `ANY_EXTEND_VECTOR_INREG`/`EXTRACT_SUBVECTOR`? jsji: Any other ISDs we can/should peek through? How about…
		nemanjaiAuthorUnsubmitted Done Reply Inline Actions I don't think we want to peek through any vector operations since we are looking for a scalar non-extending, non-indexed load. nemanjai: I don't think we want to peek through any vector operations since we are looking for a scalar…
		InputLoad = &InputLoad->getOperand(0);
		if (InputLoad->getOpcode() != ISD::LOAD)
		return nullptr;
		LoadSDNode LD = cast<LoadSDNode>(InputLoad);
		jsjiUnsubmitted Not Done Reply Inline Actions Why not just `LD->isNormalLoad()`? jsji: Why not just `LD->isNormalLoad()`?
		nemanjaiAuthorUnsubmitted Done Reply Inline Actions I think you mean `ISD::isNormalLoad(LD)` and my answer is, there is no good reason. I first added the indexed check and realized later that I also have to ensure it is non-extending :) I will definitely change it, thank you. nemanjai: I think you mean `ISD::isNormalLoad(LD)` and my answer is, there is no good reason. I first…
		return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
		}

// If this is a case we can't handle, return null and let the default		// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it		// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen		// selects to a single instruction, return Op. Otherwise, if we can codegen
// this case more efficiently than a constant pool load, lower it to the		// this case more efficiently than a constant pool load, lower it to the
// sequence of ops that should be used.		// sequence of ops that should be used.
SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,		SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
SDLoc dl(Op);		SDLoc dl(Op);
▲ Show 20 Lines • Show All 106 Lines • ▼ Show 20 Lines	SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,

// Check if this is a splat of a constant value.		// Check if this is a splat of a constant value.
APInt APSplatBits, APSplatUndef;		APInt APSplatBits, APSplatUndef;
unsigned SplatBitSize;		unsigned SplatBitSize;
bool HasAnyUndefs;		bool HasAnyUndefs;
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,		if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
HasAnyUndefs, 0, !Subtarget.isLittleEndian()) \|\|		HasAnyUndefs, 0, !Subtarget.isLittleEndian()) \|\|
SplatBitSize > 32) {		SplatBitSize > 32) {

		const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0));
		// Handle load-and-splat patterns as we have instructions that will do this
		// in one go.
		if (InputLoad && DAG.isSplatValue(Op, true)) {
		LoadSDNode LD = cast<LoadSDNode>(InputLoad);
		jsjiUnsubmitted Not Done Reply Inline Actions Do we need to check `hasOneUse` here as well? jsji: Do we need to check `hasOneUse` here as well?
		nemanjaiAuthorUnsubmitted Done Reply Inline Actions I can certainly add it since this wouldn't be profitable if there are other uses of the load. nemanjai: I can certainly add it since this wouldn't be profitable if there are other uses of the load.

		// We have handling for 4 and 8 byte elements.
		unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();

		// Checking for a single use of this load, we have to check for vector
		// width (128 bits) / ElementSize uses (since each operand of the
		// BUILD_VECTOR is a separate use of the value.
		if (InputLoad->getNode()->hasNUsesOfValue(128 / ElementSize, 0) &&
		((Subtarget.hasVSX() && ElementSize == 64) \|\|
		(Subtarget.hasP9Vector() && ElementSize == 32))) {
		SDValue Ops[] = {
		LD->getChain(), // Chain
		LD->getBasePtr(), // Ptr
		DAG.getValueType(Op.getValueType()) // VT
		};
		return
		DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl,
		DAG.getVTList(Op.getValueType(), MVT::Other),
		Ops, LD->getMemoryVT(), LD->getMemOperand());
		}
		}

// BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be		// BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
// lowered to VSX instructions under certain conditions.		// lowered to VSX instructions under certain conditions.
// Without VSX, there is no pattern more efficient than expanding the node.		// Without VSX, there is no pattern more efficient than expanding the node.
if (Subtarget.hasVSX() &&		if (Subtarget.hasVSX() &&
haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),		haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
Subtarget.hasP8Vector()))		Subtarget.hasP8Vector()))
return Op;		return Op;
return SDValue();		return SDValue();
▲ Show 20 Lines • Show All 469 Lines • ▼ Show 20 Lines	SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue V1 = Op.getOperand(0);		SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);		SDValue V2 = Op.getOperand(1);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);		ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
EVT VT = Op.getValueType();		EVT VT = Op.getValueType();
bool isLittleEndian = Subtarget.isLittleEndian();		bool isLittleEndian = Subtarget.isLittleEndian();

unsigned ShiftElts, InsertAtByte;		unsigned ShiftElts, InsertAtByte;
bool Swap = false;		bool Swap = false;

		// If this is a load-and-splat, we can do that with a single instruction
		// in some cases. However if the load has multiple uses, we don't want to
		// combine it because that will just produce multiple loads.
		const SDValue *InputLoad = getNormalLoadInput(V1);
		if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
		(PPC::isSplatShuffleMask(SVOp, 4) \|\| PPC::isSplatShuffleMask(SVOp, 8)) &&
		InputLoad->hasOneUse()) {
		bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
		int SplatIdx =
		PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);

		jsjiUnsubmitted Not Done Reply Inline Actions Similar to `isSplatShuffleMask`, we are repurposing `getVSPLTImmediate` as well, VSPLT* has only 3 forms (1/2/4), we should either rename this function, or use another wrapper. jsji: Similar to `isSplatShuffleMask`, we are repurposing `getVSPLTImmediate` as well, VSPLT* has…
		nemanjaiAuthorUnsubmitted Done Reply Inline Actions Yeah, you're absolutely right. I should rename the function. I initially left it alone as I thought that `VSPLT` adequately abbreviates `Vector Splat`, but since it is part of a mnemonic and capitalized, I agree with you that it is very misleading. nemanjai: Yeah, you're absolutely right. I should rename the function. I initially left it alone as I…
		LoadSDNode LD = cast<LoadSDNode>(InputLoad);
		// For 4-byte load-and-splat, we need Power9.
		if ((IsFourByte && Subtarget.hasP9Vector()) \|\| !IsFourByte) {
		uint64_t Offset = 0;
		if (IsFourByte)
		Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
		else
		Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
		SDValue BasePtr = LD->getBasePtr();
		if (Offset != 0)
		BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
		BasePtr, DAG.getIntPtrConstant(Offset, dl));
		SDValue Ops[] = {
		LD->getChain(), // Chain
		BasePtr, // BasePtr
		DAG.getValueType(Op.getValueType()) // VT
		};
		SDVTList VTL =
		DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
		SDValue LdSplt =
		DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
		Ops, LD->getMemoryVT(), LD->getMemOperand());
		if (LdSplt.getValueType() != SVOp->getValueType(0))
		LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
		return LdSplt;
		}
		}
if (Subtarget.hasP9Vector() &&		if (Subtarget.hasP9Vector() &&
PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,		PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
isLittleEndian)) {		isLittleEndian)) {
if (Swap)		if (Swap)
std::swap(V1, V2);		std::swap(V1, V2);
SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);		SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);		SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
if (ShiftElts) {		if (ShiftElts) {
▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines	if (PPC::isXXBRHShuffleMask(SVOp)) {
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);		SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);		SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);		return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
}		}
}		}

if (Subtarget.hasVSX()) {		if (Subtarget.hasVSX()) {
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {		if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);		int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);

SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);		SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,		SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
DAG.getConstant(SplatIdx, dl, MVT::i32));		DAG.getConstant(SplatIdx, dl, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);		return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
}		}

// Left shifts of 8 bytes are actually swaps. Convert accordingly.		// Left shifts of 8 bytes are actually swaps. Convert accordingly.
▲ Show 20 Lines • Show All 6,466 Lines • Show Last 20 Lines

lib/Target/PowerPC/PPCInstrAltivec.td

	Show First 20 Lines • Show All 209 Lines • ▼ Show 20 Lines
	def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),			def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
	(vector_shuffle node:$lhs, node:$rhs), [{			(vector_shuffle node:$lhs, node:$rhs), [{
	return PPC::isVSLDOIShuffleMask(N, 2, *CurDAG) != -1;			return PPC::isVSLDOIShuffleMask(N, 2, *CurDAG) != -1;
	}], VSLDOI_get_imm>;			}], VSLDOI_get_imm>;


	// VSPLT_get_imm xform function: convert vector_shuffle mask to VSPLT imm.			// VSPLT_get_imm xform function: convert vector_shuffle mask to VSPLT imm.
	def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{			def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
	return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG), SDLoc(N));			return getI32Imm(PPC::getSplatIdxForPPCMnemonics(N, 1, *CurDAG), SDLoc(N));
	}]>;			}]>;
	def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),			def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
	(vector_shuffle node:$lhs, node:$rhs), [{			(vector_shuffle node:$lhs, node:$rhs), [{
	return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);			return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
	}], VSPLTB_get_imm>;			}], VSPLTB_get_imm>;
	def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{			def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
	return getI32Imm(PPC::getVSPLTImmediate(N, 2, *CurDAG), SDLoc(N));			return getI32Imm(PPC::getSplatIdxForPPCMnemonics(N, 2, *CurDAG), SDLoc(N));
	}]>;			}]>;
	def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),			def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
	(vector_shuffle node:$lhs, node:$rhs), [{			(vector_shuffle node:$lhs, node:$rhs), [{
	return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);			return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
	}], VSPLTH_get_imm>;			}], VSPLTH_get_imm>;
	def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{			def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
	return getI32Imm(PPC::getVSPLTImmediate(N, 4, *CurDAG), SDLoc(N));			return getI32Imm(PPC::getSplatIdxForPPCMnemonics(N, 4, *CurDAG), SDLoc(N));
	}]>;			}]>;
	def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),			def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
	(vector_shuffle node:$lhs, node:$rhs), [{			(vector_shuffle node:$lhs, node:$rhs), [{
	return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 4);			return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 4);
	}], VSPLTW_get_imm>;			}], VSPLTW_get_imm>;


	// VSPLTISB_get_imm xform function: convert build_vector to VSPLTISB imm.			// VSPLTISB_get_imm xform function: convert build_vector to VSPLTISB imm.
	▲ Show 20 Lines • Show All 1,308 Lines • Show Last 20 Lines

lib/Target/PowerPC/PPCInstrVSX.td

Show First 20 Lines • Show All 52 Lines • ▼ Show 20 Lines
def spilltovsrrc : RegisterOperand<SPILLTOVSRRC> {		def spilltovsrrc : RegisterOperand<SPILLTOVSRRC> {
let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand;		let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand;
}		}

def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [		def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [
SDTCisVT<0, v4f32>, SDTCisPtrTy<1>		SDTCisVT<0, v4f32>, SDTCisPtrTy<1>
]>;		]>;

		def SDT_PPCldsplat : SDTypeProfile<1, 1, [
		SDTCisVec<0>, SDTCisPtrTy<1>
		jsjiUnsubmitted Not Done Reply Inline Actions Why `SDTCisSameAS<0,1>`? Shouldn't it be `SDTCisPtrTy<1>`? jsji: Why `SDTCisSameAS<0,1>`? Shouldn't it be `SDTCisPtrTy<1>`?
		nemanjaiAuthorUnsubmitted Done Reply Inline Actions Wow, that's right. I neither know why I wrote it this way nor why table gen doesn't complain! nemanjai: Wow, that's right. I neither know why I wrote it this way nor why table gen doesn't complain!
		]>;

def SDT_PPCfpextlh : SDTypeProfile<1, 1, [		def SDT_PPCfpextlh : SDTypeProfile<1, 1, [
SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32>		SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32>
]>;		]>;

// Little-endian-specific nodes.		// Little-endian-specific nodes.
def SDT_PPClxvd2x : SDTypeProfile<1, 1, [		def SDT_PPClxvd2x : SDTypeProfile<1, 1, [
SDTCisVT<0, v2f64>, SDTCisPtrTy<1>		SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
]>;		]>;
Show All 22 Lines
def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>;		def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>;
def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;		def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;
def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;		def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;
def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>;		def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>;

def PPCfpextlh : SDNode<"PPCISD::FP_EXTEND_LH", SDT_PPCfpextlh, []>;		def PPCfpextlh : SDNode<"PPCISD::FP_EXTEND_LH", SDT_PPCfpextlh, []>;
def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh,		def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;		[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
		def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat,
		[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;

multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,		multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
string asmstr, InstrItinClass itin, Intrinsic Int,		string asmstr, InstrItinClass itin, Intrinsic Int,
ValueType OutTy, ValueType InTy> {		ValueType OutTy, ValueType InTy> {
let BaseName = asmbase in {		let BaseName = asmbase in {
def NAME : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),		def NAME : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,		!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
[(set OutTy:$XT, (Int InTy:$XA, InTy:$XB))]>;		[(set OutTy:$XT, (Int InTy:$XA, InTy:$XB))]>;
▲ Show 20 Lines • Show All 3,746 Lines • ▼ Show 20 Lines	let Predicates = [HasVSX] in {
def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),		def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS		(v4i32 (XXSPLTW (COPY_TO_REGCLASS
(XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;		(XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),		def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS		(v4i32 (XXSPLTW (COPY_TO_REGCLASS
(XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;		(XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),		def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
(v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;		(v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
		def : Pat<(v2f64 (PPCldsplat xoaddr:$A)),
		(v2f64 (LXVDSX xoaddr:$A))>;
		def : Pat<(v2i64 (PPCldsplat xoaddr:$A)),
		(v2i64 (LXVDSX xoaddr:$A))>;

// Build vectors of floating point converted to i64.		// Build vectors of floating point converted to i64.
def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),		def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
(v2i64 (XXPERMDIs		(v2i64 (XXPERMDIs
(COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>;		(COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>;
def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)),		def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)),
(v2i64 (XXPERMDIs		(v2i64 (XXPERMDIs
(COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;		(COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;
▲ Show 20 Lines • Show All 228 Lines • ▼ Show 20 Lines	let Predicates = [HasP9Vector] in {
def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)),		def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS		(v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
(DFLOADf32 iaddrX4:$A),		(DFLOADf32 iaddrX4:$A),
VSFRC)), 0))>;		VSFRC)), 0))>;
def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)),		def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS		(v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
(DFLOADf32 iaddrX4:$A),		(DFLOADf32 iaddrX4:$A),
VSFRC)), 0))>;		VSFRC)), 0))>;
		def : Pat<(v4f32 (PPCldsplat xoaddr:$A)),
		(v4f32 (LXVWSX xoaddr:$A))>;
		def : Pat<(v4i32 (PPCldsplat xoaddr:$A)),
		(v4i32 (LXVWSX xoaddr:$A))>;
}		}

let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in {		let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in {
def : Pat<(i64 (extractelt v2i64:$A, 1)),		def : Pat<(i64 (extractelt v2i64:$A, 1)),
(i64 (MFVSRLD $A))>;		(i64 (MFVSRLD $A))>;
// Better way to build integer vectors if we have MTVSRDD. Big endian.		// Better way to build integer vectors if we have MTVSRDD. Big endian.
def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),		def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
(v2i64 (MTVSRDD $rB, $rA))>;		(v2i64 (MTVSRDD $rB, $rA))>;
▲ Show 20 Lines • Show All 82 Lines • Show Last 20 Lines

test/CodeGen/PowerPC/VSX-XForm-Scalars.ll

	Show All 21 Lines
	; CHECK-P8: ld r3, .LC1@toc@l(r3)			; CHECK-P8: ld r3, .LC1@toc@l(r3)
	; CHECK-P8: xscvsxdsp f0, f0			; CHECK-P8: xscvsxdsp f0, f0
	; CHECK-P8: ld r3, 0(r3)			; CHECK-P8: ld r3, 0(r3)
	; CHECK-P8: stfsx f0, r3, r4			; CHECK-P8: stfsx f0, r3, r4
	; CHECK-P8: blr			; CHECK-P8: blr
	;			;
	; CHECK-P9-LABEL: testExpandPostRAPseudo:			; CHECK-P9-LABEL: testExpandPostRAPseudo:
	; CHECK-P9: # %bb.0: # %entry			; CHECK-P9: # %bb.0: # %entry
	; CHECK-P9: lfiwzx f0, 0, r3
	; CHECK-P9: addis r4, r2, .LC0@toc@ha			; CHECK-P9: addis r4, r2, .LC0@toc@ha
				; CHECK-P9: lxvwsx vs0, 0, r3
	; CHECK-P9: ld r4, .LC0@toc@l(r4)			; CHECK-P9: ld r4, .LC0@toc@l(r4)
	; CHECK-P9: xxpermdi vs0, f0, f0, 2
	; CHECK-P9: xxspltw vs0, vs0, 3
	; CHECK-P9: stxvx vs0, 0, r4			; CHECK-P9: stxvx vs0, 0, r4
				; CHECK-P9: lis r4, 1024
	; CHECK-P9: lfiwax f0, 0, r3			; CHECK-P9: lfiwax f0, 0, r3
	; CHECK-P9: addis r3, r2, .LC1@toc@ha			; CHECK-P9: addis r3, r2, .LC1@toc@ha
	; CHECK-P9: ld r3, .LC1@toc@l(r3)			; CHECK-P9: ld r3, .LC1@toc@l(r3)
	; CHECK-P9: xscvsxdsp f0, f0			; CHECK-P9: xscvsxdsp f0, f0
	; CHECK-P9: ld r3, 0(r3)			; CHECK-P9: ld r3, 0(r3)
	; CHECK-P9: lis r4, 1024
	; CHECK-P9: stfsx f0, r3, r4			; CHECK-P9: stfsx f0, r3, r4
	; CHECK-P9: blr			; CHECK-P9: blr
	entry:			entry:
	%0 = load i32, i32* %ptr, align 4			%0 = load i32, i32* %ptr, align 4
	%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0			%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
	%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer			%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
	store <4 x i32> %splat.splat, <4 x i32>* @a, align 16			store <4 x i32> %splat.splat, <4 x i32>* @a, align 16
	tail call void asm sideeffect "#Clobber Rigisters", "~{f0},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"()			tail call void asm sideeffect "#Clobber Rigisters", "~{f0},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"()
	%1 = load i32, i32* %ptr, align 4			%1 = load i32, i32* %ptr, align 4
	%conv = sitofp i32 %1 to float			%conv = sitofp i32 %1 to float
	%2 = load float, float* @pb, align 8			%2 = load float, float* @pb, align 8
	%add.ptr = getelementptr inbounds float, float* %2, i64 16777216			%add.ptr = getelementptr inbounds float, float* %2, i64 16777216
	store float %conv, float* %add.ptr, align 4			store float %conv, float* %add.ptr, align 4
	ret void			ret void
	}			}

test/CodeGen/PowerPC/build-vector-tests.ll

Show First 20 Lines • Show All 1,321 Lines • ▼ Show 20 Lines	entry:
%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer		%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %splat.splat		ret <4 x i32> %splat.splat
}		}

; Function Attrs: norecurse nounwind readonly		; Function Attrs: norecurse nounwind readonly
define <4 x i32> @spltMemVali(i32* nocapture readonly %ptr) {		define <4 x i32> @spltMemVali(i32* nocapture readonly %ptr) {
; P9BE-LABEL: spltMemVali:		; P9BE-LABEL: spltMemVali:
; P9BE: # %bb.0: # %entry		; P9BE: # %bb.0: # %entry
; P9BE-NEXT: lfiwzx f0, 0, r3		; P9BE-NEXT: lxvwsx v2, 0, r3
; P9BE-NEXT: xxsldwi vs0, f0, f0, 1
; P9BE-NEXT: xxspltw v2, vs0, 0
; P9BE-NEXT: blr		; P9BE-NEXT: blr
;		;
; P9LE-LABEL: spltMemVali:		; P9LE-LABEL: spltMemVali:
; P9LE: # %bb.0: # %entry		; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwzx f0, 0, r3		; P9LE-NEXT: lxvwsx v2, 0, r3
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxspltw v2, vs0, 3
; P9LE-NEXT: blr		; P9LE-NEXT: blr
;		;
; P8BE-LABEL: spltMemVali:		; P8BE-LABEL: spltMemVali:
; P8BE: # %bb.0: # %entry		; P8BE: # %bb.0: # %entry
; P8BE-NEXT: lfiwzx f0, 0, r3		; P8BE-NEXT: lfiwzx f0, 0, r3
; P8BE-NEXT: xxsldwi vs0, f0, f0, 1		; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
; P8BE-NEXT: xxspltw v2, vs0, 0		; P8BE-NEXT: xxspltw v2, vs0, 0
; P8BE-NEXT: blr		; P8BE-NEXT: blr
▲ Show 20 Lines • Show All 1,558 Lines • ▼ Show 20 Lines	entry:
%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer		%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %splat.splat		ret <4 x i32> %splat.splat
}		}

; Function Attrs: norecurse nounwind readonly		; Function Attrs: norecurse nounwind readonly
define <4 x i32> @spltMemValui(i32* nocapture readonly %ptr) {		define <4 x i32> @spltMemValui(i32* nocapture readonly %ptr) {
; P9BE-LABEL: spltMemValui:		; P9BE-LABEL: spltMemValui:
; P9BE: # %bb.0: # %entry		; P9BE: # %bb.0: # %entry
; P9BE-NEXT: lfiwzx f0, 0, r3		; P9BE-NEXT: lxvwsx v2, 0, r3
; P9BE-NEXT: xxsldwi vs0, f0, f0, 1
; P9BE-NEXT: xxspltw v2, vs0, 0
; P9BE-NEXT: blr		; P9BE-NEXT: blr
;		;
; P9LE-LABEL: spltMemValui:		; P9LE-LABEL: spltMemValui:
; P9LE: # %bb.0: # %entry		; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwzx f0, 0, r3		; P9LE-NEXT: lxvwsx v2, 0, r3
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxspltw v2, vs0, 3
; P9LE-NEXT: blr		; P9LE-NEXT: blr
;		;
; P8BE-LABEL: spltMemValui:		; P8BE-LABEL: spltMemValui:
; P8BE: # %bb.0: # %entry		; P8BE: # %bb.0: # %entry
; P8BE-NEXT: lfiwzx f0, 0, r3		; P8BE-NEXT: lfiwzx f0, 0, r3
; P8BE-NEXT: xxsldwi vs0, f0, f0, 1		; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
; P8BE-NEXT: xxspltw v2, vs0, 0		; P8BE-NEXT: xxspltw v2, vs0, 0
; P8BE-NEXT: blr		; P8BE-NEXT: blr
▲ Show 20 Lines • Show All 3,461 Lines • Show Last 20 Lines

test/CodeGen/PowerPC/load-and-splat.ll

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
				; RUN: -mtriple=powerpc64-unknown-unknown < %s \| FileCheck %s \
				; RUN: -check-prefix=P9
				jsjiUnsubmitted Not Done Reply Inline Actions Any specific reason that we want to use `powerpc64` instead of `powerpc64le` for `pwr9`? jsji: Any specific reason that we want to use `powerpc64` instead of `powerpc64le` for `pwr9`?
				nemanjaiAuthorUnsubmitted Done Reply Inline Actions I wanted at least one of them to be big endian and I figured why not the one that has more hits in the test case. I don't think we really need four RUN lines for P8/P9 and LE/BE. nemanjai: I wanted at least one of them to be big endian and I figured why not the one that has more hits…
				; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
				; RUN: -mtriple=powerpc64le-unknown-unknown < %s \| FileCheck %s \
				; RUN: -check-prefix=P8
				define dso_local void @test(<2 x double>* nocapture %c, double* nocapture readonly %a) local_unnamed_addr {
				; P9-LABEL: test:
				; P9: # %bb.0: # %entry
				; P9-NEXT: addi r4, r4, 24
				; P9-NEXT: lxvdsx vs0, 0, r4
				; P9-NEXT: stxv vs0, 0(r3)
				; P9-NEXT: blr
				;
				; P8-LABEL: test:
				; P8: # %bb.0: # %entry
				; P8-NEXT: addi r4, r4, 24
				; P8-NEXT: lxvdsx vs0, 0, r4
				; P8-NEXT: stxvd2x vs0, 0, r3
				; P8-NEXT: blr
				entry:
				%arrayidx = getelementptr inbounds double, double* %a, i64 3
				%0 = load double, double* %arrayidx, align 8
				%splat.splatinsert.i = insertelement <2 x double> undef, double %0, i32 0
				%splat.splat.i = shufflevector <2 x double> %splat.splatinsert.i, <2 x double> undef, <2 x i32> zeroinitializer
				store <2 x double> %splat.splat.i, <2 x double>* %c, align 16
				ret void
				}

				define dso_local void @test2(<4 x float>* nocapture %c, float* nocapture readonly %a) local_unnamed_addr {
				; P9-LABEL: test2:
				; P9: # %bb.0: # %entry
				; P9-NEXT: addi r4, r4, 12
				; P9-NEXT: lxvwsx vs0, 0, r4
				; P9-NEXT: stxv vs0, 0(r3)
				; P9-NEXT: blr
				;
				; P8-LABEL: test2:
				; P8: # %bb.0: # %entry
				; P8-NEXT: addi r4, r4, 12
				; P8-NEXT: lfiwzx f0, 0, r4
				; P8-NEXT: xxpermdi vs0, f0, f0, 2
				; P8-NEXT: xxspltw v2, vs0, 3
				; P8-NEXT: stvx v2, 0, r3
				; P8-NEXT: blr
				entry:
				%arrayidx = getelementptr inbounds float, float* %a, i64 3
				%0 = load float, float* %arrayidx, align 4
				%splat.splatinsert.i = insertelement <4 x float> undef, float %0, i32 0
				%splat.splat.i = shufflevector <4 x float> %splat.splatinsert.i, <4 x float> undef, <4 x i32> zeroinitializer
				store <4 x float> %splat.splat.i, <4 x float>* %c, align 16
				ret void
				}

				define dso_local void @test3(<4 x i32>* nocapture %c, i32* nocapture readonly %a) local_unnamed_addr {
				; P9-LABEL: test3:
				; P9: # %bb.0: # %entry
				; P9-NEXT: addi r4, r4, 12
				; P9-NEXT: lxvwsx vs0, 0, r4
				; P9-NEXT: stxv vs0, 0(r3)
				; P9-NEXT: blr
				;
				; P8-LABEL: test3:
				; P8: # %bb.0: # %entry
				; P8-NEXT: addi r4, r4, 12
				; P8-NEXT: lfiwzx f0, 0, r4
				; P8-NEXT: xxpermdi vs0, f0, f0, 2
				; P8-NEXT: xxspltw v2, vs0, 3
				; P8-NEXT: stvx v2, 0, r3
				; P8-NEXT: blr
				entry:
				%arrayidx = getelementptr inbounds i32, i32* %a, i64 3
				%0 = load i32, i32* %arrayidx, align 4
				%splat.splatinsert.i = insertelement <4 x i32> undef, i32 %0, i32 0
				%splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer
				store <4 x i32> %splat.splat.i, <4 x i32>* %c, align 16
				ret void
				}

				define dso_local void @test4(<2 x i64>* nocapture %c, i64* nocapture readonly %a) local_unnamed_addr {
				; P9-LABEL: test4:
				; P9: # %bb.0: # %entry
				; P9-NEXT: addi r4, r4, 24
				; P9-NEXT: lxvdsx vs0, 0, r4
				; P9-NEXT: stxv vs0, 0(r3)
				; P9-NEXT: blr
				;
				; P8-LABEL: test4:
				; P8: # %bb.0: # %entry
				; P8-NEXT: addi r4, r4, 24
				; P8-NEXT: lxvdsx vs0, 0, r4
				; P8-NEXT: stxvd2x vs0, 0, r3
				; P8-NEXT: blr
				entry:
				%arrayidx = getelementptr inbounds i64, i64* %a, i64 3
				%0 = load i64, i64* %arrayidx, align 8
				%splat.splatinsert.i = insertelement <2 x i64> undef, i64 %0, i32 0
				%splat.splat.i = shufflevector <2 x i64> %splat.splatinsert.i, <2 x i64> undef, <2 x i32> zeroinitializer
				store <2 x i64> %splat.splat.i, <2 x i64>* %c, align 16
				ret void
				}

				define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) {
				; P9-LABEL: unadjusted_lxvwsx:
				; P9: # %bb.0: # %entry
				; P9-NEXT: lxvwsx v2, 0, r3
				; P9-NEXT: blr
				;
				; P8-LABEL: unadjusted_lxvwsx:
				; P8: # %bb.0: # %entry
				; P8-NEXT: lfiwzx f0, 0, r3
				; P8-NEXT: xxpermdi vs0, f0, f0, 2
				; P8-NEXT: xxspltw v2, vs0, 3
				; P8-NEXT: blr
				entry:
				%0 = bitcast i32* %s to <4 x i8>*
				%1 = load <4 x i8>, <4 x i8>* %0, align 4
				%2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
				ret <16 x i8> %2
				}

				define <16 x i8> @adjusted_lxvwsx(i64* %s, i64* %t) {
				; P9-LABEL: adjusted_lxvwsx:
				; P9: # %bb.0: # %entry
				; P9-NEXT: addi r3, r3, 4
				; P9-NEXT: lxvwsx v2, 0, r3
				; P9-NEXT: blr
				;
				; P8-LABEL: adjusted_lxvwsx:
				; P8: # %bb.0: # %entry
				; P8-NEXT: ld r3, 0(r3)
				; P8-NEXT: mtvsrd f0, r3
				; P8-NEXT: xxswapd v2, vs0
				; P8-NEXT: xxspltw v2, v2, 2
				; P8-NEXT: blr
				entry:
				%0 = bitcast i64* %s to <8 x i8>*
				%1 = load <8 x i8>, <8 x i8>* %0, align 8
				%2 = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
				ret <16 x i8> %2
				}

				define <16 x i8> @unadjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
				; P9-LABEL: unadjusted_lxvwsx_v16i8:
				; P9: # %bb.0: # %entry
				; P9-NEXT: lxvwsx v2, 0, r3
				; P9-NEXT: blr
				;
				; P8-LABEL: unadjusted_lxvwsx_v16i8:
				; P8: # %bb.0: # %entry
				; P8-NEXT: lvx v2, 0, r3
				; P8-NEXT: xxspltw v2, v2, 3
				; P8-NEXT: blr
				entry:
				%0 = load <16 x i8>, <16 x i8>* %s, align 16
				%1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
				ret <16 x i8> %1
				}

				define <16 x i8> @adjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
				; P9-LABEL: adjusted_lxvwsx_v16i8:
				; P9: # %bb.0: # %entry
				; P9-NEXT: addi r3, r3, 4
				; P9-NEXT: lxvwsx v2, 0, r3
				; P9-NEXT: blr
				;
				; P8-LABEL: adjusted_lxvwsx_v16i8:
				; P8: # %bb.0: # %entry
				; P8-NEXT: lvx v2, 0, r3
				; P8-NEXT: xxspltw v2, v2, 2
				; P8-NEXT: blr
				entry:
				%0 = load <16 x i8>, <16 x i8>* %s, align 16
				%1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
				ret <16 x i8> %1
				}

				define <16 x i8> @adjusted_lxvwsx_v16i8_2(<16 x i8> *%s, <16 x i8> %t) {
				; P9-LABEL: adjusted_lxvwsx_v16i8_2:
				; P9: # %bb.0: # %entry
				; P9-NEXT: addi r3, r3, 8
				; P9-NEXT: lxvwsx v2, 0, r3
				; P9-NEXT: blr
				;
				; P8-LABEL: adjusted_lxvwsx_v16i8_2:
				; P8: # %bb.0: # %entry
				; P8-NEXT: lvx v2, 0, r3
				; P8-NEXT: xxspltw v2, v2, 1
				; P8-NEXT: blr
				entry:
				%0 = load <16 x i8>, <16 x i8>* %s, align 16
				%1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11>
				ret <16 x i8> %1
				}

				define <16 x i8> @adjusted_lxvwsx_v16i8_3(<16 x i8> *%s, <16 x i8> %t) {
				; P9-LABEL: adjusted_lxvwsx_v16i8_3:
				; P9: # %bb.0: # %entry
				; P9-NEXT: addi r3, r3, 12
				; P9-NEXT: lxvwsx v2, 0, r3
				; P9-NEXT: blr
				;
				; P8-LABEL: adjusted_lxvwsx_v16i8_3:
				; P8: # %bb.0: # %entry
				; P8-NEXT: lvx v2, 0, r3
				; P8-NEXT: xxspltw v2, v2, 0
				; P8-NEXT: blr
				entry:
				%0 = load <16 x i8>, <16 x i8>* %s, align 16
				%1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15>
				ret <16 x i8> %1
				}

				define <16 x i8> @unadjusted_lxvdsx(i64* %s, i64* %t) {
				; P9-LABEL: unadjusted_lxvdsx:
				; P9: # %bb.0: # %entry
				; P9-NEXT: lxvdsx v2, 0, r3
				; P9-NEXT: blr
				;
				; P8-LABEL: unadjusted_lxvdsx:
				; P8: # %bb.0: # %entry
				; P8-NEXT: lxvdsx v2, 0, r3
				; P8-NEXT: blr
				entry:
				%0 = bitcast i64* %s to <8 x i8>*
				%1 = load <8 x i8>, <8 x i8>* %0, align 8
				%2 = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
				ret <16 x i8> %2
				}

				define <16 x i8> @unadjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
				; P9-LABEL: unadjusted_lxvdsx_v16i8:
				; P9: # %bb.0: # %entry
				; P9-NEXT: lxvdsx v2, 0, r3
				; P9-NEXT: blr
				;
				; P8-LABEL: unadjusted_lxvdsx_v16i8:
				; P8: # %bb.0: # %entry
				; P8-NEXT: lxvdsx v2, 0, r3
				; P8-NEXT: blr
				entry:
				%0 = load <16 x i8>, <16 x i8>* %s, align 16
				%1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
				ret <16 x i8> %1
				}

				define <16 x i8> @adjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
				; P9-LABEL: adjusted_lxvdsx_v16i8:
				; P9: # %bb.0: # %entry
				; P9-NEXT: addi r3, r3, 8
				; P9-NEXT: lxvdsx v2, 0, r3
				; P9-NEXT: blr
				;
				; P8-LABEL: adjusted_lxvdsx_v16i8:
				; P8: # %bb.0: # %entry
				; P8-NEXT: addi r3, r3, 8
				; P8-NEXT: lxvdsx v2, 0, r3
				; P8-NEXT: blr
				entry:
				%0 = load <16 x i8>, <16 x i8>* %s, align 16
				%1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
				ret <16 x i8> %1
				}

test/CodeGen/PowerPC/power9-moves-and-splats.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \			; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
	; RUN: -ppc-asm-full-reg-names < %s \| FileCheck %s			; RUN: -ppc-asm-full-reg-names < %s \| FileCheck %s
	; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu -ppc-vsr-nums-as-vr \			; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu -ppc-vsr-nums-as-vr \
	; RUN: -ppc-asm-full-reg-names < %s \| FileCheck %s --check-prefix=CHECK-BE			; RUN: -ppc-asm-full-reg-names < %s \| FileCheck %s --check-prefix=CHECK-BE

	@Globi = external global i32, align 4			@Globi = external global i32, align 4
	@Globf = external global float, align 4			@Globf = external global float, align 4

	define <2 x i64> @test1(i64 %a, i64 %b) {			define <2 x i64> @test1(i64 %a, i64 %b) {
	; CHECK-LABEL: test1:			; CHECK-LABEL: test1:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: mtvsrdd v2, r4, r3			; CHECK-NEXT: mtvsrdd v2, r4, r3
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
	;			;
				nemanjaiAuthorUnsubmitted Done Reply Inline Actions Please forgive the trivial changes in this test case. The script that produces the checks apparently behaves slightly differently now and I would prefer to leave the test case exactly as produced by the script. nemanjai: Please forgive the trivial changes in this test case. The script that produces the checks…
				lebedev.riUnsubmitted Not Done Reply Inline Actions You can just regenerate all the affected files in a preparatory commit and rebase the patch. lebedev.ri: You can just regenerate all the affected files in a preparatory commit and rebase the patch.
				nemanjaiAuthorUnsubmitted Done Reply Inline Actions Ah, yeah. That's a good idea. I don't know why I didn't think of that. I'll definitely do that next time I see this issue. nemanjai: Ah, yeah. That's a good idea. I don't know why I didn't think of that. I'll definitely do that…
				jsjiUnsubmitted Not Done Reply Inline Actions I have committed https://reviews.llvm.org/rL365330 to include the new ';', you patch should auto-merge when you rebase. jsji: I have committed https://reviews.llvm.org/rL365330 to include the new ';', you patch should…
	; CHECK-BE-LABEL: test1:			; CHECK-BE-LABEL: test1:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: mtvsrdd v2, r3, r4			; CHECK-BE-NEXT: mtvsrdd v2, r3, r4
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp			; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp
	; which will happen in a subsequent patch.			; which will happen in a subsequent patch.
	Show All 32 Lines
	entry:			entry:
	%0 = extractelement <2 x i64> %a, i32 1			%0 = extractelement <2 x i64> %a, i32 1
	ret i64 %0			ret i64 %0
	}			}

	define <4 x i32> @test4(i32* nocapture readonly %in) {			define <4 x i32> @test4(i32* nocapture readonly %in) {
	; CHECK-LABEL: test4:			; CHECK-LABEL: test4:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: lfiwzx f0, 0, r3			; CHECK-NEXT: lxvwsx v2, 0, r3
	; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
	; CHECK-NEXT: xxspltw v2, vs0, 3
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
	;			;
	; CHECK-BE-LABEL: test4:			; CHECK-BE-LABEL: test4:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: lfiwzx f0, 0, r3			; CHECK-BE-NEXT: lxvwsx v2, 0, r3
	; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
	; CHECK-BE-NEXT: xxspltw v2, vs0, 0
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	%0 = load i32, i32* %in, align 4			%0 = load i32, i32* %in, align 4
	%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0			%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
	%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer			%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
	ret <4 x i32> %splat.splat			ret <4 x i32> %splat.splat
	}			}

	define <4 x float> @test5(float* nocapture readonly %in) {			define <4 x float> @test5(float* nocapture readonly %in) {
	; CHECK-LABEL: test5:			; CHECK-LABEL: test5:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: lfiwzx f0, 0, r3			; CHECK-NEXT: lxvwsx v2, 0, r3
	; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
	; CHECK-NEXT: xxspltw v2, vs0, 3
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
	;			;
	; CHECK-BE-LABEL: test5:			; CHECK-BE-LABEL: test5:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: lfiwzx f0, 0, r3			; CHECK-BE-NEXT: lxvwsx v2, 0, r3
	; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
	; CHECK-BE-NEXT: xxspltw v2, vs0, 0
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	%0 = load float, float* %in, align 4			%0 = load float, float* %in, align 4
	%splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0			%splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
	%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer			%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
	ret <4 x float> %splat.splat			ret <4 x float> %splat.splat
	}			}

	define <4 x i32> @test6() {			define <4 x i32> @test6() {
	; CHECK-LABEL: test6:			; CHECK-LABEL: test6:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: addis r3, r2, .LC0@toc@ha			; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
	; CHECK-NEXT: ld r3, .LC0@toc@l(r3)			; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
	; CHECK-NEXT: lfiwzx f0, 0, r3			; CHECK-NEXT: lxvwsx v2, 0, r3
	; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
	; CHECK-NEXT: xxspltw v2, vs0, 3
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
	;			;
	; CHECK-BE-LABEL: test6:			; CHECK-BE-LABEL: test6:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: addis r3, r2, .LC0@toc@ha			; CHECK-BE-NEXT: addis r3, r2, .LC0@toc@ha
	; CHECK-BE-NEXT: ld r3, .LC0@toc@l(r3)			; CHECK-BE-NEXT: ld r3, .LC0@toc@l(r3)
	; CHECK-BE-NEXT: lfiwzx f0, 0, r3			; CHECK-BE-NEXT: lxvwsx v2, 0, r3
	; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
	; CHECK-BE-NEXT: xxspltw v2, vs0, 0
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	%0 = load i32, i32* @Globi, align 4			%0 = load i32, i32* @Globi, align 4
	%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0			%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
	%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer			%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
	ret <4 x i32> %splat.splat			ret <4 x i32> %splat.splat
	}			}

	define <4 x float> @test7() {			define <4 x float> @test7() {
	; CHECK-LABEL: test7:			; CHECK-LABEL: test7:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: addis r3, r2, .LC1@toc@ha			; CHECK-NEXT: addis r3, r2, .LC1@toc@ha
	; CHECK-NEXT: ld r3, .LC1@toc@l(r3)			; CHECK-NEXT: ld r3, .LC1@toc@l(r3)
	; CHECK-NEXT: lfiwzx f0, 0, r3			; CHECK-NEXT: lxvwsx v2, 0, r3
	; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
	; CHECK-NEXT: xxspltw v2, vs0, 3
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
	;			;
	; CHECK-BE-LABEL: test7:			; CHECK-BE-LABEL: test7:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: addis r3, r2, .LC1@toc@ha			; CHECK-BE-NEXT: addis r3, r2, .LC1@toc@ha
	; CHECK-BE-NEXT: ld r3, .LC1@toc@l(r3)			; CHECK-BE-NEXT: ld r3, .LC1@toc@l(r3)
	; CHECK-BE-NEXT: lfiwzx f0, 0, r3			; CHECK-BE-NEXT: lxvwsx v2, 0, r3
	; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
	; CHECK-BE-NEXT: xxspltw v2, vs0, 0
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	%0 = load float, float* @Globf, align 4			%0 = load float, float* @Globf, align 4
	%splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0			%splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
	%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer			%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
	ret <4 x float> %splat.splat			ret <4 x float> %splat.splat
	}			}
	▲ Show 20 Lines • Show All 131 Lines • Show Last 20 Lines

test/CodeGen/PowerPC/qpx-load-splat.ll

		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \		; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s \| FileCheck %s		; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s \| FileCheck %s

; Function Attrs: norecurse nounwind readonly		; Function Attrs: norecurse nounwind readonly
define <4 x double> @foo(double* nocapture readonly %a) #0 {		define <4 x double> @foo(double* nocapture readonly %a) #0 {
; CHECK-LABEL: foo:		; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry		; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvdsx v2, 0, r3		; CHECK-NEXT: lxvdsx v2, 0, r3
Show All 20 Lines	entry:
%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer		%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
ret <4 x double> %shuffle.i		ret <4 x double> %shuffle.i
}		}

define <4 x double> @fooxu(double* nocapture readonly %a, i64 %idx, double** %pptr) #0 {		define <4 x double> @fooxu(double* nocapture readonly %a, i64 %idx, double** %pptr) #0 {
; CHECK-LABEL: fooxu:		; CHECK-LABEL: fooxu:
; CHECK: # %bb.0: # %entry		; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r4, r4, 3		; CHECK-NEXT: sldi r4, r4, 3
; CHECK-NEXT: lfdux f0, r3, r4		; CHECK-NEXT: add r6, r3, r4
; CHECK-NEXT: xxspltd v2, vs0, 0		; CHECK-NEXT: std r6, 0(r5)
; CHECK-NEXT: std r3, 0(r5)		; CHECK-NEXT: lxvdsx v2, r3, r4
; CHECK-NEXT: vmr v3, v2		; CHECK-NEXT: vmr v3, v2
; CHECK-NEXT: blr		; CHECK-NEXT: blr
entry:		entry:
%p = getelementptr double, double* %a, i64 %idx		%p = getelementptr double, double* %a, i64 %idx
%0 = load double, double* %p, align 8		%0 = load double, double* %p, align 8
%vecinit.i = insertelement <4 x double> undef, double %0, i32 0		%vecinit.i = insertelement <4 x double> undef, double %0, i32 0
%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer		%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
store double* %p, double** %pptr, align 8		store double* %p, double** %pptr, align 8
Show All 34 Lines

test/CodeGen/PowerPC/swaps-le-7.ll

	; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s \| FileCheck %s			; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s \| FileCheck %s
	;			;
	; This is a regression test based on https://llvm.org/bugs/show_bug.cgi?id=27735			; This is a regression test based on https://llvm.org/bugs/show_bug.cgi?id=27735
	;			;

	@G1 = global <2 x double> <double 2.0, double -10.0>			@G1 = global <2 x double> <double 2.0, double -10.0>
	@G2 = global <2 x double> <double 3.0, double 4.0>			@G2 = global <2 x double> <double 3.0, double 4.0>
	@G3 = global <2 x double> <double 5.0, double 6.0>			@G3 = global <2 x double> <double 5.0, double 6.0>
	@G4 = global <2 x double> <double 7.0, double 8.0>			@G4 = global <2 x double> <double 7.0, double 8.0>

	; CHECK-LABEL: @zg			; CHECK-LABEL: @zg
	; CHECK: xxspltd			; CHECK: lxvdsx
	; CHECK-NEXT: xxspltd			; CHECK-NEXT: lxvdsx
	; CHECK-NEXT: xvmuldp			; CHECK-NEXT: xvmuldp
	; CHECK-DAG: xvmuldp			; CHECK-DAG: xvmuldp
	; CHECK-DAG: xvsubdp			; CHECK-DAG: xvsubdp
	; CHECK-DAG: xvadddp			; CHECK-DAG: xvadddp
	; CHECK-DAG: xxswapd			; CHECK-DAG: xxswapd
	; CHECK-DAG: xxpermdi			; CHECK-DAG: xxpermdi
	; CHECK-DAG: xvsubdp			; CHECK-DAG: xvsubdp
	; CHECK: xxswapd			; CHECK: xxswapd
	Show All 34 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC] Exploit single instruction load-and-splat for word and doubleword
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 209169

lib/Target/PowerPC/PPCISelLowering.h

lib/Target/PowerPC/PPCISelLowering.cpp

lib/Target/PowerPC/PPCInstrAltivec.td

lib/Target/PowerPC/PPCInstrVSX.td

test/CodeGen/PowerPC/VSX-XForm-Scalars.ll

test/CodeGen/PowerPC/build-vector-tests.ll

test/CodeGen/PowerPC/load-and-splat.ll

test/CodeGen/PowerPC/power9-moves-and-splats.ll

test/CodeGen/PowerPC/qpx-load-splat.ll

test/CodeGen/PowerPC/swaps-le-7.ll

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC] Exploit single instruction load-and-splat for word and doublewordClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 209169

lib/Target/PowerPC/PPCISelLowering.h

lib/Target/PowerPC/PPCISelLowering.cpp

lib/Target/PowerPC/PPCInstrAltivec.td

lib/Target/PowerPC/PPCInstrVSX.td

test/CodeGen/PowerPC/VSX-XForm-Scalars.ll

test/CodeGen/PowerPC/build-vector-tests.ll

test/CodeGen/PowerPC/load-and-splat.ll

test/CodeGen/PowerPC/power9-moves-and-splats.ll

test/CodeGen/PowerPC/qpx-load-splat.ll

test/CodeGen/PowerPC/swaps-le-7.ll

[PowerPC] Exploit single instruction load-and-splat for word and doubleword
ClosedPublic