Diff 205898

lib/Target/PowerPC/PPCISelLowering.h

Show First 20 Lines • Show All 445 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
/// Maps directly to an lxvd2x instruction that will be followed by		/// Maps directly to an lxvd2x instruction that will be followed by
/// an xxswapd.		/// an xxswapd.
LXVD2X,		LXVD2X,

/// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a		/// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
/// v2f32 value into the lower half of a VSR register.		/// v2f32 value into the lower half of a VSR register.
LD_VSX_LH,		LD_VSX_LH,

		/// LD_SPLAT - a splatting load memory instruction (LXVDSX, LXVWSX).
		jsjiUnsubmitted Not Done Reply Inline Actions This ISD has chain, please update the comments to describe it. jsji: This ISD has chain, please update the comments to describe it.
		nemanjaiAuthorUnsubmitted Done Reply Inline Actions Sure, I omitted it since all of these nodes have a chain and all these comments seem superfluous. But you're right, consistency is more important. nemanjai: Sure, I omitted it since all of these nodes have a chain and all these comments seem…
		LD_SPLAT,

/// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.		/// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
/// Maps directly to an stxvd2x instruction that will be preceded by		/// Maps directly to an stxvd2x instruction that will be preceded by
/// an xxswapd.		/// an xxswapd.
STXVD2X,		STXVD2X,

/// Store scalar integers from VSR.		/// Store scalar integers from VSR.
ST_VSR_SCAL_INT,		ST_VSR_SCAL_INT,

▲ Show 20 Lines • Show All 737 Lines • Show Last 20 Lines

lib/Target/PowerPC/PPCISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,395 Lines • ▼ Show 20 Lines	const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::QVGPCI: return "PPCISD::QVGPCI";		case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";		case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";		case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
case PPCISD::QBFLT: return "PPCISD::QBFLT";		case PPCISD::QBFLT: return "PPCISD::QBFLT";
case PPCISD::QVLFSb: return "PPCISD::QVLFSb";		case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";		case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";		case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";		case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
		case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
case PPCISD::FP_EXTEND_LH: return "PPCISD::FP_EXTEND_LH";		case PPCISD::FP_EXTEND_LH: return "PPCISD::FP_EXTEND_LH";
}		}
return nullptr;		return nullptr;
}		}

EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,		EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
EVT VT) const {		EVT VT) const {
if (!VT.isVector())		if (!VT.isVector())
▲ Show 20 Lines • Show All 360 Lines • ▼ Show 20 Lines	int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
return ShiftAmt;		return ShiftAmt;
}		}

/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand		/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to		/// specifies a splat of a single element that is suitable for input to
/// VSPLTB/VSPLTH/VSPLTW.		/// VSPLTB/VSPLTH/VSPLTW.
bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {		bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
assert(N->getValueType(0) == MVT::v16i8 &&		assert(N->getValueType(0) == MVT::v16i8 &&
(EltSize == 1 \|\| EltSize == 2 \|\| EltSize == 4));		(EltSize == 1 \|\| EltSize == 2 \|\| EltSize == 4));
		jsjiUnsubmitted Not Done Reply Inline Actions Looks like you are repurposing this function: instead of using it just for 'VSPLT/VSPLTH/VSPLTW' , use it for `lxvdsx` as well. I don't think it is a great idea to just update the assert here. We should either rename the function, or re-structure the code to two different functions, in another NFC patch? jsji: Looks like you are repurposing this function: instead of using it just for…
		nemanjaiAuthorUnsubmitted Done Reply Inline Actions Updating the comment, I can see. But why do you not feel this function is adequate for 64-bit splats? The name is adequeate - we are looking for a splat shuffle mask. And we are in a legalized DAG which will make all shuffles `v16i8`. nemanjai: Updating the comment, I can see. But why do you not feel this function is adequate for 64-bit…

// The consecutive indices need to specify an element, not part of two		// The consecutive indices need to specify an element, not part of two
// different elements. So abandon ship early if this isn't the case.		// different elements. So abandon ship early if this isn't the case.
if (N->getMaskElt(0) % EltSize != 0)		if (N->getMaskElt(0) % EltSize != 0)
return false;		return false;

// This is a splat operation if each element of the permute is the same, and		// This is a splat operation if each element of the permute is the same, and
// if the value doesn't reference the second vector.		// if the value doesn't reference the second vector.
▲ Show 20 Lines • Show All 3,337 Lines • ▼ Show 20 Lines	CCRetInfo.AnalyzeCallResult(
Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)		Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
? RetCC_PPC_Cold		? RetCC_PPC_Cold
: RetCC_PPC);		: RetCC_PPC);

// Copy all of the result registers out of their specified physreg.		// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {		for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];		CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");		assert(VA.isRegLoc() && "Can only return in registers!");

SDValue Val = DAG.getCopyFromReg(Chain, dl,		SDValue Val = DAG.getCopyFromReg(Chain, dl,
		amykUnsubmitted Not Done Reply Inline Actions Do comments like these need to be full sentences with periods, as well? amyk: Do comments like these need to be full sentences with periods, as well?
		amykUnsubmitted Not Done Reply Inline Actions Sorry, I just noticed that for some reason, it highlighted this as your change. Please disregard my comment. amyk: Sorry, I just noticed that for some reason, it highlighted this as your change. Please…
VA.getLocReg(), VA.getLocVT(), InFlag);		VA.getLocReg(), VA.getLocVT(), InFlag);
Chain = Val.getValue(1);		Chain = Val.getValue(1);
InFlag = Val.getValue(2);		InFlag = Val.getValue(2);

switch (VA.getLocInfo()) {		switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");		default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;		case CCValAssign::Full: break;
case CCValAssign::AExt:		case CCValAssign::AExt:
▲ Show 20 Lines • Show All 2,937 Lines • ▼ Show 20 Lines	if (!EnableQuadPrecision \|\|
(Op0.getOperand(0).getValueType() != MVT::i64) \|\|		(Op0.getOperand(0).getValueType() != MVT::i64) \|\|
(Op0.getOperand(1).getValueType() != MVT::i64))		(Op0.getOperand(1).getValueType() != MVT::i64))
return SDValue();		return SDValue();

return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),		return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
Op0.getOperand(1));		Op0.getOperand(1));
}		}

// If this is a case we can't handle, return null and let the default		// If this is a case we can't handle, return null and let the default
		jsjiUnsubmitted Not Done Reply Inline Actions We can't support indexed load as well, and we are also trying to return the `InputLoad`, so maybe something like `getNormalLoadInput` and add comments about returning true for success? jsji: We can't support indexed load as well, and we are also trying to return the `InputLoad`, so…
		nemanjaiAuthorUnsubmitted Done Reply Inline Actions This is a good point. I can actually make it return a `const SDValue ` so success is simply signaled by the returned value not being `nullptr`. nemanjai:* This is a good point. I can actually make it return a `const SDValue *` so success is simply…
// expansion code take care of it. If we CAN select this case, and if it		// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen		// selects to a single instruction, return Op. Otherwise, if we can codegen
// this case more efficiently than a constant pool load, lower it to the		// this case more efficiently than a constant pool load, lower it to the
// sequence of ops that should be used.		// sequence of ops that should be used.
		jsjiUnsubmitted Not Done Reply Inline Actions Any other ISDs we can/should peek through? How about `ANY_EXTEND_VECTOR_INREG`/`EXTRACT_SUBVECTOR`? jsji: Any other ISDs we can/should peek through? How about…
		nemanjaiAuthorUnsubmitted Done Reply Inline Actions I don't think we want to peek through any vector operations since we are looking for a scalar non-extending, non-indexed load. nemanjai: I don't think we want to peek through any vector operations since we are looking for a scalar…
SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,		SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
SDLoc dl(Op);		SDLoc dl(Op);
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());		BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
		jsjiUnsubmitted Not Done Reply Inline Actions Why not just `LD->isNormalLoad()`? jsji: Why not just `LD->isNormalLoad()`?
		nemanjaiAuthorUnsubmitted Done Reply Inline Actions I think you mean `ISD::isNormalLoad(LD)` and my answer is, there is no good reason. I first added the indexed check and realized later that I also have to ensure it is non-extending :) I will definitely change it, thank you. nemanjai: I think you mean `ISD::isNormalLoad(LD)` and my answer is, there is no good reason. I first…
assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");		assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");

if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {		if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
// We first build an i32 vector, load it into a QPX register,		// We first build an i32 vector, load it into a QPX register,
// then convert it to a floating-point vector and compare it		// then convert it to a floating-point vector and compare it
// to a zero vector to get the boolean result.		// to a zero vector to get the boolean result.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();		MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = MFI.CreateStackObject(16, 16, false);		int FrameIdx = MFI.CreateStackObject(16, 16, false);
▲ Show 20 Lines • Show All 97 Lines • ▼ Show 20 Lines	SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,

// Check if this is a splat of a constant value.		// Check if this is a splat of a constant value.
APInt APSplatBits, APSplatUndef;		APInt APSplatBits, APSplatUndef;
unsigned SplatBitSize;		unsigned SplatBitSize;
bool HasAnyUndefs;		bool HasAnyUndefs;
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,		if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
HasAnyUndefs, 0, !Subtarget.isLittleEndian()) \|\|		HasAnyUndefs, 0, !Subtarget.isLittleEndian()) \|\|
SplatBitSize > 32) {		SplatBitSize > 32) {

		// Handle load-and-splat patterns as we have instructions that will do this
		// in one go.
		if (DAG.isSplatValue(Op, true) &&
		Op.getOperand(0).getOpcode() == ISD::LOAD) {

		jsjiUnsubmitted Not Done Reply Inline Actions Do we need to check `hasOneUse` here as well? jsji: Do we need to check `hasOneUse` here as well?
		nemanjaiAuthorUnsubmitted Done Reply Inline Actions I can certainly add it since this wouldn't be profitable if there are other uses of the load. nemanjai: I can certainly add it since this wouldn't be profitable if there are other uses of the load.
		// Can't handle indexed or extending loads here.
		LoadSDNode *LD = cast<LoadSDNode>(Op.getOperand(0));
		if (LD->isIndexed() \|\| !ISD::isNON_EXTLoad(LD))
		return SDValue();

		// We have handling for 4 and 8 byte elements.
		unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
		if ((Subtarget.hasVSX() && ElementSize == 64) \|\|
		(Subtarget.hasP9Vector() && ElementSize == 32)) {
		SDValue Ops[] = {
		LD->getChain(), // Chain
		LD->getBasePtr(), // Ptr
		DAG.getValueType(Op.getValueType()) // VT
		};
		return
		DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl,
		DAG.getVTList(Op.getValueType(), MVT::Other),
		Ops, LD->getMemoryVT(), LD->getMemOperand());
		}
		}

// BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be		// BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
// lowered to VSX instructions under certain conditions.		// lowered to VSX instructions under certain conditions.
// Without VSX, there is no pattern more efficient than expanding the node.		// Without VSX, there is no pattern more efficient than expanding the node.
if (Subtarget.hasVSX() &&		if (Subtarget.hasVSX() &&
haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),		haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
Subtarget.hasP8Vector()))		Subtarget.hasP8Vector()))
return Op;		return Op;
return SDValue();		return SDValue();
▲ Show 20 Lines • Show All 480 Lines • ▼ Show 20 Lines	if (Subtarget.hasP9Vector() &&
if (Swap)		if (Swap)
std::swap(V1, V2);		std::swap(V1, V2);
SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);		SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);		SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
if (ShiftElts) {		if (ShiftElts) {
SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,		SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
DAG.getConstant(ShiftElts, dl, MVT::i32));		DAG.getConstant(ShiftElts, dl, MVT::i32));
SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,		SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
DAG.getConstant(InsertAtByte, dl, MVT::i32));		DAG.getConstant(InsertAtByte, dl, MVT::i32));
		jsjiUnsubmitted Not Done Reply Inline Actions Similar to `isSplatShuffleMask`, we are repurposing `getVSPLTImmediate` as well, VSPLT* has only 3 forms (1/2/4), we should either rename this function, or use another wrapper. jsji: Similar to `isSplatShuffleMask`, we are repurposing `getVSPLTImmediate` as well, VSPLT* has…
		nemanjaiAuthorUnsubmitted Done Reply Inline Actions Yeah, you're absolutely right. I should rename the function. I initially left it alone as I thought that `VSPLT` adequately abbreviates `Vector Splat`, but since it is part of a mnemonic and capitalized, I agree with you that it is very misleading. nemanjai: Yeah, you're absolutely right. I should rename the function. I initially left it alone as I…
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);		return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}		}
SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,		SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
DAG.getConstant(InsertAtByte, dl, MVT::i32));		DAG.getConstant(InsertAtByte, dl, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);		return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}		}

if (Subtarget.hasP9Altivec()) {		if (Subtarget.hasP9Altivec()) {
▲ Show 20 Lines • Show All 6,510 Lines • Show Last 20 Lines

lib/Target/PowerPC/PPCInstrVSX.td

Show First 20 Lines • Show All 52 Lines • ▼ Show 20 Lines
def spilltovsrrc : RegisterOperand<SPILLTOVSRRC> {		def spilltovsrrc : RegisterOperand<SPILLTOVSRRC> {
let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand;		let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand;
}		}

def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [		def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [
SDTCisVT<0, v4f32>, SDTCisPtrTy<1>		SDTCisVT<0, v4f32>, SDTCisPtrTy<1>
]>;		]>;

		def SDT_PPCldsplat : SDTypeProfile<1, 1, [
		SDTCisVec<0>, SDTCisSameAs<0, 1>
		jsjiUnsubmitted Not Done Reply Inline Actions Why `SDTCisSameAS<0,1>`? Shouldn't it be `SDTCisPtrTy<1>`? jsji: Why `SDTCisSameAS<0,1>`? Shouldn't it be `SDTCisPtrTy<1>`?
		nemanjaiAuthorUnsubmitted Done Reply Inline Actions Wow, that's right. I neither know why I wrote it this way nor why table gen doesn't complain! nemanjai: Wow, that's right. I neither know why I wrote it this way nor why table gen doesn't complain!
		]>;

def SDT_PPCfpextlh : SDTypeProfile<1, 1, [		def SDT_PPCfpextlh : SDTypeProfile<1, 1, [
SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32>		SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32>
]>;		]>;

// Little-endian-specific nodes.		// Little-endian-specific nodes.
def SDT_PPClxvd2x : SDTypeProfile<1, 1, [		def SDT_PPClxvd2x : SDTypeProfile<1, 1, [
SDTCisVT<0, v2f64>, SDTCisPtrTy<1>		SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
]>;		]>;
Show All 22 Lines
def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>;		def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>;
def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;		def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;
def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;		def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;
def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>;		def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>;

def PPCfpextlh : SDNode<"PPCISD::FP_EXTEND_LH", SDT_PPCfpextlh, []>;		def PPCfpextlh : SDNode<"PPCISD::FP_EXTEND_LH", SDT_PPCfpextlh, []>;
def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh,		def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;		[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
		def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat,
		[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;

multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,		multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
string asmstr, InstrItinClass itin, Intrinsic Int,		string asmstr, InstrItinClass itin, Intrinsic Int,
ValueType OutTy, ValueType InTy> {		ValueType OutTy, ValueType InTy> {
let BaseName = asmbase in {		let BaseName = asmbase in {
def NAME : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),		def NAME : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,		!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
[(set OutTy:$XT, (Int InTy:$XA, InTy:$XB))]>;		[(set OutTy:$XT, (Int InTy:$XA, InTy:$XB))]>;
▲ Show 20 Lines • Show All 3,742 Lines • ▼ Show 20 Lines	let Predicates = [HasVSX] in {
def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),		def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS		(v4i32 (XXSPLTW (COPY_TO_REGCLASS
(XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;		(XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),		def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS		(v4i32 (XXSPLTW (COPY_TO_REGCLASS
(XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;		(XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),		def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
(v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;		(v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
		def : Pat<(v2f64 (PPCldsplat xoaddr:$A)),
		(v2f64 (LXVDSX xoaddr:$A))>;
		def : Pat<(v2i64 (PPCldsplat xoaddr:$A)),
		(v2i64 (LXVDSX xoaddr:$A))>;

// Build vectors of floating point converted to i64.		// Build vectors of floating point converted to i64.
def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),		def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
(v2i64 (XXPERMDIs		(v2i64 (XXPERMDIs
(COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>;		(COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>;
def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)),		def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)),
(v2i64 (XXPERMDIs		(v2i64 (XXPERMDIs
(COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;		(COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;
▲ Show 20 Lines • Show All 228 Lines • ▼ Show 20 Lines	let Predicates = [HasP9Vector] in {
def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)),		def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS		(v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
(DFLOADf32 iaddrX4:$A),		(DFLOADf32 iaddrX4:$A),
VSFRC)), 0))>;		VSFRC)), 0))>;
def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)),		def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS		(v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
(DFLOADf32 iaddrX4:$A),		(DFLOADf32 iaddrX4:$A),
VSFRC)), 0))>;		VSFRC)), 0))>;
		def : Pat<(v4f32 (PPCldsplat xoaddr:$A)),
		(v4f32 (LXVWSX xoaddr:$A))>;
		def : Pat<(v4i32 (PPCldsplat xoaddr:$A)),
		(v4i32 (LXVWSX xoaddr:$A))>;
}		}

let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in {		let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in {
def : Pat<(i64 (extractelt v2i64:$A, 1)),		def : Pat<(i64 (extractelt v2i64:$A, 1)),
(i64 (MFVSRLD $A))>;		(i64 (MFVSRLD $A))>;
// Better way to build integer vectors if we have MTVSRDD. Big endian.		// Better way to build integer vectors if we have MTVSRDD. Big endian.
def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),		def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
(v2i64 (MTVSRDD $rB, $rA))>;		(v2i64 (MTVSRDD $rB, $rA))>;
▲ Show 20 Lines • Show All 82 Lines • Show Last 20 Lines

test/CodeGen/PowerPC/VSX-XForm-Scalars.ll

	Show All 21 Lines
	; CHECK-P8: ld r3, .LC1@toc@l(r3)			; CHECK-P8: ld r3, .LC1@toc@l(r3)
	; CHECK-P8: xscvsxdsp f0, f0			; CHECK-P8: xscvsxdsp f0, f0
	; CHECK-P8: ld r3, 0(r3)			; CHECK-P8: ld r3, 0(r3)
	; CHECK-P8: stfsx f0, r3, r4			; CHECK-P8: stfsx f0, r3, r4
	; CHECK-P8: blr			; CHECK-P8: blr
	;			;
	; CHECK-P9-LABEL: testExpandPostRAPseudo:			; CHECK-P9-LABEL: testExpandPostRAPseudo:
	; CHECK-P9: # %bb.0: # %entry			; CHECK-P9: # %bb.0: # %entry
	; CHECK-P9: lfiwzx f0, 0, r3
	; CHECK-P9: addis r4, r2, .LC0@toc@ha			; CHECK-P9: addis r4, r2, .LC0@toc@ha
				; CHECK-P9: lxvwsx vs0, 0, r3
	; CHECK-P9: ld r4, .LC0@toc@l(r4)			; CHECK-P9: ld r4, .LC0@toc@l(r4)
	; CHECK-P9: xxpermdi vs0, f0, f0, 2
	; CHECK-P9: xxspltw vs0, vs0, 3
	; CHECK-P9: stxvx vs0, 0, r4			; CHECK-P9: stxvx vs0, 0, r4
				; CHECK-P9: lis r4, 1024
	; CHECK-P9: lfiwax f0, 0, r3			; CHECK-P9: lfiwax f0, 0, r3
	; CHECK-P9: addis r3, r2, .LC1@toc@ha			; CHECK-P9: addis r3, r2, .LC1@toc@ha
	; CHECK-P9: ld r3, .LC1@toc@l(r3)			; CHECK-P9: ld r3, .LC1@toc@l(r3)
	; CHECK-P9: xscvsxdsp f0, f0			; CHECK-P9: xscvsxdsp f0, f0
	; CHECK-P9: ld r3, 0(r3)			; CHECK-P9: ld r3, 0(r3)
	; CHECK-P9: lis r4, 1024
	; CHECK-P9: stfsx f0, r3, r4			; CHECK-P9: stfsx f0, r3, r4
	; CHECK-P9: blr			; CHECK-P9: blr
	entry:			entry:
	%0 = load i32, i32* %ptr, align 4			%0 = load i32, i32* %ptr, align 4
	%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0			%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
	%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer			%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
	store <4 x i32> %splat.splat, <4 x i32>* @a, align 16			store <4 x i32> %splat.splat, <4 x i32>* @a, align 16
	tail call void asm sideeffect "#Clobber Rigisters", "~{f0},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"()			tail call void asm sideeffect "#Clobber Rigisters", "~{f0},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"()
	%1 = load i32, i32* %ptr, align 4			%1 = load i32, i32* %ptr, align 4
	%conv = sitofp i32 %1 to float			%conv = sitofp i32 %1 to float
	%2 = load float, float* @pb, align 8			%2 = load float, float* @pb, align 8
	%add.ptr = getelementptr inbounds float, float* %2, i64 16777216			%add.ptr = getelementptr inbounds float, float* %2, i64 16777216
	store float %conv, float* %add.ptr, align 4			store float %conv, float* %add.ptr, align 4
	ret void			ret void
	}			}

test/CodeGen/PowerPC/build-vector-tests.ll

Show First 20 Lines • Show All 1,321 Lines • ▼ Show 20 Lines	entry:
%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer		%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %splat.splat		ret <4 x i32> %splat.splat
}		}

; Function Attrs: norecurse nounwind readonly		; Function Attrs: norecurse nounwind readonly
define <4 x i32> @spltMemVali(i32* nocapture readonly %ptr) {		define <4 x i32> @spltMemVali(i32* nocapture readonly %ptr) {
; P9BE-LABEL: spltMemVali:		; P9BE-LABEL: spltMemVali:
; P9BE: # %bb.0: # %entry		; P9BE: # %bb.0: # %entry
; P9BE-NEXT: lfiwzx f0, 0, r3		; P9BE-NEXT: lxvwsx v2, 0, r3
; P9BE-NEXT: xxsldwi vs0, f0, f0, 1
; P9BE-NEXT: xxspltw v2, vs0, 0
; P9BE-NEXT: blr		; P9BE-NEXT: blr
;		;
; P9LE-LABEL: spltMemVali:		; P9LE-LABEL: spltMemVali:
; P9LE: # %bb.0: # %entry		; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwzx f0, 0, r3		; P9LE-NEXT: lxvwsx v2, 0, r3
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxspltw v2, vs0, 3
; P9LE-NEXT: blr		; P9LE-NEXT: blr
;		;
; P8BE-LABEL: spltMemVali:		; P8BE-LABEL: spltMemVali:
; P8BE: # %bb.0: # %entry		; P8BE: # %bb.0: # %entry
; P8BE-NEXT: lfiwzx f0, 0, r3		; P8BE-NEXT: lfiwzx f0, 0, r3
; P8BE-NEXT: xxsldwi vs0, f0, f0, 1		; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
; P8BE-NEXT: xxspltw v2, vs0, 0		; P8BE-NEXT: xxspltw v2, vs0, 0
; P8BE-NEXT: blr		; P8BE-NEXT: blr
▲ Show 20 Lines • Show All 1,558 Lines • ▼ Show 20 Lines	entry:
%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer		%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %splat.splat		ret <4 x i32> %splat.splat
}		}

; Function Attrs: norecurse nounwind readonly		; Function Attrs: norecurse nounwind readonly
define <4 x i32> @spltMemValui(i32* nocapture readonly %ptr) {		define <4 x i32> @spltMemValui(i32* nocapture readonly %ptr) {
; P9BE-LABEL: spltMemValui:		; P9BE-LABEL: spltMemValui:
; P9BE: # %bb.0: # %entry		; P9BE: # %bb.0: # %entry
; P9BE-NEXT: lfiwzx f0, 0, r3		; P9BE-NEXT: lxvwsx v2, 0, r3
; P9BE-NEXT: xxsldwi vs0, f0, f0, 1
; P9BE-NEXT: xxspltw v2, vs0, 0
; P9BE-NEXT: blr		; P9BE-NEXT: blr
;		;
; P9LE-LABEL: spltMemValui:		; P9LE-LABEL: spltMemValui:
; P9LE: # %bb.0: # %entry		; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwzx f0, 0, r3		; P9LE-NEXT: lxvwsx v2, 0, r3
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxspltw v2, vs0, 3
; P9LE-NEXT: blr		; P9LE-NEXT: blr
;		;
; P8BE-LABEL: spltMemValui:		; P8BE-LABEL: spltMemValui:
; P8BE: # %bb.0: # %entry		; P8BE: # %bb.0: # %entry
; P8BE-NEXT: lfiwzx f0, 0, r3		; P8BE-NEXT: lfiwzx f0, 0, r3
; P8BE-NEXT: xxsldwi vs0, f0, f0, 1		; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
; P8BE-NEXT: xxspltw v2, vs0, 0		; P8BE-NEXT: xxspltw v2, vs0, 0
; P8BE-NEXT: blr		; P8BE-NEXT: blr
▲ Show 20 Lines • Show All 3,461 Lines • Show Last 20 Lines

test/CodeGen/PowerPC/load-and-splat.ll

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
				; RUN: -mtriple=powerpc64-unknown-unknown < %s \| FileCheck %s \
				; RUN: -check-prefix=P9
				jsjiUnsubmitted Not Done Reply Inline Actions Any specific reason that we want to use `powerpc64` instead of `powerpc64le` for `pwr9`? jsji: Any specific reason that we want to use `powerpc64` instead of `powerpc64le` for `pwr9`?
				nemanjaiAuthorUnsubmitted Done Reply Inline Actions I wanted at least one of them to be big endian and I figured why not the one that has more hits in the test case. I don't think we really need four RUN lines for P8/P9 and LE/BE. nemanjai: I wanted at least one of them to be big endian and I figured why not the one that has more hits…
				; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
				; RUN: -mtriple=powerpc64le-unknown-unknown < %s \| FileCheck %s \
				; RUN: -check-prefix=P8
				define dso_local void @test(<2 x double>* nocapture %c, double* nocapture readonly %a) local_unnamed_addr {
				; P9-LABEL: test:
				; P9: # %bb.0: # %entry
				; P9-NEXT: addi r4, r4, 24
				; P9-NEXT: lxvdsx vs0, 0, r4
				; P9-NEXT: stxv vs0, 0(r3)
				; P9-NEXT: blr
				;
				; P8-LABEL: test:
				; P8: # %bb.0: # %entry
				; P8-NEXT: addi r4, r4, 24
				; P8-NEXT: lxvdsx vs0, 0, r4
				; P8-NEXT: stxvd2x vs0, 0, r3
				; P8-NEXT: blr
				entry:
				%arrayidx = getelementptr inbounds double, double* %a, i64 3
				%0 = load double, double* %arrayidx, align 8
				%splat.splatinsert.i = insertelement <2 x double> undef, double %0, i32 0
				%splat.splat.i = shufflevector <2 x double> %splat.splatinsert.i, <2 x double> undef, <2 x i32> zeroinitializer
				store <2 x double> %splat.splat.i, <2 x double>* %c, align 16
				ret void
				}

				define dso_local void @test2(<4 x float>* nocapture %c, float* nocapture readonly %a) local_unnamed_addr {
				; P9-LABEL: test2:
				; P9: # %bb.0: # %entry
				; P9-NEXT: addi r4, r4, 12
				; P9-NEXT: lxvwsx vs0, 0, r4
				; P9-NEXT: stxv vs0, 0(r3)
				; P9-NEXT: blr
				;
				; P8-LABEL: test2:
				; P8: # %bb.0: # %entry
				; P8-NEXT: addi r4, r4, 12
				; P8-NEXT: lfiwzx f0, 0, r4
				; P8-NEXT: xxpermdi vs0, f0, f0, 2
				; P8-NEXT: xxspltw v2, vs0, 3
				; P8-NEXT: stvx v2, 0, r3
				; P8-NEXT: blr
				entry:
				%arrayidx = getelementptr inbounds float, float* %a, i64 3
				%0 = load float, float* %arrayidx, align 4
				%splat.splatinsert.i = insertelement <4 x float> undef, float %0, i32 0
				%splat.splat.i = shufflevector <4 x float> %splat.splatinsert.i, <4 x float> undef, <4 x i32> zeroinitializer
				store <4 x float> %splat.splat.i, <4 x float>* %c, align 16
				ret void
				}

				define dso_local void @test3(<4 x i32>* nocapture %c, i32* nocapture readonly %a) local_unnamed_addr {
				; P9-LABEL: test3:
				; P9: # %bb.0: # %entry
				; P9-NEXT: addi r4, r4, 12
				; P9-NEXT: lxvwsx vs0, 0, r4
				; P9-NEXT: stxv vs0, 0(r3)
				; P9-NEXT: blr
				;
				; P8-LABEL: test3:
				; P8: # %bb.0: # %entry
				; P8-NEXT: addi r4, r4, 12
				; P8-NEXT: lfiwzx f0, 0, r4
				; P8-NEXT: xxpermdi vs0, f0, f0, 2
				; P8-NEXT: xxspltw v2, vs0, 3
				; P8-NEXT: stvx v2, 0, r3
				; P8-NEXT: blr
				entry:
				%arrayidx = getelementptr inbounds i32, i32* %a, i64 3
				%0 = load i32, i32* %arrayidx, align 4
				%splat.splatinsert.i = insertelement <4 x i32> undef, i32 %0, i32 0
				%splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer
				store <4 x i32> %splat.splat.i, <4 x i32>* %c, align 16
				ret void
				}

				define dso_local void @test4(<2 x i64>* nocapture %c, i64* nocapture readonly %a) local_unnamed_addr {
				; P9-LABEL: test4:
				; P9: # %bb.0: # %entry
				; P9-NEXT: addi r4, r4, 24
				; P9-NEXT: lxvdsx vs0, 0, r4
				; P9-NEXT: stxv vs0, 0(r3)
				; P9-NEXT: blr
				;
				; P8-LABEL: test4:
				; P8: # %bb.0: # %entry
				; P8-NEXT: addi r4, r4, 24
				; P8-NEXT: lxvdsx vs0, 0, r4
				; P8-NEXT: stxvd2x vs0, 0, r3
				; P8-NEXT: blr
				entry:
				%arrayidx = getelementptr inbounds i64, i64* %a, i64 3
				%0 = load i64, i64* %arrayidx, align 8
				%splat.splatinsert.i = insertelement <2 x i64> undef, i64 %0, i32 0
				%splat.splat.i = shufflevector <2 x i64> %splat.splatinsert.i, <2 x i64> undef, <2 x i32> zeroinitializer
				store <2 x i64> %splat.splat.i, <2 x i64>* %c, align 16
				ret void
				}

test/CodeGen/PowerPC/power9-moves-and-splats.ll

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \			; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
	; RUN: -ppc-asm-full-reg-names < %s \| FileCheck %s			; RUN: -ppc-asm-full-reg-names < %s \| FileCheck %s
	; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu -ppc-vsr-nums-as-vr \			; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu -ppc-vsr-nums-as-vr \
	; RUN: -ppc-asm-full-reg-names < %s \| FileCheck %s --check-prefix=CHECK-BE			; RUN: -ppc-asm-full-reg-names < %s \| FileCheck %s --check-prefix=CHECK-BE

	@Globi = external global i32, align 4			@Globi = external global i32, align 4
	@Globf = external global float, align 4			@Globf = external global float, align 4

	define <2 x i64> @test1(i64 %a, i64 %b) {			define <2 x i64> @test1(i64 %a, i64 %b) {
	; CHECK-LABEL: test1:			; CHECK-LABEL: test1:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: mtvsrdd v2, r4, r3			; CHECK-NEXT: mtvsrdd v2, r4, r3
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
				;
				nemanjaiAuthorUnsubmitted Done Reply Inline Actions Please forgive the trivial changes in this test case. The script that produces the checks apparently behaves slightly differently now and I would prefer to leave the test case exactly as produced by the script. nemanjai: Please forgive the trivial changes in this test case. The script that produces the checks…
				lebedev.riUnsubmitted Not Done Reply Inline Actions You can just regenerate all the affected files in a preparatory commit and rebase the patch. lebedev.ri: You can just regenerate all the affected files in a preparatory commit and rebase the patch.
				nemanjaiAuthorUnsubmitted Done Reply Inline Actions Ah, yeah. That's a good idea. I don't know why I didn't think of that. I'll definitely do that next time I see this issue. nemanjai: Ah, yeah. That's a good idea. I don't know why I didn't think of that. I'll definitely do that…
				jsjiUnsubmitted Not Done Reply Inline Actions I have committed https://reviews.llvm.org/rL365330 to include the new ';', you patch should auto-merge when you rebase. jsji: I have committed https://reviews.llvm.org/rL365330 to include the new ';', you patch should…
	; CHECK-BE-LABEL: test1:			; CHECK-BE-LABEL: test1:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: mtvsrdd v2, r3, r4			; CHECK-BE-NEXT: mtvsrdd v2, r3, r4
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp			; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp
	; which will happen in a subsequent patch.			; which will happen in a subsequent patch.
	%vecins = insertelement <2 x i64> undef, i64 %a, i32 0			%vecins = insertelement <2 x i64> undef, i64 %a, i32 0
	%vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1			%vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1
	ret <2 x i64> %vecins1			ret <2 x i64> %vecins1
	}			}

	define i64 @test2(<2 x i64> %a) {			define i64 @test2(<2 x i64> %a) {
	; CHECK-LABEL: test2:			; CHECK-LABEL: test2:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: mfvsrld r3, v2			; CHECK-NEXT: mfvsrld r3, v2
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
				;
	; CHECK-BE-LABEL: test2:			; CHECK-BE-LABEL: test2:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: mfvsrd r3, v2			; CHECK-BE-NEXT: mfvsrd r3, v2
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	%0 = extractelement <2 x i64> %a, i32 0			%0 = extractelement <2 x i64> %a, i32 0
	ret i64 %0			ret i64 %0
	}			}

	define i64 @test3(<2 x i64> %a) {			define i64 @test3(<2 x i64> %a) {
	; CHECK-LABEL: test3:			; CHECK-LABEL: test3:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: mfvsrd r3, v2			; CHECK-NEXT: mfvsrd r3, v2
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
				;
	; CHECK-BE-LABEL: test3:			; CHECK-BE-LABEL: test3:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: mfvsrld r3, v2			; CHECK-BE-NEXT: mfvsrld r3, v2
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	%0 = extractelement <2 x i64> %a, i32 1			%0 = extractelement <2 x i64> %a, i32 1
	ret i64 %0			ret i64 %0
	}			}

	define <4 x i32> @test4(i32* nocapture readonly %in) {			define <4 x i32> @test4(i32* nocapture readonly %in) {
	; CHECK-LABEL: test4:			; CHECK-LABEL: test4:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: lfiwzx f0, 0, r3			; CHECK-NEXT: lxvwsx v2, 0, r3
	; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
	; CHECK-NEXT: xxspltw v2, vs0, 3
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
				;
	; CHECK-BE-LABEL: test4:			; CHECK-BE-LABEL: test4:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: lfiwzx f0, 0, r3			; CHECK-BE-NEXT: lxvwsx v2, 0, r3
	; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
	; CHECK-BE-NEXT: xxspltw v2, vs0, 0
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	%0 = load i32, i32* %in, align 4			%0 = load i32, i32* %in, align 4
	%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0			%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
	%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer			%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
	ret <4 x i32> %splat.splat			ret <4 x i32> %splat.splat
	}			}

	define <4 x float> @test5(float* nocapture readonly %in) {			define <4 x float> @test5(float* nocapture readonly %in) {
	; CHECK-LABEL: test5:			; CHECK-LABEL: test5:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: lfiwzx f0, 0, r3			; CHECK-NEXT: lxvwsx v2, 0, r3
	; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
	; CHECK-NEXT: xxspltw v2, vs0, 3
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
				;
	; CHECK-BE-LABEL: test5:			; CHECK-BE-LABEL: test5:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: lfiwzx f0, 0, r3			; CHECK-BE-NEXT: lxvwsx v2, 0, r3
	; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
	; CHECK-BE-NEXT: xxspltw v2, vs0, 0
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	%0 = load float, float* %in, align 4			%0 = load float, float* %in, align 4
	%splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0			%splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
	%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer			%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
	ret <4 x float> %splat.splat			ret <4 x float> %splat.splat
	}			}

	define <4 x i32> @test6() {			define <4 x i32> @test6() {
	; CHECK-LABEL: test6:			; CHECK-LABEL: test6:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: addis r3, r2, .LC0@toc@ha			; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
	; CHECK-NEXT: ld r3, .LC0@toc@l(r3)			; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
	; CHECK-NEXT: lfiwzx f0, 0, r3			; CHECK-NEXT: lxvwsx v2, 0, r3
	; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
	; CHECK-NEXT: xxspltw v2, vs0, 3
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
				;
	; CHECK-BE-LABEL: test6:			; CHECK-BE-LABEL: test6:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: addis r3, r2, .LC0@toc@ha			; CHECK-BE-NEXT: addis r3, r2, .LC0@toc@ha
	; CHECK-BE-NEXT: ld r3, .LC0@toc@l(r3)			; CHECK-BE-NEXT: ld r3, .LC0@toc@l(r3)
	; CHECK-BE-NEXT: lfiwzx f0, 0, r3			; CHECK-BE-NEXT: lxvwsx v2, 0, r3
	; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
	; CHECK-BE-NEXT: xxspltw v2, vs0, 0
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	%0 = load i32, i32* @Globi, align 4			%0 = load i32, i32* @Globi, align 4
	%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0			%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
	%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer			%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
	ret <4 x i32> %splat.splat			ret <4 x i32> %splat.splat
	}			}

	define <4 x float> @test7() {			define <4 x float> @test7() {
	; CHECK-LABEL: test7:			; CHECK-LABEL: test7:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: addis r3, r2, .LC1@toc@ha			; CHECK-NEXT: addis r3, r2, .LC1@toc@ha
	; CHECK-NEXT: ld r3, .LC1@toc@l(r3)			; CHECK-NEXT: ld r3, .LC1@toc@l(r3)
	; CHECK-NEXT: lfiwzx f0, 0, r3			; CHECK-NEXT: lxvwsx v2, 0, r3
	; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
	; CHECK-NEXT: xxspltw v2, vs0, 3
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
				;
	; CHECK-BE-LABEL: test7:			; CHECK-BE-LABEL: test7:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: addis r3, r2, .LC1@toc@ha			; CHECK-BE-NEXT: addis r3, r2, .LC1@toc@ha
	; CHECK-BE-NEXT: ld r3, .LC1@toc@l(r3)			; CHECK-BE-NEXT: ld r3, .LC1@toc@l(r3)
	; CHECK-BE-NEXT: lfiwzx f0, 0, r3			; CHECK-BE-NEXT: lxvwsx v2, 0, r3
	; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
	; CHECK-BE-NEXT: xxspltw v2, vs0, 0
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	%0 = load float, float* @Globf, align 4			%0 = load float, float* @Globf, align 4
	%splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0			%splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
	%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer			%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
	ret <4 x float> %splat.splat			ret <4 x float> %splat.splat
	}			}

	define <16 x i8> @test8() {			define <16 x i8> @test8() {
	; CHECK-LABEL: test8:			; CHECK-LABEL: test8:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: xxlxor v2, v2, v2			; CHECK-NEXT: xxlxor v2, v2, v2
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
				;
	; CHECK-BE-LABEL: test8:			; CHECK-BE-LABEL: test8:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: xxlxor v2, v2, v2			; CHECK-BE-NEXT: xxlxor v2, v2, v2
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	ret <16 x i8> zeroinitializer			ret <16 x i8> zeroinitializer
	}			}

	define <16 x i8> @test9() {			define <16 x i8> @test9() {
	; CHECK-LABEL: test9:			; CHECK-LABEL: test9:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: xxspltib v2, 1			; CHECK-NEXT: xxspltib v2, 1
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
				;
	; CHECK-BE-LABEL: test9:			; CHECK-BE-LABEL: test9:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: xxspltib v2, 1			; CHECK-BE-NEXT: xxspltib v2, 1
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	ret <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>			ret <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
	}			}

	define <16 x i8> @test10() {			define <16 x i8> @test10() {
	; CHECK-LABEL: test10:			; CHECK-LABEL: test10:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: xxspltib v2, 127			; CHECK-NEXT: xxspltib v2, 127
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
				;
	; CHECK-BE-LABEL: test10:			; CHECK-BE-LABEL: test10:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: xxspltib v2, 127			; CHECK-BE-NEXT: xxspltib v2, 127
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	ret <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>			ret <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>
	}			}

	define <16 x i8> @test11() {			define <16 x i8> @test11() {
	; CHECK-LABEL: test11:			; CHECK-LABEL: test11:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: xxspltib v2, 128			; CHECK-NEXT: xxspltib v2, 128
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
				;
	; CHECK-BE-LABEL: test11:			; CHECK-BE-LABEL: test11:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: xxspltib v2, 128			; CHECK-BE-NEXT: xxspltib v2, 128
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	ret <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>			ret <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
	}			}

	define <16 x i8> @test12() {			define <16 x i8> @test12() {
	; CHECK-LABEL: test12:			; CHECK-LABEL: test12:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: xxspltib v2, 255			; CHECK-NEXT: xxspltib v2, 255
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
				;
	; CHECK-BE-LABEL: test12:			; CHECK-BE-LABEL: test12:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: xxspltib v2, 255			; CHECK-BE-NEXT: xxspltib v2, 255
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>			ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
	}			}

	define <16 x i8> @test13() {			define <16 x i8> @test13() {
	; CHECK-LABEL: test13:			; CHECK-LABEL: test13:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: xxspltib v2, 129			; CHECK-NEXT: xxspltib v2, 129
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
				;
	; CHECK-BE-LABEL: test13:			; CHECK-BE-LABEL: test13:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: xxspltib v2, 129			; CHECK-BE-NEXT: xxspltib v2, 129
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	ret <16 x i8> <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>			ret <16 x i8> <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
	}			}

	define <16 x i8> @test13E127() {			define <16 x i8> @test13E127() {
	; CHECK-LABEL: test13E127:			; CHECK-LABEL: test13E127:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: xxspltib v2, 200			; CHECK-NEXT: xxspltib v2, 200
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
				;
	; CHECK-BE-LABEL: test13E127:			; CHECK-BE-LABEL: test13E127:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: xxspltib v2, 200			; CHECK-BE-NEXT: xxspltib v2, 200
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	ret <16 x i8> <i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200>			ret <16 x i8> <i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200>
	}			}

	define <4 x i32> @test14(<4 x i32> %a, i32* nocapture readonly %b) {			define <4 x i32> @test14(<4 x i32> %a, i32* nocapture readonly %b) {
	; CHECK-LABEL: test14:			; CHECK-LABEL: test14:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: lwz r3, 0(r5)			; CHECK-NEXT: lwz r3, 0(r5)
	; CHECK-NEXT: mtvsrws v2, r3			; CHECK-NEXT: lxvwsx v2, 0, r5
	; CHECK-NEXT: addi r3, r3, 5			; CHECK-NEXT: addi r3, r3, 5
	; CHECK-NEXT: stw r3, 0(r5)			; CHECK-NEXT: stw r3, 0(r5)
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
				;
	; CHECK-BE-LABEL: test14:			; CHECK-BE-LABEL: test14:
	; CHECK-BE: # %bb.0: # %entry			; CHECK-BE: # %bb.0: # %entry
	; CHECK-BE-NEXT: lwz r3, 0(r5)			; CHECK-BE-NEXT: lwz r3, 0(r5)
	; CHECK-BE-NEXT: mtvsrws v2, r3			; CHECK-BE-NEXT: lxvwsx v2, 0, r5
	; CHECK-BE-NEXT: addi r3, r3, 5			; CHECK-BE-NEXT: addi r3, r3, 5
	; CHECK-BE-NEXT: stw r3, 0(r5)			; CHECK-BE-NEXT: stw r3, 0(r5)
	; CHECK-BE-NEXT: blr			; CHECK-BE-NEXT: blr

	entry:			entry:
	%0 = load i32, i32* %b, align 4			%0 = load i32, i32* %b, align 4
	%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0			%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
	%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer			%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
	%1 = add i32 %0, 5			%1 = add i32 %0, 5
	store i32 %1, i32* %b, align 4			store i32 %1, i32* %b, align 4
	ret <4 x i32> %splat.splat			ret <4 x i32> %splat.splat
	}			}

test/CodeGen/PowerPC/qpx-load-splat.ll

		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \		; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s \| FileCheck %s		; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s \| FileCheck %s

; Function Attrs: norecurse nounwind readonly		; Function Attrs: norecurse nounwind readonly
define <4 x double> @foo(double* nocapture readonly %a) #0 {		define <4 x double> @foo(double* nocapture readonly %a) #0 {
; CHECK-LABEL: foo:		; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry		; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvdsx v2, 0, r3		; CHECK-NEXT: lxvdsx v2, 0, r3
Show All 20 Lines	entry:
%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer		%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
ret <4 x double> %shuffle.i		ret <4 x double> %shuffle.i
}		}

define <4 x double> @fooxu(double* nocapture readonly %a, i64 %idx, double** %pptr) #0 {		define <4 x double> @fooxu(double* nocapture readonly %a, i64 %idx, double** %pptr) #0 {
; CHECK-LABEL: fooxu:		; CHECK-LABEL: fooxu:
; CHECK: # %bb.0: # %entry		; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r4, r4, 3		; CHECK-NEXT: sldi r4, r4, 3
; CHECK-NEXT: lfdux f0, r3, r4		; CHECK-NEXT: add r6, r3, r4
; CHECK-NEXT: xxspltd v2, vs0, 0		; CHECK-NEXT: std r6, 0(r5)
; CHECK-NEXT: std r3, 0(r5)		; CHECK-NEXT: lxvdsx v2, r3, r4
; CHECK-NEXT: vmr v3, v2		; CHECK-NEXT: vmr v3, v2
; CHECK-NEXT: blr		; CHECK-NEXT: blr
entry:		entry:
%p = getelementptr double, double* %a, i64 %idx		%p = getelementptr double, double* %a, i64 %idx
%0 = load double, double* %p, align 8		%0 = load double, double* %p, align 8
%vecinit.i = insertelement <4 x double> undef, double %0, i32 0		%vecinit.i = insertelement <4 x double> undef, double %0, i32 0
%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer		%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
store double* %p, double** %pptr, align 8		store double* %p, double** %pptr, align 8
Show All 34 Lines

test/CodeGen/PowerPC/swaps-le-7.ll

	; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s \| FileCheck %s			; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s \| FileCheck %s
	;			;
	; This is a regression test based on https://llvm.org/bugs/show_bug.cgi?id=27735			; This is a regression test based on https://llvm.org/bugs/show_bug.cgi?id=27735
	;			;

	@G1 = global <2 x double> <double 2.0, double -10.0>			@G1 = global <2 x double> <double 2.0, double -10.0>
	@G2 = global <2 x double> <double 3.0, double 4.0>			@G2 = global <2 x double> <double 3.0, double 4.0>
	@G3 = global <2 x double> <double 5.0, double 6.0>			@G3 = global <2 x double> <double 5.0, double 6.0>
	@G4 = global <2 x double> <double 7.0, double 8.0>			@G4 = global <2 x double> <double 7.0, double 8.0>

	; CHECK-LABEL: @zg			; CHECK-LABEL: @zg
	; CHECK: xxspltd			; CHECK: lxvdsx
	; CHECK-NEXT: xxspltd			; CHECK-NEXT: lxvdsx
	; CHECK-NEXT: xvmuldp			; CHECK-NEXT: xvmuldp
	; CHECK-DAG: xvmuldp			; CHECK-DAG: xvmuldp
	; CHECK-DAG: xvsubdp			; CHECK-DAG: xvsubdp
	; CHECK-DAG: xvadddp			; CHECK-DAG: xvadddp
	; CHECK-DAG: xxswapd			; CHECK-DAG: xxswapd
	; CHECK-DAG: xxpermdi			; CHECK-DAG: xxpermdi
	; CHECK-DAG: xvsubdp			; CHECK-DAG: xvsubdp
	; CHECK: xxswapd			; CHECK: xxswapd
	Show All 34 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC] Exploit single instruction load-and-splat for word and doubleword
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 205898

lib/Target/PowerPC/PPCISelLowering.h

lib/Target/PowerPC/PPCISelLowering.cpp

lib/Target/PowerPC/PPCInstrVSX.td

test/CodeGen/PowerPC/VSX-XForm-Scalars.ll

test/CodeGen/PowerPC/build-vector-tests.ll

test/CodeGen/PowerPC/load-and-splat.ll

test/CodeGen/PowerPC/power9-moves-and-splats.ll

test/CodeGen/PowerPC/qpx-load-splat.ll

test/CodeGen/PowerPC/swaps-le-7.ll

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC] Exploit single instruction load-and-splat for word and doublewordClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 205898

lib/Target/PowerPC/PPCISelLowering.h

lib/Target/PowerPC/PPCISelLowering.cpp

lib/Target/PowerPC/PPCInstrVSX.td

test/CodeGen/PowerPC/VSX-XForm-Scalars.ll

test/CodeGen/PowerPC/build-vector-tests.ll

test/CodeGen/PowerPC/load-and-splat.ll

test/CodeGen/PowerPC/power9-moves-and-splats.ll

test/CodeGen/PowerPC/qpx-load-splat.ll

test/CodeGen/PowerPC/swaps-le-7.ll

[PowerPC] Exploit single instruction load-and-splat for word and doubleword
ClosedPublic