Diff 159729

llvm/trunk/lib/Target/PowerPC/P9InstrResources.td

Show First 20 Lines • Show All 586 Lines • ▼ Show 20 Lines	def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
XXBRW,		XXBRW,
XXEXTRACTUW,		XXEXTRACTUW,
XXINSERTW,		XXINSERTW,
XXMRGHW,		XXMRGHW,
XXMRGLW,		XXMRGLW,
XXPERM,		XXPERM,
XXPERMR,		XXPERMR,
XXSLDWI,		XXSLDWI,
		XXSLDWIs,
XXSPLTIB,		XXSPLTIB,
XXSPLTW,		XXSPLTW,
XXSPLTWs,		XXSPLTWs,
XXPERMDI,		XXPERMDI,
XXPERMDIs,		XXPERMDIs,
VADDCUQ,		VADDCUQ,
VADDECUQ,		VADDECUQ,
VADDEUQM,		VADDEUQM,
▲ Show 20 Lines • Show All 817 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 8,448 Lines • ▼ Show 20 Lines	if (PPC::isXXBRHShuffleMask(SVOp)) {
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);		return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
}		}
}		}

if (Subtarget.hasVSX()) {		if (Subtarget.hasVSX()) {
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {		if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);		int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);

// If the source for the shuffle is a scalar_to_vector that came from a
// 32-bit load, it will have used LXVWSX so we don't need to splat again.
if (Subtarget.hasP9Vector() &&
((isLittleEndian && SplatIdx == 3) \|\|
(!isLittleEndian && SplatIdx == 0))) {
SDValue Src = V1.getOperand(0);
if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
Src.getOperand(0).getOpcode() == ISD::LOAD &&
Src.getOperand(0).hasOneUse())
return V1;
}
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);		SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,		SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
DAG.getConstant(SplatIdx, dl, MVT::i32));		DAG.getConstant(SplatIdx, dl, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);		return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
}		}

// Left shifts of 8 bytes are actually swaps. Convert accordingly.		// Left shifts of 8 bytes are actually swaps. Convert accordingly.
if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {		if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
▲ Show 20 Lines • Show All 5,715 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td

Show First 20 Lines • Show All 871 Lines • ▼ Show 20 Lines	def XXSEL : XX4Form<60, 3,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC),		(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC),
"xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>;		"xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>;

def XXSLDWI : XX3Form_2<60, 2,		def XXSLDWI : XX3Form_2<60, 2,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW),		(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW),
"xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm,		"xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm,
[(set v4i32:$XT, (PPCvecshl v4i32:$XA, v4i32:$XB,		[(set v4i32:$XT, (PPCvecshl v4i32:$XA, v4i32:$XB,
imm32SExt16:$SHW))]>;		imm32SExt16:$SHW))]>;

		let isCodeGenOnly = 1 in
		def XXSLDWIs : XX3Form_2s<60, 2,
		(outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$SHW),
		"xxsldwi $XT, $XA, $XA, $SHW", IIC_VecPerm, []>;

def XXSPLTW : XX2Form_2<60, 164,		def XXSPLTW : XX2Form_2<60, 164,
(outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),		(outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
"xxspltw $XT, $XB, $UIM", IIC_VecPerm,		"xxspltw $XT, $XB, $UIM", IIC_VecPerm,
[(set v4i32:$XT,		[(set v4i32:$XT,
(PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>;		(PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>;
let isCodeGenOnly = 1 in		let isCodeGenOnly = 1 in
def XXSPLTWs : XX2Form_2<60, 164,		def XXSPLTWs : XX2Form_2<60, 164,
(outs vsrc:$XT), (ins vfrc:$XB, u2imm:$UIM),		(outs vsrc:$XT), (ins vfrc:$XB, u2imm:$UIM),
"xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;		"xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;

} // hasSideEffects		} // hasSideEffects
} // UseVSXReg = 1		} // UseVSXReg = 1

// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after		// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.		// instruction selection into a branch sequence.
let usesCustomInserter = 1, // Expanded after instruction selection.		let usesCustomInserter = 1, // Expanded after instruction selection.
PPC970_Single = 1 in {		PPC970_Single = 1 in {

▲ Show 20 Lines • Show All 564 Lines • ▼ Show 20 Lines	def : Pat<(f32 (PPCfcfids
(f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;		(f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
def : Pat<(f32 (PPCfcfidus		def : Pat<(f32 (PPCfcfidus
(f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),		(f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
(f32 (XSCVUXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;		(f32 (XSCVUXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
def : Pat<(f32 (PPCfcfidus		def : Pat<(f32 (PPCfcfidus
(f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),		(f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
(f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;		(f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
}		}
def : Pat<(v4i32 (scalar_to_vector ScalarLoads.Li32)),
(v4i32 (XXSPLTWs (LIWAX xoaddr:$src), 1))>;

// Instructions for converting float to i64 feeding a store.		// Instructions for converting float to i64 feeding a store.
let Predicates = [NoP9Vector] in {		let Predicates = [NoP9Vector] in {
def : Pat<(PPCstore_scal_int_from_vsr		def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 8),		(f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 8),
(STXSDX (XSCVDPSXDS f64:$src), xoaddr:$dst)>;		(STXSDX (XSCVDPSXDS f64:$src), xoaddr:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr		def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 8),		(f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 8),
▲ Show 20 Lines • Show All 1,566 Lines • ▼ Show 20 Lines	let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst),		def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst),
(STXVX $rS, xoaddr:$dst)>;		(STXVX $rS, xoaddr:$dst)>;
def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst),		def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst),
(STXVX $rS, xoaddr:$dst)>;		(STXVX $rS, xoaddr:$dst)>;
def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),		def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
(STXVX $rS, xoaddr:$dst)>;		(STXVX $rS, xoaddr:$dst)>;
def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),		def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
(STXVX $rS, xoaddr:$dst)>;		(STXVX $rS, xoaddr:$dst)>;

		let AddedComplexity = 400 in {
		// LIWAX - This instruction is used for sign extending i32 -> i64.
		// LIWZX - This instruction will be emitted for i32, f32, and when
		// zero-extending i32 to i64 (zext i32 -> i64).
		let Predicates = [IsLittleEndian] in {

		def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
		(v2i64 (XXPERMDIs
		(COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC), 2))>;

		def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
		(v2i64 (XXPERMDIs
		(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;

def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),		def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
(v4i32 (LXVWSX xoaddr:$src))>;		(v4i32 (XXPERMDIs
		(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;

def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),		def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
(v4f32 (LXVWSX xoaddr:$src))>;		(v4f32 (XXPERMDIs
def : Pat<(v4f32 (scalar_to_vector		(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
(f32 (fpround (f64 (extloadf32 xoaddr:$src)))))),		}
(v4f32 (LXVWSX xoaddr:$src))>;
		let Predicates = [IsBigEndian] in {
		def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
		(v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>;

		def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
		(v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>;

		def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
		(v4i32 (XXSLDWIs
		(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;

		def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
		(v4f32 (XXSLDWIs
		(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
		}

		}

// Build vectors from i8 loads		// Build vectors from i8 loads
def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)),		def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)),
(v16i8 (VSPLTBs 7, (LXSIBZX xoaddr:$src)))>;		(v16i8 (VSPLTBs 7, (LXSIBZX xoaddr:$src)))>;
def : Pat<(v8i16 (scalar_to_vector ScalarLoads.ZELi8)),		def : Pat<(v8i16 (scalar_to_vector ScalarLoads.ZELi8)),
(v8i16 (VSPLTHs 3, (LXSIBZX xoaddr:$src)))>;		(v8i16 (VSPLTHs 3, (LXSIBZX xoaddr:$src)))>;
def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)),		def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)),
(v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>;		(v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>;
▲ Show 20 Lines • Show All 145 Lines • ▼ Show 20 Lines	def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
"#DFSTOREf64",		"#DFSTOREf64",
[(store f64:$XT, ixaddr:$dst)]>;		[(store f64:$XT, ixaddr:$dst)]>;
}		}
def : Pat<(f64 (extloadf32 ixaddr:$src)),		def : Pat<(f64 (extloadf32 ixaddr:$src)),
(COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>;		(COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>;
def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))),		def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))),
(f32 (DFLOADf32 ixaddr:$src))>;		(f32 (DFLOADf32 ixaddr:$src))>;


		let AddedComplexity = 400 in {
		// The following pseudoinstructions are used to ensure the utilization
		// of all 64 VSX registers.
		let Predicates = [IsLittleEndian, HasP9Vector] in {
		def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
		(v2i64 (XXPERMDIs
		(COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>;
		def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))),
		(v2i64 (XXPERMDIs
		(COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;

		def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))),
		(v2f64 (XXPERMDIs
		(COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>;
		def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
		(v2f64 (XXPERMDIs
		(COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
		}

		let Predicates = [IsBigEndian, HasP9Vector] in {
		def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
		(v2i64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
		def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))),
		(v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;

		def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))),
		(v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
		def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
		(v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
		}
		}

let Predicates = [IsBigEndian, HasP9Vector] in {		let Predicates = [IsBigEndian, HasP9Vector] in {

// (Un)Signed DWord vector extract -> QP		// (Un)Signed DWord vector extract -> QP
def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),		def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),
(f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;		(f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))),		def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))),
(f128 (XSCVSDQP		(f128 (XSCVSDQP
(EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;		(EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
▲ Show 20 Lines • Show All 698 Lines • ▼ Show 20 Lines	let Predicates = [HasP9Altivec] in {
def: Pat<(v2i64 (PPCSExtVElems v4i32:$A)),		def: Pat<(v2i64 (PPCSExtVElems v4i32:$A)),
(v2i64 (VEXTSW2D $A))>;		(v2i64 (VEXTSW2D $A))>;
def: Pat<(v4i32 (PPCSExtVElems v16i8:$A)),		def: Pat<(v4i32 (PPCSExtVElems v16i8:$A)),
(v4i32 (VEXTSB2W $A))>;		(v4i32 (VEXTSB2W $A))>;
def: Pat<(v4i32 (PPCSExtVElems v8i16:$A)),		def: Pat<(v4i32 (PPCSExtVElems v8i16:$A)),
(v4i32 (VEXTSH2W $A))>;		(v4i32 (VEXTSH2W $A))>;
}		}
}		}

llvm/trunk/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll

	; RUN: llc < %s -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \			; RUN: llc < %s -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
	; RUN: -verify-machineinstrs \| FileCheck %s --check-prefix=CHECK-P8			; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \
				; RUN: \| FileCheck %s --check-prefix=CHECK-P8
	; RUN: llc < %s -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \			; RUN: llc < %s -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
	; RUN: -verify-machineinstrs \| FileCheck %s --check-prefix=CHECK-P9			; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \
				; RUN: \| FileCheck %s --check-prefix=CHECK-P9

	@a = external local_unnamed_addr global <4 x i32>, align 16			@a = external local_unnamed_addr global <4 x i32>, align 16
	@pb = external local_unnamed_addr global float*, align 8			@pb = external local_unnamed_addr global float*, align 8

	define void @testExpandPostRAPseudo(i32* nocapture readonly %ptr) {			define void @testExpandPostRAPseudo(i32* nocapture readonly %ptr) {
	; CHECK-P8-LABEL: testExpandPostRAPseudo:			; CHECK-P8-LABEL: testExpandPostRAPseudo:
	; CHECK-P8: lxsiwax 34, 0, 3			; CHECK-P8: # %bb.0: # %entry
	; CHECK-P8-NEXT: xxspltw 34, 34, 1			; CHECK-P8: lfiwzx f0, 0, r3
	; CHECK-P8-NEXT: stvx 2, 0, 4			; CHECK-P8: ld r4, .LC0@toc@l(r4)
	; CHECK-P8: #APP			; CHECK-P8: xxpermdi vs0, f0, f0, 2
	; CHECK-P8-NEXT: #Clobber Rigisters			; CHECK-P8: xxspltw v2, vs0, 3
	; CHECK-P8-NEXT: #NO_APP			; CHECK-P8: stvx v2, 0, r4
	; CHECK-P8-NEXT: lis 4, 1024			; CHECK-P8: lis r4, 1024
	; CHECK-P8-NEXT: lfiwax 0, 0, 3			; CHECK-P8: lfiwax f0, 0, r3
	; CHECK-P8: stfsx 0, 3, 4			; CHECK-P8: addis r3, r2, .LC1@toc@ha
	; CHECK-P8-NEXT: blr			; CHECK-P8: ld r3, .LC1@toc@l(r3)
				; CHECK-P8: xscvsxdsp f0, f0
				; CHECK-P8: ld r3, 0(r3)
				; CHECK-P8: stfsx f0, r3, r4
				; CHECK-P8: blr
				;
	; CHECK-P9-LABEL: testExpandPostRAPseudo:			; CHECK-P9-LABEL: testExpandPostRAPseudo:
	; CHECK-P9: lxvwsx 0, 0, 3			; CHECK-P9: # %bb.0: # %entry
	; CHECK-P9: stxvx 0, 0, 4			; CHECK-P9: lfiwzx f0, 0, r3
	; CHECK-P9: #APP			; CHECK-P9: addis r4, r2, .LC0@toc@ha
	; CHECK-P9-NEXT: #Clobber Rigisters			; CHECK-P9: ld r4, .LC0@toc@l(r4)
	; CHECK-P9-NEXT: #NO_APP			; CHECK-P9: xxpermdi vs0, f0, f0, 2
	; CHECK-P9-NEXT: lis 4, 1024			; CHECK-P9: xxspltw vs0, vs0, 3
	; CHECK-P9-NEXT: lfiwax 0, 0, 3			; CHECK-P9: stxvx vs0, 0, r4
	; CHECK-P9: stfsx 0, 3, 4			; CHECK-P9: lis r4, 1024
	; CHECK-P9-NEXT: blr			; CHECK-P9: lfiwax f0, 0, r3
				; CHECK-P9: addis r3, r2, .LC1@toc@ha
				; CHECK-P9: ld r3, .LC1@toc@l(r3)
				; CHECK-P9: xscvsxdsp f0, f0
				; CHECK-P9: ld r3, 0(r3)
				; CHECK-P9: stfsx f0, r3, r4
				; CHECK-P9: blr
	entry:			entry:
	%0 = load i32, i32* %ptr, align 4			%0 = load i32, i32* %ptr, align 4
	%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0			%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
	%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer			%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
	store <4 x i32> %splat.splat, <4 x i32>* @a, align 16			store <4 x i32> %splat.splat, <4 x i32>* @a, align 16
	tail call void asm sideeffect "#Clobber Rigisters", "~{f0},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"()			tail call void asm sideeffect "#Clobber Rigisters", "~{f0},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"()
	%1 = load i32, i32* %ptr, align 4			%1 = load i32, i32* %ptr, align 4
	%conv = sitofp i32 %1 to float			%conv = sitofp i32 %1 to float
	%2 = load float, float* @pb, align 8			%2 = load float, float* @pb, align 8
	%add.ptr = getelementptr inbounds float, float* %2, i64 16777216			%add.ptr = getelementptr inbounds float, float* %2, i64 16777216
	store float %conv, float* %add.ptr, align 4			store float %conv, float* %add.ptr, align 4
	ret void			ret void
	}			}

llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll

Show First 20 Lines • Show All 103 Lines • ▼ Show 20 Lines
;vector int fromRandMemVari(int *arr, int elem) { //		;vector int fromRandMemVari(int *arr, int elem) { //
; return (vector int) { arr[elem+4], arr[elem+1], arr[elem+2], arr[elem+8] };//		; return (vector int) { arr[elem+4], arr[elem+1], arr[elem+2], arr[elem+8] };//
;} //		;} //
;// P8: mtvsrwz, xxspltw //		;// P8: mtvsrwz, xxspltw //
;// P9: mtvsrws //		;// P9: mtvsrws //
;vector int spltRegVali(int val) { //		;vector int spltRegVali(int val) { //
; return (vector int) val; //		; return (vector int) val; //
;} //		;} //
;// P8: lxsiwax, xxspltw //		;// P8: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw //
;// P9: lxvwsx //		;// P9: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw //
;vector int spltMemVali(int *ptr) { //		;vector int spltMemVali(int *ptr) { //
; return (vector int)*ptr; //		; return (vector int)*ptr; //
;} //		;} //
;// P8: vspltisw //		;// P8: vspltisw //
;// P9: vspltisw //		;// P9: vspltisw //
;vector int spltCnstConvftoi() { //		;vector int spltCnstConvftoi() { //
; return (vector int) 4.74f; //		; return (vector int) 4.74f; //
;} //		;} //
▲ Show 20 Lines • Show All 157 Lines • ▼ Show 20 Lines
; return (vector unsigned int) { arr[elem+4], arr[elem+1], //		; return (vector unsigned int) { arr[elem+4], arr[elem+1], //
; arr[elem+2], arr[elem+8] }; //		; arr[elem+2], arr[elem+8] }; //
;} //		;} //
;// P8: mtvsrwz, xxspltw //		;// P8: mtvsrwz, xxspltw //
;// P9: mtvsrws //		;// P9: mtvsrws //
;vector unsigned int spltRegValui(unsigned int val) { //		;vector unsigned int spltRegValui(unsigned int val) { //
; return (vector unsigned int) val; //		; return (vector unsigned int) val; //
;} //		;} //
;// P8: lxsiwax, xxspltw //		;// P8: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw //
;// P9: lxvwsx //		;// P9: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw //
;vector unsigned int spltMemValui(unsigned int *ptr) { //		;vector unsigned int spltMemValui(unsigned int *ptr) { //
; return (vector unsigned int)*ptr; //		; return (vector unsigned int)*ptr; //
;} //		;} //
;// P8: vspltisw //		;// P8: vspltisw //
;// P9: vspltisw //		;// P9: vspltisw //
;vector unsigned int spltCnstConvftoui() { //		;vector unsigned int spltCnstConvftoui() { //
; return (vector unsigned int) 4.74f; //		; return (vector unsigned int) 4.74f; //
;} //		;} //
▲ Show 20 Lines • Show All 900 Lines • ▼ Show 20 Lines	entry:
%0 = load i32, i32* %ptr, align 4		%0 = load i32, i32* %ptr, align 4
%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0		%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer		%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %splat.splat		ret <4 x i32> %splat.splat
; P9BE-LABEL: spltMemVali		; P9BE-LABEL: spltMemVali
; P9LE-LABEL: spltMemVali		; P9LE-LABEL: spltMemVali
; P8BE-LABEL: spltMemVali		; P8BE-LABEL: spltMemVali
; P8LE-LABEL: spltMemVali		; P8LE-LABEL: spltMemVali
; P9BE: lxvwsx v2, 0, r3		; P9BE: lfiwzx f0, 0, r3
; P9BE: blr		; P9BE: xxsldwi vs0, f0, f0, 1
; P9LE: lxvwsx v2, 0, r3		; P9BE: xxspltw v2, vs0, 0
; P9LE: blr		; P9BE: blr
; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3		; P9LE: lfiwzx f0, 0, r3
; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1		; P9LE: xxpermdi vs0, f0, f0, 2
; P8BE: blr		; P9LE: xxspltw v2, vs0, 3
; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3		; P9LE: blr
; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1		; P8BE: lfiwzx f0, 0, r3
		; P8BE: xxsldwi vs0, f0, f0, 1
		; P8BE: xxspltw v2, vs0, 0
		; P8BE: blr
		; P8LE: lfiwzx f0, 0, r3
		; P8LE: xxpermdi vs0, f0, f0, 2
		; P8LE: xxspltw v2, vs0, 3
; P8LE: blr		; P8LE: blr
}		}

; Function Attrs: norecurse nounwind readnone		; Function Attrs: norecurse nounwind readnone
define <4 x i32> @spltCnstConvftoi() {		define <4 x i32> @spltCnstConvftoi() {
entry:		entry:
ret <4 x i32> <i32 4, i32 4, i32 4, i32 4>		ret <4 x i32> <i32 4, i32 4, i32 4, i32 4>
; P9BE-LABEL: spltCnstConvftoi		; P9BE-LABEL: spltCnstConvftoi
▲ Show 20 Lines • Show All 1,111 Lines • ▼ Show 20 Lines	entry:
%0 = load i32, i32* %ptr, align 4		%0 = load i32, i32* %ptr, align 4
%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0		%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer		%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %splat.splat		ret <4 x i32> %splat.splat
; P9BE-LABEL: spltMemValui		; P9BE-LABEL: spltMemValui
; P9LE-LABEL: spltMemValui		; P9LE-LABEL: spltMemValui
; P8BE-LABEL: spltMemValui		; P8BE-LABEL: spltMemValui
; P8LE-LABEL: spltMemValui		; P8LE-LABEL: spltMemValui
; P9BE: lxvwsx v2, 0, r3		; P9BE: lfiwzx f0, 0, r3
; P9BE: blr		; P9BE: xxsldwi vs0, f0, f0, 1
; P9LE: lxvwsx v2, 0, r3		; P9BE: xxspltw v2, vs0, 0
; P9LE: blr		; P9BE: blr
; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3		; P9LE: lfiwzx f0, 0, r3
; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1		; P9LE: xxpermdi vs0, f0, f0, 2
; P8BE: blr		; P9LE: xxspltw v2, vs0, 3
; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3		; P9LE: blr
; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1		; P8BE: lfiwzx f0, 0, r3
		; P8BE: xxsldwi vs0, f0, f0, 1
		; P8BE: xxspltw v2, vs0, 0
		; P8BE: blr
		; P8LE: lfiwzx f0, 0, r3
		; P8LE: xxpermdi vs0, f0, f0, 2
		; P8LE: xxspltw v2, vs0, 3
; P8LE: blr		; P8LE: blr
}		}

; Function Attrs: norecurse nounwind readnone		; Function Attrs: norecurse nounwind readnone
define <4 x i32> @spltCnstConvftoui() {		define <4 x i32> @spltCnstConvftoui() {
entry:		entry:
ret <4 x i32> <i32 4, i32 4, i32 4, i32 4>		ret <4 x i32> <i32 4, i32 4, i32 4, i32 4>
; P9BE-LABEL: spltCnstConvftoui		; P9BE-LABEL: spltCnstConvftoui
▲ Show 20 Lines • Show All 2,450 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/PowerPC/load-v4i8-improved.ll

	; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s \| FileCheck \			; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s \
				; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \| FileCheck --check-prefix=CHECK-LE \
	; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s			; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s
	; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s \| FileCheck \			; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s \
				; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \| FileCheck \
	; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s			; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s

	define <16 x i8> @test(i32* %s, i32* %t) {			define <16 x i8> @test(i32* %s, i32* %t) {
				; CHECK-LE-LABEL: test:
				; CHECK-LE: # %bb.0: # %entry
				; CHECK-LE-NEXT: lfiwzx f0, 0, r3
				; CHECK-LE-NEXT: xxpermdi vs0, f0, f0, 2
				; CHECK-LE-NEXT: xxspltw v2, vs0, 3
				; CHECK-LE-NEXT: blr

				; CHECK-LABEL: test:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: lfiwzx f0, 0, r3
				; CHECK-NEXT: xxsldwi vs0, f0, f0, 1
				; CHECK-NEXT: xxspltw v2, vs0, 0
				; CHECK-NEXT: blr
	entry:			entry:
	%0 = bitcast i32* %s to <4 x i8>*			%0 = bitcast i32* %s to <4 x i8>*
	%1 = load <4 x i8>, <4 x i8>* %0, align 4			%1 = load <4 x i8>, <4 x i8>* %0, align 4
	%2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>			%2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
	ret <16 x i8> %2			ret <16 x i8> %2
	; CHECK-LABEL: test
	; CHECK: lxsiwax 34, 0, 3
	; CHECK: xxspltw 34, 34, 1
	}			}

llvm/trunk/test/CodeGen/PowerPC/power9-moves-and-splats.ll

	; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s \| FileCheck %s			; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
	; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu < %s \| FileCheck %s \			; RUN: -ppc-asm-full-reg-names < %s \| FileCheck %s
	; RUN: --check-prefix=CHECK-BE			; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu -ppc-vsr-nums-as-vr \
				; RUN: -ppc-asm-full-reg-names < %s \| FileCheck %s --check-prefix=CHECK-BE

	@Globi = external global i32, align 4			@Globi = external global i32, align 4
	@Globf = external global float, align 4			@Globf = external global float, align 4

	define <2 x i64> @test1(i64 %a, i64 %b) {			define <2 x i64> @test1(i64 %a, i64 %b) {
				; CHECK-LABEL: test1:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: mtvsrdd v2, r4, r3
				; CHECK-NEXT: blr

				; CHECK-BE-LABEL: test1:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: mtvsrdd v2, r3, r4
				; CHECK-BE-NEXT: blr
	entry:			entry:
	; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp			; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp
	; which will happen in a subsequent patch.			; which will happen in a subsequent patch.
	; CHECK-LABEL: test1
	; CHECK: mtvsrdd 34, 4, 3
	; CHECK-BE-LABEL: test1
	; CHECK-BE: mtvsrdd 34, 3, 4
	%vecins = insertelement <2 x i64> undef, i64 %a, i32 0			%vecins = insertelement <2 x i64> undef, i64 %a, i32 0
	%vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1			%vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1
	ret <2 x i64> %vecins1			ret <2 x i64> %vecins1
	}			}

	define i64 @test2(<2 x i64> %a) {			define i64 @test2(<2 x i64> %a) {
				; CHECK-LABEL: test2:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: mfvsrld r3, v2
				; CHECK-NEXT: blr

				; CHECK-BE-LABEL: test2:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: mfvsrd r3, v2
				; CHECK-BE-NEXT: blr
	entry:			entry:
	; CHECK-LABEL: test2
	; CHECK: mfvsrld 3, 34
	%0 = extractelement <2 x i64> %a, i32 0			%0 = extractelement <2 x i64> %a, i32 0
	ret i64 %0			ret i64 %0
	}			}

	define i64 @test3(<2 x i64> %a) {			define i64 @test3(<2 x i64> %a) {
				; CHECK-LABEL: test3:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: mfvsrd r3, v2
				; CHECK-NEXT: blr

				; CHECK-BE-LABEL: test3:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: mfvsrld r3, v2
				; CHECK-BE-NEXT: blr
	entry:			entry:
	; CHECK-BE-LABEL: test3
	; CHECK-BE: mfvsrld 3, 34
	%0 = extractelement <2 x i64> %a, i32 1			%0 = extractelement <2 x i64> %a, i32 1
	ret i64 %0			ret i64 %0
	}			}

	define <4 x i32> @test4(i32* nocapture readonly %in) {			define <4 x i32> @test4(i32* nocapture readonly %in) {
				; CHECK-LABEL: test4:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: lfiwzx f0, 0, r3
				; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
				; CHECK-NEXT: xxspltw v2, vs0, 3
				; CHECK-NEXT: blr

				; CHECK-BE-LABEL: test4:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: lfiwzx f0, 0, r3
				; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
				; CHECK-BE-NEXT: xxspltw v2, vs0, 0
				; CHECK-BE-NEXT: blr
	entry:			entry:
	; CHECK-LABEL: test4
	; CHECK: lxvwsx 34, 0, 3
	; CHECK-NOT: xxspltw
	; CHECK-BE-LABEL: test4
	; CHECK-BE: lxvwsx 34, 0, 3
	; CHECK-BE-NOT: xxspltw
	%0 = load i32, i32* %in, align 4			%0 = load i32, i32* %in, align 4
	%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0			%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
	%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer			%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
	ret <4 x i32> %splat.splat			ret <4 x i32> %splat.splat
	}			}

	define <4 x float> @test5(float* nocapture readonly %in) {			define <4 x float> @test5(float* nocapture readonly %in) {
				; CHECK-LABEL: test5:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: lfiwzx f0, 0, r3
				; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
				; CHECK-NEXT: xxspltw v2, vs0, 3
				; CHECK-NEXT: blr

				; CHECK-BE-LABEL: test5:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: lfiwzx f0, 0, r3
				; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
				; CHECK-BE-NEXT: xxspltw v2, vs0, 0
				; CHECK-BE-NEXT: blr
	entry:			entry:
	; CHECK-LABEL: test5
	; CHECK: lxvwsx 34, 0, 3
	; CHECK-NOT: xxspltw
	; CHECK-BE-LABEL: test5
	; CHECK-BE: lxvwsx 34, 0, 3
	; CHECK-BE-NOT: xxspltw
	%0 = load float, float* %in, align 4			%0 = load float, float* %in, align 4
	%splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0			%splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
	%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer			%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
	ret <4 x float> %splat.splat			ret <4 x float> %splat.splat
	}			}

	define <4 x i32> @test6() {			define <4 x i32> @test6() {
				; CHECK-LABEL: test6:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
				; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
				; CHECK-NEXT: lfiwzx f0, 0, r3
				; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
				; CHECK-NEXT: xxspltw v2, vs0, 3
				; CHECK-NEXT: blr

				; CHECK-BE-LABEL: test6:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: addis r3, r2, .LC0@toc@ha
				; CHECK-BE-NEXT: ld r3, .LC0@toc@l(r3)
				; CHECK-BE-NEXT: lfiwzx f0, 0, r3
				; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
				; CHECK-BE-NEXT: xxspltw v2, vs0, 0
				; CHECK-BE-NEXT: blr
	entry:			entry:
	; CHECK-LABEL: test6
	; CHECK: addis
	; CHECK: ld [[TOC:[0-9]+]], .LC0
	; CHECK: lxvwsx 34, 0, 3
	; CHECK-NOT: xxspltw
	; CHECK-BE-LABEL: test6
	; CHECK-BE: addis
	; CHECK-BE: ld [[TOC:[0-9]+]], .LC0
	; CHECK-BE: lxvwsx 34, 0, 3
	; CHECK-BE-NOT: xxspltw
	%0 = load i32, i32* @Globi, align 4			%0 = load i32, i32* @Globi, align 4
	%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0			%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
	%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer			%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
	ret <4 x i32> %splat.splat			ret <4 x i32> %splat.splat
	}			}

	define <4 x float> @test7() {			define <4 x float> @test7() {
				; CHECK-LABEL: test7:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: addis r3, r2, .LC1@toc@ha
				; CHECK-NEXT: ld r3, .LC1@toc@l(r3)
				; CHECK-NEXT: lfiwzx f0, 0, r3
				; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
				; CHECK-NEXT: xxspltw v2, vs0, 3
				; CHECK-NEXT: blr

				; CHECK-BE-LABEL: test7:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: addis r3, r2, .LC1@toc@ha
				; CHECK-BE-NEXT: ld r3, .LC1@toc@l(r3)
				; CHECK-BE-NEXT: lfiwzx f0, 0, r3
				; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
				; CHECK-BE-NEXT: xxspltw v2, vs0, 0
				; CHECK-BE-NEXT: blr
	entry:			entry:
	; CHECK-LABEL: test7
	; CHECK: addis
	; CHECK: ld [[TOC:[0-9]+]], .LC1
	; CHECK: lxvwsx 34, 0, 3
	; CHECK-NOT: xxspltw
	; CHECK-BE-LABEL: test7
	; CHECK-BE: addis
	; CHECK-BE: ld [[TOC:[0-9]+]], .LC1
	; CHECK-BE: lxvwsx 34, 0, 3
	; CHECK-BE-NOT: xxspltw
	%0 = load float, float* @Globf, align 4			%0 = load float, float* @Globf, align 4
	%splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0			%splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
	%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer			%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
	ret <4 x float> %splat.splat			ret <4 x float> %splat.splat
	}			}

	define <16 x i8> @test8() {			define <16 x i8> @test8() {
				; CHECK-LABEL: test8:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: xxlxor v2, v2, v2
				; CHECK-NEXT: blr

				; CHECK-BE-LABEL: test8:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: xxlxor v2, v2, v2
				; CHECK-BE-NEXT: blr
	entry:			entry:
	; CHECK-LABEL: test8
	; CHECK: xxlxor 34, 34, 34
	; CHECK-BE-LABEL: test8
	; CHECK-BE: xxlxor 34, 34, 34
	ret <16 x i8> zeroinitializer			ret <16 x i8> zeroinitializer
	}			}

	define <16 x i8> @test9() {			define <16 x i8> @test9() {
				; CHECK-LABEL: test9:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: xxspltib v2, 1
				; CHECK-NEXT: blr

				; CHECK-BE-LABEL: test9:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: xxspltib v2, 1
				; CHECK-BE-NEXT: blr
	entry:			entry:
	; CHECK-LABEL: test9
	; CHECK: xxspltib 34, 1
	; CHECK-BE-LABEL: test9
	; CHECK-BE: xxspltib 34, 1
	ret <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>			ret <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
	}			}

	define <16 x i8> @test10() {			define <16 x i8> @test10() {
				; CHECK-LABEL: test10:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: xxspltib v2, 127
				; CHECK-NEXT: blr

				; CHECK-BE-LABEL: test10:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: xxspltib v2, 127
				; CHECK-BE-NEXT: blr
	entry:			entry:
	; CHECK-LABEL: test10
	; CHECK: xxspltib 34, 127
	; CHECK-BE-LABEL: test10
	; CHECK-BE: xxspltib 34, 127
	ret <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>			ret <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>
	}			}

	define <16 x i8> @test11() {			define <16 x i8> @test11() {
				; CHECK-LABEL: test11:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: xxspltib v2, 128
				; CHECK-NEXT: blr

				; CHECK-BE-LABEL: test11:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: xxspltib v2, 128
				; CHECK-BE-NEXT: blr
	entry:			entry:
	; CHECK-LABEL: test11
	; CHECK: xxspltib 34, 128
	; CHECK-BE-LABEL: test11
	; CHECK-BE: xxspltib 34, 128
	ret <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>			ret <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
	}			}

	define <16 x i8> @test12() {			define <16 x i8> @test12() {
				; CHECK-LABEL: test12:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: xxspltib v2, 255
				; CHECK-NEXT: blr

				; CHECK-BE-LABEL: test12:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: xxspltib v2, 255
				; CHECK-BE-NEXT: blr
	entry:			entry:
	; CHECK-LABEL: test12
	; CHECK: xxspltib 34, 255
	; CHECK-BE-LABEL: test12
	; CHECK-BE: xxspltib 34, 255
	ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>			ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
	}			}

	define <16 x i8> @test13() {			define <16 x i8> @test13() {
				; CHECK-LABEL: test13:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: xxspltib v2, 129
				; CHECK-NEXT: blr

				; CHECK-BE-LABEL: test13:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: xxspltib v2, 129
				; CHECK-BE-NEXT: blr
	entry:			entry:
	; CHECK-LABEL: test13
	; CHECK: xxspltib 34, 129
	; CHECK-BE-LABEL: test13
	; CHECK-BE: xxspltib 34, 129
	ret <16 x i8> <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>			ret <16 x i8> <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
	}			}

	define <16 x i8> @test13E127() {			define <16 x i8> @test13E127() {
				; CHECK-LABEL: test13E127:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: xxspltib v2, 200
				; CHECK-NEXT: blr

				; CHECK-BE-LABEL: test13E127:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: xxspltib v2, 200
				; CHECK-BE-NEXT: blr
	entry:			entry:
	; CHECK-LABEL: test13E127
	; CHECK: xxspltib 34, 200
	; CHECK-BE-LABEL: test13E127
	; CHECK-BE: xxspltib 34, 200
	ret <16 x i8> <i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200>			ret <16 x i8> <i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200>
	}			}

	define <4 x i32> @test14(<4 x i32> %a, i32* nocapture readonly %b) {			define <4 x i32> @test14(<4 x i32> %a, i32* nocapture readonly %b) {
				; CHECK-LABEL: test14:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: lwz r3, 0(r5)
				; CHECK-NEXT: mtvsrws v2, r3
				; CHECK-NEXT: addi r3, r3, 5
				; CHECK-NEXT: stw r3, 0(r5)
				; CHECK-NEXT: blr

				; CHECK-BE-LABEL: test14:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: lwz r3, 0(r5)
				; CHECK-BE-NEXT: mtvsrws v2, r3
				; CHECK-BE-NEXT: addi r3, r3, 5
				; CHECK-BE-NEXT: stw r3, 0(r5)
				; CHECK-BE-NEXT: blr
	entry:			entry:
	; CHECK-LABEL: test14
	; CHECK: lwz [[LD:[0-9]+]],
	; CHECK: mtvsrws 34, [[LD]]
	; CHECK-BE-LABEL: test14
	; CHECK-BE: lwz [[LD:[0-9]+]],
	; CHECK-BE: mtvsrws 34, [[LD]]
	%0 = load i32, i32* %b, align 4			%0 = load i32, i32* %b, align 4
	%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0			%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
	%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer			%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
	%1 = add i32 %0, 5			%1 = add i32 %0, 5
	store i32 %1, i32* %b, align 4			store i32 %1, i32* %b, align 4
	ret <4 x i32> %splat.splat			ret <4 x i32> %splat.splat
	}			}

llvm/trunk/test/CodeGen/PowerPC/qpx-load-splat.ll

	; RUN: llc -verify-machineinstrs < %s \| FileCheck %s			; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
	target datalayout = "E-m:e-i64:64-n32:64"			; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s \| FileCheck %s
	target triple = "powerpc64-bgq-linux"

	; Function Attrs: norecurse nounwind readonly			; Function Attrs: norecurse nounwind readonly
	define <4 x double> @foo(double* nocapture readonly %a) #0 {			define <4 x double> @foo(double* nocapture readonly %a) #0 {
				; CHECK-LABEL: foo:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: lxvdsx v2, 0, r3
				; CHECK-NEXT: vmr v3, v2
				; CHECK-NEXT: blr
	entry:			entry:
	%0 = load double, double* %a, align 8			%0 = load double, double* %a, align 8
	%vecinit.i = insertelement <4 x double> undef, double %0, i32 0			%vecinit.i = insertelement <4 x double> undef, double %0, i32 0
	%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer			%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
	ret <4 x double> %shuffle.i			ret <4 x double> %shuffle.i

	; CHECK-LABEL: @foo
	; CHECK: lfd 1, 0(3)
	; CHECK: blr
	}			}

	define <4 x double> @foox(double* nocapture readonly %a, i64 %idx) #0 {			define <4 x double> @foox(double* nocapture readonly %a, i64 %idx) #0 {
				; CHECK-LABEL: foox:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: sldi r4, r4, 3
				; CHECK-NEXT: lxvdsx v2, r3, r4
				; CHECK-NEXT: vmr v3, v2
				; CHECK-NEXT: blr
	entry:			entry:
	%p = getelementptr double, double* %a, i64 %idx			%p = getelementptr double, double* %a, i64 %idx
	%0 = load double, double* %p, align 8			%0 = load double, double* %p, align 8
	%vecinit.i = insertelement <4 x double> undef, double %0, i32 0			%vecinit.i = insertelement <4 x double> undef, double %0, i32 0
	%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer			%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
	ret <4 x double> %shuffle.i			ret <4 x double> %shuffle.i

	; CHECK-LABEL: @foox
	; CHECK: sldi [[REG1:[0-9]+]], 4, 3
	; CHECK: lfdx 1, 3, [[REG1]]
	; CHECK: blr
	}			}

	define <4 x double> @fooxu(double* nocapture readonly %a, i64 %idx, double** %pptr) #0 {			define <4 x double> @fooxu(double* nocapture readonly %a, i64 %idx, double** %pptr) #0 {
				; CHECK-LABEL: fooxu:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: sldi r4, r4, 3
				; CHECK-NEXT: lfdux f0, r3, r4
				; CHECK-NEXT: xxspltd v2, vs0, 0
				; CHECK-NEXT: std r3, 0(r5)
				; CHECK-NEXT: vmr v3, v2
				; CHECK-NEXT: blr
	entry:			entry:
	%p = getelementptr double, double* %a, i64 %idx			%p = getelementptr double, double* %a, i64 %idx
	%0 = load double, double* %p, align 8			%0 = load double, double* %p, align 8
	%vecinit.i = insertelement <4 x double> undef, double %0, i32 0			%vecinit.i = insertelement <4 x double> undef, double %0, i32 0
	%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer			%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
	store double* %p, double** %pptr, align 8			store double* %p, double** %pptr, align 8
	ret <4 x double> %shuffle.i			ret <4 x double> %shuffle.i

	; CHECK-LABEL: @foox
	; CHECK: sldi [[REG1:[0-9]+]], 4, 3
	; CHECK: lfdux 1, 3, [[REG1]]
	; CHECK: std 3, 0(5)
	; CHECK: blr
	}			}

	define <4 x float> @foof(float* nocapture readonly %a) #0 {			define <4 x float> @foof(float* nocapture readonly %a) #0 {
				; CHECK-LABEL: foof:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: lfiwzx f0, 0, r3
				; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
				; CHECK-NEXT: xxspltw v2, vs0, 3
				; CHECK-NEXT: blr
	entry:			entry:
	%0 = load float, float* %a, align 4			%0 = load float, float* %a, align 4
	%vecinit.i = insertelement <4 x float> undef, float %0, i32 0			%vecinit.i = insertelement <4 x float> undef, float %0, i32 0
	%shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer			%shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
	ret <4 x float> %shuffle.i			ret <4 x float> %shuffle.i

	; CHECK-LABEL: @foof
	; CHECK: lfs 1, 0(3)
	; CHECK: blr
	}			}

	define <4 x float> @foofx(float* nocapture readonly %a, i64 %idx) #0 {			define <4 x float> @foofx(float* nocapture readonly %a, i64 %idx) #0 {
				; CHECK-LABEL: foofx:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: sldi r4, r4, 2
				; CHECK-NEXT: lfiwzx f0, r3, r4
				; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
				; CHECK-NEXT: xxspltw v2, vs0, 3
				; CHECK-NEXT: blr
	entry:			entry:
	%p = getelementptr float, float* %a, i64 %idx			%p = getelementptr float, float* %a, i64 %idx
	%0 = load float, float* %p, align 4			%0 = load float, float* %p, align 4
	%vecinit.i = insertelement <4 x float> undef, float %0, i32 0			%vecinit.i = insertelement <4 x float> undef, float %0, i32 0
	%shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer			%shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
	ret <4 x float> %shuffle.i			ret <4 x float> %shuffle.i

	; CHECK-LABEL: @foofx
	; CHECK: sldi [[REG1:[0-9]+]], 4, 2
	; CHECK: lfsx 1, 3, [[REG1]]
	; CHECK: blr
	}			}

	attributes #0 = { norecurse nounwind readonly "target-cpu"="a2q" "target-features"="+qpx,-altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" }

llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_1.ll

				; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
				; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s \| FileCheck %s --check-prefix=P9LE
				; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
				; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s \| FileCheck %s --check-prefix=P9BE
				; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
				; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s \| FileCheck %s --check-prefix=P8LE
				; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
				; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s \| FileCheck %s --check-prefix=P8BE

				; Function Attrs: norecurse nounwind readonly
				define <2 x i64> @s2v_test1(i64* nocapture readonly %int64, <2 x i64> %vec) {
				; P9LE-LABEL: s2v_test1:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: lfd f0, 0(r3)
				; P9LE-NEXT: xxpermdi v3, f0, f0, 2
				; P9LE-NEXT: xxpermdi v2, v2, v3, 1
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test1:
				; P9BE: # %bb.0: # %entry
				; P9BE-NEXT: lfd f0, 0(r3)
				; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P9BE-NEXT: blr
				entry:
				%0 = load i64, i64* %int64, align 8
				%vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
				ret <2 x i64> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x i64> @s2v_test2(i64* nocapture readonly %int64, <2 x i64> %vec) {
				; P9LE-LABEL: s2v_test2:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: lfd f0, 8(r3)
				; P9LE-NEXT: xxpermdi v3, f0, f0, 2
				; P9LE-NEXT: xxpermdi v2, v2, v3, 1
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test2:
				; P9BE: # %bb.0: # %entry
				; P9BE-NEXT: lfd f0, 8(r3)
				; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P9BE-NEXT: blr
				entry:
				%arrayidx = getelementptr inbounds i64, i64* %int64, i64 1
				%0 = load i64, i64* %arrayidx, align 8
				%vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
				ret <2 x i64> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x i64> @s2v_test3(i64* nocapture readonly %int64, <2 x i64> %vec, i32 signext %Idx) {
				; P9LE-LABEL: s2v_test3:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: sldi r4, r7, 3
				; P9LE-NEXT: lfdx f0, r3, r4
				; P9LE-NEXT: xxpermdi v3, f0, f0, 2
				; P9LE-NEXT: xxpermdi v2, v2, v3, 1
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test3
				; P9BE: # %bb.0: # %entry
				; P9BE-NEXT: sldi r4, r7, 3
				; P9BE-NEXT: lfdx f0, r3, r4
				; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P9BE-NEXT: blr
				entry:
				%idxprom = sext i32 %Idx to i64
				%arrayidx = getelementptr inbounds i64, i64* %int64, i64 %idxprom
				%0 = load i64, i64* %arrayidx, align 8
				%vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
				ret <2 x i64> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x i64> @s2v_test4(i64* nocapture readonly %int64, <2 x i64> %vec) {
				; P9LE-LABEL: s2v_test4:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: lfd f0, 8(r3)
				; P9LE-NEXT: xxpermdi v3, f0, f0, 2
				; P9LE-NEXT: xxpermdi v2, v2, v3, 1
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test4:
				; P9BE: # %bb.0: # %entry
				; P9BE-NEXT: lfd f0, 8(r3)
				; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P9BE-NEXT: blr
				entry:
				%arrayidx = getelementptr inbounds i64, i64* %int64, i64 1
				%0 = load i64, i64* %arrayidx, align 8
				%vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
				ret <2 x i64> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x i64> @s2v_test5(<2 x i64> %vec, i64* nocapture readonly %ptr1) {
				; P9LE-LABEL: s2v_test5:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: lfd f0, 0(r5)
				; P9LE-NEXT: xxpermdi v3, f0, f0, 2
				; P9LE-NEXT: xxpermdi v2, v2, v3, 1
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test5:
				; P9BE: # %bb.0: # %entry
				; P9BE-NEXT: lfd f0, 0(r5)
				; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P9BE-NEXT: blr
				entry:
				%0 = load i64, i64* %ptr1, align 8
				%vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
				ret <2 x i64> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x double> @s2v_test_f1(double* nocapture readonly %f64, <2 x double> %vec) {
				; P9LE-LABEL: s2v_test_f1:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: lfd f0, 0(r3)
				; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
				; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test_f1:
				; P9BE: # %bb.0: # %entry
				; P9BE-NEXT: lfd f0, 0(r3)
				; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P9BE-NEXT: blr

				; P8LE-LABEL: s2v_test_f1:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: lfdx f0, 0, r3
				; P8LE-NEXT: xxspltd vs0, vs0, 0
				; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test_f1:
				; P8BE: # %bb.0: # %entry
				; P8BE-NEXT: lfdx f0, 0, r3
				; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P8BE-NEXT: blr
				entry:
				%0 = load double, double* %f64, align 8
				%vecins = insertelement <2 x double> %vec, double %0, i32 0
				ret <2 x double> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x double> @s2v_test_f2(double* nocapture readonly %f64, <2 x double> %vec) {
				; P9LE-LABEL: s2v_test_f2:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: lfd f0, 8(r3)
				; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
				; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test_f2:
				; P9BE: # %bb.0: # %entry
				; P9BE-NEXT: lfd f0, 8(r3)
				; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P9BE-NEXT: blr

				; P8LE-LABEL: s2v_test_f2:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: addi r3, r3, 8
				; P8LE-NEXT: lfdx f0, 0, r3
				; P8LE-NEXT: xxspltd vs0, vs0, 0
				; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test_f2:
				; P8BE: # %bb.0: # %entry
				; P8BE-NEXT: addi r3, r3, 8
				; P8BE-NEXT: lfdx f0, 0, r3
				; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P8BE-NEXT: blr
				entry:
				%arrayidx = getelementptr inbounds double, double* %f64, i64 1
				%0 = load double, double* %arrayidx, align 8
				%vecins = insertelement <2 x double> %vec, double %0, i32 0
				ret <2 x double> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x double> @s2v_test_f3(double* nocapture readonly %f64, <2 x double> %vec, i32 signext %Idx) {
				; P9LE-LABEL: s2v_test_f3:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: sldi r4, r7, 3
				; P9LE-NEXT: lfdx f0, r3, r4
				; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
				; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test_f3:
				; P9BE: # %bb.0: # %entry
				; P9BE-NEXT: sldi r4, r7, 3
				; P9BE-NEXT: lfdx f0, r3, r4
				; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P9BE-NEXT: blr

				; P8LE-LABEL: s2v_test_f3:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: sldi r4, r7, 3
				; P8LE-NEXT: lfdx f0, r3, r4
				; P8LE-NEXT: xxspltd vs0, vs0, 0
				; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test_f3:
				; P8BE: # %bb.0: # %entry
				; P8BE-NEXT: sldi r4, r7, 3
				; P8BE-NEXT: lfdx f0, r3, r4
				; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P8BE-NEXT: blr
				entry:
				%idxprom = sext i32 %Idx to i64
				%arrayidx = getelementptr inbounds double, double* %f64, i64 %idxprom
				%0 = load double, double* %arrayidx, align 8
				%vecins = insertelement <2 x double> %vec, double %0, i32 0
				ret <2 x double> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x double> @s2v_test_f4(double* nocapture readonly %f64, <2 x double> %vec) {
				; P9LE-LABEL: s2v_test_f4:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: lfd f0, 8(r3)
				; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
				; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test_f4:
				; P9BE: # %bb.0: # %entry
				; P9BE-NEXT: lfd f0, 8(r3)
				; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P9BE-NEXT: blr

				; P8LE-LABEL: s2v_test_f4:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: addi r3, r3, 8
				; P8LE-NEXT: lfdx f0, 0, r3
				; P8LE-NEXT: xxspltd vs0, vs0, 0
				; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test_f4:
				; P8BE: # %bb.0: # %entry
				; P8BE-NEXT: addi r3, r3, 8
				; P8BE-NEXT: lfdx f0, 0, r3
				; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P8BE-NEXT: blr
				entry:
				%arrayidx = getelementptr inbounds double, double* %f64, i64 1
				%0 = load double, double* %arrayidx, align 8
				%vecins = insertelement <2 x double> %vec, double %0, i32 0
				ret <2 x double> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x double> @s2v_test_f5(<2 x double> %vec, double* nocapture readonly %ptr1) {
				; P9LE-LABEL: s2v_test_f5:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: lfd f0, 0(r5)
				; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
				; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test_f5:
				; P9BE: # %bb.0: # %entry
				; P9BE-NEXT: lfd f0, 0(r5)
				; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P9BE-NEXT: blr

				; P8LE-LABEL: s2v_test_f5:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: lfdx f0, 0, r5
				; P8LE-NEXT: xxspltd vs0, vs0, 0
				; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test_f5:
				; P8BE: # %bb.0: # %entry
				; P8BE-NEXT: lfdx f0, 0, r5
				; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P8BE-NEXT: blr
				entry:
				%0 = load double, double* %ptr1, align 8
				%vecins = insertelement <2 x double> %vec, double %0, i32 0
				ret <2 x double> %vecins
				}

llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_2.ll

				; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
				; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s \| FileCheck %s --check-prefix=P9LE
				; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
				; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s \| FileCheck %s --check-prefix=P9BE
				; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
				; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s \| FileCheck %s --check-prefix=P8LE
				; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
				; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s \| FileCheck %s --check-prefix=P8BE

				define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) {
				; P9LE-LABEL: test_liwzx1:
				; P9LE: # %bb.0:
				; P9LE-NEXT: lfiwzx f0, 0, r3
				; P9LE-NEXT: lfiwzx f1, 0, r4
				; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
				; P9LE-NEXT: xxpermdi vs1, f1, f1, 2
				; P9LE-NEXT: xvaddsp vs0, vs0, vs1
				; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
				; P9LE-NEXT: xscvspdpn f0, vs0
				; P9LE-NEXT: stfs f0, 0(r5)
				; P9LE-NEXT: blr

				; P9BE-LABEL: test_liwzx1:
				; P9BE: # %bb.0:
				; P9BE-NEXT: lfiwzx f0, 0, r3
				; P9BE-NEXT: lfiwzx f1, 0, r4
				; P9BE-NEXT: xxsldwi vs0, f0, f0, 1
				; P9BE-NEXT: xxsldwi vs1, f1, f1, 1
				; P9BE-NEXT: xvaddsp vs0, vs0, vs1
				; P9BE-NEXT: xscvspdpn f0, vs0
				; P9BE-NEXT: stfs f0, 0(r5)
				; P9BE-NEXT: blr

				; P8LE-LABEL: test_liwzx1:
				; P8LE: # %bb.0:
				; P8LE-NEXT: lfiwzx f0, 0, r3
				; P8LE-NEXT: lfiwzx f1, 0, r4
				; P8LE-NEXT: xxpermdi vs0, f0, f0, 2
				; P8LE-NEXT: xxpermdi vs1, f1, f1, 2
				; P8LE-NEXT: xvaddsp vs0, vs0, vs1
				; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3
				; P8LE-NEXT: xscvspdpn f0, vs0
				; P8LE-NEXT: stfsx f0, 0, r5
				; P8LE-NEXT: blr

				; P8BE-LABEL: test_liwzx1:
				; P8BE: # %bb.0:
				; P8BE-NEXT: lfiwzx f0, 0, r3
				; P8BE-NEXT: lfiwzx f1, 0, r4
				; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
				; P8BE-NEXT: xxsldwi vs1, f1, f1, 1
				; P8BE-NEXT: xvaddsp vs0, vs0, vs1
				; P8BE-NEXT: xscvspdpn f0, vs0
				; P8BE-NEXT: stfsx f0, 0, r5
				; P8BE-NEXT: blr
				%a = load <1 x float>, <1 x float>* %A
				%b = load <1 x float>, <1 x float>* %B
				%X = fadd <1 x float> %a, %b
				store <1 x float> %X, <1 x float>* %C
				ret void
				}

				define <1 x float>* @test_liwzx2(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) {
				; P9LE-LABEL: test_liwzx2:
				; P9LE: # %bb.0:
				; P9LE-NEXT: lfiwzx f0, 0, r3
				; P9LE-NEXT: lfiwzx f1, 0, r4
				; P9LE-NEXT: mr r3, r5
				; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
				; P9LE-NEXT: xxpermdi vs1, f1, f1, 2
				; P9LE-NEXT: xvsubsp vs0, vs0, vs1
				; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
				; P9LE-NEXT: xscvspdpn f0, vs0
				; P9LE-NEXT: stfs f0, 0(r5)
				; P9LE-NEXT: blr

				; P9BE-LABEL: test_liwzx2:
				; P9BE: # %bb.0:
				; P9BE-NEXT: lfiwzx f0, 0, r3
				; P9BE-NEXT: lfiwzx f1, 0, r4
				; P9BE-NEXT: mr r3, r5
				; P9BE-NEXT: xxsldwi vs0, f0, f0, 1
				; P9BE-NEXT: xxsldwi vs1, f1, f1, 1
				; P9BE-NEXT: xvsubsp vs0, vs0, vs1
				; P9BE-NEXT: xscvspdpn f0, vs0
				; P9BE-NEXT: stfs f0, 0(r5)
				; P9BE-NEXT: blr

				; P8LE-LABEL: test_liwzx2:
				; P8LE: # %bb.0:
				; P8LE-NEXT: lfiwzx f0, 0, r3
				; P8LE-NEXT: lfiwzx f1, 0, r4
				; P8LE-NEXT: mr r3, r5
				; P8LE-NEXT: xxpermdi vs0, f0, f0, 2
				; P8LE-NEXT: xxpermdi vs1, f1, f1, 2
				; P8LE-NEXT: xvsubsp vs0, vs0, vs1
				; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3
				; P8LE-NEXT: xscvspdpn f0, vs0
				; P8LE-NEXT: stfsx f0, 0, r5
				; P8LE-NEXT: blr

				; P8BE-LABEL: test_liwzx2:
				; P8BE: # %bb.0:
				; P8BE-NEXT: lfiwzx f0, 0, r3
				; P8BE-NEXT: lfiwzx f1, 0, r4
				; P8BE-NEXT: mr r3, r5
				; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
				; P8BE-NEXT: xxsldwi vs1, f1, f1, 1
				; P8BE-NEXT: xvsubsp vs0, vs0, vs1
				; P8BE-NEXT: xscvspdpn f0, vs0
				; P8BE-NEXT: stfsx f0, 0, r5
				; P8BE-NEXT: blr
				%a = load <1 x float>, <1 x float>* %A
				%b = load <1 x float>, <1 x float>* %B
				%X = fsub <1 x float> %a, %b
				store <1 x float> %X, <1 x float>* %C
				ret <1 x float>* %C
				}

llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_3.ll

				; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
				; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s \| FileCheck %s --check-prefix=P9LE
				; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
				; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s \| FileCheck %s --check-prefix=P9BE
				; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
				; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s \| FileCheck %s --check-prefix=P8LE
				; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
				; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s \| FileCheck %s --check-prefix=P8BE

				; Function Attrs: norecurse nounwind readonly
				define <2 x i64> @s2v_test1(i32* nocapture readonly %int32, <2 x i64> %vec) {
				; P9LE-LABEL: s2v_test1:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: lfiwax f0, 0, r3
				; P9LE-NEXT: xxpermdi v3, f0, f0, 2
				; P9LE-NEXT: xxpermdi v2, v2, v3, 1
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test1:
				; P9BE: # %bb.0: # %entry
				; P9BE-NEXT: lfiwax f0, 0, r3
				; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P9BE-NEXT: blr

				; P8LE-LABEL: s2v_test1:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: lfiwax f0, 0, r3
				; P8LE-NEXT: xxpermdi v3, f0, f0, 2
				; P8LE-NEXT: xxpermdi v2, v2, v3, 1
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test1:
				; P8BE: # %bb.0: # %entry
				; P8BE-NEXT: lfiwax f0, 0, r3
				; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P8BE-NEXT: blr
				entry:
				%0 = load i32, i32* %int32, align 4
				%conv = sext i32 %0 to i64
				%vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
				ret <2 x i64> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x i64> @s2v_test2(i32* nocapture readonly %int32, <2 x i64> %vec) {
				; P9LE-LABEL: s2v_test2:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: addi r3, r3, 4
				; P9LE-NEXT: lfiwax f0, 0, r3
				; P9LE-NEXT: xxpermdi v3, f0, f0, 2
				; P9LE-NEXT: xxpermdi v2, v2, v3, 1
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test2:
				; P9BE: # %bb.0: # %entry
				; P9BE-NEXT: addi r3, r3, 4
				; P9BE-NEXT: lfiwax f0, 0, r3
				; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P9BE-NEXT: blr

				; P8LE-LABEL: s2v_test2:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: addi r3, r3, 4
				; P8LE-NEXT: lfiwax f0, 0, r3
				; P8LE-NEXT: xxpermdi v3, f0, f0, 2
				; P8LE-NEXT: xxpermdi v2, v2, v3, 1
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test2:
				; P8BE: # %bb.0: # %entry
				; P8BE-NEXT: addi r3, r3, 4
				; P8BE-NEXT: lfiwax f0, 0, r3
				; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P8BE-NEXT: blr
				entry:
				%arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
				%0 = load i32, i32* %arrayidx, align 4
				%conv = sext i32 %0 to i64
				%vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
				ret <2 x i64> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x i64> @s2v_test3(i32* nocapture readonly %int32, <2 x i64> %vec, i32 signext %Idx) {
				; P9LE-LABEL: s2v_test3:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: sldi r4, r7, 2
				; P9LE-NEXT: lfiwax f0, r3, r4
				; P9LE-NEXT: xxpermdi v3, f0, f0, 2
				; P9LE-NEXT: xxpermdi v2, v2, v3, 1
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test3:
				; P9BE: # %bb.0: # %entry
				; P9BE-NEXT: sldi r4, r7, 2
				; P9BE-NEXT: lfiwax f0, r3, r4
				; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P9BE-NEXT: blr

				; P8LE-LABEL: s2v_test3:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: sldi r4, r7, 2
				; P8LE-NEXT: lfiwax f0, r3, r4
				; P8LE-NEXT: xxpermdi v3, f0, f0, 2
				; P8LE-NEXT: xxpermdi v2, v2, v3, 1
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test3:
				; P8BE: # %bb.0: # %entry
				; P8BE-NEXT: sldi r4, r7, 2
				; P8BE-NEXT: lfiwax f0, r3, r4
				; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P8BE-NEXT: blr
				entry:
				%idxprom = sext i32 %Idx to i64
				%arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom
				%0 = load i32, i32* %arrayidx, align 4
				%conv = sext i32 %0 to i64
				%vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
				ret <2 x i64> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x i64> @s2v_test4(i32* nocapture readonly %int32, <2 x i64> %vec) {
				; P9LE-LABEL: s2v_test4:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: addi r3, r3, 4
				; P9LE-NEXT: lfiwax f0, 0, r3
				; P9LE-NEXT: xxpermdi v3, f0, f0, 2
				; P9LE-NEXT: xxpermdi v2, v2, v3, 1
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test4:
				; P9BE: # %bb.0: # %entry
				; P9BE-NEXT: addi r3, r3, 4
				; P9BE-NEXT: lfiwax f0, 0, r3
				; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P9BE-NEXT: blr

				; P8LE-LABEL: s2v_test4:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: addi r3, r3, 4
				; P8LE-NEXT: lfiwax f0, 0, r3
				; P8LE-NEXT: xxpermdi v3, f0, f0, 2
				; P8LE-NEXT: xxpermdi v2, v2, v3, 1
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test4:
				; P8BE: # %bb.0: # %entry
				; P8BE-NEXT: addi r3, r3, 4
				; P8BE-NEXT: lfiwax f0, 0, r3
				; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P8BE-NEXT: blr
				entry:
				%arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
				%0 = load i32, i32* %arrayidx, align 4
				%conv = sext i32 %0 to i64
				%vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
				ret <2 x i64> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x i64> @s2v_test5(<2 x i64> %vec, i32* nocapture readonly %ptr1) {
				; P9LE-LABEL: s2v_test5:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: lfiwax f0, 0, r5
				; P9LE-NEXT: xxpermdi v3, f0, f0, 2
				; P9LE-NEXT: xxpermdi v2, v2, v3, 1
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test5:
				; P9BE: # %bb.0: # %entry
				; P9BE-NEXT: lfiwax f0, 0, r5
				; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P9BE-NEXT: blr

				; P8LE-LABEL: s2v_test5:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: lfiwax f0, 0, r5
				; P8LE-NEXT: xxpermdi v3, f0, f0, 2
				; P8LE-NEXT: xxpermdi v2, v2, v3, 1
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test5:
				; P8BE: # %bb.0: # %entry
				; P8BE-NEXT: lfiwax f0, 0, r5
				; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
				; P8BE-NEXT: blr
				entry:
				%0 = load i32, i32* %ptr1, align 4
				%conv = sext i32 %0 to i64
				%vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
				ret <2 x i64> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x i64> @s2v_test6(i32* nocapture readonly %ptr) {
				; P9LE-LABEL: s2v_test6:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: lfiwax f0, 0, r3
				; P9LE-NEXT: xxpermdi v2, f0, f0, 2
				; P9LE-NEXT: xxspltd v2, v2, 1
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test6:
				; P9BE: # %bb.0: # %entry
				; P9BE-NEXT: lfiwax f0, 0, r3
				; P9BE-NEXT: xxspltd v2, vs0, 0
				; P9BE-NEXT: blr

				; P8LE-LABEL: s2v_test6:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: lfiwax f0, 0, r3
				; P8LE-NEXT: xxpermdi v2, f0, f0, 2
				; P8LE-NEXT: xxspltd v2, v2, 1
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test6:
				; P8BE: # %bb.0: # %entry
				; P8BE-NEXT: lfiwax f0, 0, r3
				; P8BE-NEXT: xxspltd v2, vs0, 0
				; P8BE-NEXT: blr
				entry:
				%0 = load i32, i32* %ptr, align 4
				%conv = sext i32 %0 to i64
				%splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
				%splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
				ret <2 x i64> %splat.splat
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x i64> @s2v_test7(i32* nocapture readonly %ptr) {
				; P9LE-LABEL: s2v_test7:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: lfiwax f0, 0, r3
				; P9LE-NEXT: xxpermdi v2, f0, f0, 2
				; P9LE-NEXT: xxspltd v2, v2, 1
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test7:
				; P9BE: # %bb.0: # %entry
				; P9BE-NEXT: lfiwax f0, 0, r3
				; P9BE-NEXT: xxspltd v2, vs0, 0
				; P9BE-NEXT: blr

				; P8LE-LABEL: s2v_test7:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: lfiwax f0, 0, r3
				; P8LE-NEXT: xxpermdi v2, f0, f0, 2
				; P8LE-NEXT: xxspltd v2, v2, 1
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test7:
				; P8BE: # %bb.0: # %entry
				; P8BE-NEXT: lfiwax f0, 0, r3
				; P8BE-NEXT: xxspltd v2, vs0, 0
				; P8BE-NEXT: blr
				entry:
				%0 = load i32, i32* %ptr, align 4
				%conv = sext i32 %0 to i64
				%splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
				%splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
				ret <2 x i64> %splat.splat
				}

llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_4.ll

				; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
				; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s \| FileCheck %s --check-prefix=P9LE
				; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
				; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s \| FileCheck %s --check-prefix=P9BE
				; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
				; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s \| FileCheck %s --check-prefix=P8LE
				; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
				; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s \| FileCheck %s --check-prefix=P8BE

				; Function Attrs: norecurse nounwind readonly
				define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) {
				; P8LE-LABEL: s2v_test1:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: lfiwzx f0, 0, r3
				; P8LE-NEXT: addis r4, r2, .LCPI0_0@toc@ha
				; P8LE-NEXT: addi r3, r4, .LCPI0_0@toc@l
				; P8LE-NEXT: lvx v4, 0, r3
				; P8LE-NEXT: xxpermdi v3, f0, f0, 2
				; P8LE-NEXT: vperm v2, v3, v2, v4
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test1:
				; P8BE: # %bb.0: # %entry
				; P8BE: lfiwzx f0, 0, r3
				; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
				; P8BE: xxsldwi vs0, v2, vs0, 1
				; P8BE: xxsldwi v2, vs0, vs0, 3
				; P8BE-NEXT: blr
				entry:
				%0 = load i32, i32* %int32, align 4
				%vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
				ret <4 x i32> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec) {
				; P8LE-LABEL: s2v_test2:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: addi r3, r3, 4
				; P8LE-NEXT: addis r4, r2, .LCPI1_0@toc@ha
				; P8LE-NEXT: lfiwzx f0, 0, r3
				; P8LE-NEXT: addi r3, r4, .LCPI1_0@toc@l
				; P8LE-NEXT: lvx v4, 0, r3
				; P8LE-NEXT: xxpermdi v3, f0, f0, 2
				; P8LE-NEXT: vperm v2, v3, v2, v4
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test2:
				; P8BE: # %bb.0: # %entry
				; P8BE: addi r3, r3, 4
				; P8BE: lfiwzx f0, 0, r3
				; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
				; P8BE: xxsldwi vs0, v2, vs0, 1
				; P8BE: xxsldwi v2, vs0, vs0, 3
				; P8BE-NEXT: blr
				entry:
				%arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
				%0 = load i32, i32* %arrayidx, align 4
				%vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
				ret <4 x i32> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32 signext %Idx) {
				; P8LE-LABEL: s2v_test3:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: sldi r5, r7, 2
				; P8LE-NEXT: addis r4, r2, .LCPI2_0@toc@ha
				; P8LE-NEXT: lfiwzx f0, r3, r5
				; P8LE-NEXT: addi r3, r4, .LCPI2_0@toc@l
				; P8LE-NEXT: lvx v4, 0, r3
				; P8LE-NEXT: xxpermdi v3, f0, f0, 2
				; P8LE-NEXT: vperm v2, v3, v2, v4
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test3:
				; P8BE: # %bb.0: # %entry
				; P8BE: sldi r4, r7, 2
				; P8BE: lfiwzx f0, r3, r4
				; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
				; P8BE: xxsldwi vs0, v2, vs0, 1
				; P8BE: xxsldwi v2, vs0, vs0, 3
				; P8BE-NEXT: blr
				entry:
				%idxprom = sext i32 %Idx to i64
				%arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom
				%0 = load i32, i32* %arrayidx, align 4
				%vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
				ret <4 x i32> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec) {
				; P8LE-LABEL: s2v_test4:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: addi r3, r3, 4
				; P8LE-NEXT: addis r4, r2, .LCPI3_0@toc@ha
				; P8LE-NEXT: lfiwzx f0, 0, r3
				; P8LE-NEXT: addi r3, r4, .LCPI3_0@toc@l
				; P8LE-NEXT: lvx v4, 0, r3
				; P8LE-NEXT: xxpermdi v3, f0, f0, 2
				; P8LE-NEXT: vperm v2, v3, v2, v4
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test4:
				; P8BE: # %bb.0: # %entry
				; P8BE: addi r3, r3, 4
				; P8BE: lfiwzx f0, 0, r3
				; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
				; P8BE: xxsldwi vs0, v2, vs0, 1
				; P8BE: xxsldwi v2, vs0, vs0, 3
				; P8BE-NEXT: blr
				entry:
				%arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
				%0 = load i32, i32* %arrayidx, align 4
				%vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
				ret <4 x i32> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1) {
				; P8LE-LABEL: s2v_test5:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: lfiwzx f0, 0, r5
				; P8LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
				; P8LE-NEXT: addi r3, r3, .LCPI4_0@toc@l
				; P8LE-NEXT: lvx v4, 0, r3
				; P8LE-NEXT: xxpermdi v3, f0, f0, 2
				; P8LE-NEXT: vperm v2, v3, v2, v4
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test5:
				; P8BE: # %bb.0: # %entry
				; P8BE: lfiwzx f0, 0, r5
				; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
				; P8BE: xxsldwi vs0, v2, vs0, 1
				; P8BE: xxsldwi v2, vs0, vs0, 3
				; P8BE-NEXT: blr
				entry:
				%0 = load i32, i32* %ptr1, align 4
				%vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
				ret <4 x i32> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec) {
				; P8LE-LABEL: s2v_test_f1:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: lfiwzx f0, 0, r3
				; P8LE-NEXT: addis r4, r2, .LCPI5_0@toc@ha
				; P8LE-NEXT: addi r3, r4, .LCPI5_0@toc@l
				; P8LE-NEXT: lvx v4, 0, r3
				; P8LE-NEXT: xxpermdi v3, f0, f0, 2
				; P8LE-NEXT: vperm v2, v3, v2, v4
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test_f1:
				; P8BE: # %bb.0: # %entry
				; P8BE: lfiwzx f0, 0, r3
				; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
				; P8BE: xxsldwi vs0, v2, vs0, 1
				; P8BE: xxsldwi v2, vs0, vs0, 3
				; P8BE-NEXT: blr
				entry:
				%0 = load float, float* %f64, align 4
				%vecins = insertelement <4 x float> %vec, float %0, i32 0
				ret <4 x float> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec) {
				; P9LE-LABEL: s2v_test_f2:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: addi r3, r3, 4
				; P9LE-NEXT: xxspltw v2, v2, 2
				; P9LE-NEXT: lfiwzx f0, 0, r3
				; P9LE-NEXT: xxpermdi v3, f0, f0, 2
				; P9LE-NEXT: vmrglw v2, v2, v3
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test_f2:
				; P9BE: # %bb.0: # %entry
				; P9BE: addi r3, r3, 4
				; P9BE: xxspltw v2, v2, 1
				; P9BE: lfiwzx f0, 0, r3
				; P9BE-NEXT: xxsldwi v3, f0, f0, 1
				; P9BE: vmrghw v2, v3, v2
				; P9BE-NEXT: blr

				; P8LE-LABEL: s2v_test_f2:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: addi r3, r3, 4
				; P8LE-NEXT: xxspltw v2, v2, 2
				; P8LE-NEXT: lfiwzx f0, 0, r3
				; P8LE-NEXT: xxpermdi v3, f0, f0, 2
				; P8LE-NEXT: vmrglw v2, v2, v3
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test_f2:
				; P8BE: # %bb.0: # %entry
				; P8BE-NEXT: addi r3, r3, 4
				; P8BE-NEXT: xxspltw v2, v2, 1
				; P8BE-NEXT: lfiwzx f0, 0, r3
				; P8BE-NEXT: xxsldwi v3, f0, f0, 1
				; P8BE-NEXT: vmrghw v2, v3, v2
				; P8BE-NEXT: blr
				entry:
				%arrayidx = getelementptr inbounds float, float* %f64, i64 1
				%0 = load float, float* %arrayidx, align 8
				%vecins = insertelement <2 x float> %vec, float %0, i32 0
				ret <2 x float> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec, i32 signext %Idx) {
				; P9LE-LABEL: s2v_test_f3:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: sldi r4, r7, 2
				; P9LE-NEXT: xxspltw v2, v2, 2
				; P9LE-NEXT: lfiwzx f0, r3, r4
				; P9LE-NEXT: xxpermdi v3, f0, f0, 2
				; P9LE-NEXT: vmrglw v2, v2, v3
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test_f3:
				; P9BE: # %bb.0: # %entry
				; P9BE: sldi r4, r7, 2
				; P9BE: xxspltw v2, v2, 1
				; P9BE: lfiwzx f0, r3, r4
				; P9BE-NEXT: xxsldwi v3, f0, f0, 1
				; P9BE: vmrghw v2, v3, v2
				; P9BE-NEXT: blr

				; P8LE-LABEL: s2v_test_f3:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: sldi r4, r7, 2
				; P8LE-NEXT: xxspltw v2, v2, 2
				; P8LE-NEXT: lfiwzx f0, r3, r4
				; P8LE-NEXT: xxpermdi v3, f0, f0, 2
				; P8LE-NEXT: vmrglw v2, v2, v3
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test_f3:
				; P8BE: # %bb.0: # %entry
				; P8BE-NEXT: sldi r4, r7, 2
				; P8BE-NEXT: xxspltw v2, v2, 1
				; P8BE-NEXT: lfiwzx f0, r3, r4
				; P8BE-NEXT: xxsldwi v3, f0, f0, 1
				; P8BE-NEXT: vmrghw v2, v3, v2
				; P8BE-NEXT: blr
				entry:
				%idxprom = sext i32 %Idx to i64
				%arrayidx = getelementptr inbounds float, float* %f64, i64 %idxprom
				%0 = load float, float* %arrayidx, align 8
				%vecins = insertelement <2 x float> %vec, float %0, i32 0
				ret <2 x float> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec) {
				; P9LE-LABEL: s2v_test_f4:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: addi r3, r3, 4
				; P9LE-NEXT: xxspltw v2, v2, 2
				; P9LE-NEXT: lfiwzx f0, 0, r3
				; P9LE-NEXT: xxpermdi v3, f0, f0, 2
				; P9LE-NEXT: vmrglw v2, v2, v3
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test_f4:
				; P9BE: # %bb.0: # %entry
				; P9BE: addi r3, r3, 4
				; P9BE: xxspltw v2, v2, 1
				; P9BE: lfiwzx f0, 0, r3
				; P9BE-NEXT: xxsldwi v3, f0, f0, 1
				; P9BE: vmrghw v2, v3, v2
				; P9BE-NEXT: blr

				; P8LE-LABEL: s2v_test_f4:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: addi r3, r3, 4
				; P8LE-NEXT: xxspltw v2, v2, 2
				; P8LE-NEXT: lfiwzx f0, 0, r3
				; P8LE-NEXT: xxpermdi v3, f0, f0, 2
				; P8LE-NEXT: vmrglw v2, v2, v3
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test_f4:
				; P8BE: # %bb.0: # %entry
				; P8BE-NEXT: addi r3, r3, 4
				; P8BE-NEXT: xxspltw v2, v2, 1
				; P8BE-NEXT: lfiwzx f0, 0, r3
				; P8BE-NEXT: xxsldwi v3, f0, f0, 1
				; P8BE-NEXT: vmrghw v2, v3, v2
				; P8BE-NEXT: blr
				entry:
				%arrayidx = getelementptr inbounds float, float* %f64, i64 1
				%0 = load float, float* %arrayidx, align 8
				%vecins = insertelement <2 x float> %vec, float %0, i32 0
				ret <2 x float> %vecins
				}

				; Function Attrs: norecurse nounwind readonly
				define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr1) {
				; P9LE-LABEL: s2v_test_f5:
				; P9LE: # %bb.0: # %entry
				; P9LE-NEXT: lfiwzx f0, 0, r5
				; P9LE-NEXT: xxspltw v2, v2, 2
				; P9LE-NEXT: xxpermdi v3, f0, f0, 2
				; P9LE-NEXT: vmrglw v2, v2, v3
				; P9LE-NEXT: blr

				; P9BE-LABEL: s2v_test_f5:
				; P9BE: # %bb.0: # %entry
				; P9BE: lfiwzx f0, 0, r5
				; P9BE: xxspltw v2, v2, 1
				; P9BE-NEXT: xxsldwi v3, f0, f0, 1
				; P9BE: vmrghw v2, v3, v2
				; P9BE-NEXT: blr

				; P8LE-LABEL: s2v_test_f5:
				; P8LE: # %bb.0: # %entry
				; P8LE-NEXT: lfiwzx f0, 0, r5
				; P8LE-NEXT: xxspltw v2, v2, 2
				; P8LE-NEXT: xxpermdi v3, f0, f0, 2
				; P8LE-NEXT: vmrglw v2, v2, v3
				; P8LE-NEXT: blr

				; P8BE-LABEL: s2v_test_f5:
				; P8BE: # %bb.0: # %entry
				; P8BE-NEXT: lfiwzx f0, 0, r5
				; P8BE-NEXT: xxspltw v2, v2, 1
				; P8BE-NEXT: xxsldwi v3, f0, f0, 1
				; P8BE-NEXT: vmrghw v2, v3, v2
				; P8BE-NEXT: blr
				entry:
				%0 = load float, float* %ptr1, align 8
				%vecins = insertelement <2 x float> %vec, float %0, i32 0
				ret <2 x float> %vecins
				}

llvm/trunk/test/CodeGen/PowerPC/swaps-le-6.ll

	; RUN: llc -verify-machineinstrs -mcpu=pwr8 \			; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-vsr-nums-as-vr \
	; RUN: -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s \| FileCheck %s			; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu \
				; RUN: -O3 < %s \| FileCheck %s

	; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \			; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \
	; RUN: -verify-machineinstrs < %s \| FileCheck %s --check-prefix=CHECK-P9 \			; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \
				; RUN: < %s \| FileCheck %s --check-prefix=CHECK-P9 \
	; RUN: --implicit-check-not xxswapd			; RUN: --implicit-check-not xxswapd

	; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \			; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \
	; RUN: -verify-machineinstrs -mattr=-power9-vector < %s \| FileCheck %s			; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \
				; RUN: -mattr=-power9-vector < %s \| FileCheck %s

	; These tests verify that VSX swap optimization works when loading a scalar			; These tests verify that VSX swap optimization works when loading a scalar
	; into a vector register.			; into a vector register.


	@x = global <2 x double> <double 9.970000e+01, double -1.032220e+02>, align 16			@x = global <2 x double> <double 9.970000e+01, double -1.032220e+02>, align 16
	@z = global <2 x double> <double 2.332000e+01, double 3.111111e+01>, align 16			@z = global <2 x double> <double 2.332000e+01, double 3.111111e+01>, align 16
	@y = global double 1.780000e+00, align 8			@y = global double 1.780000e+00, align 8

	define void @bar0() {			define void @bar0() {
				; CHECK-LABEL: bar0:
				; CHECK: # %bb.0: # %entry
				; CHECK: addis r3, r2, .LC0@toc@ha
				; CHECK: addis r4, r2, .LC1@toc@ha
				; CHECK: ld r3, .LC0@toc@l(r3)
				; CHECK: addis r3, r2, .LC2@toc@ha
				; CHECK: ld r3, .LC2@toc@l(r3)
				; CHECK: xxpermdi vs0, vs0, vs1, 1
				; CHECK: stxvd2x vs0, 0, r3
				; CHECK: blr
				;
				; CHECK-P9-LABEL: bar0:
				; CHECK-P9: # %bb.0: # %entry
				; CHECK-P9: addis r3, r2, .LC0@toc@ha
				; CHECK-P9: addis r4, r2, .LC1@toc@ha
				; CHECK-P9: ld r3, .LC0@toc@l(r3)
				; CHECK-P9: ld r4, .LC1@toc@l(r4)
				; CHECK-P9: lfd f0, 0(r3)
				; CHECK-P9: lxvx vs1, 0, r4
				; CHECK-P9: addis r3, r2, .LC2@toc@ha
				; CHECK-P9: ld r3, .LC2@toc@l(r3)
				; CHECK-P9: xxpermdi vs0, f0, f0, 2
				; CHECK-P9: xxpermdi vs0, vs1, vs0, 1
				; CHECK-P9: stxvx vs0, 0, r3
				; CHECK-P9: blr
	entry:			entry:
	%0 = load <2 x double>, <2 x double>* @x, align 16			%0 = load <2 x double>, <2 x double>* @x, align 16
	%1 = load double, double* @y, align 8			%1 = load double, double* @y, align 8
	%vecins = insertelement <2 x double> %0, double %1, i32 0			%vecins = insertelement <2 x double> %0, double %1, i32 0
	store <2 x double> %vecins, <2 x double>* @z, align 16			store <2 x double> %vecins, <2 x double>* @z, align 16
	ret void			ret void
	}			}

	; CHECK-LABEL: @bar0
	; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
	; CHECK-DAG: lfdx [[REG2:[0-9]+]]
	; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
	; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1
	; CHECK: stxvd2x [[REG5]]

	; CHECK-P9-LABEL: @bar0
	; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]]
	; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3)
	; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
	; CHECK-P9: xxpermdi [[REG5:[0-9]+]], [[REG1]], [[REG4]], 1
	; CHECK-P9: stxvx [[REG5]]

	define void @bar1() {			define void @bar1() {
				; CHECK-LABEL: bar1:
				; CHECK: # %bb.0: # %entry
				; CHECK: addis r3, r2, .LC0@toc@ha
				; CHECK: addis r4, r2, .LC1@toc@ha
				; CHECK: ld r3, .LC0@toc@l(r3)
				; CHECK: addis r3, r2, .LC2@toc@ha
				; CHECK: ld r3, .LC2@toc@l(r3)
				; CHECK: xxmrghd vs0, vs1, vs0
				; CHECK: stxvd2x vs0, 0, r3
				; CHECK: blr
				;
				; CHECK-P9-LABEL: bar1:
				; CHECK-P9: # %bb.0: # %entry
				; CHECK-P9: addis r3, r2, .LC0@toc@ha
				; CHECK-P9: addis r4, r2, .LC1@toc@ha
				; CHECK-P9: ld r3, .LC0@toc@l(r3)
				; CHECK-P9: ld r4, .LC1@toc@l(r4)
				; CHECK-P9: lfd f0, 0(r3)
				; CHECK-P9: lxvx vs1, 0, r4
				; CHECK-P9: addis r3, r2, .LC2@toc@ha
				; CHECK-P9: ld r3, .LC2@toc@l(r3)
				; CHECK-P9: xxpermdi vs0, f0, f0, 2
				; CHECK-P9: xxmrgld vs0, vs0, vs1
				; CHECK-P9: stxvx vs0, 0, r3
				; CHECK-P9: blr
	entry:			entry:
	%0 = load <2 x double>, <2 x double>* @x, align 16			%0 = load <2 x double>, <2 x double>* @x, align 16
	%1 = load double, double* @y, align 8			%1 = load double, double* @y, align 8
	%vecins = insertelement <2 x double> %0, double %1, i32 1			%vecins = insertelement <2 x double> %0, double %1, i32 1
	store <2 x double> %vecins, <2 x double>* @z, align 16			store <2 x double> %vecins, <2 x double>* @z, align 16
	ret void			ret void
	}			}

	; CHECK-LABEL: @bar1
	; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
	; CHECK-DAG: lfdx [[REG2:[0-9]+]]
	; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
	; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]]
	; CHECK: stxvd2x [[REG5]]

	; CHECK-P9-LABEL: @bar1
	; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]]
	; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3)
	; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
	; CHECK-P9: xxmrgld [[REG5:[0-9]+]], [[REG4]], [[REG1]]
	; CHECK-P9: stxvx [[REG5]]

llvm/trunk/test/CodeGen/PowerPC/vsx_insert_extract_le.ll

	; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \			; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -ppc-vsr-nums-as-vr \
	; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s \| FileCheck %s			; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \
				; RUN: \| FileCheck %s

				; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector -ppc-vsr-nums-as-vr \
				; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \
				; RUN: \| FileCheck --check-prefix=CHECK-P9-VECTOR %s

	; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector \			; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
	; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s \| FileCheck %s

	; RUN: llc -verify-machineinstrs -mcpu=pwr9 \
	; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s \| FileCheck %s \			; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s \| FileCheck %s \
	; RUN: --check-prefix=CHECK-P9 --implicit-check-not xxswapd			; RUN: --check-prefix=CHECK-P9 --implicit-check-not xxswapd

	define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {			define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {
				; CHECK-LABEL: testi0:
				; CHECK: # %bb.0:
				; CHECK-NEXT: lxvd2x vs0, 0, r3
				; CHECK-NEXT: lfdx f1, 0, r4
				; CHECK-NEXT: xxswapd vs0, vs0
				; CHECK-NEXT: xxspltd vs1, vs1, 0
				; CHECK-NEXT: xxpermdi v2, vs0, vs1, 1
				; CHECK-NEXT: blr
				;
				; CHECK-P9-VECTOR-LABEL: testi0:
				; CHECK-P9-VECTOR: # %bb.0:
				; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3
				; CHECK-P9-VECTOR-NEXT: lfdx f1, 0, r4
				; CHECK-P9-VECTOR-NEXT: xxspltd vs1, vs1, 0
				; CHECK-P9-VECTOR-NEXT: xxswapd vs0, vs0
				; CHECK-P9-VECTOR-NEXT: xxpermdi v2, vs0, vs1, 1
				; CHECK-P9-VECTOR-NEXT: blr
				;
				; CHECK-P9-LABEL: testi0:
				; CHECK-P9: # %bb.0:
				; CHECK-P9-NEXT: lfd f0, 0(r4)
				; CHECK-P9-NEXT: lxv vs1, 0(r3)
				; CHECK-P9-NEXT: xxpermdi vs0, f0, f0, 2
				; CHECK-P9-NEXT: xxpermdi v2, vs1, vs0, 1
				; CHECK-P9-NEXT: blr
	%v = load <2 x double>, <2 x double>* %p1			%v = load <2 x double>, <2 x double>* %p1
	%s = load double, double* %p2			%s = load double, double* %p2
	%r = insertelement <2 x double> %v, double %s, i32 0			%r = insertelement <2 x double> %v, double %s, i32 0
	ret <2 x double> %r			ret <2 x double> %r

	; CHECK-LABEL: testi0
	; CHECK: lxvd2x 0, 0, 3
	; CHECK: lfdx 1, 0, 4
	; CHECK-DAG: xxspltd 1, 1, 0
	; CHECK-DAG: xxswapd 0, 0
	; CHECK: xxpermdi 34, 0, 1, 1

	; CHECK-P9-LABEL: testi0
	; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4)
	; CHECK-P9: lxv [[REG2:[0-9]+]], 0(3)
	; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0
	; CHECK-P9: xxpermdi 34, [[REG2]], [[REG3]], 1
	}			}

	define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {			define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
				; CHECK-LABEL: testi1:
				; CHECK: # %bb.0:
				; CHECK-NEXT: lxvd2x vs0, 0, r3
				; CHECK-NEXT: lfdx f1, 0, r4
				; CHECK-NEXT: xxswapd vs0, vs0
				; CHECK-NEXT: xxspltd vs1, vs1, 0
				; CHECK-NEXT: xxmrgld v2, vs1, vs0
				; CHECK-NEXT: blr
				;
				; CHECK-P9-VECTOR-LABEL: testi1:
				; CHECK-P9-VECTOR: # %bb.0:
				; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3
				; CHECK-P9-VECTOR-NEXT: lfdx f1, 0, r4
				; CHECK-P9-VECTOR-NEXT: xxspltd vs1, vs1, 0
				; CHECK-P9-VECTOR-NEXT: xxswapd vs0, vs0
				; CHECK-P9-VECTOR-NEXT: xxmrgld v2, vs1, vs0
				; CHECK-P9-VECTOR-NEXT: blr
				;
				; CHECK-P9-LABEL: testi1:
				; CHECK-P9: # %bb.0:
				; CHECK-P9-NEXT: lfd f0, 0(r4)
				; CHECK-P9-NEXT: lxv vs1, 0(r3)
				; CHECK-P9-NEXT: xxpermdi vs0, f0, f0, 2
				; CHECK-P9-NEXT: xxmrgld v2, vs0, vs1
				; CHECK-P9-NEXT: blr
	%v = load <2 x double>, <2 x double>* %p1			%v = load <2 x double>, <2 x double>* %p1
	%s = load double, double* %p2			%s = load double, double* %p2
	%r = insertelement <2 x double> %v, double %s, i32 1			%r = insertelement <2 x double> %v, double %s, i32 1
	ret <2 x double> %r			ret <2 x double> %r

	; CHECK-LABEL: testi1
	; CHECK: lxvd2x 0, 0, 3
	; CHECK: lfdx 1, 0, 4
	; CHECK-DAG: xxspltd 1, 1, 0
	; CHECK-DAG: xxswapd 0, 0
	; CHECK: xxmrgld 34, 1, 0

	; CHECK-P9-LABEL: testi1
	; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4)
	; CHECK-P9: lxv [[REG2:[0-9]+]], 0(3)
	; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0
	; CHECK-P9: xxmrgld 34, [[REG3]], [[REG2]]
	}			}

	define double @teste0(<2 x double>* %p1) {			define double @teste0(<2 x double>* %p1) {
				; CHECK-LABEL: teste0:
				; CHECK: # %bb.0:
				; CHECK-NEXT: lxvd2x vs1, 0, r3
				; CHECK: blr
				;
				; CHECK-P9-VECTOR-LABEL: teste0:
				; CHECK-P9-VECTOR: # %bb.0:
				; CHECK-P9-VECTOR-NEXT: lxvd2x vs1, 0, r3
				; CHECK-P9-VECTOR: blr
				;
				; CHECK-P9-LABEL: teste0:
				; CHECK-P9: # %bb.0:
				; CHECK-P9-NEXT: lfd f1, 0(r3)
				; CHECK-P9-NEXT: blr
	%v = load <2 x double>, <2 x double>* %p1			%v = load <2 x double>, <2 x double>* %p1
	%r = extractelement <2 x double> %v, i32 0			%r = extractelement <2 x double> %v, i32 0
	ret double %r			ret double %r

	; CHECK-LABEL: teste0
	; CHECK: lxvd2x 1, 0, 3

	; CHECK-P9-LABEL: teste0
	; CHECK-P9: lfd 1, 0(3)
	}			}

	define double @teste1(<2 x double>* %p1) {			define double @teste1(<2 x double>* %p1) {
				; CHECK-LABEL: teste1:
				; CHECK: # %bb.0:
				; CHECK-NEXT: lxvd2x vs0, 0, r3
				; CHECK-NEXT: xxswapd vs1, vs0
				; CHECK: blr
				;
				; CHECK-P9-VECTOR-LABEL: teste1:
				; CHECK-P9-VECTOR: # %bb.0:
				; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3
				; CHECK-P9-VECTOR-NEXT: xxswapd vs1, vs0
				; CHECK-P9-VECTOR: blr
				;
				; CHECK-P9-LABEL: teste1:
				; CHECK-P9: # %bb.0:
				; CHECK-P9-NEXT: lfd f1, 8(r3)
				; CHECK-P9-NEXT: blr
	%v = load <2 x double>, <2 x double>* %p1			%v = load <2 x double>, <2 x double>* %p1
	%r = extractelement <2 x double> %v, i32 1			%r = extractelement <2 x double> %v, i32 1
	ret double %r			ret double %r

	; CHECK-LABEL: teste1
	; CHECK: lxvd2x 0, 0, 3
	; CHECK: xxswapd 1, 0

	; CHECK-P9-LABEL: teste1
	; CHECK-P9: lfd 1, 8(3)
	}			}

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC] Improve codegen for vector loads using scalar_to_vector
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 159729

llvm/trunk/lib/Target/PowerPC/P9InstrResources.td

llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp

llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td

llvm/trunk/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll

llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll

llvm/trunk/test/CodeGen/PowerPC/load-v4i8-improved.ll

llvm/trunk/test/CodeGen/PowerPC/power9-moves-and-splats.ll

llvm/trunk/test/CodeGen/PowerPC/qpx-load-splat.ll

llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_1.ll

llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_2.ll

llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_3.ll

llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_4.ll

llvm/trunk/test/CodeGen/PowerPC/swaps-le-6.ll

llvm/trunk/test/CodeGen/PowerPC/vsx_insert_extract_le.ll

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC] Improve codegen for vector loads using scalar_to_vector ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 159729

llvm/trunk/lib/Target/PowerPC/P9InstrResources.td

llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp

llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td

llvm/trunk/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll

llvm/trunk/test/CodeGen/PowerPC/build-vector-tests.ll

llvm/trunk/test/CodeGen/PowerPC/load-v4i8-improved.ll

llvm/trunk/test/CodeGen/PowerPC/power9-moves-and-splats.ll

llvm/trunk/test/CodeGen/PowerPC/qpx-load-splat.ll

llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_1.ll

llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_2.ll

llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_3.ll

llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_4.ll

llvm/trunk/test/CodeGen/PowerPC/swaps-le-6.ll

llvm/trunk/test/CodeGen/PowerPC/vsx_insert_extract_le.ll

[PowerPC] Improve codegen for vector loads using scalar_to_vector
ClosedPublic