Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -451,6 +451,9 @@ /// v2f32 value into the lower half of a VSR register. LD_VSX_LH, + /// LD_SPLAT - a splatting load memory instruction (LXVDSX, LXVWSX). + LD_SPLAT, + /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. /// Maps directly to an stxvd2x instruction that will be preceded by /// an xxswapd. Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -1401,6 +1401,7 @@ case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH"; + case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; case PPCISD::FP_EXTEND_LH: return "PPCISD::FP_EXTEND_LH"; } return nullptr; @@ -8208,6 +8209,33 @@ if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || SplatBitSize > 32) { + + // Handle load-and-splat patterns as we have instructions that will do this + // in one go. + if (DAG.isSplatValue(Op, true) && + Op.getOperand(0).getOpcode() == ISD::LOAD) { + + // Can't handle indexed or extending loads here. + LoadSDNode *LD = cast(Op.getOperand(0)); + if (LD->isIndexed() || !ISD::isNON_EXTLoad(LD)) + return SDValue(); + + // We have handling for 4 and 8 byte elements. + unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits(); + if ((Subtarget.hasVSX() && ElementSize == 64) || + (Subtarget.hasP9Vector() && ElementSize == 32)) { + SDValue Ops[] = { + LD->getChain(), // Chain + LD->getBasePtr(), // Ptr + DAG.getValueType(Op.getValueType()) // VT + }; + return + DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, + DAG.getVTList(Op.getValueType(), MVT::Other), + Ops, LD->getMemoryVT(), LD->getMemOperand()); + } + } + // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be // lowered to VSX instructions under certain conditions. // Without VSX, there is no pattern more efficient than expanding the node. Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -58,6 +58,10 @@ SDTCisVT<0, v4f32>, SDTCisPtrTy<1> ]>; +def SDT_PPCldsplat : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisSameAs<0, 1> +]>; + def SDT_PPCfpextlh : SDTypeProfile<1, 1, [ SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32> ]>; @@ -96,6 +100,8 @@ def PPCfpextlh : SDNode<"PPCISD::FP_EXTEND_LH", SDT_PPCfpextlh, []>; def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; multiclass XX3Form_Rcr opcode, bits<7> xo, string asmbase, string asmstr, InstrItinClass itin, Intrinsic Int, @@ -3854,6 +3860,10 @@ (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)), (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>; + def : Pat<(v2f64 (PPCldsplat xoaddr:$A)), + (v2f64 (LXVDSX xoaddr:$A))>; + def : Pat<(v2i64 (PPCldsplat xoaddr:$A)), + (v2i64 (LXVDSX xoaddr:$A))>; // Build vectors of floating point converted to i64. def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)), @@ -4098,6 +4108,10 @@ (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$A), VSFRC)), 0))>; + def : Pat<(v4f32 (PPCldsplat xoaddr:$A)), + (v4f32 (LXVWSX xoaddr:$A))>; + def : Pat<(v4i32 (PPCldsplat xoaddr:$A)), + (v4i32 (LXVWSX xoaddr:$A))>; } let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in { Index: test/CodeGen/PowerPC/VSX-XForm-Scalars.ll =================================================================== --- test/CodeGen/PowerPC/VSX-XForm-Scalars.ll +++ test/CodeGen/PowerPC/VSX-XForm-Scalars.ll @@ -27,18 +27,16 @@ ; ; CHECK-P9-LABEL: testExpandPostRAPseudo: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9: lfiwzx f0, 0, r3 ; CHECK-P9: addis r4, r2, .LC0@toc@ha +; CHECK-P9: lxvwsx vs0, 0, r3 ; CHECK-P9: ld r4, .LC0@toc@l(r4) -; CHECK-P9: xxpermdi vs0, f0, f0, 2 -; CHECK-P9: xxspltw vs0, vs0, 3 ; CHECK-P9: stxvx vs0, 0, r4 +; CHECK-P9: lis r4, 1024 ; CHECK-P9: lfiwax f0, 0, r3 ; CHECK-P9: addis r3, r2, .LC1@toc@ha ; CHECK-P9: ld r3, .LC1@toc@l(r3) ; CHECK-P9: xscvsxdsp f0, f0 ; CHECK-P9: ld r3, 0(r3) -; CHECK-P9: lis r4, 1024 ; CHECK-P9: stfsx f0, r3, r4 ; CHECK-P9: blr entry: Index: test/CodeGen/PowerPC/build-vector-tests.ll =================================================================== --- test/CodeGen/PowerPC/build-vector-tests.ll +++ test/CodeGen/PowerPC/build-vector-tests.ll @@ -1327,16 +1327,12 @@ define <4 x i32> @spltMemVali(i32* nocapture readonly %ptr) { ; P9BE-LABEL: spltMemVali: ; P9BE: # %bb.0: # %entry -; P9BE-NEXT: lfiwzx f0, 0, r3 -; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 -; P9BE-NEXT: xxspltw v2, vs0, 0 +; P9BE-NEXT: lxvwsx v2, 0, r3 ; P9BE-NEXT: blr ; ; P9LE-LABEL: spltMemVali: ; P9LE: # %bb.0: # %entry -; P9LE-NEXT: lfiwzx f0, 0, r3 -; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 -; P9LE-NEXT: xxspltw v2, vs0, 3 +; P9LE-NEXT: lxvwsx v2, 0, r3 ; P9LE-NEXT: blr ; ; P8BE-LABEL: spltMemVali: @@ -2911,16 +2907,12 @@ define <4 x i32> @spltMemValui(i32* nocapture readonly %ptr) { ; P9BE-LABEL: spltMemValui: ; P9BE: # %bb.0: # %entry -; P9BE-NEXT: lfiwzx f0, 0, r3 -; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 -; P9BE-NEXT: xxspltw v2, vs0, 0 +; P9BE-NEXT: lxvwsx v2, 0, r3 ; P9BE-NEXT: blr ; ; P9LE-LABEL: spltMemValui: ; P9LE: # %bb.0: # %entry -; P9LE-NEXT: lfiwzx f0, 0, r3 -; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 -; P9LE-NEXT: xxspltw v2, vs0, 3 +; P9LE-NEXT: lxvwsx v2, 0, r3 ; P9LE-NEXT: blr ; ; P8BE-LABEL: spltMemValui: Index: test/CodeGen/PowerPC/load-and-splat.ll =================================================================== --- test/CodeGen/PowerPC/load-and-splat.ll +++ test/CodeGen/PowerPC/load-and-splat.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ +; RUN: -check-prefix=P9 +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ +; RUN: -check-prefix=P8 +define dso_local void @test(<2 x double>* nocapture %c, double* nocapture readonly %a) local_unnamed_addr { +; P9-LABEL: test: +; P9: # %bb.0: # %entry +; P9-NEXT: addi r4, r4, 24 +; P9-NEXT: lxvdsx vs0, 0, r4 +; P9-NEXT: stxv vs0, 0(r3) +; P9-NEXT: blr +; +; P8-LABEL: test: +; P8: # %bb.0: # %entry +; P8-NEXT: addi r4, r4, 24 +; P8-NEXT: lxvdsx vs0, 0, r4 +; P8-NEXT: stxvd2x vs0, 0, r3 +; P8-NEXT: blr +entry: + %arrayidx = getelementptr inbounds double, double* %a, i64 3 + %0 = load double, double* %arrayidx, align 8 + %splat.splatinsert.i = insertelement <2 x double> undef, double %0, i32 0 + %splat.splat.i = shufflevector <2 x double> %splat.splatinsert.i, <2 x double> undef, <2 x i32> zeroinitializer + store <2 x double> %splat.splat.i, <2 x double>* %c, align 16 + ret void +} + +define dso_local void @test2(<4 x float>* nocapture %c, float* nocapture readonly %a) local_unnamed_addr { +; P9-LABEL: test2: +; P9: # %bb.0: # %entry +; P9-NEXT: addi r4, r4, 12 +; P9-NEXT: lxvwsx vs0, 0, r4 +; P9-NEXT: stxv vs0, 0(r3) +; P9-NEXT: blr +; +; P8-LABEL: test2: +; P8: # %bb.0: # %entry +; P8-NEXT: addi r4, r4, 12 +; P8-NEXT: lfiwzx f0, 0, r4 +; P8-NEXT: xxpermdi vs0, f0, f0, 2 +; P8-NEXT: xxspltw v2, vs0, 3 +; P8-NEXT: stvx v2, 0, r3 +; P8-NEXT: blr +entry: + %arrayidx = getelementptr inbounds float, float* %a, i64 3 + %0 = load float, float* %arrayidx, align 4 + %splat.splatinsert.i = insertelement <4 x float> undef, float %0, i32 0 + %splat.splat.i = shufflevector <4 x float> %splat.splatinsert.i, <4 x float> undef, <4 x i32> zeroinitializer + store <4 x float> %splat.splat.i, <4 x float>* %c, align 16 + ret void +} + +define dso_local void @test3(<4 x i32>* nocapture %c, i32* nocapture readonly %a) local_unnamed_addr { +; P9-LABEL: test3: +; P9: # %bb.0: # %entry +; P9-NEXT: addi r4, r4, 12 +; P9-NEXT: lxvwsx vs0, 0, r4 +; P9-NEXT: stxv vs0, 0(r3) +; P9-NEXT: blr +; +; P8-LABEL: test3: +; P8: # %bb.0: # %entry +; P8-NEXT: addi r4, r4, 12 +; P8-NEXT: lfiwzx f0, 0, r4 +; P8-NEXT: xxpermdi vs0, f0, f0, 2 +; P8-NEXT: xxspltw v2, vs0, 3 +; P8-NEXT: stvx v2, 0, r3 +; P8-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %a, i64 3 + %0 = load i32, i32* %arrayidx, align 4 + %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %0, i32 0 + %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer + store <4 x i32> %splat.splat.i, <4 x i32>* %c, align 16 + ret void +} + +define dso_local void @test4(<2 x i64>* nocapture %c, i64* nocapture readonly %a) local_unnamed_addr { +; P9-LABEL: test4: +; P9: # %bb.0: # %entry +; P9-NEXT: addi r4, r4, 24 +; P9-NEXT: lxvdsx vs0, 0, r4 +; P9-NEXT: stxv vs0, 0(r3) +; P9-NEXT: blr +; +; P8-LABEL: test4: +; P8: # %bb.0: # %entry +; P8-NEXT: addi r4, r4, 24 +; P8-NEXT: lxvdsx vs0, 0, r4 +; P8-NEXT: stxvd2x vs0, 0, r3 +; P8-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i64, i64* %a, i64 3 + %0 = load i64, i64* %arrayidx, align 8 + %splat.splatinsert.i = insertelement <2 x i64> undef, i64 %0, i32 0 + %splat.splat.i = shufflevector <2 x i64> %splat.splatinsert.i, <2 x i64> undef, <2 x i32> zeroinitializer + store <2 x i64> %splat.splat.i, <2 x i64>* %c, align 16 + ret void +} Index: test/CodeGen/PowerPC/power9-moves-and-splats.ll =================================================================== --- test/CodeGen/PowerPC/power9-moves-and-splats.ll +++ test/CodeGen/PowerPC/power9-moves-and-splats.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \ ; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu -ppc-vsr-nums-as-vr \ @@ -11,11 +12,12 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mtvsrdd v2, r4, r3 ; CHECK-NEXT: blr - +; ; CHECK-BE-LABEL: test1: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrdd v2, r3, r4 ; CHECK-BE-NEXT: blr + entry: ; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp ; which will happen in a subsequent patch. @@ -29,11 +31,12 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mfvsrld r3, v2 ; CHECK-NEXT: blr - +; ; CHECK-BE-LABEL: test2: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr + entry: %0 = extractelement <2 x i64> %a, i32 0 ret i64 %0 @@ -44,11 +47,12 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mfvsrd r3, v2 ; CHECK-NEXT: blr - +; ; CHECK-BE-LABEL: test3: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mfvsrld r3, v2 ; CHECK-BE-NEXT: blr + entry: %0 = extractelement <2 x i64> %a, i32 1 ret i64 %0 @@ -57,17 +61,14 @@ define <4 x i32> @test4(i32* nocapture readonly %in) { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lfiwzx f0, 0, r3 -; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 -; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: lxvwsx v2, 0, r3 ; CHECK-NEXT: blr - +; ; CHECK-BE-LABEL: test4: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lfiwzx f0, 0, r3 -; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1 -; CHECK-BE-NEXT: xxspltw v2, vs0, 0 +; CHECK-BE-NEXT: lxvwsx v2, 0, r3 ; CHECK-BE-NEXT: blr + entry: %0 = load i32, i32* %in, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 @@ -78,17 +79,14 @@ define <4 x float> @test5(float* nocapture readonly %in) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lfiwzx f0, 0, r3 -; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 -; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: lxvwsx v2, 0, r3 ; CHECK-NEXT: blr - +; ; CHECK-BE-LABEL: test5: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lfiwzx f0, 0, r3 -; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1 -; CHECK-BE-NEXT: xxspltw v2, vs0, 0 +; CHECK-BE-NEXT: lxvwsx v2, 0, r3 ; CHECK-BE-NEXT: blr + entry: %0 = load float, float* %in, align 4 %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0 @@ -101,19 +99,16 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis r3, r2, .LC0@toc@ha ; CHECK-NEXT: ld r3, .LC0@toc@l(r3) -; CHECK-NEXT: lfiwzx f0, 0, r3 -; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 -; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: lxvwsx v2, 0, r3 ; CHECK-NEXT: blr - +; ; CHECK-BE-LABEL: test6: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis r3, r2, .LC0@toc@ha ; CHECK-BE-NEXT: ld r3, .LC0@toc@l(r3) -; CHECK-BE-NEXT: lfiwzx f0, 0, r3 -; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1 -; CHECK-BE-NEXT: xxspltw v2, vs0, 0 +; CHECK-BE-NEXT: lxvwsx v2, 0, r3 ; CHECK-BE-NEXT: blr + entry: %0 = load i32, i32* @Globi, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 @@ -126,19 +121,16 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis r3, r2, .LC1@toc@ha ; CHECK-NEXT: ld r3, .LC1@toc@l(r3) -; CHECK-NEXT: lfiwzx f0, 0, r3 -; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 -; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: lxvwsx v2, 0, r3 ; CHECK-NEXT: blr - +; ; CHECK-BE-LABEL: test7: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis r3, r2, .LC1@toc@ha ; CHECK-BE-NEXT: ld r3, .LC1@toc@l(r3) -; CHECK-BE-NEXT: lfiwzx f0, 0, r3 -; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1 -; CHECK-BE-NEXT: xxspltw v2, vs0, 0 +; CHECK-BE-NEXT: lxvwsx v2, 0, r3 ; CHECK-BE-NEXT: blr + entry: %0 = load float, float* @Globf, align 4 %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0 @@ -151,11 +143,12 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v2, v2, v2 ; CHECK-NEXT: blr - +; ; CHECK-BE-LABEL: test8: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxlxor v2, v2, v2 ; CHECK-BE-NEXT: blr + entry: ret <16 x i8> zeroinitializer } @@ -165,11 +158,12 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v2, 1 ; CHECK-NEXT: blr - +; ; CHECK-BE-LABEL: test9: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxspltib v2, 1 ; CHECK-BE-NEXT: blr + entry: ret <16 x i8> } @@ -179,11 +173,12 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v2, 127 ; CHECK-NEXT: blr - +; ; CHECK-BE-LABEL: test10: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxspltib v2, 127 ; CHECK-BE-NEXT: blr + entry: ret <16 x i8> } @@ -193,11 +188,12 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v2, 128 ; CHECK-NEXT: blr - +; ; CHECK-BE-LABEL: test11: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxspltib v2, 128 ; CHECK-BE-NEXT: blr + entry: ret <16 x i8> } @@ -207,11 +203,12 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v2, 255 ; CHECK-NEXT: blr - +; ; CHECK-BE-LABEL: test12: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxspltib v2, 255 ; CHECK-BE-NEXT: blr + entry: ret <16 x i8> } @@ -221,11 +218,12 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v2, 129 ; CHECK-NEXT: blr - +; ; CHECK-BE-LABEL: test13: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxspltib v2, 129 ; CHECK-BE-NEXT: blr + entry: ret <16 x i8> } @@ -235,11 +233,12 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v2, 200 ; CHECK-NEXT: blr - +; ; CHECK-BE-LABEL: test13E127: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxspltib v2, 200 ; CHECK-BE-NEXT: blr + entry: ret <16 x i8> } @@ -248,18 +247,19 @@ ; CHECK-LABEL: test14: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lwz r3, 0(r5) -; CHECK-NEXT: mtvsrws v2, r3 +; CHECK-NEXT: lxvwsx v2, 0, r5 ; CHECK-NEXT: addi r3, r3, 5 ; CHECK-NEXT: stw r3, 0(r5) ; CHECK-NEXT: blr - +; ; CHECK-BE-LABEL: test14: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lwz r3, 0(r5) -; CHECK-BE-NEXT: mtvsrws v2, r3 +; CHECK-BE-NEXT: lxvwsx v2, 0, r5 ; CHECK-BE-NEXT: addi r3, r3, 5 ; CHECK-BE-NEXT: stw r3, 0(r5) ; CHECK-BE-NEXT: blr + entry: %0 = load i32, i32* %b, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 Index: test/CodeGen/PowerPC/qpx-load-splat.ll =================================================================== --- test/CodeGen/PowerPC/qpx-load-splat.ll +++ test/CodeGen/PowerPC/qpx-load-splat.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \ ; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s @@ -34,9 +35,9 @@ ; CHECK-LABEL: fooxu: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: sldi r4, r4, 3 -; CHECK-NEXT: lfdux f0, r3, r4 -; CHECK-NEXT: xxspltd v2, vs0, 0 -; CHECK-NEXT: std r3, 0(r5) +; CHECK-NEXT: add r6, r3, r4 +; CHECK-NEXT: std r6, 0(r5) +; CHECK-NEXT: lxvdsx v2, r3, r4 ; CHECK-NEXT: vmr v3, v2 ; CHECK-NEXT: blr entry: Index: test/CodeGen/PowerPC/swaps-le-7.ll =================================================================== --- test/CodeGen/PowerPC/swaps-le-7.ll +++ test/CodeGen/PowerPC/swaps-le-7.ll @@ -9,8 +9,8 @@ @G4 = global <2 x double> ; CHECK-LABEL: @zg -; CHECK: xxspltd -; CHECK-NEXT: xxspltd +; CHECK: lxvdsx +; CHECK-NEXT: lxvdsx ; CHECK-NEXT: xvmuldp ; CHECK-DAG: xvmuldp ; CHECK-DAG: xvsubdp