Index: lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.cpp
+++ lib/Target/PowerPC/PPCISelLowering.cpp
@@ -671,6 +671,10 @@
       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
 
+      if (Subtarget.hasDirectMove())
+        setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
+      setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
+
       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
     }
 
@@ -683,9 +687,6 @@
       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
     }
-
-    if (Subtarget.isISA3_0() && Subtarget.hasDirectMove())
-      setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
   }
 
   if (Subtarget.hasQPX()) {
@@ -7075,16 +7076,6 @@
   return DAG.getNode(ISD::BITCAST, dl, VT, T);
 }
 
-static bool isNonConstSplatBV(BuildVectorSDNode *BVN, EVT Type) {
-  if (BVN->isConstant() || BVN->getValueType(0) != Type)
-    return false;
-  auto OpZero = BVN->getOperand(0);
-  for (int i = 1, e = BVN->getNumOperands(); i < e; i++)
-    if (BVN->getOperand(i) != OpZero)
-      return false;
-  return true;
-}
-
 // If this is a case we can't handle, return null and let the default
 // expansion code take care of it.  If we CAN select this case, and if it
 // selects to a single instruction, return Op.  Otherwise, if we can codegen
@@ -7207,14 +7198,47 @@
   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
                              HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
       SplatBitSize > 32) {
-    // We can splat a non-const value on CPU's that implement ISA 3.0
-    // in two ways: LXVWSX (load and splat) and MTVSRWS(move and splat).
-    auto OpZero = BVN->getOperand(0);
-    bool CanLoadAndSplat = OpZero.getOpcode() == ISD::LOAD &&
-      BVN->isOnlyUserOf(OpZero.getNode());
-    if (Subtarget.isISA3_0() && !CanLoadAndSplat &&
-        (isNonConstSplatBV(BVN, MVT::v4i32) ||
-         isNonConstSplatBV(BVN, MVT::v2i64)))
+    if (!Subtarget.hasVSX())
+      return SDValue();
+
+    // SDAG patterns are provided for building vectors out of values that are
+    // in registers.
+    bool RightType = Op.getValueType() == MVT::v2f64 ||
+      Op.getValueType() == MVT::v4f32 ||
+      (Op.getValueType() == MVT::v2i64 && Subtarget.hasDirectMove()) ||
+      (Op.getValueType() == MVT::v4i32 && Subtarget.hasDirectMove());
+
+    // We have efficient patterns for BUILD_VECTOR nodes whose inputs
+    // are non-constant and non-undef. Also, if this is a load-and-splat,
+    // it is better handled through (splat (scalar_to_vector)).
+    auto haveEfficientPattern = [&](BuildVectorSDNode *V) -> bool {
+      bool IsSplat = true;
+      bool IsLoad = false;
+      SDValue Op0 = V->getOperand(0);
+      if (V->isConstant())
+        return false;
+      for (int i = 0, e = V->getNumOperands(); i < e; i++) {
+        if (V->getOperand(i).isUndef())
+          return false;
+        // We want to expand nodes that represent load-and-splat even if the
+        // loaded value is a floating point truncation or conversion to int.
+        if (V->getOperand(i).getOpcode() == ISD::LOAD ||
+            (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
+             V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
+            (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
+             V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
+            (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
+             V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
+          IsLoad = true;
+        // If the operands are different or the input is not a load and has more
+        // uses than just this BV node, then it isn't a splat.
+        if (V->getOperand(i) != Op0 ||
+            (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
+          IsSplat = false;
+      }
+      return !(IsSplat && IsLoad);
+    };
+    if (RightType && haveEfficientPattern(BVN))
       return Op;
     return SDValue();
   }
@@ -7236,8 +7260,20 @@
   }
 
   // We have XXSPLTIB for constant splats one byte wide
-  if (Subtarget.isISA3_0() && Op.getValueType() == MVT::v16i8)
+  if (Subtarget.hasP9Vector() && SplatSize == 1) {
+    // This is a splat of 1-byte elements with some elements potentially undef.
+    // Rather than trying to match undef in the SDAG patterns, ensure that all
+    // elements are the same constant.
+    if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
+      SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
+                                                       dl, MVT::i32));
+      SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
+      if (Op.getValueType() != MVT::v16i8)
+        return DAG.getBitcast(Op.getValueType(), NewBV);
+      return NewBV;
+    }
     return Op;
+  }
 
   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
@@ -7485,7 +7521,7 @@
 
       // If the source for the shuffle is a scalar_to_vector that came from a
       // 32-bit load, it will have used LXVWSX so we don't need to splat again.
-      if (Subtarget.isISA3_0() &&
+      if (Subtarget.hasP9Vector() &&
           ((isLittleEndian && SplatIdx == 3) ||
            (!isLittleEndian && SplatIdx == 0))) {
         SDValue Src = V1.getOperand(0);
Index: lib/Target/PowerPC/PPCInstrInfo.td
===================================================================
--- lib/Target/PowerPC/PPCInstrInfo.td
+++ lib/Target/PowerPC/PPCInstrInfo.td
@@ -327,6 +327,7 @@
   return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
 }], LO16>;
 def immSExt8 : ImmLeaf<i32, [{ return isInt<8>(Imm); }]>;
+def immSExt5NonZero : ImmLeaf<i32, [{ return Imm && isInt<5>(Imm); }]>;
 
 // imm16Shifted* - These match immediates where the low 16-bits are zero.  There
 // are two forms: imm16ShiftedSExt and imm16ShiftedZExt.  These two forms are
Index: lib/Target/PowerPC/PPCInstrVSX.td
===================================================================
--- lib/Target/PowerPC/PPCInstrVSX.td
+++ lib/Target/PowerPC/PPCInstrVSX.td
@@ -570,18 +570,38 @@
                       (outs vsfrc:$XT), (ins vsfrc:$XB),
                       "xscvdpsxds $XT, $XB", IIC_VecFP,
                       [(set f64:$XT, (PPCfctidz f64:$XB))]>;
+  let isCodeGenOnly = 1 in
+  def XSCVDPSXDSs : XX2Form<60, 344,
+                      (outs vssrc:$XT), (ins vssrc:$XB),
+                      "xscvdpsxds $XT, $XB", IIC_VecFP,
+                      [(set f32:$XT, (PPCfctidz f32:$XB))]>;
   def XSCVDPSXWS : XX2Form<60, 88,
                       (outs vsfrc:$XT), (ins vsfrc:$XB),
                       "xscvdpsxws $XT, $XB", IIC_VecFP,
                       [(set f64:$XT, (PPCfctiwz f64:$XB))]>;
+  let isCodeGenOnly = 1 in
+  def XSCVDPSXWSs : XX2Form<60, 88,
+                      (outs vssrc:$XT), (ins vssrc:$XB),
+                      "xscvdpsxws $XT, $XB", IIC_VecFP,
+                      [(set f32:$XT, (PPCfctiwz f32:$XB))]>;
   def XSCVDPUXDS : XX2Form<60, 328,
                       (outs vsfrc:$XT), (ins vsfrc:$XB),
                       "xscvdpuxds $XT, $XB", IIC_VecFP,
                       [(set f64:$XT, (PPCfctiduz f64:$XB))]>;
+  let isCodeGenOnly = 1 in
+  def XSCVDPUXDSs : XX2Form<60, 328,
+                      (outs vssrc:$XT), (ins vssrc:$XB),
+                      "xscvdpuxds $XT, $XB", IIC_VecFP,
+                      [(set f32:$XT, (PPCfctiduz f32:$XB))]>;
   def XSCVDPUXWS : XX2Form<60, 72,
                       (outs vsfrc:$XT), (ins vsfrc:$XB),
                       "xscvdpuxws $XT, $XB", IIC_VecFP,
                       [(set f64:$XT, (PPCfctiwuz f64:$XB))]>;
+  let isCodeGenOnly = 1 in
+  def XSCVDPUXWSs : XX2Form<60, 72,
+                      (outs vssrc:$XT), (ins vssrc:$XB),
+                      "xscvdpuxws $XT, $XB", IIC_VecFP,
+                      [(set f32:$XT, (PPCfctiwuz f32:$XB))]>;
   def XSCVSPDP : XX2Form<60, 329,
                       (outs vsfrc:$XT), (ins vsfrc:$XB),
                       "xscvspdp $XT, $XB", IIC_VecFP, []>;
@@ -620,13 +640,15 @@
                       "xvcvspsxds $XT, $XB", IIC_VecFP, []>;
   def XVCVSPSXWS : XX2Form<60, 152,
                       (outs vsrc:$XT), (ins vsrc:$XB),
-                      "xvcvspsxws $XT, $XB", IIC_VecFP, []>;
+                      "xvcvspsxws $XT, $XB", IIC_VecFP,
+                      [(set v4i32:$XT, (fp_to_sint v4f32:$XB))]>;
   def XVCVSPUXDS : XX2Form<60, 392,
                       (outs vsrc:$XT), (ins vsrc:$XB),
                       "xvcvspuxds $XT, $XB", IIC_VecFP, []>;
   def XVCVSPUXWS : XX2Form<60, 136,
                       (outs vsrc:$XT), (ins vsrc:$XB),
-                      "xvcvspuxws $XT, $XB", IIC_VecFP, []>;
+                      "xvcvspuxws $XT, $XB", IIC_VecFP,
+                      [(set v4i32:$XT, (fp_to_uint v4f32:$XB))]>;
   def XVCVSXDDP : XX2Form<60, 504,
                       (outs vsrc:$XT), (ins vsrc:$XB),
                       "xvcvsxddp $XT, $XB", IIC_VecFP,
@@ -653,7 +675,8 @@
                       "xvcvuxwdp $XT, $XB", IIC_VecFP, []>;
   def XVCVUXWSP : XX2Form<60, 168,
                       (outs vsrc:$XT), (ins vsrc:$XB),
-                      "xvcvuxwsp $XT, $XB", IIC_VecFP, []>;
+                      "xvcvuxwsp $XT, $XB", IIC_VecFP,
+                      [(set v4f32:$XT, (uint_to_fp v4i32:$XB))]>;
 
   // Rounding Instructions
   def XSRDPI : XX2Form<60, 73,
@@ -1178,6 +1201,8 @@
 
   def : Pat<(f64 (extloadf32 xoaddr:$src)),
             (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>;
+  def : Pat<(f32 (fpround (extloadf32 xoaddr:$src))),
+            (f32 (LXSSPX xoaddr:$src))>;
   def : Pat<(f64 (fpextend f32:$src)),
             (COPY_TO_REGCLASS $src, VSFRC)>;
 
@@ -1355,7 +1380,7 @@
 } // AddedComplexity = 400
 } // HasP8Vector
 
-let UseVSXReg = 1 in {
+let UseVSXReg = 1, AddedComplexity = 400 in {
 let Predicates = [HasDirectMove] in {
   // VSX direct move instructions
   def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
@@ -1701,6 +1726,7 @@
   dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC);
 }
 
+let AddedComplexity = 400 in {
 // v4f32 scalar <-> vector conversions (BE)
 let Predicates = [IsBigEndian, HasP8Vector] in {
   def : Pat<(v4f32 (scalar_to_vector f32:$A)),
@@ -1939,15 +1965,16 @@
           (f64 (MTVSRD $S))>;
 }
 
+// Materialize a zero-vector of long long
+def : Pat<(v2i64 immAllZerosV),
+          (v2i64 (XXLXORz))>;
+}
+
 def AlignValues {
   dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3));
   dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC);
 }
 
-// Materialize a zero-vector of long long
-def : Pat<(v2i64 immAllZerosV),
-          (v2i64 (XXLXORz))>;
-
 // The following VSX instructions were introduced in Power ISA 3.0
 def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">;
 let AddedComplexity = 400, Predicates = [HasP9Vector] in {
@@ -2407,23 +2434,8 @@
             (v4i32 (LXVWSX xoaddr:$src))>;
   def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
             (v4f32 (LXVWSX xoaddr:$src))>;
-  def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
-            (v4i32 (MTVSRWS $A))>;
-  def : Pat<(v16i8 (build_vector immSExt8:$A, immSExt8:$A, immSExt8:$A,
-                                 immSExt8:$A, immSExt8:$A, immSExt8:$A,
-                                 immSExt8:$A, immSExt8:$A, immSExt8:$A,
-                                 immSExt8:$A, immSExt8:$A, immSExt8:$A,
-                                 immSExt8:$A, immSExt8:$A, immSExt8:$A,
-                                 immSExt8:$A)),
-            (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>;
-  def : Pat<(v16i8 immAllOnesV),
-            (v16i8 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>;
-  def : Pat<(v8i16 immAllOnesV),
-            (v8i16 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>;
-  def : Pat<(v4i32 immAllOnesV),
-            (v4i32 (XXSPLTIB 255))>;
-  def : Pat<(v2i64 immAllOnesV),
-            (v2i64 (XXSPLTIB 255))>;
+  def : Pat<(v4f32 (scalar_to_vector (f32 (fpround (extloadf32 xoaddr:$src))))),
+            (v4f32 (LXVWSX xoaddr:$src))>;
 
   // Build vectors from i8 loads
   def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)),
@@ -2564,6 +2576,7 @@
             (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>;
   def : Pat<(f64 (PPCVexts f64:$A, 2)),
             (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>;
+
   let isPseudo = 1 in {
     def DFLOADf32  : Pseudo<(outs vssrc:$XT), (ins memrix:$src),
                             "#DFLOADf32",
@@ -2580,18 +2593,261 @@
   }
   def : Pat<(f64 (extloadf32 iaddr:$src)),
             (COPY_TO_REGCLASS (DFLOADf32 iaddr:$src), VSFRC)>;
+  def : Pat<(f32 (fpround (extloadf32 iaddr:$src))),
+            (f32 (DFLOADf32 iaddr:$src))>;
 } // end HasP9Vector, AddedComplexity
 
-let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {
-def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
-          (v2i64 (MTVSRDD $rB, $rA))>;
-def : Pat<(i64 (extractelt v2i64:$A, 0)),
-          (i64 (MFVSRLD $A))>;
+// Integer extend helper dags 32 -> 64
+def AnyExts {
+  dag A = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32);
+  dag B = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $B, sub_32);
+  dag C = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $C, sub_32);
+  dag D = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $D, sub_32);
 }
 
-let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in {
-def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
-          (v2i64 (MTVSRDD $rB, $rA))>;
-def : Pat<(i64 (extractelt v2i64:$A, 1)),
-          (i64 (MFVSRLD $A))>;
+def DblToFlt {
+  dag A0 = (f32 (fpround (f64 (extractelt v2f64:$A, 0))));
+  dag A1 = (f32 (fpround (f64 (extractelt v2f64:$A, 1))));
+  dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0))));
+  dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1))));
+}
+def FltToIntLoad {
+  dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A)))));
+}
+def FltToUIntLoad {
+  dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (extloadf32 xoaddr:$A)))));
+}
+def FltToLongLoad {
+  dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A)))));
+}
+def FltToULongLoad {
+  dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A)))));
+}
+def FltToLong {
+  dag A = (i64 (PPCmfvsr (PPCfctidz (fpextend f32:$A))));
+}
+def FltToULong {
+  dag A = (i64 (PPCmfvsr (PPCfctiduz (fpextend f32:$A))));
+}
+def DblToInt {
+  dag A = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$A))));
+}
+def DblToUInt {
+  dag A = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$A))));
+}
+def DblToLong {
+  dag A = (i64 (PPCmfvsr (f64 (PPCfctidz f64:$A))));
+}
+def DblToULong {
+  dag A = (i64 (PPCmfvsr (f64 (PPCfctiduz f64:$A))));
+}
+def DblToIntLoad {
+  dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A)))));
+}
+def DblToUIntLoad {
+  dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A)))));
+}
+def DblToLongLoad {
+  dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A)))));
+}
+def DblToULongLoad {
+  dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (load xoaddr:$A)))));
+}
+
+// FP merge dags (for f32 -> v4f32)
+def MrgFP {
+  dag AC = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $A, VSRC),
+                               (COPY_TO_REGCLASS $C, VSRC), 0));
+  dag BD = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $B, VSRC),
+                               (COPY_TO_REGCLASS $D, VSRC), 0));
+  dag ABhToFlt = (XVCVDPSP (XXPERMDI $A, $B, 0));
+  dag ABlToFlt = (XVCVDPSP (XXPERMDI $A, $B, 3));
+  dag BAhToFlt = (XVCVDPSP (XXPERMDI $B, $A, 0));
+  dag BAlToFlt = (XVCVDPSP (XXPERMDI $B, $A, 3));
+}
+
+// Patterns for BUILD_VECTOR nodes.
+def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">;
+let AddedComplexity = 400 in {
+
+  let Predicates = [HasVSX] in {
+    // Build vectors of floating point converted to i32.
+    def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.A,
+                                   DblToInt.A, DblToInt.A)),
+              (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS $A), VSRC), 1))>;
+    def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.A,
+                                   DblToUInt.A, DblToUInt.A)),
+              (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS $A), VSRC), 1))>;
+    def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)),
+              (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC),
+                               (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), 0))>;
+    def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)),
+              (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC),
+                               (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>;
+    def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
+              (v4i32 (XXSPLTW (COPY_TO_REGCLASS
+                                (XSCVDPSXWSs (LXSSPX xoaddr:$A)), VSRC), 1))>;
+    def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
+              (v4i32 (XXSPLTW (COPY_TO_REGCLASS
+                                (XSCVDPUXWSs (LXSSPX xoaddr:$A)), VSRC), 1))>;
+    def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
+              (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
+
+    // Build vectors of floating point converted to i64.
+    def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
+              (v2i64 (XXPERMDIs (COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>;
+    def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)),
+              (v2i64 (XXPERMDIs (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;
+    def : Pat<(v2i64 (scalar_to_vector DblToLongLoad.A)),
+              (v2i64 (XVCVDPSXDS (LXVDSX xoaddr:$A)))>;
+    def : Pat<(v2i64 (scalar_to_vector DblToULongLoad.A)),
+              (v2i64 (XVCVDPUXDS (LXVDSX xoaddr:$A)))>;
+  }
+
+  let Predicates = [HasVSX, NoP9Vector] in {
+    // Load-and-splat with fp-to-int conversion (using X-Form VSX loads).
+    def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)),
+              (v4i32 (XXSPLTW (COPY_TO_REGCLASS
+                                (XSCVDPSXWS (LXSDX xoaddr:$A)), VSRC), 1))>;
+    def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)),
+              (v4i32 (XXSPLTW (COPY_TO_REGCLASS
+                                (XSCVDPUXWS (LXSDX xoaddr:$A)), VSRC), 1))>;
+    def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)),
+              (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
+                                              (LXSSPX xoaddr:$A), VSFRC)), 0))>;
+    def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)),
+              (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
+                                              (LXSSPX xoaddr:$A), VSFRC)), 0))>;
+  }
+
+  // Big endian, available on all targets with VSX
+  let Predicates = [IsBigEndian, HasVSX] in {
+    def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
+              (v2f64 (XXPERMDI
+                        (COPY_TO_REGCLASS $A, VSRC),
+                        (COPY_TO_REGCLASS $B, VSRC), 0))>;
+
+    def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)),
+              (VMRGEW MrgFP.AC, MrgFP.BD)>;
+    def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
+                                   DblToFlt.B0, DblToFlt.B1)),
+              (v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>;
+  }
+
+  let Predicates = [IsLittleEndian, HasVSX] in {
+  // Little endian, available on all targets with VSX
+    def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
+              (v2f64 (XXPERMDI
+                        (COPY_TO_REGCLASS $B, VSRC),
+                        (COPY_TO_REGCLASS $A, VSRC), 0))>;
+
+    def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)),
+              (VMRGEW MrgFP.AC, MrgFP.BD)>;
+    def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
+                                   DblToFlt.B0, DblToFlt.B1)),
+              (v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>;
+  }
+
+  let Predicates = [HasDirectMove] in {
+    /* Endianness-neutral constant splat on P8 and newer targets. The reason
+       for this pattern is that on targets with direct moves, we don't expand
+       BUILD_VECTOR nodes for v4i32.
+    */
+    def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A,
+                                   immSExt5NonZero:$A, immSExt5NonZero:$A)),
+              (v4i32 (VSPLTISW imm:$A))>;
+  }
+
+  let Predicates = [IsBigEndian, HasDirectMove, NoP9Vector] in {
+    // Big endian integer vectors using direct moves.
+    def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
+              (v2i64 (XXPERMDI
+                        (COPY_TO_REGCLASS (MTVSRD $A), VSRC),
+                        (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>;
+    def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+              (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC),
+                                   (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC), 0),
+                      (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC),
+                                   (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC), 0))>;
+    def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
+              (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
+  }
+
+  let Predicates = [IsLittleEndian, HasDirectMove, NoP9Vector] in {
+    // Little endian integer vectors using direct moves.
+    def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
+              (v2i64 (XXPERMDI
+                        (COPY_TO_REGCLASS (MTVSRD $B), VSRC),
+                        (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>;
+    def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+              (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC),
+                                   (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC), 0),
+                      (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC),
+                                   (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 0))>;
+    def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
+              (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
+  }
+
+  let Predicates = [HasP9Vector] in {
+    // Endianness-neutral patterns for const splats with ISA 3.0 instructions.
+    def : Pat<(v4i32 (scalar_to_vector i32:$A)),
+              (v4i32 (MTVSRWS $A))>;
+    def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
+              (v4i32 (MTVSRWS $A))>;
+    def : Pat<(v16i8 (build_vector immSExt8:$A, immSExt8:$A, immSExt8:$A,
+                                   immSExt8:$A, immSExt8:$A, immSExt8:$A,
+                                   immSExt8:$A, immSExt8:$A, immSExt8:$A,
+                                   immSExt8:$A, immSExt8:$A, immSExt8:$A,
+                                   immSExt8:$A, immSExt8:$A, immSExt8:$A,
+                                   immSExt8:$A)),
+              (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>;
+    def : Pat<(v16i8 immAllOnesV),
+              (v16i8 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>;
+    def : Pat<(v8i16 immAllOnesV),
+              (v8i16 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>;
+    def : Pat<(v4i32 immAllOnesV),
+              (v4i32 (XXSPLTIB 255))>;
+    def : Pat<(v2i64 immAllOnesV),
+              (v2i64 (XXSPLTIB 255))>;
+    def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
+              (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>;
+    def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
+              (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>;
+    def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)),
+              (v4i32 (XXSPLTW (COPY_TO_REGCLASS
+                                (XSCVDPSXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>;
+    def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)),
+              (v4i32 (XXSPLTW (COPY_TO_REGCLASS
+                                (XSCVDPUXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>;
+    def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)),
+              (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
+                                              (DFLOADf32 iaddr:$A),
+                                              VSFRC)), 0))>;
+    def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)),
+              (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
+                                              (DFLOADf32 iaddr:$A),
+                                              VSFRC)), 0))>;
+  }
+
+  let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in {
+    def : Pat<(i64 (extractelt v2i64:$A, 1)),
+              (i64 (MFVSRLD $A))>;
+    // Better way to build integer vectors if we have MTVSRDD. Big endian.
+    def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
+              (v2i64 (MTVSRDD $rB, $rA))>;
+    def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+              (VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.A, AnyExts.C), VSRC),
+                      (COPY_TO_REGCLASS (MTVSRDD AnyExts.B, AnyExts.D), VSRC))>;
+  }
+
+  let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {
+    def : Pat<(i64 (extractelt v2i64:$A, 0)),
+              (i64 (MFVSRLD $A))>;
+    // Better way to build integer vectors if we have MTVSRDD. Little endian.
+    def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
+              (v2i64 (MTVSRDD $rB, $rA))>;
+    def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+              (VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC),
+                      (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC))>;
+  }
 }
Index: test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
===================================================================
--- test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
+++ test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -46,10 +46,10 @@
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %splat.splat
-; CHECK: sldi [[REG1:[0-9]+]], 3, 32
-; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
-; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
-; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
+; CHECK: mtvsrwz [[REG1:[0-9]+]], 3
+; CHECK: xxspltw 34, [[REG1]]
+; CHECK-LE: mtvsrwz [[REG1:[0-9]+]], 3
+; CHECK-LE: xxspltw 34, [[REG1]]
 }
 
 ; Function Attrs: nounwind
Index: test/CodeGen/PowerPC/power9-moves-and-splats.ll
===================================================================
--- test/CodeGen/PowerPC/power9-moves-and-splats.ll
+++ test/CodeGen/PowerPC/power9-moves-and-splats.ll
@@ -10,15 +10,9 @@
 ; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp
 ; which will happen in a subsequent patch.
 ; CHECK-LABEL: test1
-; FIXME: mtvsrdd 34, 4, 3
-; CHECK: mtvsrd {{[0-9]+}}, 3
-; CHECK: mtvsrd {{[0-9]+}}, 4
-; CHECK: xxmrgld
+; CHECK: mtvsrdd 34, 4, 3
 ; CHECK-BE-LABEL: test1
-; FIXME-BE: mtvsrdd 34, 3, 4
-; CHECK-BE: mtvsrd {{[0-9]+}}, 4
-; CHECK-BE: mtvsrd {{[0-9]+}}, 3
-; CHECK-BE: xxmrghd
+; CHECK-BE: mtvsrdd 34, 3, 4
   %vecins = insertelement <2 x i64> undef, i64 %a, i32 0
   %vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1
   ret <2 x i64> %vecins1
@@ -162,10 +156,14 @@
 entry:
 ; CHECK-LABEL: test14
 ; CHECK: lwz [[LD:[0-9]+]],
-; CHECK: mtvsrws 34, [[LD]]
+; FIXME: mtvsrws 34, [[LD]]
+; CHECK: mtvsrws [[SPLT:[0-9]+]], [[LD]]
+; CHECK: xxspltw 34, [[SPLT]], 3
 ; CHECK-BE-LABEL: test14
 ; CHECK-BE: lwz [[LD:[0-9]+]],
-; CHECK-BE: mtvsrws 34, [[LD]]
+; FIXME: mtvsrws 34, [[LD]]
+; CHECK-BE: mtvsrws [[SPLT:[0-9]+]], [[LD]]
+; CHECK-BE: xxspltw 34, [[SPLT]], 0
   %0 = load i32, i32* %b, align 4
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
Index: test/CodeGen/PowerPC/tail-dup-analyzable-fallthrough.ll
===================================================================
--- test/CodeGen/PowerPC/tail-dup-analyzable-fallthrough.ll
+++ test/CodeGen/PowerPC/tail-dup-analyzable-fallthrough.ll
@@ -5,7 +5,7 @@
 ; Check that the conditional return block of fmax_double3.exit was not
 ; duplicated into the if.then.i block
 ; CHECK: # %if.then.i
-; CHECK: lxvd2x
+; CHECK: xxlxor
 ; CHECK: stxvd2x
 ; CHECK-NOT: bclr
 ; CHECK: {{^}}.LBB{{[0-9_]+}}:
Index: test/CodeGen/PowerPC/vsx.ll
===================================================================
--- test/CodeGen/PowerPC/vsx.ll
+++ test/CodeGen/PowerPC/vsx.ll
@@ -1087,9 +1087,7 @@
 ; CHECK-LE: mtvsrwa
 ; CHECK-LE: xscvsxddp
 ; CHECK-LE: xscvsxddp
-; CHECK-LE: xxspltd
-; CHECK-LE: xxspltd
-; CHECK-LE: xxmrgld
+; CHECK-LE: xxmrghd
 ; CHECK-LE: blr
 }
 
@@ -1112,9 +1110,7 @@
 ; CHECK-LE: mtvsrwa
 ; CHECK-LE: xscvsxddp
 ; CHECK-LE: xscvsxddp
-; CHECK-LE: xxspltd
-; CHECK-LE: xxspltd
-; CHECK-LE: xxmrgld
+; CHECK-LE: xxmrghd
 ; CHECK-LE: blr
 }