diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12649,6 +12649,100 @@
   return SDValue();
 }
 
+// Convert a build_vector of int-to-fp conversions into an int-to-fp conversion
+// of a build_vector of ints.
+// FIXME: If the values originally come from up to two vectors of integers, we
+// can emit an int-to-fp conversion of a shuffle.
+static SDValue combineBVOfFpConversions(SDNode *N,
+                                        SelectionDAG &DAG) {
+  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
+         "Should be called with a BUILD_VECTOR node");
+
+  for (int i = 0, e = N->getNumOperands(); i < e; i++)
+    if (N->getOperand(i).getOpcode() != ISD::SINT_TO_FP &&
+        N->getOperand(i).getOpcode() != ISD::UINT_TO_FP)
+      return SDValue();
+
+  SDLoc dl(N);
+  bool Signed = N->getOperand(0).getOpcode() == ISD::SINT_TO_FP;
+  EVT FinalVecType = N->getValueType(0);
+  EVT IntermVecType = FinalVecType.changeVectorElementTypeToInteger();
+  SmallVector <SDValue, 4> Ops;
+  auto extendIfNecessary = [&](SDValue Op) {
+    EVT ScalarVT = IntermVecType.getScalarType();
+    unsigned InputWidth = Op.getValueType().getSizeInBits();
+    unsigned OutputWidth = ScalarVT.getSizeInBits();
+    if (InputWidth < OutputWidth)
+      return DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+                         ScalarVT, Op);
+    else if (InputWidth > OutputWidth)
+      return SDValue();
+    return Op;
+  };
+  for (int i = 0, e = N->getNumOperands(); i < e; i++) {
+    SDValue InOp = N->getOperand(i).getOperand(0);
+    InOp = extendIfNecessary(InOp);
+    if (!InOp)
+      return SDValue();
+    Ops.push_back(InOp);
+  }
+  SDValue BV = DAG.getBuildVector(IntermVecType, dl, Ops);
+  return DAG.getNode(N->getOperand(0).getOpcode(), dl, FinalVecType, BV);
+}
+
+static SDValue combineBVOfExtractFpConvert(SDNode *N,
+                                           SelectionDAG &DAG) {
+  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
+         "Should be called with a BUILD_VECTOR node");
+  if (N->getValueType(0) != MVT::v2f64)
+    return SDValue();
+
+  SDValue FirstInput = N->getOperand(0);
+  // Looking for:
+  // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
+  if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
+      FirstInput.getOpcode() != ISD::UINT_TO_FP)
+    return SDValue();
+  if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
+      N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
+    return SDValue();
+  if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
+    return SDValue();
+
+  SDValue Ext1 = FirstInput.getOperand(0);
+  SDValue Ext2 = N->getOperand(1).getOperand(0);
+  if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+     Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+    return SDValue();
+
+  ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
+  ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
+  if (!Ext1Op || !Ext2Op)
+    return SDValue();
+  if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
+      Ext1.getOperand(0) != Ext2.getOperand(0))
+    return SDValue();
+
+  const PPCSubtarget& Subtarget =
+      static_cast<const PPCSubtarget&>(DAG.getSubtarget());
+  int FirstElem = Ext1Op->getZExtValue();
+  int SecondElem = Ext2Op->getZExtValue();
+  int SubvecIdx;
+  if (FirstElem == 0 && SecondElem == 1)
+    SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
+  else if (FirstElem == 2 && SecondElem == 3)
+    SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
+  else
+    return SDValue();
+
+  SDLoc dl(N);
+  SDValue SrcVec = Ext1.getOperand(0);
+  auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
+    PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
+  return DAG.getNode(NodeType, dl, MVT::v2f64,
+                     SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
+}
+
 /// Reduce the number of loads when building a vector.
 ///
 /// Building a vector out of multiple loads can be converted to a load
@@ -12910,50 +13004,14 @@
       return Reduced;
   }
 
-
-  if (N->getValueType(0) != MVT::v2f64)
-    return SDValue();
-
-  // Looking for:
   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
-  if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
-      FirstInput.getOpcode() != ISD::UINT_TO_FP)
-    return SDValue();
-  if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
-      N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
-    return SDValue();
-  if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
-    return SDValue();
-
-  SDValue Ext1 = FirstInput.getOperand(0);
-  SDValue Ext2 = N->getOperand(1).getOperand(0);
-  if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
-     Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
-    return SDValue();
-
-  ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
-  ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
-  if (!Ext1Op || !Ext2Op)
-    return SDValue();
-  if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
-      Ext1.getOperand(0) != Ext2.getOperand(0))
-    return SDValue();
-
-  int FirstElem = Ext1Op->getZExtValue();
-  int SecondElem = Ext2Op->getZExtValue();
-  int SubvecIdx;
-  if (FirstElem == 0 && SecondElem == 1)
-    SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
-  else if (FirstElem == 2 && SecondElem == 3)
-    SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
-  else
-    return SDValue();
+  Reduced = combineBVOfExtractFpConvert(N, DAG);
+  if (Reduced)
+    return Reduced;
 
-  SDValue SrcVec = Ext1.getOperand(0);
-  auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
-    PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
-  return DAG.getNode(NodeType, dl, MVT::v2f64,
-                     SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
+  // Combine (build_vector (sint_to_fp*)) to (sint_to_fp (build_vector)).
+  Reduced = combineBVOfFpConversions(N, DAG);
+  return Reduced;
 }
 
 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
@@ -12977,18 +13035,24 @@
       Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
     return SDValue();
 
+  // If we are loading a subword value and converting it to FP, we can avoid the
+  // move and load directly to a VSR. However, that is only profitable if there
+  // are no other uses of the loaded value.
   SDValue FirstOperand(Op.getOperand(0));
-  bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
-    (FirstOperand.getValueType() == MVT::i8 ||
-     FirstOperand.getValueType() == MVT::i16);
-  if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
+  LoadSDNode *LdInput = dyn_cast<LoadSDNode>(FirstOperand);
+  bool SubWordLoad = LdInput &&
+    (LdInput->getMemoryVT() == MVT::i8 ||
+     LdInput->getMemoryVT() == MVT::i16);
+  if (!DCI.isBeforeLegalize() && Subtarget.hasP9Vector() &&
+      Subtarget.hasP9Altivec() && SubWordLoad &&
+      FirstOperand.hasOneUse()) {
     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
     bool DstDouble = Op.getValueType() == MVT::f64;
     unsigned ConvOp = Signed ?
       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
     SDValue WidthConst =
-      DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
+      DAG.getIntPtrConstant(LdInput->getMemoryVT() == MVT::i8 ? 1 : 2,
                             dl, false);
     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
diff --git a/test/CodeGen/PowerPC/int-to-fp-build-vectors.ll b/test/CodeGen/PowerPC/int-to-fp-build-vectors.ll
--- a/test/CodeGen/PowerPC/int-to-fp-build-vectors.ll
+++ b/test/CodeGen/PowerPC/int-to-fp-build-vectors.ll
@@ -0,0 +1,517 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 < %s | FileCheck -check-prefix=CHECK-PWR9 %s
+; Function Attrs: norecurse nounwind readnone
+define <2 x double> @test_sc_to_d(<16 x i8> %a) {
+; CHECK-LABEL: test_sc_to_d:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd 0, 34
+; CHECK-NEXT:    mfvsrd 3, 34
+; CHECK-NEXT:    rldicl 3, 3, 8, 56
+; CHECK-NEXT:    mfvsrd 4, 0
+; CHECK-NEXT:    extsb 3, 3
+; CHECK-NEXT:    mtvsrd 0, 3
+; CHECK-NEXT:    rldicl 4, 4, 8, 56
+; CHECK-NEXT:    extsb 3, 4
+; CHECK-NEXT:    mtvsrd 1, 3
+; CHECK-NEXT:    xxmrghd 34, 0, 1
+; CHECK-NEXT:    xvcvsxddp 34, 34
+; CHECK-NEXT:    blr
+;
+; CHECK-PWR9-LABEL: test_sc_to_d:
+; CHECK-PWR9:       # %bb.0: # %entry
+; CHECK-PWR9-NEXT:    vsldoi 2, 2, 2, 9
+; CHECK-PWR9-NEXT:    vextsb2d 2, 2
+; CHECK-PWR9-NEXT:    xvcvsxddp 34, 34
+; CHECK-PWR9-NEXT:    blr
+entry:
+  %vecext = extractelement <16 x i8> %a, i32 7
+  %conv = sitofp i8 %vecext to double
+  %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+  %vecext1 = extractelement <16 x i8> %a, i32 15
+  %conv2 = sitofp i8 %vecext1 to double
+  %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+  ret <2 x double> %vecinit3
+}
+
+; Function Attrs: norecurse nounwind readnone
+define <2 x double> @test_uc_to_d(<16 x i8> %a) {
+; CHECK-LABEL: test_uc_to_d:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mfvsrd 3, 34
+; CHECK-NEXT:    clrldi 4, 3, 56
+; CHECK-NEXT:    rldicl 3, 3, 32, 56
+; CHECK-NEXT:    clrldi 4, 4, 56
+; CHECK-NEXT:    clrldi 3, 3, 56
+; CHECK-NEXT:    mtvsrd 0, 4
+; CHECK-NEXT:    mtvsrd 1, 3
+; CHECK-NEXT:    xxmrghd 34, 1, 0
+; CHECK-NEXT:    xvcvuxddp 34, 34
+; CHECK-NEXT:    blr
+;
+; CHECK-PWR9-LABEL: test_uc_to_d:
+; CHECK-PWR9:       # %bb.0: # %entry
+; CHECK-PWR9-NEXT:    li 3, 8
+; CHECK-PWR9-NEXT:    li 4, 12
+; CHECK-PWR9-NEXT:    vextubrx 3, 3, 2
+; CHECK-PWR9-NEXT:    vextubrx 4, 4, 2
+; CHECK-PWR9-NEXT:    clrldi 3, 3, 56
+; CHECK-PWR9-NEXT:    clrldi 4, 4, 56
+; CHECK-PWR9-NEXT:    mtvsrdd 34, 4, 3
+; CHECK-PWR9-NEXT:    xvcvuxddp 34, 34
+; CHECK-PWR9-NEXT:    blr
+entry:
+  %vecext = extractelement <16 x i8> %a, i32 8
+  %conv = uitofp i8 %vecext to double
+  %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+  %vecext1 = extractelement <16 x i8> %a, i32 12
+  %conv2 = uitofp i8 %vecext1 to double
+  %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+  ret <2 x double> %vecinit3
+}
+
+; Function Attrs: norecurse nounwind readnone
+define <4 x float> @test_sc_to_f(<16 x i8> %a) {
+; CHECK-LABEL: test_sc_to_f:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd 0, 34
+; CHECK-NEXT:    mfvsrd 3, 34
+; CHECK-NEXT:    rldicl 3, 3, 40, 56
+; CHECK-NEXT:    mfvsrd 4, 0
+; CHECK-NEXT:    extsb 3, 3
+; CHECK-NEXT:    clrldi 5, 4, 56
+; CHECK-NEXT:    rldicl 6, 4, 40, 56
+; CHECK-NEXT:    rldicl 4, 4, 8, 56
+; CHECK-NEXT:    extsb 6, 6
+; CHECK-NEXT:    extsb 5, 5
+; CHECK-NEXT:    extsb 4, 4
+; CHECK-NEXT:    rldimi 5, 6, 32, 0
+; CHECK-NEXT:    rldimi 4, 3, 32, 0
+; CHECK-NEXT:    mtvsrd 0, 5
+; CHECK-NEXT:    mtvsrd 1, 4
+; CHECK-NEXT:    xxmrghd 0, 1, 0
+; CHECK-NEXT:    xvcvsxwsp 34, 0
+; CHECK-NEXT:    blr
+;
+; CHECK-PWR9-LABEL: test_sc_to_f:
+; CHECK-PWR9:       # %bb.0: # %entry
+; CHECK-PWR9-NEXT:    addis 3, 2, .LCPI2_0@toc@ha
+; CHECK-PWR9-NEXT:    addi 3, 3, .LCPI2_0@toc@l
+; CHECK-PWR9-NEXT:    lxvx 35, 0, 3
+; CHECK-PWR9-NEXT:    vperm 2, 2, 2, 3
+; CHECK-PWR9-NEXT:    vextsb2w 2, 2
+; CHECK-PWR9-NEXT:    xvcvsxwsp 34, 34
+; CHECK-PWR9-NEXT:    blr
+entry:
+  %vecext = extractelement <16 x i8> %a, i32 0
+  %conv = sitofp i8 %vecext to float
+  %vecinit = insertelement <4 x float> undef, float %conv, i32 0
+  %vecext1 = extractelement <16 x i8> %a, i32 3
+  %conv2 = sitofp i8 %vecext1 to float
+  %vecinit3 = insertelement <4 x float> %vecinit, float %conv2, i32 1
+  %vecext4 = extractelement <16 x i8> %a, i32 7
+  %conv5 = sitofp i8 %vecext4 to float
+  %vecinit6 = insertelement <4 x float> %vecinit3, float %conv5, i32 2
+  %vecext7 = extractelement <16 x i8> %a, i32 11
+  %conv8 = sitofp i8 %vecext7 to float
+  %vecinit9 = insertelement <4 x float> %vecinit6, float %conv8, i32 3
+  ret <4 x float> %vecinit9
+}
+
+; Function Attrs: norecurse nounwind readnone
+define <4 x float> @test_uc_to_f(<16 x i8> %a) {
+; CHECK-LABEL: test_uc_to_f:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd 0, 34
+; CHECK-NEXT:    mfvsrd 3, 34
+; CHECK-NEXT:    rldicl 3, 3, 40, 56
+; CHECK-NEXT:    mfvsrd 4, 0
+; CHECK-NEXT:    rlwinm 3, 3, 0, 24, 31
+; CHECK-NEXT:    clrldi 5, 4, 56
+; CHECK-NEXT:    rldicl 6, 4, 40, 56
+; CHECK-NEXT:    rldicl 4, 4, 8, 56
+; CHECK-NEXT:    rlwinm 6, 6, 0, 24, 31
+; CHECK-NEXT:    rlwinm 5, 5, 0, 24, 31
+; CHECK-NEXT:    rlwinm 4, 4, 0, 24, 31
+; CHECK-NEXT:    rldimi 5, 6, 32, 0
+; CHECK-NEXT:    rldimi 4, 3, 32, 0
+; CHECK-NEXT:    mtvsrd 0, 5
+; CHECK-NEXT:    mtvsrd 1, 4
+; CHECK-NEXT:    xxmrghd 0, 1, 0
+; CHECK-NEXT:    xvcvuxwsp 34, 0
+; CHECK-NEXT:    blr
+;
+; CHECK-PWR9-LABEL: test_uc_to_f:
+; CHECK-PWR9:       # %bb.0: # %entry
+; CHECK-PWR9-NEXT:    li 3, 0
+; CHECK-PWR9-NEXT:    li 4, 3
+; CHECK-PWR9-NEXT:    vextubrx 4, 4, 2
+; CHECK-PWR9-NEXT:    vextubrx 3, 3, 2
+; CHECK-PWR9-NEXT:    li 5, 7
+; CHECK-PWR9-NEXT:    li 6, 11
+; CHECK-PWR9-NEXT:    vextubrx 5, 5, 2
+; CHECK-PWR9-NEXT:    vextubrx 6, 6, 2
+; CHECK-PWR9-NEXT:    rlwinm 4, 4, 0, 24, 31
+; CHECK-PWR9-NEXT:    rlwinm 3, 3, 0, 24, 31
+; CHECK-PWR9-NEXT:    rldimi 3, 4, 32, 0
+; CHECK-PWR9-NEXT:    rlwinm 4, 6, 0, 24, 31
+; CHECK-PWR9-NEXT:    rlwinm 5, 5, 0, 24, 31
+; CHECK-PWR9-NEXT:    rldimi 5, 4, 32, 0
+; CHECK-PWR9-NEXT:    mtvsrdd 0, 5, 3
+; CHECK-PWR9-NEXT:    xvcvuxwsp 34, 0
+; CHECK-PWR9-NEXT:    blr
+entry:
+  %vecext = extractelement <16 x i8> %a, i32 0
+  %conv = uitofp i8 %vecext to float
+  %vecinit = insertelement <4 x float> undef, float %conv, i32 0
+  %vecext1 = extractelement <16 x i8> %a, i32 3
+  %conv2 = uitofp i8 %vecext1 to float
+  %vecinit3 = insertelement <4 x float> %vecinit, float %conv2, i32 1
+  %vecext4 = extractelement <16 x i8> %a, i32 7
+  %conv5 = uitofp i8 %vecext4 to float
+  %vecinit6 = insertelement <4 x float> %vecinit3, float %conv5, i32 2
+  %vecext7 = extractelement <16 x i8> %a, i32 11
+  %conv8 = uitofp i8 %vecext7 to float
+  %vecinit9 = insertelement <4 x float> %vecinit6, float %conv8, i32 3
+  ret <4 x float> %vecinit9
+}
+
+; Function Attrs: norecurse nounwind readnone
+define <2 x double> @test_ss_to_d(<8 x i16> %a) {
+; CHECK-LABEL: test_ss_to_d:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd 0, 34
+; CHECK-NEXT:    mfvsrd 3, 34
+; CHECK-NEXT:    clrldi 3, 3, 48
+; CHECK-NEXT:    mfvsrd 4, 0
+; CHECK-NEXT:    extsh 3, 3
+; CHECK-NEXT:    mtvsrd 0, 3
+; CHECK-NEXT:    clrldi 4, 4, 48
+; CHECK-NEXT:    extsh 3, 4
+; CHECK-NEXT:    mtvsrd 1, 3
+; CHECK-NEXT:    xxmrghd 34, 0, 1
+; CHECK-NEXT:    xvcvsxddp 34, 34
+; CHECK-NEXT:    blr
+;
+; CHECK-PWR9-LABEL: test_ss_to_d:
+; CHECK-PWR9:       # %bb.0: # %entry
+; CHECK-PWR9-NEXT:    vextsh2d 2, 2
+; CHECK-PWR9-NEXT:    xvcvsxddp 34, 34
+; CHECK-PWR9-NEXT:    blr
+entry:
+  %vecext = extractelement <8 x i16> %a, i32 0
+  %conv = sitofp i16 %vecext to double
+  %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+  %vecext1 = extractelement <8 x i16> %a, i32 4
+  %conv2 = sitofp i16 %vecext1 to double
+  %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+  ret <2 x double> %vecinit3
+}
+
+; Function Attrs: norecurse nounwind readnone
+define <2 x double> @test_us_to_d(<8 x i16> %a) {
+; CHECK-LABEL: test_us_to_d:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd 0, 34
+; CHECK-NEXT:    mfvsrd 3, 34
+; CHECK-NEXT:    clrldi 3, 3, 48
+; CHECK-NEXT:    mfvsrd 4, 0
+; CHECK-NEXT:    clrldi 3, 3, 48
+; CHECK-NEXT:    mtvsrd 0, 3
+; CHECK-NEXT:    clrldi 4, 4, 48
+; CHECK-NEXT:    clrldi 3, 4, 48
+; CHECK-NEXT:    mtvsrd 1, 3
+; CHECK-NEXT:    xxmrghd 34, 0, 1
+; CHECK-NEXT:    xvcvuxddp 34, 34
+; CHECK-NEXT:    blr
+;
+; CHECK-PWR9-LABEL: test_us_to_d:
+; CHECK-PWR9:       # %bb.0: # %entry
+; CHECK-PWR9-NEXT:    li 3, 0
+; CHECK-PWR9-NEXT:    li 4, 8
+; CHECK-PWR9-NEXT:    vextuhrx 3, 3, 2
+; CHECK-PWR9-NEXT:    vextuhrx 4, 4, 2
+; CHECK-PWR9-NEXT:    clrldi 3, 3, 48
+; CHECK-PWR9-NEXT:    clrldi 4, 4, 48
+; CHECK-PWR9-NEXT:    mtvsrdd 34, 4, 3
+; CHECK-PWR9-NEXT:    xvcvuxddp 34, 34
+; CHECK-PWR9-NEXT:    blr
+entry:
+  %vecext = extractelement <8 x i16> %a, i32 0
+  %conv = uitofp i16 %vecext to double
+  %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+  %vecext1 = extractelement <8 x i16> %a, i32 4
+  %conv2 = uitofp i16 %vecext1 to double
+  %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+  ret <2 x double> %vecinit3
+}
+
+; Function Attrs: norecurse nounwind readnone
+define <4 x float> @test_ss_to_f(<8 x i16> %a) {
+; CHECK-LABEL: test_ss_to_f:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd 0, 34
+; CHECK-NEXT:    mfvsrd 3, 34
+; CHECK-NEXT:    clrldi 5, 3, 48
+; CHECK-NEXT:    rldicl 3, 3, 32, 48
+; CHECK-NEXT:    mfvsrd 4, 0
+; CHECK-NEXT:    extsh 3, 3
+; CHECK-NEXT:    extsh 5, 5
+; CHECK-NEXT:    rldimi 5, 3, 32, 0
+; CHECK-NEXT:    clrldi 6, 4, 48
+; CHECK-NEXT:    rldicl 4, 4, 32, 48
+; CHECK-NEXT:    mtvsrd 0, 5
+; CHECK-NEXT:    extsh 3, 4
+; CHECK-NEXT:    extsh 4, 6
+; CHECK-NEXT:    rldimi 4, 3, 32, 0
+; CHECK-NEXT:    mtvsrd 1, 4
+; CHECK-NEXT:    xxmrghd 0, 0, 1
+; CHECK-NEXT:    xvcvsxwsp 34, 0
+; CHECK-NEXT:    blr
+;
+; CHECK-PWR9-LABEL: test_ss_to_f:
+; CHECK-PWR9:       # %bb.0: # %entry
+; CHECK-PWR9-NEXT:    vextsh2w 2, 2
+; CHECK-PWR9-NEXT:    xvcvsxwsp 34, 34
+; CHECK-PWR9-NEXT:    blr
+entry:
+  %vecext = extractelement <8 x i16> %a, i32 0
+  %conv = sitofp i16 %vecext to float
+  %vecinit = insertelement <4 x float> undef, float %conv, i32 0
+  %vecext1 = extractelement <8 x i16> %a, i32 2
+  %conv2 = sitofp i16 %vecext1 to float
+  %vecinit3 = insertelement <4 x float> %vecinit, float %conv2, i32 1
+  %vecext4 = extractelement <8 x i16> %a, i32 4
+  %conv5 = sitofp i16 %vecext4 to float
+  %vecinit6 = insertelement <4 x float> %vecinit3, float %conv5, i32 2
+  %vecext7 = extractelement <8 x i16> %a, i32 6
+  %conv8 = sitofp i16 %vecext7 to float
+  %vecinit9 = insertelement <4 x float> %vecinit6, float %conv8, i32 3
+  ret <4 x float> %vecinit9
+}
+
+; Function Attrs: norecurse nounwind readnone
+define <4 x float> @test_us_to_f(<8 x i16> %a) {
+; CHECK-LABEL: test_us_to_f:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd 0, 34
+; CHECK-NEXT:    mfvsrd 3, 34
+; CHECK-NEXT:    clrldi 5, 3, 48
+; CHECK-NEXT:    rldicl 3, 3, 32, 48
+; CHECK-NEXT:    mfvsrd 4, 0
+; CHECK-NEXT:    rlwinm 3, 3, 0, 16, 31
+; CHECK-NEXT:    rlwinm 5, 5, 0, 16, 31
+; CHECK-NEXT:    rldimi 5, 3, 32, 0
+; CHECK-NEXT:    clrldi 6, 4, 48
+; CHECK-NEXT:    rldicl 4, 4, 32, 48
+; CHECK-NEXT:    mtvsrd 0, 5
+; CHECK-NEXT:    rlwinm 3, 4, 0, 16, 31
+; CHECK-NEXT:    rlwinm 4, 6, 0, 16, 31
+; CHECK-NEXT:    rldimi 4, 3, 32, 0
+; CHECK-NEXT:    mtvsrd 1, 4
+; CHECK-NEXT:    xxmrghd 0, 0, 1
+; CHECK-NEXT:    xvcvuxwsp 34, 0
+; CHECK-NEXT:    blr
+;
+; CHECK-PWR9-LABEL: test_us_to_f:
+; CHECK-PWR9:       # %bb.0: # %entry
+; CHECK-PWR9-NEXT:    li 3, 0
+; CHECK-PWR9-NEXT:    li 4, 4
+; CHECK-PWR9-NEXT:    li 5, 8
+; CHECK-PWR9-NEXT:    vextuhrx 3, 3, 2
+; CHECK-PWR9-NEXT:    vextuhrx 4, 4, 2
+; CHECK-PWR9-NEXT:    li 6, 12
+; CHECK-PWR9-NEXT:    vextuhrx 5, 5, 2
+; CHECK-PWR9-NEXT:    vextuhrx 6, 6, 2
+; CHECK-PWR9-NEXT:    rlwinm 4, 4, 0, 16, 31
+; CHECK-PWR9-NEXT:    rlwinm 3, 3, 0, 16, 31
+; CHECK-PWR9-NEXT:    rldimi 3, 4, 32, 0
+; CHECK-PWR9-NEXT:    rlwinm 4, 6, 0, 16, 31
+; CHECK-PWR9-NEXT:    rlwinm 5, 5, 0, 16, 31
+; CHECK-PWR9-NEXT:    rldimi 5, 4, 32, 0
+; CHECK-PWR9-NEXT:    mtvsrdd 0, 5, 3
+; CHECK-PWR9-NEXT:    xvcvuxwsp 34, 0
+; CHECK-PWR9-NEXT:    blr
+entry:
+  %vecext = extractelement <8 x i16> %a, i32 0
+  %conv = uitofp i16 %vecext to float
+  %vecinit = insertelement <4 x float> undef, float %conv, i32 0
+  %vecext1 = extractelement <8 x i16> %a, i32 2
+  %conv2 = uitofp i16 %vecext1 to float
+  %vecinit3 = insertelement <4 x float> %vecinit, float %conv2, i32 1
+  %vecext4 = extractelement <8 x i16> %a, i32 4
+  %conv5 = uitofp i16 %vecext4 to float
+  %vecinit6 = insertelement <4 x float> %vecinit3, float %conv5, i32 2
+  %vecext7 = extractelement <8 x i16> %a, i32 6
+  %conv8 = uitofp i16 %vecext7 to float
+  %vecinit9 = insertelement <4 x float> %vecinit6, float %conv8, i32 3
+  ret <4 x float> %vecinit9
+}
+
+; Function Attrs: norecurse nounwind readnone
+define <2 x double> @test_si_to_d(<4 x i32> %a) {
+; CHECK-LABEL: test_si_to_d:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd 0, 34
+; CHECK-NEXT:    mfvsrwz 4, 34
+; CHECK-NEXT:    extsw 4, 4
+; CHECK-NEXT:    mfvsrwz 3, 0
+; CHECK-NEXT:    mtvsrd 1, 4
+; CHECK-NEXT:    extsw 3, 3
+; CHECK-NEXT:    mtvsrd 0, 3
+; CHECK-NEXT:    xxmrghd 34, 1, 0
+; CHECK-NEXT:    xvcvsxddp 34, 34
+; CHECK-NEXT:    blr
+;
+; CHECK-PWR9-LABEL: test_si_to_d:
+; CHECK-PWR9:       # %bb.0: # %entry
+; CHECK-PWR9-NEXT:    vextsw2d 2, 2
+; CHECK-PWR9-NEXT:    xvcvsxddp 34, 34
+; CHECK-PWR9-NEXT:    blr
+entry:
+  %vecext = extractelement <4 x i32> %a, i32 0
+  %conv = sitofp i32 %vecext to double
+  %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+  %vecext1 = extractelement <4 x i32> %a, i32 2
+  %conv2 = sitofp i32 %vecext1 to double
+  %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+  ret <2 x double> %vecinit3
+}
+
+; Function Attrs: norecurse nounwind readnone
+define <2 x double> @test_ui_to_d(<4 x i32> %a) {
+; CHECK-LABEL: test_ui_to_d:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd 0, 34
+; CHECK-NEXT:    xxlxor 1, 1, 1
+; CHECK-NEXT:    mfvsrwz 3, 0
+; CHECK-NEXT:    mtfprwz 0, 3
+; CHECK-NEXT:    xscvuxddp 0, 0
+; CHECK-NEXT:    xxmrghd 34, 1, 0
+; CHECK-NEXT:    blr
+;
+; CHECK-PWR9-LABEL: test_ui_to_d:
+; CHECK-PWR9:       # %bb.0: # %entry
+; CHECK-PWR9-NEXT:    xxextractuw 0, 34, 12
+; CHECK-PWR9-NEXT:    xscvuxddp 0, 0
+; CHECK-PWR9-NEXT:    xxlxor 1, 1, 1
+; CHECK-PWR9-NEXT:    xxmrghd 34, 1, 0
+; CHECK-PWR9-NEXT:    blr
+entry:
+  %vecext = extractelement <4 x i32> %a, i32 0
+  %conv = uitofp i32 %vecext to double
+  %vecinit3 = insertelement <2 x double> <double undef, double 0.000000e+00>, double %conv, i32 0
+  ret <2 x double> %vecinit3
+}
+
+; Function Attrs: norecurse nounwind readnone
+define <4 x float> @test_si_to_f(<4 x i32> %a) {
+; CHECK-LABEL: test_si_to_f:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
+; CHECK-NEXT:    addi 3, 3, .LCPI10_0@toc@l
+; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    vperm 2, 2, 2, 3
+; CHECK-NEXT:    xvcvsxwsp 34, 34
+; CHECK-NEXT:    blr
+;
+; CHECK-PWR9-LABEL: test_si_to_f:
+; CHECK-PWR9:       # %bb.0: # %entry
+; CHECK-PWR9-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
+; CHECK-PWR9-NEXT:    addi 3, 3, .LCPI10_0@toc@l
+; CHECK-PWR9-NEXT:    lxvx 35, 0, 3
+; CHECK-PWR9-NEXT:    vperm 2, 2, 2, 3
+; CHECK-PWR9-NEXT:    xvcvsxwsp 34, 34
+; CHECK-PWR9-NEXT:    blr
+entry:
+  %vecext = extractelement <4 x i32> %a, i32 0
+  %conv = sitofp i32 %vecext to float
+  %vecinit = insertelement <4 x float> undef, float %conv, i32 0
+  %vecext1 = extractelement <4 x i32> %a, i32 3
+  %conv2 = sitofp i32 %vecext1 to float
+  %vecinit3 = insertelement <4 x float> %vecinit, float %conv2, i32 1
+  %vecext4 = extractelement <4 x i32> %a, i32 1
+  %conv5 = sitofp i32 %vecext4 to float
+  %vecinit6 = insertelement <4 x float> %vecinit3, float %conv5, i32 2
+  %vecext7 = extractelement <4 x i32> %a, i32 2
+  %conv8 = sitofp i32 %vecext7 to float
+  %vecinit9 = insertelement <4 x float> %vecinit6, float %conv8, i32 3
+  ret <4 x float> %vecinit9
+}
+
+; Function Attrs: norecurse nounwind readnone
+define <4 x float> @test_ui_to_f(<4 x i32> %a) {
+; CHECK-LABEL: test_ui_to_f:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvcvuxwsp 34, 34
+; CHECK-NEXT:    blr
+;
+; CHECK-PWR9-LABEL: test_ui_to_f:
+; CHECK-PWR9:       # %bb.0: # %entry
+; CHECK-PWR9-NEXT:    xvcvuxwsp 34, 34
+; CHECK-PWR9-NEXT:    blr
+entry:
+  %0 = uitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %0
+}
+
+; Function Attrs: norecurse nounwind readnone
+define <2 x double> @test_sl_to_d(<2 x i64> %a) {
+; CHECK-LABEL: test_sl_to_d:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvcvsxddp 34, 34
+; CHECK-NEXT:    blr
+;
+; CHECK-PWR9-LABEL: test_sl_to_d:
+; CHECK-PWR9:       # %bb.0: # %entry
+; CHECK-PWR9-NEXT:    xvcvsxddp 34, 34
+; CHECK-PWR9-NEXT:    blr
+entry:
+  %0 = sitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %0
+}
+
+; Function Attrs: norecurse nounwind readnone
+define <2 x double> @test_ul_to_d(<2 x i64> %a) {
+; CHECK-LABEL: test_ul_to_d:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxspltd 34, 34, 1
+; CHECK-NEXT:    xvcvuxddp 34, 34
+; CHECK-NEXT:    blr
+;
+; CHECK-PWR9-LABEL: test_ul_to_d:
+; CHECK-PWR9:       # %bb.0: # %entry
+; CHECK-PWR9-NEXT:    xxspltd 34, 34, 1
+; CHECK-PWR9-NEXT:    xvcvuxddp 34, 34
+; CHECK-PWR9-NEXT:    blr
+entry:
+  %vecext = extractelement <2 x i64> %a, i32 0
+  %conv = uitofp i64 %vecext to double
+  %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+  %vecinit3 = shufflevector <2 x double> %vecinit, <2 x double> undef, <2 x i32> zeroinitializer
+  ret <2 x double> %vecinit3
+}
+
+define dso_local <2 x double> @test_2ul_to_d(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_2ul_to_d:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxpermdi 34, 35, 34, 1
+; CHECK-NEXT:    xvcvsxddp 34, 34
+; CHECK-NEXT:    blr
+;
+; CHECK-PWR9-LABEL: test_2ul_to_d:
+; CHECK-PWR9:       # %bb.0: # %entry
+; CHECK-PWR9-NEXT:    xxpermdi 34, 35, 34, 1
+; CHECK-PWR9-NEXT:    xvcvsxddp 34, 34
+; CHECK-PWR9-NEXT:    blr
+entry:
+  %vecext = extractelement <2 x i64> %a, i32 0
+  %conv = sitofp i64 %vecext to double
+  %vecinit = insertelement <2 x double> undef, double %conv, i32 0
+  %vecext1 = extractelement <2 x i64> %b, i32 1
+  %conv2 = sitofp i64 %vecext1 to double
+  %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
+  ret <2 x double> %vecinit3
+}
diff --git a/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll b/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
--- a/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
+++ b/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
@@ -14,61 +14,42 @@
 define dso_local <2 x double> @test1(<8 x i16> %a) {
 ; P9BE-LABEL: test1:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    li r3, 0
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r3, r3, 0, 16, 31
-; P9BE-NEXT:    mtfprwz f0, r3
-; P9BE-NEXT:    li r3, 2
-; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r3, r3, 0, 16, 31
-; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    xscvuxddp f0, f0
-; P9BE-NEXT:    xscvuxddp f1, f1
-; P9BE-NEXT:    xxmrghd v2, vs0, vs1
+; P9BE-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
+; P9BE-NEXT:    addi r3, r3, .LCPI0_0@toc@l
+; P9BE-NEXT:    lxvx v3, 0, r3
+; P9BE-NEXT:    xxlxor v4, v4, v4
+; P9BE-NEXT:    vperm v2, v4, v2, v3
+; P9BE-NEXT:    xvcvuxddp v2, v2
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: test1:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    li r3, 0
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    rlwinm r3, r3, 0, 16, 31
-; P9LE-NEXT:    mtfprwz f0, r3
-; P9LE-NEXT:    li r3, 2
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    rlwinm r3, r3, 0, 16, 31
-; P9LE-NEXT:    mtfprwz f1, r3
-; P9LE-NEXT:    xscvuxddp f0, f0
-; P9LE-NEXT:    xscvuxddp f1, f1
-; P9LE-NEXT:    xxmrghd v2, vs1, vs0
+; P9LE-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
+; P9LE-NEXT:    addi r3, r3, .LCPI0_0@toc@l
+; P9LE-NEXT:    lxvx v3, 0, r3
+; P9LE-NEXT:    xxlxor v4, v4, v4
+; P9LE-NEXT:    vperm v2, v2, v4, v3
+; P9LE-NEXT:    xvcvuxddp v2, v2
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: test1:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    mfvsrd r3, v2
-; P8BE-NEXT:    rldicl r4, r3, 16, 48
-; P8BE-NEXT:    rldicl r3, r3, 32, 48
-; P8BE-NEXT:    rlwinm r4, r4, 0, 16, 31
-; P8BE-NEXT:    rlwinm r3, r3, 0, 16, 31
-; P8BE-NEXT:    mtfprwz f0, r4
-; P8BE-NEXT:    mtfprwz f1, r3
-; P8BE-NEXT:    xscvuxddp f0, f0
-; P8BE-NEXT:    xscvuxddp f1, f1
-; P8BE-NEXT:    xxmrghd v2, vs0, vs1
+; P8BE-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
+; P8BE-NEXT:    xxlxor v4, v4, v4
+; P8BE-NEXT:    addi r3, r3, .LCPI0_0@toc@l
+; P8BE-NEXT:    lxvw4x v3, 0, r3
+; P8BE-NEXT:    vperm v2, v4, v2, v3
+; P8BE-NEXT:    xvcvuxddp v2, v2
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: test1:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    mfvsrd r3, f0
-; P8LE-NEXT:    clrldi r4, r3, 48
-; P8LE-NEXT:    rldicl r3, r3, 48, 48
-; P8LE-NEXT:    rlwinm r4, r4, 0, 16, 31
-; P8LE-NEXT:    rlwinm r3, r3, 0, 16, 31
-; P8LE-NEXT:    mtfprwz f0, r4
-; P8LE-NEXT:    mtfprwz f1, r3
-; P8LE-NEXT:    xscvuxddp f0, f0
-; P8LE-NEXT:    xscvuxddp f1, f1
-; P8LE-NEXT:    xxmrghd v2, vs1, vs0
+; P8LE-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
+; P8LE-NEXT:    xxlxor v4, v4, v4
+; P8LE-NEXT:    addi r3, r3, .LCPI0_0@toc@l
+; P8LE-NEXT:    lvx v3, 0, r3
+; P8LE-NEXT:    vperm v2, v2, v4, v3
+; P8LE-NEXT:    xvcvuxddp v2, v2
 ; P8LE-NEXT:    blr
 entry:
   %vecext = extractelement <8 x i16> %a, i32 0
@@ -83,32 +64,34 @@
 define dso_local <2 x double> @test2(<4 x i32> %a, <4 x i32> %b) {
 ; P9BE-LABEL: test2:
 ; P9BE:       # %bb.0: # %entry
-; P9BE-NEXT:    xxextractuw f0, v2, 0
-; P9BE-NEXT:    xxextractuw f1, v3, 4
-; P9BE-NEXT:    xscvuxddp f0, f0
-; P9BE-NEXT:    xscvuxddp f1, f1
-; P9BE-NEXT:    xxmrghd v2, vs0, vs1
+; P9BE-NEXT:    li r3, 0
+; P9BE-NEXT:    vextuwlx r3, r3, v2
+; P9BE-NEXT:    mfvsrwz r4, v3
+; P9BE-NEXT:    mtvsrdd v2, r3, r4
+; P9BE-NEXT:    xvcvuxddp v2, v2
 ; P9BE-NEXT:    blr
 ;
 ; P9LE-LABEL: test2:
 ; P9LE:       # %bb.0: # %entry
-; P9LE-NEXT:    xxextractuw f0, v2, 12
-; P9LE-NEXT:    xxextractuw f1, v3, 8
-; P9LE-NEXT:    xscvuxddp f0, f0
-; P9LE-NEXT:    xscvuxddp f1, f1
-; P9LE-NEXT:    xxmrghd v2, vs1, vs0
+; P9LE-NEXT:    li r3, 0
+; P9LE-NEXT:    li r4, 4
+; P9LE-NEXT:    vextuwrx r3, r3, v2
+; P9LE-NEXT:    vextuwrx r4, r4, v3
+; P9LE-NEXT:    mtvsrdd v2, r4, r3
+; P9LE-NEXT:    xvcvuxddp v2, v2
 ; P9LE-NEXT:    blr
 ;
 ; P8BE-LABEL: test2:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    xxsldwi vs0, v2, v2, 3
-; P8BE-NEXT:    mfvsrwz r4, v3
-; P8BE-NEXT:    mtfprwz f1, r4
-; P8BE-NEXT:    mfvsrwz r3, f0
-; P8BE-NEXT:    xscvuxddp f1, f1
-; P8BE-NEXT:    mtfprwz f0, r3
-; P8BE-NEXT:    xscvuxddp f0, f0
-; P8BE-NEXT:    xxmrghd v2, vs0, vs1
+; P8BE-NEXT:    mfvsrwz r3, v3
+; P8BE-NEXT:    clrldi r3, r3, 32
+; P8BE-NEXT:    mfvsrwz r4, f0
+; P8BE-NEXT:    mtvsrd f0, r3
+; P8BE-NEXT:    clrldi r3, r4, 32
+; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    xxmrghd v2, vs1, vs0
+; P8BE-NEXT:    xvcvuxddp v2, v2
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: test2:
@@ -117,11 +100,12 @@
 ; P8LE-NEXT:    xxsldwi vs1, v3, v3, 1
 ; P8LE-NEXT:    mfvsrwz r3, f0
 ; P8LE-NEXT:    mfvsrwz r4, f1
-; P8LE-NEXT:    mtfprwz f0, r3
-; P8LE-NEXT:    mtfprwz f1, r4
-; P8LE-NEXT:    xscvuxddp f0, f0
-; P8LE-NEXT:    xscvuxddp f1, f1
+; P8LE-NEXT:    clrldi r3, r3, 32
+; P8LE-NEXT:    clrldi r4, r4, 32
+; P8LE-NEXT:    mtvsrd f0, r3
+; P8LE-NEXT:    mtvsrd f1, r4
 ; P8LE-NEXT:    xxmrghd v2, vs1, vs0
+; P8LE-NEXT:    xvcvuxddp v2, v2
 ; P8LE-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %a, i32 0
diff --git a/test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll b/test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll
--- a/test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll
+++ b/test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll
@@ -137,14 +137,12 @@
   ret <4 x float> %splat.splat
 ; CHECK-LABEL: vecfuc
 ; CHECK: lxsibzx [[LD:[0-9]+]], 0, 3
-; CHECK-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]]
-; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
-; CHECK-NEXT: xxspltw 34, [[CONVS]], 0
+; CHECK-NEXT: xxspltw [[SPLT:[0-9]+]], [[LD]], 1
+; CHECK-NEXT: xvcvuxwsp 34, [[SPLT]]
 ; CHECK-BE-LABEL: vecfuc
 ; CHECK-BE: lxsibzx [[LD:[0-9]+]], 0, 3
-; CHECK-BE-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]]
-; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
-; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0
+; CHECK-BE-NEXT: xxspltw [[SPLT:[0-9]+]], [[LD]], 1
+; CHECK-BE-NEXT: xvcvuxwsp 34, [[SPLT]]
 }
 
 ; Function Attrs: norecurse nounwind readonly
@@ -157,12 +155,12 @@
   ret <2 x double> %splat.splat
 ; CHECK-LABEL: vecduc
 ; CHECK: lxsibzx [[LD:[0-9]+]], 0, 3
-; CHECK-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]]
-; CHECK-NEXT: xxspltd  34, [[CONVD]], 0
+; CHECK-NEXT: xxspltd [[SPLT:[0-9]+]], [[LD]], 0
+; CHECK-NEXT: xvcvuxddp 34, [[SPLT]]
 ; CHECK-BE-LABEL: vecduc
 ; CHECK-BE: lxsibzx [[LD:[0-9]+]], 0, 3
-; CHECK-BE-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]]
-; CHECK-BE-NEXT: xxspltd  34, [[CONVD]], 0
+; CHECK-BE-NEXT: xxspltd [[SPLT:[0-9]+]], [[LD]], 0
+; CHECK-BE-NEXT: xvcvuxddp 34, [[SPLT]]
 }
 
 ; Function Attrs: norecurse nounwind readonly
@@ -277,16 +275,14 @@
   ret <4 x float> %splat.splat
 ; CHECK-LABEL: vecfsc
 ; CHECK: lxsibzx
-; CHECK-NEXT: vextsb2d
-; CHECK-NEXT: xscvsxdsp [[CONVD:[0-9]+]],
-; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
-; CHECK-NEXT: xxspltw 34, [[CONVS]], 0
+; CHECK-NEXT: vextsb2w
+; CHECK-NEXT: xxspltw [[SPLT:[0-9]+]],
+; CHECK-NEXT: xvcvsxwsp 34, [[SPLT]]
 ; CHECK-BE-LABEL: vecfsc
 ; CHECK-BE: lxsibzx [[LD:[0-9]+]], 0, 3
-; CHECK-BE-NEXT: vextsb2d
-; CHECK-BE-NEXT: xscvsxdsp [[CONVD:[0-9]+]],
-; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
-; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0
+; CHECK-BE-NEXT: vextsb2w
+; CHECK-BE-NEXT: xxspltw [[SPLT:[0-9]+]],
+; CHECK-BE-NEXT: xvcvsxwsp 34, [[SPLT]]
 }
 
 ; Function Attrs: norecurse nounwind readonly
@@ -300,13 +296,13 @@
 ; CHECK-LABEL: vecdsc
 ; CHECK: lxsibzx
 ; CHECK-NEXT: vextsb2d
-; CHECK-NEXT: xscvsxddp [[CONVD:[0-9]+]],
-; CHECK-NEXT: xxspltd  34, [[CONVD]], 0
+; CHECK-NEXT: xxspltd [[SPLT:[0-9]+]],
+; CHECK-NEXT: xvcvsxddp 34, [[SPLT]]
 ; CHECK-BE-LABEL: vecdsc
 ; CHECK-BE: lxsibzx
 ; CHECK-BE-NEXT: vextsb2d
-; CHECK-BE-NEXT: xscvsxddp [[CONVD:[0-9]+]],
-; CHECK-BE-NEXT: xxspltd  34, [[CONVD]], 0
+; CHECK-BE-NEXT: xxspltd [[SPLT:[0-9]+]],
+; CHECK-BE-NEXT: xvcvsxddp 34, [[SPLT]]
 }
 
 ; Function Attrs: norecurse nounwind readonly
@@ -447,14 +443,12 @@
   ret <4 x float> %splat.splat
 ; CHECK-LABEL: vecfus
 ; CHECK: lxsihzx [[LD:[0-9]+]], 0, 3
-; CHECK-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]]
-; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
-; CHECK-NEXT: xxspltw 34, [[CONVS]], 0
+; CHECK-NEXT: xxspltw [[SPLT:[0-9]+]], [[LD]], 1
+; CHECK-NEXT: xvcvuxwsp 34, [[SPLT]]
 ; CHECK-BE-LABEL: vecfus
 ; CHECK-BE: lxsihzx [[LD:[0-9]+]], 0, 3
-; CHECK-BE-NEXT: xscvuxdsp [[CONVD:[0-9]+]], [[LD]]
-; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
-; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0
+; CHECK-BE-NEXT: xxspltw [[SPLT:[0-9]+]], [[LD]], 1
+; CHECK-BE-NEXT: xvcvuxwsp 34, [[SPLT]]
 }
 
 ; Function Attrs: norecurse nounwind readonly
@@ -467,12 +461,12 @@
   ret <2 x double> %splat.splat
 ; CHECK-LABEL: vecdus
 ; CHECK: lxsihzx [[LD:[0-9]+]], 0, 3
-; CHECK-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]]
-; CHECK-NEXT: xxspltd  34, [[CONVD]], 0
+; CHECK-NEXT: xxspltd [[SPLT:[0-9]+]], [[LD]], 0
+; CHECK-NEXT: xvcvuxddp 34, [[SPLT]]
 ; CHECK-BE-LABEL: vecdus
 ; CHECK-BE: lxsihzx [[LD:[0-9]+]], 0, 3
-; CHECK-BE-NEXT: xscvuxddp [[CONVD:[0-9]+]], [[LD]]
-; CHECK-BE-NEXT: xxspltd  34, [[CONVD]], 0
+; CHECK-BE-NEXT: xxspltd [[SPLT:[0-9]+]], [[LD]], 0
+; CHECK-BE-NEXT: xvcvuxddp 34, [[SPLT]]
 }
 
 ; Function Attrs: norecurse nounwind readonly
@@ -591,16 +585,14 @@
   ret <4 x float> %splat.splat
 ; CHECK-LABEL: vecfss
 ; CHECK: lxsihzx
-; CHECK-NEXT: vextsh2d
-; CHECK-NEXT: xscvsxdsp [[CONVD:[0-9]+]],
-; CHECK-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
-; CHECK-NEXT: xxspltw 34, [[CONVS]], 0
+; CHECK-NEXT: vextsh2w
+; CHECK-NEXT: xxspltw [[CONVS:[0-9]+]], {{[0-9]+}}, 1
+; CHECK-NEXT: xvcvsxwsp 34, [[CONVS]]
 ; CHECK-BE-LABEL: vecfss
 ; CHECK-BE: lxsihzx [[LD:[0-9]+]], 0, 3
-; CHECK-BE-NEXT: vextsh2d
-; CHECK-BE-NEXT: xscvsxdsp [[CONVD:[0-9]+]],
-; CHECK-BE-NEXT: xscvdpspn [[CONVS:[0-9]+]], [[CONVD]]
-; CHECK-BE-NEXT: xxspltw 34, [[CONVS]], 0
+; CHECK-BE-NEXT: vextsh2w
+; CHECK-BE-NEXT: xxspltw [[CONVS:[0-9]+]], {{[0-9]+}}, 1
+; CHECK-BE-NEXT: xvcvsxwsp 34, [[CONVS]]
 }
 
 ; Function Attrs: norecurse nounwind readonly
@@ -614,13 +606,13 @@
 ; CHECK-LABEL: vecdss
 ; CHECK: lxsihzx
 ; CHECK-NEXT: vextsh2d
-; CHECK-NEXT: xscvsxddp [[CONVD:[0-9]+]],
-; CHECK-NEXT: xxspltd  34, [[CONVD]], 0
+; CHECK-NEXT: xxspltd [[SPLT:[0-9]+]],
+; CHECK-NEXT: xvcvsxddp 34, [[SPLT]]
 ; CHECK-BE-LABEL: vecdss
 ; CHECK-BE: lxsihzx
 ; CHECK-BE-NEXT: vextsh2d
-; CHECK-BE-NEXT: xscvsxddp [[CONVD:[0-9]+]],
-; CHECK-BE-NEXT: xxspltd  34, [[CONVD]], 0
+; CHECK-BE-NEXT: xxspltd [[SPLT:[0-9]+]],
+; CHECK-BE-NEXT: xvcvsxddp 34, [[SPLT]]
 }
 
 ; Function Attrs: norecurse nounwind