Index: lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.h
+++ lib/Target/PowerPC/PPCISelLowering.h
@@ -1129,6 +1129,10 @@
     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
     bool hasBitPreservingFPLogic(EVT VT) const override;
     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
+    // If the input vector will require a direct-move to extract the element
+    // but the store can be combined into PPC::STIWX, we want to combine it.
+    bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
+                                   unsigned &Cost) const override;
   }; // end class PPCTargetLowering
 
   namespace PPC {
Index: lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.cpp
+++ lib/Target/PowerPC/PPCISelLowering.cpp
@@ -14288,6 +14288,38 @@
   return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
 }
 
+bool PPCTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
+                                                  unsigned &Cost) const {
+  if (!Subtarget.hasDirectMove() || !Subtarget.hasAltivec() ||
+      !Subtarget.hasVSX())
+    return false;
+
+  // If the index is unknown at compile time, this is very expensive to lower
+  // and it is not possible to combine the store with the extract.
+  ConstantInt *CI = dyn_cast<ConstantInt>(Idx);
+  if (!CI)
+    return false;
+
+  assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
+  unsigned BitWidth = VectorTy->getScalarSizeInBits();
+
+  // Only have combined stores for sub-word types on Power9.
+  if (BitWidth > 32 || (!Subtarget.hasP9Vector() && BitWidth != 32))
+    return false;
+
+  uint64_t CIdx = CI->getZExtValue();
+  uint64_t NaturalIdx = -1UL;
+  switch (BitWidth) {
+  default: return false;
+  case 8: NaturalIdx = Subtarget.isLittleEndian() ? 8 : 7; break;
+  case 16: NaturalIdx = Subtarget.isLittleEndian() ? 4 : 3; break;
+  case 32: NaturalIdx = Subtarget.isLittleEndian() ? 2 : 1; break;
+  }
+
+  Cost = CIdx == NaturalIdx ? 0 : 3;
+  return true;
+}
+
 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
   if (!Subtarget.hasVSX())
     return false;
Index: lib/Target/PowerPC/PPCInstrVSX.td
===================================================================
--- lib/Target/PowerPC/PPCInstrVSX.td
+++ lib/Target/PowerPC/PPCInstrVSX.td
@@ -1430,7 +1430,7 @@
                           "xscvspdpn $XT, $XB", IIC_VecFP, []>;
   } // UseVSXReg = 1
 
-  let Predicates = [IsLittleEndian] in {
+  let Predicates = [HasP8Vector, IsLittleEndian] in {
   def : Pat<(f32 (PPCfcfids
                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
             (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
@@ -1445,9 +1445,32 @@
                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
             (f32 (XSCVUXDSP (COPY_TO_REGCLASS
                               (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
-  }
+  def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src),
+            (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+  def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src),
+            (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+  foreach Idx = [ [0,2], [1,1], [3,3] ] in {
+    def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+              (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+                                     sub_64), xoaddr:$src)>;
+    def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+              (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+                                     sub_64), xoaddr:$src)>;
+  }
+  } // HasP8Vector, IsLittleEndian
+
+  let Predicates = [HasP8Vector, IsLittleEndian, NoP9Vector] in {
+  def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+            (STXSDX (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), xoaddr:$src)>;
+  def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+            (STXSDX (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), xoaddr:$src)>;
+  def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+            (STXSDX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+  def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+            (STXSDX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+  } // HasP8Vector, IsLittleEndian, NoP9Vector
 
-  let Predicates = [IsBigEndian] in {
+  let Predicates = [HasP8Vector, IsBigEndian] in {
   def : Pat<(f32 (PPCfcfids
                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
             (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
@@ -1460,6 +1483,29 @@
   def : Pat<(f32 (PPCfcfidus
                    (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
             (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+  def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src),
+            (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+  def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src),
+            (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+  foreach Idx = [ [0,3], [2,1], [3,2] ] in {
+    def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+              (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+                                     sub_64), xoaddr:$src)>;
+    def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+              (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+                                     sub_64), xoaddr:$src)>;
+  }
+  } // HasP8Vector, IsBigEndian
+
+  let Predicates = [HasP8Vector, IsBigEndian, NoP9Vector] in {
+  def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+            (STXSDX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+  def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+            (STXSDX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+  def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+            (STXSDX (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), xoaddr:$src)>;
+  def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+            (STXSDX (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), xoaddr:$src)>;
   }
 
   // Instructions for converting float to i64 feeding a store.
@@ -3270,6 +3316,27 @@
       def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
                 (v2f64 (XXPERMDIs
                 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddr:$src),
+                (STXSDX (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                        sub_64), xaddr:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddr:$src),
+                (STXSDX (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                         sub_64), xaddr:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddr:$src),
+                (STXSDX (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddr:$src),
+                (STXSDX (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ixaddr:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                            sub_64), ixaddr:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 0)), ixaddr:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+                            ixaddr:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ixaddr:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 1)), ixaddr:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>;
+
     }
 
     let Predicates = [IsBigEndian, HasP9Vector] in {
@@ -3282,6 +3349,26 @@
                 (v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
       def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
                 (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddr:$src),
+                (STXSDX (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                        sub_64), xaddr:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddr:$src),
+                (STXSDX (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                        sub_64), xaddr:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddr:$src),
+                (STXSDX (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddr:$src),
+                (STXSDX (EXTRACT_SUBREG $A, sub_64), xaddr:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ixaddr:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                        sub_64), ixaddr:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 0)), ixaddr:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+                        sub_64), ixaddr:$src)>;
+      def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ixaddr:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>;
+      def : Pat<(store (f64 (extractelt v2f64:$A, 1)), ixaddr:$src),
+                (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ixaddr:$src)>;
     }
   }
 
@@ -3467,7 +3554,7 @@
             (STXSDX (XSCVDPSXDS f64:$src), xaddr:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
               (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ixaddr:$dst, 8),
-            (STXSD (XSCVDPSXDS f64:$src), ixaddr:$dst)>;
+            (DFSTOREf64 (XSCVDPSXDS f64:$src), ixaddr:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
               (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2),
             (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
@@ -3498,7 +3585,7 @@
             (STXSDX (XSCVDPUXDS f64:$src), xaddr:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
               (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ixaddr:$dst, 8),
-            (STXSD (XSCVDPUXDS f64:$src), ixaddr:$dst)>;
+            (DFSTOREf64 (XSCVDPUXDS f64:$src), ixaddr:$dst)>;
   def : Pat<(PPCstore_scal_int_from_vsr
               (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2),
             (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
Index: test/CodeGen/PowerPC/combine-extract-store.ll
===================================================================
--- test/CodeGen/PowerPC/combine-extract-store.ll
+++ test/CodeGen/PowerPC/combine-extract-store.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unkknown-unknown \
+; RUN:   -verify-machineinstrs -O2 < %s | FileCheck %s
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unkknown-unknown \
+; RUN:   -verify-machineinstrs -O2 < %s | FileCheck %s --check-prefix=CHECK-BE
+
+; Function Attrs: norecurse nounwind
+define void @test(<4 x i32>* noalias nocapture readonly %VP, <4 x i32>* noalias nocapture %VP2, i32* noalias nocapture %IP) local_unnamed_addr #0 {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vspltisw 2, 4
+; CHECK-NEXT:    lvx 3, 0, 3
+; CHECK-NEXT:    vadduwm 2, 3, 2
+; CHECK-NEXT:    stxsiwx 34, 0, 5
+; CHECK-NEXT:    stvx 3, 0, 4
+; CHECK-NEXT:    blr
+entry:
+  %0 = load <4 x i32>, <4 x i32>* %VP, align 16
+  %vecext = extractelement <4 x i32> %0, i32 2
+  %add = add nsw i32 %vecext, 4
+  store i32 %add, i32* %IP, align 4
+  store <4 x i32> %0, <4 x i32>* %VP2, align 16
+  ret void
+}
+
+; Function Attrs: norecurse nounwind
+define void @testf(<4 x float>* noalias nocapture readonly %VP, <4 x float>* noalias nocapture %VP2, float* noalias nocapture %IP) local_unnamed_addr #0 {
+; CHECK-LABEL: testf:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis 6, 2, .LCPI1_0@toc@ha
+; CHECK-NEXT:    lvx 2, 0, 3
+; CHECK-NEXT:    addi 6, 6, .LCPI1_0@toc@l
+; CHECK-NEXT:    lvx 3, 0, 6
+; CHECK-NEXT:    stvx 2, 0, 4
+; CHECK-NEXT:    xvaddsp 0, 34, 35
+; CHECK-NEXT:    stfiwx 0, 0, 5
+; CHECK-NEXT:    blr
+entry:
+  %0 = load <4 x float>, <4 x float>* %VP, align 16
+  %vecext = extractelement <4 x float> %0, i32 2
+  %add = fadd float %vecext, 4.000000e+00
+  store float %add, float* %IP, align 4
+  store <4 x float> %0, <4 x float>* %VP2, align 16
+  ret void
+}
+
+; Function Attrs: norecurse nounwind
+define void @testBE(<4 x i32>* noalias nocapture readonly %VP, <4 x i32>* noalias nocapture %VP2, i32* noalias nocapture %IP) local_unnamed_addr #0 {
+; CHECK-BE-LABEL: testBE:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    vspltisw 2, 4
+; CHECK-BE-NEXT:    lxvw4x 35, 0, 3
+; CHECK-BE-NEXT:    vadduwm 2, 3, 2
+; CHECK-BE-NEXT:    stxsiwx 34, 0, 5
+; CHECK-BE-NEXT:    stxvw4x 35, 0, 4
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = load <4 x i32>, <4 x i32>* %VP, align 16
+  %vecext = extractelement <4 x i32> %0, i32 1
+  %add = add nsw i32 %vecext, 4
+  store i32 %add, i32* %IP, align 4
+  store <4 x i32> %0, <4 x i32>* %VP2, align 16
+  ret void
+}
+
+; Function Attrs: norecurse nounwind
+define void @testBEf(<4 x float>* noalias nocapture readonly %VP, <4 x float>* noalias nocapture %VP2, float* noalias nocapture %IP) local_unnamed_addr #0 {
+; CHECK-BE-LABEL: testBEf:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    addis 6, 2, .LCPI3_0@toc@ha
+; CHECK-BE-NEXT:    lxvw4x 0, 0, 3
+; CHECK-BE-NEXT:    addi 6, 6, .LCPI3_0@toc@l
+; CHECK-BE-NEXT:    lxvw4x 1, 0, 6
+; CHECK-BE-NEXT:    stxvw4x 0, 0, 4
+; CHECK-BE-NEXT:    xvaddsp 1, 0, 1
+; CHECK-BE-NEXT:    stfiwx 1, 0, 5
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = load <4 x float>, <4 x float>* %VP, align 16
+  %vecext = extractelement <4 x float> %0, i32 1
+  %add = fadd float %vecext, 4.000000e+00
+  store float %add, float* %IP, align 4
+  store <4 x float> %0, <4 x float>* %VP2, align 16
+  ret void
+}
Index: test/CodeGen/PowerPC/extract-and-store.ll
===================================================================
--- test/CodeGen/PowerPC/extract-and-store.ll
+++ test/CodeGen/PowerPC/extract-and-store.ll
@@ -12,20 +12,20 @@
 ; CHECK-LABEL: testll0:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxswapd vs0, vs34
-; CHECK-NEXT:    mfvsrd r3, f0
-; CHECK-NEXT:    std r3, 24(r7)
+; CHECK-NEXT:    addi r3, r7, 24
+; CHECK-NEXT:    stxsdx f0, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testll0:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mfvsrd r3, vs34
-; CHECK-BE-NEXT:    std r3, 24(r7)
+; CHECK-BE-NEXT:    addi r3, r7, 24
+; CHECK-BE-NEXT:    stxsdx vs34, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testll0:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mfvsrld r3, vs34
-; CHECK-P9-NEXT:    std r3, 24(r7)
+; CHECK-P9-NEXT:    xxswapd vs0, vs34
+; CHECK-P9-NEXT:    stfd f0, 24(r7)
 ; CHECK-P9-NEXT:    blr
 entry:
   %vecext = extractelement <2 x i64> %a, i32 0
@@ -38,21 +38,20 @@
 define <2 x i64> @testll1(<2 x i64> returned %a, i64 %b, i64* nocapture %ap) local_unnamed_addr #0 {
 ; CHECK-LABEL: testll1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mfvsrd r3, vs34
-; CHECK-NEXT:    std r3, 24(r6)
+; CHECK-NEXT:    addi r3, r6, 24
+; CHECK-NEXT:    stxsdx vs34, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testll1:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxswapd vs0, vs34
-; CHECK-BE-NEXT:    mfvsrd r3, f0
-; CHECK-BE-NEXT:    std r3, 24(r6)
+; CHECK-BE-NEXT:    addi r3, r6, 24
+; CHECK-BE-NEXT:    stxsdx f0, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testll1:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mfvsrd r3, vs34
-; CHECK-P9-NEXT:    std r3, 24(r6)
+; CHECK-P9-NEXT:    stxsd v2, 24(r6)
 ; CHECK-P9-NEXT:    blr
 entry:
   %vecext = extractelement <2 x i64> %a, i32 1
@@ -66,7 +65,8 @@
 ; CHECK-LABEL: testd0:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxswapd vs0, vs34
-; CHECK-NEXT:    stfd f0, 24(r7)
+; CHECK-NEXT:    addi r3, r7, 24
+; CHECK-NEXT:    stxsdx f0, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testd0:
@@ -98,7 +98,8 @@
 ; CHECK-BE-LABEL: testd1:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxswapd vs0, vs34
-; CHECK-BE-NEXT:    stfd f0, 24(r7)
+; CHECK-BE-NEXT:    addi r3, r7, 24
+; CHECK-BE-NEXT:    stxsdx f0, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testd1:
@@ -116,22 +117,23 @@
 define <4 x float> @testf0(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 {
 ; CHECK-LABEL: testf0:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 3
-; CHECK-NEXT:    xscvspdpn f0, vs0
-; CHECK-NEXT:    stfs f0, 12(r7)
+; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-NEXT:    addi r3, r7, 12
+; CHECK-NEXT:    stfiwx f0, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testf0:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xscvspdpn f0, vs34
-; CHECK-BE-NEXT:    stfs f0, 12(r7)
+; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-BE-NEXT:    addi r3, r7, 12
+; CHECK-BE-NEXT:    stfiwx f0, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testf0:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 3
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    stfs f0, 12(r7)
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-P9-NEXT:    addi r3, r7, 12
+; CHECK-P9-NEXT:    stfiwx f0, 0, r3
 ; CHECK-P9-NEXT:    blr
 entry:
   %vecext = extractelement <4 x float> %a, i32 0
@@ -144,23 +146,22 @@
 define <4 x float> @testf1(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 {
 ; CHECK-LABEL: testf1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxswapd vs0, vs34
-; CHECK-NEXT:    xscvspdpn f0, vs0
-; CHECK-NEXT:    stfs f0, 12(r7)
+; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 1
+; CHECK-NEXT:    addi r3, r7, 12
+; CHECK-NEXT:    stfiwx f0, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testf1:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 1
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    stfs f0, 12(r7)
+; CHECK-BE-NEXT:    addi r3, r7, 12
+; CHECK-BE-NEXT:    stxsiwx vs34, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testf1:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    xxswapd vs0, vs34
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    stfs f0, 12(r7)
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 1
+; CHECK-P9-NEXT:    addi r3, r7, 12
+; CHECK-P9-NEXT:    stfiwx f0, 0, r3
 ; CHECK-P9-NEXT:    blr
 entry:
   %vecext = extractelement <4 x float> %a, i32 1
@@ -173,23 +174,21 @@
 define <4 x float> @testf2(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 {
 ; CHECK-LABEL: testf2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 1
-; CHECK-NEXT:    xscvspdpn f0, vs0
-; CHECK-NEXT:    stfs f0, 12(r7)
+; CHECK-NEXT:    addi r3, r7, 12
+; CHECK-NEXT:    stxsiwx vs34, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testf2:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxswapd vs0, vs34
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    stfs f0, 12(r7)
+; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 1
+; CHECK-BE-NEXT:    addi r3, r7, 12
+; CHECK-BE-NEXT:    stfiwx f0, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testf2:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    stfs f0, 12(r7)
+; CHECK-P9-NEXT:    addi r3, r7, 12
+; CHECK-P9-NEXT:    stxsiwx vs34, 0, r3
 ; CHECK-P9-NEXT:    blr
 entry:
   %vecext = extractelement <4 x float> %a, i32 2
@@ -202,21 +201,23 @@
 define <4 x float> @testf3(<4 x float> returned %a, <4 x float> %b, float* nocapture %ap) local_unnamed_addr #0 {
 ; CHECK-LABEL: testf3:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xscvspdpn f0, vs34
-; CHECK-NEXT:    stfs f0, 12(r7)
+; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-NEXT:    addi r3, r7, 12
+; CHECK-NEXT:    stfiwx f0, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testf3:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 3
-; CHECK-BE-NEXT:    xscvspdpn f0, vs0
-; CHECK-BE-NEXT:    stfs f0, 12(r7)
+; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-BE-NEXT:    addi r3, r7, 12
+; CHECK-BE-NEXT:    stfiwx f0, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testf3:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    xscvspdpn f0, vs34
-; CHECK-P9-NEXT:    stfs f0, 12(r7)
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-P9-NEXT:    addi r3, r7, 12
+; CHECK-P9-NEXT:    stfiwx f0, 0, r3
 ; CHECK-P9-NEXT:    blr
 entry:
   %vecext = extractelement <4 x float> %a, i32 3
@@ -229,23 +230,23 @@
 define <4 x i32> @testi0(<4 x i32> returned %a, <4 x i32> %b, i32* nocapture %ap) local_unnamed_addr #0 {
 ; CHECK-LABEL: testi0:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxswapd vs0, vs34
-; CHECK-NEXT:    mfvsrwz r3, f0
-; CHECK-NEXT:    stw r3, 12(r7)
+; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-NEXT:    addi r3, r7, 12
+; CHECK-NEXT:    stfiwx f0, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testi0:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
-; CHECK-BE-NEXT:    stw r3, 12(r7)
+; CHECK-BE-NEXT:    addi r3, r7, 12
+; CHECK-BE-NEXT:    stfiwx f0, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testi0:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li r3, 0
-; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
-; CHECK-P9-NEXT:    stw r3, 12(r7)
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-P9-NEXT:    addi r3, r7, 12
+; CHECK-P9-NEXT:    stfiwx f0, 0, r3
 ; CHECK-P9-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %a, i32 0
@@ -259,21 +260,21 @@
 ; CHECK-LABEL: testi1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 1
-; CHECK-NEXT:    mfvsrwz r3, f0
-; CHECK-NEXT:    stw r3, 12(r7)
+; CHECK-NEXT:    addi r3, r7, 12
+; CHECK-NEXT:    stfiwx f0, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testi1:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mfvsrwz r3, vs34
-; CHECK-BE-NEXT:    stw r3, 12(r7)
+; CHECK-BE-NEXT:    addi r3, r7, 12
+; CHECK-BE-NEXT:    stxsiwx vs34, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testi1:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li r3, 4
-; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
-; CHECK-P9-NEXT:    stw r3, 12(r7)
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 1
+; CHECK-P9-NEXT:    addi r3, r7, 12
+; CHECK-P9-NEXT:    stfiwx f0, 0, r3
 ; CHECK-P9-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %a, i32 1
@@ -286,21 +287,21 @@
 define <4 x i32> @testi2(<4 x i32> returned %a, <4 x i32> %b, i32* nocapture %ap) local_unnamed_addr #0 {
 ; CHECK-LABEL: testi2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mfvsrwz r3, vs34
-; CHECK-NEXT:    stw r3, 12(r7)
+; CHECK-NEXT:    addi r3, r7, 12
+; CHECK-NEXT:    stxsiwx vs34, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testi2:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 1
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
-; CHECK-BE-NEXT:    stw r3, 12(r7)
+; CHECK-BE-NEXT:    addi r3, r7, 12
+; CHECK-BE-NEXT:    stfiwx f0, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testi2:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mfvsrwz r3, vs34
-; CHECK-P9-NEXT:    stw r3, 12(r7)
+; CHECK-P9-NEXT:    addi r3, r7, 12
+; CHECK-P9-NEXT:    stxsiwx vs34, 0, r3
 ; CHECK-P9-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %a, i32 2
@@ -314,22 +315,22 @@
 ; CHECK-LABEL: testi3:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 3
-; CHECK-NEXT:    mfvsrwz r3, f0
-; CHECK-NEXT:    stw r3, 12(r7)
+; CHECK-NEXT:    addi r3, r7, 12
+; CHECK-NEXT:    stfiwx f0, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: testi3:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxswapd vs0, vs34
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
-; CHECK-BE-NEXT:    stw r3, 12(r7)
+; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-BE-NEXT:    addi r3, r7, 12
+; CHECK-BE-NEXT:    stfiwx f0, 0, r3
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: testi3:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    li r3, 12
-; CHECK-P9-NEXT:    vextuwrx r3, r3, v2
-; CHECK-P9-NEXT:    stw r3, 12(r7)
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-P9-NEXT:    addi r3, r7, 12
+; CHECK-P9-NEXT:    stfiwx f0, 0, r3
 ; CHECK-P9-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %a, i32 3
Index: test/CodeGen/PowerPC/scalar_vector_test_2.ll
===================================================================
--- test/CodeGen/PowerPC/scalar_vector_test_2.ll
+++ test/CodeGen/PowerPC/scalar_vector_test_2.ll
@@ -15,9 +15,8 @@
 ; P9LE-NEXT:    xxpermdi vs0, f0, f0, 2
 ; P9LE-NEXT:    xxpermdi vs1, f1, f1, 2
 ; P9LE-NEXT:    xvaddsp vs0, vs0, vs1
-; P9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
-; P9LE-NEXT:    xscvspdpn f0, vs0
-; P9LE-NEXT:    stfs f0, 0(r5)
+; P9LE-NEXT:    xxsldwi vs0, vs0, vs0, 2
+; P9LE-NEXT:    stfiwx f0, 0, r5
 ; P9LE-NEXT:    blr
 
 ; P9BE-LABEL: test_liwzx1:
@@ -27,8 +26,8 @@
 ; P9BE-NEXT:    xxsldwi vs0, f0, f0, 1
 ; P9BE-NEXT:    xxsldwi vs1, f1, f1, 1
 ; P9BE-NEXT:    xvaddsp vs0, vs0, vs1
-; P9BE-NEXT:    xscvspdpn f0, vs0
-; P9BE-NEXT:    stfs f0, 0(r5)
+; P9BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; P9BE-NEXT:    stfiwx f0, 0, r5
 ; P9BE-NEXT:    blr
 
 ; P8LE-LABEL: test_liwzx1:
@@ -38,9 +37,8 @@
 ; P8LE-NEXT:    xxpermdi vs0, f0, f0, 2
 ; P8LE-NEXT:    xxpermdi vs1, f1, f1, 2
 ; P8LE-NEXT:    xvaddsp vs0, vs0, vs1
-; P8LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
-; P8LE-NEXT:    xscvspdpn f0, vs0
-; P8LE-NEXT:    stfsx f0, 0, r5
+; P8LE-NEXT:    xxsldwi vs0, vs0, vs0, 2
+; P8LE-NEXT:    stfiwx f0, 0, r5
 ; P8LE-NEXT:    blr
 
 ; P8BE-LABEL: test_liwzx1:
@@ -50,8 +48,8 @@
 ; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
 ; P8BE-NEXT:    xxsldwi vs1, f1, f1, 1
 ; P8BE-NEXT:    xvaddsp vs0, vs0, vs1
-; P8BE-NEXT:    xscvspdpn f0, vs0
-; P8BE-NEXT:    stfsx f0, 0, r5
+; P8BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; P8BE-NEXT:    stfiwx f0, 0, r5
 ; P8BE-NEXT:    blr
   %a = load <1 x float>, <1 x float>* %A
   %b = load <1 x float>, <1 x float>* %B
@@ -69,9 +67,8 @@
 ; P9LE-NEXT:    xxpermdi vs0, f0, f0, 2
 ; P9LE-NEXT:    xxpermdi vs1, f1, f1, 2
 ; P9LE-NEXT:    xvsubsp vs0, vs0, vs1
-; P9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
-; P9LE-NEXT:    xscvspdpn f0, vs0
-; P9LE-NEXT:    stfs f0, 0(r5)
+; P9LE-NEXT:    xxsldwi vs0, vs0, vs0, 2
+; P9LE-NEXT:    stfiwx f0, 0, r5
 ; P9LE-NEXT:    blr
 
 ; P9BE-LABEL: test_liwzx2:
@@ -82,8 +79,8 @@
 ; P9BE-NEXT:    xxsldwi vs0, f0, f0, 1
 ; P9BE-NEXT:    xxsldwi vs1, f1, f1, 1
 ; P9BE-NEXT:    xvsubsp vs0, vs0, vs1
-; P9BE-NEXT:    xscvspdpn f0, vs0
-; P9BE-NEXT:    stfs f0, 0(r5)
+; P9BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; P9BE-NEXT:    stfiwx f0, 0, r5
 ; P9BE-NEXT:    blr
 
 ; P8LE-LABEL: test_liwzx2:
@@ -94,9 +91,8 @@
 ; P8LE-NEXT:    xxpermdi vs0, f0, f0, 2
 ; P8LE-NEXT:    xxpermdi vs1, f1, f1, 2
 ; P8LE-NEXT:    xvsubsp vs0, vs0, vs1
-; P8LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
-; P8LE-NEXT:    xscvspdpn f0, vs0
-; P8LE-NEXT:    stfsx f0, 0, r5
+; P8LE-NEXT:    xxsldwi vs0, vs0, vs0, 2
+; P8LE-NEXT:    stfiwx f0, 0, r5
 ; P8LE-NEXT:    blr
 
 ; P8BE-LABEL: test_liwzx2:
@@ -107,8 +103,8 @@
 ; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
 ; P8BE-NEXT:    xxsldwi vs1, f1, f1, 1
 ; P8BE-NEXT:    xvsubsp vs0, vs0, vs1
-; P8BE-NEXT:    xscvspdpn f0, vs0
-; P8BE-NEXT:    stfsx f0, 0, r5
+; P8BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
+; P8BE-NEXT:    stfiwx f0, 0, r5
 ; P8BE-NEXT:    blr
   %a = load <1 x float>, <1 x float>* %A
   %b = load <1 x float>, <1 x float>* %B
Index: test/CodeGen/PowerPC/store_fptoi.ll
===================================================================
--- test/CodeGen/PowerPC/store_fptoi.ll
+++ test/CodeGen/PowerPC/store_fptoi.ll
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: dpConv2sdw
 ; CHECK: lfd [[LD:[0-9]+]], 0(3)
 ; CHECK: xscvdpsxds [[CONV:[0-9]+]], [[LD]]
-; CHECK-NEXT: stxsd [[CONV]], 0(4)
+; CHECK-NEXT: stfd [[CONV]], 0(4)
 ; CHECK-NEXT: blr
 
 ; CHECK-PWR8-LABEL: dpConv2sdw
@@ -104,7 +104,7 @@
 ; CHECK-LABEL: spConv2sdw
 ; CHECK: lfs [[LD:[0-9]+]], 0(3)
 ; CHECK-NEXT: xscvdpsxds [[CONV:[0-9]+]], [[LD]]
-; CHECK-NEXT: stxsd [[CONV]], 0(4)
+; CHECK-NEXT: stfd [[CONV]], 0(4)
 ; CHECK-NEXT: blr
 
 ; CHECK-PWR8-LABEL: spConv2sdw
@@ -402,7 +402,7 @@
 ; CHECK-LABEL: dpConv2udw
 ; CHECK: lfd [[LD:[0-9]+]], 0(3)
 ; CHECK: xscvdpuxds [[CONV:[0-9]+]], [[LD]]
-; CHECK-NEXT: stxsd [[CONV]], 0(4)
+; CHECK-NEXT: stfd [[CONV]], 0(4)
 ; CHECK-NEXT: blr
 
 ; CHECK-PWR8-LABEL: dpConv2udw
@@ -488,7 +488,7 @@
 ; CHECK-LABEL: spConv2udw
 ; CHECK: lfs [[LD:[0-9]+]], 0(3)
 ; CHECK-NEXT: xscvdpuxds [[CONV:[0-9]+]], [[LD]]
-; CHECK-NEXT: stxsd [[CONV]], 0(4)
+; CHECK-NEXT: stfd [[CONV]], 0(4)
 ; CHECK-NEXT: blr
 
 ; CHECK-PWR8-LABEL: spConv2udw