Index: lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
===================================================================
--- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -83,6 +83,16 @@
   PPC::F24, PPC::F25, PPC::F26, PPC::F27,
   PPC::F28, PPC::F29, PPC::F30, PPC::F31
 };
+static const MCPhysReg VFRegs[32] = {
+  PPC::VF0,  PPC::VF1,  PPC::VF2,  PPC::VF3,
+  PPC::VF4,  PPC::VF5,  PPC::VF6,  PPC::VF7,
+  PPC::VF8,  PPC::VF9,  PPC::VF10, PPC::VF11,
+  PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+  PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+  PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+  PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+  PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
 static const MCPhysReg VRegs[32] = {
   PPC::V0,  PPC::V1,  PPC::V2,  PPC::V3,
   PPC::V4,  PPC::V5,  PPC::V6,  PPC::V7,
@@ -590,6 +600,11 @@
     Inst.addOperand(MCOperand::createReg(FRegs[getReg()]));
   }
 
+  void addRegVFRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(VFRegs[getReg()]));
+  }
+
   void addRegVRRCOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createReg(VRegs[getReg()]));
Index: lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
===================================================================
--- lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -89,6 +89,17 @@
   PPC::F28, PPC::F29, PPC::F30, PPC::F31
 };
 
+static const unsigned VFRegs[] = {
+  PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+  PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+  PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+  PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+  PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+  PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+  PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+  PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
+
 static const unsigned VRegs[] = {
   PPC::V0, PPC::V1, PPC::V2, PPC::V3,
   PPC::V4, PPC::V5, PPC::V6, PPC::V7,
@@ -242,6 +253,12 @@
   return decodeRegisterClass(Inst, RegNo, FRegs);
 }
 
+static DecodeStatus DecodeVFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, VFRegs);
+}
+
 static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
                                             uint64_t Address,
                                             const void *Decoder) {
Index: lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
===================================================================
--- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -10,8 +10,8 @@
 // This class prints an PPC MCInst to a .s file.
 //
 //===----------------------------------------------------------------------===//
-
 #include "PPCInstPrinter.h"
+#include "PPCInstrInfo.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCPredicates.h"
 #include "llvm/MC/MCExpr.h"
@@ -429,11 +429,28 @@
   return RegName;
 }
 
+static bool isVFRegister(unsigned Reg) {
+  return Reg >= PPC::VF0 && Reg <= PPC::VF31;
+}
+
 void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
                                   raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isReg()) {
-    const char *RegName = getRegisterName(Op.getReg());
+    unsigned Reg = Op.getReg();
+
+    // If this is a vsx instruction that using 64-bit altivec register (we use
+    // VFRC to represent 64-bit altivec register), we need upgrade VFRC to VRRC
+    // (128-bit full altivec register)
+    // (Please synchronize with PPCAsmPrinter::printOperand)
+    if (isVFRegister(Reg) &&
+        MII.get(MI->getOpcode()).TSFlags & PPCII::VsxUseAltivecReg) {
+      const MCRegisterClass *VRRC = &MRI.getRegClass(PPC::VRRCRegClassID);
+      Reg = MRI.getMatchingSuperReg(Reg, PPC::sub_64, VRRC);
+      assert(Reg && "VFRC's super register should include VRRC");
+    }
+
+    const char *RegName = getRegisterName(Reg);
     // The linux and AIX assembler does not take register prefixes.
     if (!isDarwinSyntax())
       RegName = stripRegisterPrefix(RegName);
Index: lib/Target/PowerPC/PPCAsmPrinter.cpp
===================================================================
--- lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -164,6 +164,10 @@
   return RegName;
 }
 
+static bool isVFRegister(unsigned Reg) {
+  return Reg >= PPC::VF0 && Reg <= PPC::VF31;
+}
+
 void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
                                  raw_ostream &O) {
   const DataLayout &DL = getDataLayout();
@@ -171,7 +175,22 @@
 
   switch (MO.getType()) {
   case MachineOperand::MO_Register: {
-    const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
+    unsigned Reg = MO.getReg();
+
+    // If this is a vsx instruction that using 64-bit altivec register (we use
+    // VFRC to represent 64-bit altivec register), we need upgrade VFRC to VRRC
+    // (128-bit full altivec register)
+    // (Please synchronize with PPCInstPrinter::printOperand)
+    if (isVFRegister(Reg) &&
+        MI->getDesc().TSFlags & PPCII::VsxUseAltivecReg) {
+      const TargetRegisterClass *VRRC =
+        Subtarget->getRegisterInfo()->getRegClass(PPC::VRRCRegClassID);
+      Reg = Subtarget->getRegisterInfo()
+                     ->getMatchingSuperReg(Reg, PPC::sub_64, VRRC);
+      assert(Reg && "VFRC's super register should include VRRC");
+    }
+    const char *RegName = PPCInstPrinter::getRegisterName(Reg);
+
     // Linux assembler (Others?) does not take register mnemonics.
     // FIXME - What about special registers used in mfspr/mtspr?
     if (!Subtarget->isDarwin())
Index: lib/Target/PowerPC/PPCInstrFormats.td
===================================================================
--- lib/Target/PowerPC/PPCInstrFormats.td
+++ lib/Target/PowerPC/PPCInstrFormats.td
@@ -38,6 +38,10 @@
   let TSFlags{2}   = PPC970_Cracked;
   let TSFlags{5-3} = PPC970_Unit;
 
+  /// Vsx instruction which uses altivec register
+  bits<1> VsxUseAltivecReg = 0;
+  let TSFlags{6}   = VsxUseAltivecReg;
+
   // Fields used for relation models.
   string BaseName = "";
 
Index: lib/Target/PowerPC/PPCInstrInfo.h
===================================================================
--- lib/Target/PowerPC/PPCInstrInfo.h
+++ lib/Target/PowerPC/PPCInstrInfo.h
@@ -61,6 +61,14 @@
   PPC970_VPERM  = 6 << PPC970_Shift,   // Vector Permute Unit
   PPC970_BRU    = 7 << PPC970_Shift    // Branch Unit
 };
+
+enum {
+  // Shift count to bypass PPC970 flags
+  NewDef_Shift = 6,
+
+  // Vsx instruction which uses altivec register
+  VsxUseAltivecReg = 0x1 << NewDef_Shift
+};
 } // end namespace PPCII
 
 class PPCSubtarget;
Index: lib/Target/PowerPC/PPCInstrVSX.td
===================================================================
--- lib/Target/PowerPC/PPCInstrVSX.td
+++ lib/Target/PowerPC/PPCInstrVSX.td
@@ -47,6 +47,13 @@
   let ParserMatchClass = PPCRegVSSRCAsmOperand;
 }
 
+def PPCRegVFRCAsmOperand : AsmOperandClass {
+  let Name = "RegVFRC"; let PredicateMethod = "isRegNumber";
+}
+def vfrc : RegisterOperand<VFRC> {
+  let ParserMatchClass = PPCRegVFRCAsmOperand;
+}
+
 // Little-endian-specific nodes.
 def SDT_PPClxvd2x : SDTypeProfile<1, 1, [
   SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
@@ -2090,12 +2097,14 @@
   // Load Vector
   def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
                             "lxv $XT, $src", IIC_LdStLFD, []>;
+  let VsxUseAltivecReg = 1 in {
   // Load DWord
-  def LXSD  : DSForm_1<57, 2, (outs vrrc:$vD), (ins memrix:$src),
+  def LXSD  : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src),
                        "lxsd $vD, $src", IIC_LdStLFD, []>;
   // Load SP from src, convert it to DP, and place in dword[0]
-  def LXSSP : DSForm_1<57, 3, (outs vrrc:$vD), (ins memrix:$src),
+  def LXSSP : DSForm_1<57, 3, (outs vfrc:$vD), (ins memrix:$src),
                        "lxssp $vD, $src", IIC_LdStLFD, []>;
+  } // end VsxUseAltivecReg
 
   // [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different
   // "out" and "in" dag
@@ -2127,12 +2136,14 @@
   // Store Vector
   def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
                              "stxv $XT, $dst", IIC_LdStSTFD, []>;
+  let VsxUseAltivecReg = 1 in {
   // Store DWord
-  def STXSD  : DSForm_1<61, 2, (outs), (ins vrrc:$vS, memrix:$dst),
+  def STXSD  : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst),
                         "stxsd $vS, $dst", IIC_LdStSTFD, []>;
   // Convert DP of dword[0] to SP, and Store to dst
-  def STXSSP : DSForm_1<61, 3, (outs), (ins vrrc:$vS, memrix:$dst),
+  def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$vS, memrix:$dst),
                         "stxssp $vS, $dst", IIC_LdStSTFD, []>;
+  } // end VsxUseAltivecReg
 
   // [PO S RA RB XO SX]
   class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
@@ -2155,4 +2166,16 @@
   def STXVL    : X_XS6_RA5_RB5<31,  397, "stxvl"   , vsrc, []>;
   def STXVLL   : X_XS6_RA5_RB5<31,  429, "stxvll"  , vsrc, []>;
   } // end mayStore
+
+  // Prefer Power9 (aka Power v.3 instructions)
+  let AddedComplexity = 500 in {
+    def : Pat<(f64 (load iaddr:$src)), (LXSD  iaddr:$src)>;
+    def : Pat<(f32 (load iaddr:$src)),
+            (COPY_TO_REGCLASS (LXSSP iaddr:$src), VFRC)>;
+    def : Pat<(f64 (extloadf32 iaddr:$src)),
+            (COPY_TO_REGCLASS (LXSSP iaddr:$src), VFRC)>;
+    def : Pat<(store f64:$vS, iaddr:$dst), (STXSD $vS, iaddr:$dst)>;
+    def : Pat<(store f32:$vS, iaddr:$dst),
+            (STXSSP (COPY_TO_REGCLASS $vS, VFRC), iaddr:$dst)>;
+  }
 } // end HasP9Vector
Index: test/CodeGen/PowerPC/dform-test.ll
===================================================================
--- /dev/null
+++ test/CodeGen/PowerPC/dform-test.ll
@@ -0,0 +1,201 @@
+; RUN: llc < %s -march=ppc64 -mcpu=pwr9 -o - | FileCheck %s --check-prefix=PWR9 --check-prefix=CHECK
+; RUN: llc < %s -march=ppc64 -mcpu=pwr8 -o - | FileCheck %s --check-prefix=PWR8 --check-prefix=CHECK
+
+; CHECK-LABEL: LXSD:
+define void @LXSD(i32 zeroext %N) {
+entry:
+  %cmp17 = icmp eq i32 %N, 0
+  br i1 %cmp17, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+; v0 = vsx32
+; PWR9-DAG: lxsd 0
+; PWR9-DAG: lxsd 2
+; PWR9-DAG: lxsd 3
+; PWR9-DAG: lxsd 4
+; PWR9-DAG: lxssp 5
+; PWR9-DAG: xxmrghd 34, 37, 34
+; PWR9-DAG: xxmrghd 35, 37, 35
+; PWR9-DAG: xxmrghd 36, 37, 36
+; PWR9-DAG: xxmrghd 37, 37, 32
+; PWR8-NOT: lxsd{{[^x]}}
+; PWR8-NOT: lxssp{{[^x]}}
+  %i.018 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %call = tail call double* @getDoublePtr()
+  %0 = load double, double* %call, align 8
+  %vecinit = insertelement <2 x double> <double 0.000000e+00, double undef>, double %0, i32 1
+  %arrayidx1 = getelementptr inbounds double, double* %call, i64 1
+  %1 = load double, double* %arrayidx1, align 8
+  %vecinit2 = insertelement <2 x double> <double 0.000000e+00, double undef>, double %1, i32 1
+  %arrayidx3 = getelementptr inbounds double, double* %call, i64 2
+  %2 = load double, double* %arrayidx3, align 8
+  %vecinit4 = insertelement <2 x double> <double 0.000000e+00, double undef>, double %2, i32 1
+  %arrayidx5 = getelementptr inbounds double, double* %call, i64 3
+  %3 = load double, double* %arrayidx5, align 8
+  %vecinit6 = insertelement <2 x double> <double 0.000000e+00, double undef>, double %3, i32 1
+  tail call void @passVSX(<2 x double> %vecinit, <2 x double> %vecinit2, <2 x double> %vecinit4, <2 x double> %vecinit6)
+  %inc = add nuw nsw i32 %i.018, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+declare double* @getDoublePtr()
+
+; CHECK-LABEL: LXSSP:
+define void @LXSSP(i32 zeroext %N) #0 {
+entry:
+  %cmp20 = icmp eq i32 %N, 0
+  br i1 %cmp20, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+
+; PWR9-DAG: lxssp 0
+; PWR9-DAG: lxssp 2
+; PWR9-DAG: lxssp 3
+; PWR9-DAG: lxssp 4
+; PWR9-DAG: lxssp 5
+; PWR9-DAG: xxmrghd 34, 37, 34
+; PWR9-DAG: xxmrghd 35, 37, 35
+; PWR9-DAG: xxmrghd 36, 37, 36
+; PWR9-DAG: xxmrghd 37, 37, 32
+; PWR8-NOT: lxssp{{[^x]}}
+  %i.021 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %call = tail call float* @getFloatPtr()
+  %0 = load float, float* %call, align 4
+  %conv = fpext float %0 to double
+  %vecinit = insertelement <2 x double> <double 0.000000e+00, double undef>, double %conv, i32 1
+  %arrayidx1 = getelementptr inbounds float, float* %call, i64 1
+  %1 = load float, float* %arrayidx1, align 4
+  %conv2 = fpext float %1 to double
+  %vecinit3 = insertelement <2 x double> <double 0.000000e+00, double undef>, double %conv2, i32 1
+  %arrayidx4 = getelementptr inbounds float, float* %call, i64 2
+  %2 = load float, float* %arrayidx4, align 4
+  %conv5 = fpext float %2 to double
+  %vecinit6 = insertelement <2 x double> <double 0.000000e+00, double undef>, double %conv5, i32 1
+  %arrayidx7 = getelementptr inbounds float, float* %call, i64 3
+  %3 = load float, float* %arrayidx7, align 4
+  %conv8 = fpext float %3 to double
+  %vecinit9 = insertelement <2 x double> <double 0.000000e+00, double undef>, double %conv8, i32 1
+  tail call void @passVSX(<2 x double> %vecinit, <2 x double> %vecinit3, <2 x double> %vecinit6, <2 x double> %vecinit9)
+  %inc = add nuw nsw i32 %i.021, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+declare float* @getFloatPtr()
+
+; CHECK-LABEL: STXSD
+define void @STXSD(i32 zeroext %N) {
+entry:
+  %cmp17 = icmp eq i32 %N, 0
+  br i1 %cmp17, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+; PWR9: stxsd{{[^x]}}
+; PWR9: stxsd{{[^x]}}
+; PWR9: stxsd{{[^x]}}
+; PWR9: stxsd{{[^x]}}
+; PWR8-NOT: stxsd{{[^x]}}
+  %i.018 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %call = tail call double* @getDoublePtr()
+  %call1 = tail call <2 x double> @getVSX()
+  %vecext = extractelement <2 x double> %call1, i32 0
+  store double %vecext, double* %call, align 8
+  %call2 = tail call <2 x double> @getVSX()
+  %vecext3 = extractelement <2 x double> %call2, i32 0
+  %arrayidx4 = getelementptr inbounds double, double* %call, i64 1
+  store double %vecext3, double* %arrayidx4, align 8
+  %call5 = tail call <2 x double> @getVSX()
+  %vecext6 = extractelement <2 x double> %call5, i32 0
+  %arrayidx7 = getelementptr inbounds double, double* %call, i64 2
+  store double %vecext6, double* %arrayidx7, align 8
+  %call8 = tail call <2 x double> @getVSX()
+  %vecext9 = extractelement <2 x double> %call8, i32 0
+  %arrayidx10 = getelementptr inbounds double, double* %call, i64 3
+  store double %vecext9, double* %arrayidx10, align 8
+  %inc = add nuw nsw i32 %i.018, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+declare <2 x double> @getVSX()
+
+define void @STXSSP(i32 zeroext %N) {
+entry:
+  %cmp20 = icmp eq i32 %N, 0
+  br i1 %cmp20, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+; v19 = vsx51
+; PWR9: xxlor 51
+; PWR9: stxssp 19
+; PWR9: xxlor 51
+; PWR9: stxssp 19
+; PWR9: xxlor 51
+; PWR9: stxssp 19
+; PWR9: xxlor 51
+; PWR9: stxssp 19
+; PWR8-NOT: stxssp{{[^x]}}
+  %i.021 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %call = tail call float* @getFloatPtr()
+  %call1 = tail call <2 x double> @getVSX()
+  %vecext = extractelement <2 x double> %call1, i32 0
+  %conv = fptrunc double %vecext to float
+  store float %conv, float* %call, align 4
+  %call2 = tail call <2 x double> @getVSX()
+  %vecext3 = extractelement <2 x double> %call2, i32 0
+  %conv4 = fptrunc double %vecext3 to float
+  %arrayidx5 = getelementptr inbounds float, float* %call, i64 1
+  store float %conv4, float* %arrayidx5, align 4
+  %call6 = tail call <2 x double> @getVSX()
+  %vecext7 = extractelement <2 x double> %call6, i32 0
+  %conv8 = fptrunc double %vecext7 to float
+  %arrayidx9 = getelementptr inbounds float, float* %call, i64 2
+  store float %conv8, float* %arrayidx9, align 4
+  %call10 = tail call <2 x double> @getVSX()
+  %vecext11 = extractelement <2 x double> %call10, i32 0
+  %conv12 = fptrunc double %vecext11 to float
+  %arrayidx13 = getelementptr inbounds float, float* %call, i64 3
+  store float %conv12, float* %arrayidx13, align 4
+  %inc = add nuw nsw i32 %i.021, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+declare void @passVSX(<2 x double>, <2 x double>, <2 x double>, <2 x double>)