Index: lib/Target/Mips/MipsSEISelLowering.cpp
===================================================================
--- lib/Target/Mips/MipsSEISelLowering.cpp
+++ lib/Target/Mips/MipsSEISelLowering.cpp
@@ -3592,9 +3592,18 @@
   const bool UsingMips32 = RC == &Mips::GPR32RegClass;
   unsigned Rs = RegInfo.createVirtualRegister(RC);
 
-  BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0);
+  unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
+  unsigned Tmp2 = RegInfo.createVirtualRegister(&Mips::GPR64RegClass);
+  BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), !UsingMips32? Tmp : Rs)
+      .addReg(Ws)
+      .addImm(0);
+  if(!UsingMips32)
+    BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp2)
+        .addImm(0)
+        .addReg(Tmp).
+         addImm(Mips::sub_32);
   BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64))
-      .addReg(Rs)
+      .addReg(UsingMips32 ? Rs : Tmp2)
       .addReg(Rt)
       .addImm(Imm)
       .addMemOperand(BB->getParent()->getMachineMemOperand(
@@ -3642,8 +3651,12 @@
       BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt);
   for (unsigned i = 1; i < MI.getNumOperands(); i++)
     MIB.add(MI.getOperand(i));
+  unsigned Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
+  if(!UsingMips32) 
+    BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32);
 
-  BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt);
+  BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(UsingMips32 ? Rt
+                                                                      : Tmp);
 
   MI.eraseFromParent();
   return BB;
@@ -3710,6 +3723,7 @@
   assert(Subtarget.hasMSA() && Subtarget.hasMips32r2());
 
   bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
+  bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
 
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL = MI.getDebugLoc();
@@ -3725,11 +3739,14 @@
 
   // Perform the register class copy as mentioned above.
   unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC);
-  BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs);
+  unsigned Rtemp1 = RegInfo.createVirtualRegister(&Mips::FGR32RegClass);  
+  if (IsFGR64onMips32) 
+    BuildMI(*BB,MI,DL,TII->get(Mips::CVT_S_D64),Rtemp1).addReg(Fs);
+  BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(IsFGR64onMips32 ? Rtemp1: Fs);
   BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp);
   unsigned WPHI = Wtemp;
 
-  if (!Subtarget.hasMips64() && IsFGR64) {
+  if (IsFGR64onMips32) {
     unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
     BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs);
     unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
@@ -3838,7 +3855,7 @@
   // Perform the safety regclass copy mentioned above.
   unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC);
   unsigned FPRPHI = IsFGR64onMips32
-                        ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass)
+                        ? RegInfo.createVirtualRegister(&Mips::FGR32RegClass)
                         : Fd;
   BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0);
   BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp);
@@ -3848,8 +3865,10 @@
     BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2)
         .addReg(WPHI)
         .addImm(1);
+    unsigned Rtemp3 = RegInfo.createVirtualRegister(&Mips::FGR64RegClass);
+    BuildMI(*BB,MI,DL,TII->get(Mips::CVT_D64_S),Rtemp3).addReg(FPRPHI);
     BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd)
-        .addReg(FPRPHI)
+        .addReg(Rtemp3)
         .addReg(Rtemp2);
   }
 
Index: test/CodeGen/Mips/msa/f16-llvm-ir.ll
===================================================================
--- test/CodeGen/Mips/msa/f16-llvm-ir.ll
+++ test/CodeGen/Mips/msa/f16-llvm-ir.ll
@@ -1,21 +1,21 @@
 ; RUN: llc -relocation-model=pic -march=mipsel -mcpu=mips32r5 \
-; RUN:     -mattr=+fp64,+msa < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS32,MIPSR5,MIPS32-O32,MIPS32R5-O32
 ; RUN: llc -relocation-model=pic -march=mips64el -mcpu=mips64r5 \
-; RUN:     -mattr=+fp64,+msa -target-abi n32 < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n32 < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS64,MIPSR5,MIPS64-N32,MIPS64R5-N32
 ; RUN: llc -relocation-model=pic -march=mips64el -mcpu=mips64r5 \
-; RUN:     -mattr=+fp64,+msa -target-abi n64 < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n64 < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS64,MIPSR5,MIPS64-N64,MIPS64R5-N64
 
 ; RUN: llc -relocation-model=pic -march=mipsel -mcpu=mips32r6 \
-; RUN:     -mattr=+fp64,+msa < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS32,MIPSR6,MIPSR6-O32
 ; RUN: llc -relocation-model=pic -march=mips64el -mcpu=mips64r6 \
-; RUN:     -mattr=+fp64,+msa -target-abi n32 < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n32 < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS64,MIPSR6,MIPS64-N32,MIPSR6-N32
 ; RUN: llc -relocation-model=pic -march=mips64el -mcpu=mips64r6 \
-; RUN:     -mattr=+fp64,+msa -target-abi n64 < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n64 < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS64,MIPSR6,MIPS64-N64,MIPSR6-N64
 
 
@@ -83,7 +83,8 @@
 ; MIPS32: copy_s.w $[[R1:[0-9]+]], $w[[W2]][0]
 ; MIPS32: mtc1 $[[R1]], $f[[F0:[0-9]+]]
 ; MIPS32: copy_s.w $[[R2:[0-9]+]], $w[[W2]][1]
-; MIPS32: mthc1 $[[R2]], $f[[F0]]
+; MIPS32: cvt.d.s $f[[F1:[0-9]+]], $f[[F0]]
+; MIPS32: mthc1 $[[R2]], $f[[F1]]
 ; MIPS64: copy_s.d $[[R2:[0-9]+]], $w[[W2]][0]
 ; MIPS64: dmtc1 $[[R2]], $f[[F0:[0-9]+]]
 
@@ -91,11 +92,13 @@
   %3 = fpext half %2 to double
   %add = fadd double %1, %3
 
-; ALL: add.d $f[[F1:[0-9]+]], $f[[F0]], $f[[F0]]
+; MIPS32: add.d $f[[F2:[0-9]+]], $f[[F1]], $f[[F1]]
+; MIPS64: add.d $f[[F1:[0-9]+]], $f[[F0]], $f[[F0]]
 
   %4 = fptrunc double %add to half
 
-; MIPS32: mfc1 $[[R2:[0-9]+]], $f[[F1]]
+; MIPS32: cvt.s.d $f[[F3:[0-9]+]], $f[[F2]]
+; MIPS32: mfc1 $[[R2:[0-9]+]], $f[[F3]]
 ; MIPS32: fill.w $w[[W2:[0-9]+]], $[[R2]]
 ; MIPS32: mfhc1 $[[R3:[0-9]+]], $f[[F1]]
 ; MIPS32: insert.w $w[[W2]][1], $[[R3]]
@@ -138,25 +141,31 @@
 ; MIPS32:       copy_s.w $[[R3:[0-9]+]], $w[[W5]][0]
 ; MIPS32:       mtc1 $[[R3]], $f[[F3:[0-9]+]]
 ; MIPS32:       copy_s.w $[[R4:[0-9]+]], $w[[W5]][1]
-; MIPS32:       mthc1 $[[R3]], $f[[F3]]
+; MIPS32:       cvt.d.s $f[[F4:[0-9]+]], $f[[F3]]
+; MIPS32:       mthc1 $[[R3]], $f[[F4]]
 
 ; MIPS64:       copy_s.d $[[R2:[0-9]+]], $w[[W2]][0]
 ; MIPS64:       dmtc1 $[[R2]], $f[[F3:[0-9]+]]
 
-; ALL:          trunc.w.d $f[[F4:[0-9]+]], $f[[F3]]
-; ALL:          mfc1 $[[R4:[0-9]+]], $f[[F4]]
+; MIPS32:       trunc.w.d $f[[F5:[0-9]+]], $f[[F4]]
+; MIPS32:       mfc1 $[[R4:[0-9]+]], $f[[F5]]
+; MIPS64:       trunc.w.d $f[[F4:[0-9]+]], $f[[F3]]
+; MIPS64:       mfc1 $[[R4:[0-9]+]], $f[[F4]]
 ; ALL:          fexupr.d $w[[W6:[0-9]+]], $w[[W1]]
 
 ; MIPS32:       copy_s.w $[[R5:[0-9]+]], $w[[W6]][0]
-; MIPS32:       mtc1 $[[R5]], $f[[F5:[0-9]+]]
+; MIPS32:       mtc1 $[[R5]], $f[[F6:[0-9]+]]
 ; MIPS32:       copy_s.w $[[R6:[0-9]+]], $w[[W6]][1]
-; MIPS32:       mthc1 $[[R6]], $f[[F5]]
+; MIPS32:       cvt.d.s $f[[F7:[0-9]+]], $f[[F6]]
+; MIPS32:       mthc1 $[[R6]], $f[[F7]]
 
 ; MIPS64:       copy_s.d $[[R2:[0-9]+]], $w[[W2]][0]
 ; MIPS64:       dmtc1 $[[R2]], $f[[F5:[0-9]+]]
 
-; ALL:          trunc.w.d $f[[F6:[0-9]]], $f[[F5]]
-; ALL:          mfc1 $[[R7:[0-9]]], $f[[F6]]
+; MIPS32:          trunc.w.d $f[[F8:[0-9]]], $f[[F7]]
+; MIPS32:          mfc1 $[[R7:[0-9]]], $f[[F8]]
+; MIPS64:          trunc.w.d $f[[F6:[0-9]]], $f[[F5]]
+; MIPS64:          mfc1 $[[R7:[0-9]]], $f[[F6]]
 
 ; MIPS32R5-O32: lw $[[R13:[0-9]+]], %got($CPI{{[0-9]+}}_{{[0-9]+}})
 ; MIPS32R5-O32: addiu $[[R14:[0-9]+]], $[[R13]], %lo($CPI{{[0-9]+}}_{{[0-9]+}})
@@ -201,13 +210,16 @@
 ; MIPS32: copy_s.w $[[R2:[0-9]+]], $w[[W2]][0]
 ; MIPS32: mtc1 $[[R2]], $f[[F0:[0-9]+]]
 ; MIPS32: copy_s.w $[[R3:[0-9]+]], $w[[W2]][1]
-; MIPS32: mthc1 $[[R3]], $f[[F0]]
+; MIPS32: cvt.d.s $f[[F1:[0-9]+]], $f[[F0]]
+; MIPS32: mthc1 $[[R3]], $f[[F1]]
 
 ; MIPS64: copy_s.d $[[R2:[0-9]+]], $w[[W2]][0]
 ; MIPS64: dmtc1 $[[R2]], $f[[F0:[0-9]+]]
 
-; ALL:    trunc.w.d $f[[F1:[0-9]+]], $f[[F0]]
-; ALL:    mfc1 $2, $f[[F1]]
+; MIPS32:    trunc.w.d $f[[F2:[0-9]+]], $f[[F1]]
+; MIPS32:    mfc1 $2, $f[[F2]]
+; MIPS64:    trunc.w.d $f[[F1:[0-9]+]], $f[[F0]]
+; MIPS64:    mfc1 $2, $f[[F1]]
 }
 
 define void @uitofp(i32 %a) {
@@ -228,7 +240,8 @@
 ; MIPSR6-N32: sub.d $f[[F2:[0-9]+]], $f[[F1]], $f[[F0]]
 ; MIPSR6-N64: sub.d $f[[F2:[0-9]+]], $f[[F1]], $f[[F0]]
 
-; MIPS32:     mfc1 $[[R0:[0-9]+]], $f[[F2]]
+; MIPS32:     cvt.s.d $f[[F3:[0-9]+]], $f[[F2]]
+; MIPS32:     mfc1 $[[R0:[0-9]+]], $f[[F3]]
 ; MIPS32:     fill.w $w[[W0:[0-9]+]], $[[R0]]
 ; MIPS32:     mfhc1 $[[R1:[0-9]+]], $f[[F2]]
 ; MIPS32:     insert.w $w[[W0]][1], $[[R1]]