Index: lib/Target/Mips/MipsISelLowering.h
===================================================================
--- lib/Target/Mips/MipsISelLowering.h
+++ lib/Target/Mips/MipsISelLowering.h
@@ -90,6 +90,9 @@
       // Thread Pointer
       ThreadPointer,
 
+      // Vector Floating Point Multiply and Subtract
+      FMS,
+
       // Floating Point Branch Conditional
       FPBrcond,
 
Index: lib/Target/Mips/MipsISelLowering.cpp
===================================================================
--- lib/Target/Mips/MipsISelLowering.cpp
+++ lib/Target/Mips/MipsISelLowering.cpp
@@ -200,6 +200,7 @@
   case MipsISD::Ret:               return "MipsISD::Ret";
   case MipsISD::ERet:              return "MipsISD::ERet";
   case MipsISD::EH_RETURN:         return "MipsISD::EH_RETURN";
+  case MipsISD::FMS:               return "MipsISD::FMS";
   case MipsISD::FPBrcond:          return "MipsISD::FPBrcond";
   case MipsISD::FPCmp:             return "MipsISD::FPCmp";
   case MipsISD::FSELECT:           return "MipsISD::FSELECT";
Index: lib/Target/Mips/MipsInstrInfo.td
===================================================================
--- lib/Target/Mips/MipsInstrInfo.td
+++ lib/Target/Mips/MipsInstrInfo.td
@@ -248,6 +248,10 @@
                            AssemblerPredicate<"!FeatureUseIndirectJumpsHazard">;
 def HasCRC   : Predicate<"Subtarget->hasCRC()">,
                AssemblerPredicate<"FeatureCRC">;
+
+// TODO: Add support for FPOpFusion::Standard
+def AllowFPOpFusion : Predicate<"(TM.Options.AllowFPOpFusion =="
+                                " FPOpFusion::Fast)">;
 //===----------------------------------------------------------------------===//
 // Mips GPR size adjectives.
 // They are mutually exclusive.
Index: lib/Target/Mips/MipsMSAInstrInfo.td
===================================================================
--- lib/Target/Mips/MipsMSAInstrInfo.td
+++ lib/Target/Mips/MipsMSAInstrInfo.td
@@ -46,6 +46,7 @@
 def MipsPCKEV : SDNode<"MipsISD::PCKEV", SDT_ILV>;
 def MipsPCKOD : SDNode<"MipsISD::PCKOD", SDT_ILV>;
 def MipsINSVE : SDNode<"MipsISD::INSVE", SDT_INSVE>;
+def MipsFMS   : SDNode<"MipsISD::FMS", SDTFPTernaryOp>;
 
 def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>;
 def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>;
@@ -369,9 +370,6 @@
                       (or node:$ws, (shl (v2i64 vsplati64_imm_eq_1),
                                          node:$wt))>;
 
-def fms : PatFrag<(ops node:$wd, node:$ws, node:$wt),
-                  (fsub node:$wd, (fmul node:$ws, node:$wt))>;
-
 def muladd : PatFrag<(ops node:$wd, node:$ws, node:$wt),
                      (add node:$wd, (mul node:$ws, node:$wt))>;
 
@@ -2091,8 +2089,8 @@
 class FMIN_A_D_DESC : MSA_3RF_DESC_BASE<"fmin_a.d", int_mips_fmin_a_d,
                                         MSA128DOpnd>;
 
-class FMSUB_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.w", fms, MSA128WOpnd>;
-class FMSUB_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.d", fms, MSA128DOpnd>;
+class FMSUB_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.w", MipsFMS, MSA128WOpnd>;
+class FMSUB_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.d", MipsFMS, MSA128DOpnd>;
 
 class FMUL_W_DESC : MSA_3RF_DESC_BASE<"fmul.w", fmul, MSA128WOpnd>;
 class FMUL_D_DESC : MSA_3RF_DESC_BASE<"fmul.d", fmul, MSA128DOpnd>;
@@ -3135,6 +3133,20 @@
 def FTRUNC_U_W : FTRUNC_U_W_ENC, FTRUNC_U_W_DESC;
 def FTRUNC_U_D : FTRUNC_U_D_ENC, FTRUNC_U_D_DESC;
 
+def : MipsPat<(fsub MSA128WOpnd:$wd, (fmul MSA128WOpnd:$ws, MSA128WOpnd:$wt)),
+              (FMSUB_W MSA128WOpnd:$wd, MSA128WOpnd:$ws, MSA128WOpnd:$wt)>,
+              Requires<[AllowFPOpFusion]>;
+def : MipsPat<(fsub MSA128DOpnd:$wd, (fmul MSA128DOpnd:$ws, MSA128DOpnd:$wt)),
+              (FMSUB_D MSA128DOpnd:$wd, MSA128DOpnd:$ws, MSA128DOpnd:$wt)>,
+              Requires<[AllowFPOpFusion]>;
+
+def : MipsPat<(fadd MSA128WOpnd:$wd, (fmul MSA128WOpnd:$ws, MSA128WOpnd:$wt)),
+              (FMADD_W MSA128WOpnd:$wd, MSA128WOpnd:$ws, MSA128WOpnd:$wt)>,
+              Requires<[AllowFPOpFusion]>;
+def : MipsPat<(fadd MSA128DOpnd:$wd, (fmul MSA128DOpnd:$ws, MSA128DOpnd:$wt)),
+              (FMADD_D MSA128DOpnd:$wd, MSA128DOpnd:$ws, MSA128DOpnd:$wt)>,
+              Requires<[AllowFPOpFusion]>;
+
 def HADD_S_H : HADD_S_H_ENC, HADD_S_H_DESC;
 def HADD_S_W : HADD_S_W_ENC, HADD_S_W_DESC;
 def HADD_S_D : HADD_S_D_ENC, HADD_S_D_DESC;
Index: lib/Target/Mips/MipsSEISelLowering.cpp
===================================================================
--- lib/Target/Mips/MipsSEISelLowering.cpp
+++ lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1858,10 +1858,8 @@
   case Intrinsic::mips_fmsub_w:
   case Intrinsic::mips_fmsub_d: {
     // TODO: If intrinsics have fast-math-flags, propagate them.
-    EVT ResTy = Op->getValueType(0);
-    return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1),
-                       DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy,
-                                   Op->getOperand(2), Op->getOperand(3)));
+    return DAG.getNode(MipsISD::FMS, SDLoc(Op), Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
   }
   case Intrinsic::mips_frint_w:
   case Intrinsic::mips_frint_d:
Index: test/CodeGen/Mips/fp-contract.ll
===================================================================
--- /dev/null
+++ test/CodeGen/Mips/fp-contract.ll
@@ -0,0 +1,39 @@
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s --check-prefixes=CHECK-CONTRACT-OFF
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 -fp-contract=off < %s | FileCheck %s --check-prefixes=CHECK-CONTRACT-OFF
+; RUN: llc -march=mips -mattr=+msa,+fp64 -fp-contract=fast < %s | FileCheck %s --check-prefixes=CHECK-CONTRACT-FAST
+
+declare <4 x float> @llvm.mips.fmul.w(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.mips.fadd.w(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.mips.fsub.w(<4 x float>, <4 x float>)
+
+define dso_local void @try_fuse_A(<4 x float>* noalias nocapture sret %agg.result, <4 x float>* nocapture readonly dereferenceable(16) %acc, <4 x float>* nocapture readonly dereferenceable(16) %a, <4 x float>* nocapture readonly dereferenceable(16) %b) local_unnamed_addr {
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16, !tbaa !2
+  %1 = load <4 x float>, <4 x float>* %b, align 16, !tbaa !2
+  %2 = tail call <4 x float> @llvm.mips.fmul.w(<4 x float> %0, <4 x float> %1)
+  %3 = load <4 x float>, <4 x float>* %acc, align 16, !tbaa !2
+  %4 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %3, <4 x float> %2)
+  store <4 x float> %4, <4 x float>* %agg.result, align 16, !tbaa !2
+  ret void
+  ; CHECK-CONTRACT-OFF: fmul.w
+  ; CHECK-CONTRACT-OFF: fadd.w
+  ; CHECK-CONTRACT-FAST: fmadd.w
+}
+
+define dso_local void @try_fuse_B(<4 x float>* noalias nocapture sret %agg.result, <4 x float>* nocapture readonly dereferenceable(16) %acc, <4 x float>* nocapture readonly dereferenceable(16) %a, <4 x float>* nocapture readonly dereferenceable(16) %b) local_unnamed_addr {
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16, !tbaa !2
+  %1 = load <4 x float>, <4 x float>* %b, align 16, !tbaa !2
+  %2 = tail call <4 x float> @llvm.mips.fmul.w(<4 x float> %0, <4 x float> %1)
+  %3 = load <4 x float>, <4 x float>* %acc, align 16, !tbaa !2
+  %4 = tail call <4 x float> @llvm.mips.fsub.w(<4 x float> %3, <4 x float> %2)
+  store <4 x float> %4, <4 x float>* %agg.result, align 16, !tbaa !2
+  ret void
+  ; CHECK-CONTRACT-OFF: fmul.w
+  ; CHECK-CONTRACT-OFF: fsub.w
+  ; CHECK-CONTRACT-FAST: fmsub.w
+}
+
+!2 = !{!3, !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C++ TBAA"}