Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9627,8 +9627,9 @@
     return SDValue();
 
   SDNodeFlags Flags = N->getFlags();
+  bool CanFuse = Options.UnsafeFPMath || isContractable(N);
   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
-                              Options.UnsafeFPMath || HasFMAD);
+                              CanFuse || HasFMAD);
   // If the addition is not contractable, do not combine.
   if (!AllowFusionGlobally && !isContractable(N))
     return SDValue();
@@ -9700,9 +9701,7 @@
   // More folding opportunities when target permits.
   if (Aggressive) {
     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
-    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
-    // are currently only supported on binary nodes.
-    if (Options.UnsafeFPMath &&
+    if (CanFuse &&
         N0.getOpcode() == PreferredFusedOpcode &&
         N0.getOperand(2).getOpcode() == ISD::FMUL &&
         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
@@ -9715,9 +9714,7 @@
     }
 
     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
-    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
-    // are currently only supported on binary nodes.
-    if (Options.UnsafeFPMath &&
+    if (CanFuse &&
         N1->getOpcode() == PreferredFusedOpcode &&
         N1.getOperand(2).getOpcode() == ISD::FMUL &&
         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
@@ -9841,8 +9838,9 @@
     return SDValue();
 
   const SDNodeFlags Flags = N->getFlags();
+  bool CanFuse = Options.UnsafeFPMath || isContractable(N);
   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
-                              Options.UnsafeFPMath || HasFMAD);
+                              CanFuse || HasFMAD);
 
   // If the subtraction is not contractable, do not combine.
   if (!AllowFusionGlobally && !isContractable(N))
@@ -9873,11 +9871,12 @@
 
   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
   // Note: Commutes FSUB operands.
-  if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
+  if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
     return DAG.getNode(PreferredFusedOpcode, SL, VT,
                        DAG.getNode(ISD::FNEG, SL, VT,
                                    N1.getOperand(0)),
                        N1.getOperand(1), N0, Flags);
+  }
 
   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
@@ -9973,9 +9972,7 @@
   if (Aggressive) {
     // fold (fsub (fma x, y, (fmul u, v)), z)
     //   -> (fma x, y (fma u, v, (fneg z)))
-    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
-    // are currently only supported on binary nodes.
-    if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
+    if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
         N0.getOperand(2)->hasOneUse()) {
       return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -9989,9 +9986,7 @@
 
     // fold (fsub x, (fma y, z, (fmul u, v)))
     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
-    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
-    // are currently only supported on binary nodes.
-    if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
+    if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
         isContractableFMUL(N1.getOperand(2))) {
       SDValue N20 = N1.getOperand(2).getOperand(0);
       SDValue N21 = N1.getOperand(2).getOperand(1);
@@ -10611,6 +10606,7 @@
 
   // FMA nodes have flags that propagate to the created nodes.
   const SDNodeFlags Flags = N->getFlags();
+  bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
 
   // Constant fold FMA.
   if (isa<ConstantFPSDNode>(N0) &&
@@ -10619,7 +10615,7 @@
     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
   }
 
-  if (Options.UnsafeFPMath) {
+  if (UnsafeFPMath) {
     if (N0CFP && N0CFP->isZero())
       return N2;
     if (N1CFP && N1CFP->isZero())
@@ -10636,7 +10632,7 @@
      !isConstantFPBuildVectorOrConstantFP(N1))
     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
 
-  if (Options.UnsafeFPMath) {
+  if (UnsafeFPMath) {
     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
         isConstantFPBuildVectorOrConstantFP(N1) &&
@@ -10682,7 +10678,7 @@
     }
   }
 
-  if (Options.UnsafeFPMath) {
+  if (UnsafeFPMath) {
     // (fma x, c, x) -> (fmul x, (c+1))
     if (N1CFP && N0 == N2) {
       return DAG.getNode(ISD::FMUL, DL, VT, N0,
Index: test/CodeGen/AArch64/fma-aggressive.ll
===================================================================
--- test/CodeGen/AArch64/fma-aggressive.ll
+++ test/CodeGen/AArch64/fma-aggressive.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=thunderx2t99 < %s | FileCheck %s
+
+define float @test1(float %u , float %v , float %x, float %y, float %z) {
+; CHECK-LABEL: test1
+; CHECK-EVEN: fmadd {{s[0-9]*[02468]}}, {{s[0-9]*}}, {{s[0-9]*}}, {{s[0-9]*[02468]}}
+; CHECK-EVEN: fmadd {{s[0-9]*[02468]}}, {{s[0-9]*}}, {{s[0-9]*}}, {{s[0-9]*[02468]}}
+  %mul.1 = fmul fast float %u, %v
+  %mul.2 = fmul fast float %x, %y
+  %fma = fadd fast float %mul.2, %mul.1
+  %res = fadd fast float %fma, %z
+  ret float %res
+}
+
+define float @test2(float %u , float %v , float %x, float %y, float %z) {
+; CHECK-LABEL: test2
+; CHECK-EVEN: fmadd {{s[0-9]*[02468]}}, {{s[0-9]*}}, {{s[0-9]*}}, {{s[0-9]*[02468]}}
+; CHECK-EVEN: fmadd {{s[0-9]*[02468]}}, {{s[0-9]*}}, {{s[0-9]*}}, {{s[0-9]*[02468]}}
+  %mul.1 = fmul fast float %y, %z
+  %mul.2 = fmul fast float %u, %v
+  %fma = fadd fast float %mul.2, %mul.1
+  %res = fadd fast float %x, %fma
+  ret float %res
+}
+
+define float @test3(float %u , float %v , float %x, float %y, float %z) {
+; CHECK-LABEL: test3
+; CHECK-EVEN: fnmsub {{s[0-9]*[02468]}}, {{s[0-9]*}}, {{s[0-9]*}}, {{s[0-9]*[02468]}}
+  %mul.1 = fmul fast float %x, %y
+  %res = fsub fast float %mul.1, %z
+  ret float %res
+}
+
+define float @test4(float %u , float %v , float %x, float %y, float %z) {
+; CHECK-LABEL: test4
+; CHECK-EVEN: fnmadd {{s[0-9]*[02468]}}, {{s[0-9]*}}, {{s[0-9]*}}, {{s[0-9]*[02468]}}
+  %mul.1 = fmul fast float %x, %y
+  %neg = fsub fast float -0.0, %mul.1
+  %res = fsub fast float %neg, %z
+  ret float %res
+}
Index: test/CodeGen/AArch64/neon-fma-FMF.ll
===================================================================
--- test/CodeGen/AArch64/neon-fma-FMF.ll
+++ test/CodeGen/AArch64/neon-fma-FMF.ll
@@ -1,13 +1,23 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
-define <2 x float> @fma(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
-; CHECK-LABEL: fma:
+define <2 x float> @fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+; CHECK-LABEL: fma_1:
 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 	%tmp1 = fmul contract <2 x float> %A, %B;
 	%tmp2 = fadd contract <2 x float> %C, %tmp1;
 	ret <2 x float> %tmp2
 }
 
+; This case will fold as it was only available through unsafe before, now available from
+; the contract on the fadd
+define <2 x float> @fma_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+; CHECK-LABEL: fma_2:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp1 = fmul <2 x float> %A, %B;
+	%tmp2 = fadd contract <2 x float> %C, %tmp1;
+	ret <2 x float> %tmp2
+}
+
 define <2 x float> @no_fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
 ; CHECK-LABEL: no_fma_1:
 ; CHECK: fmul
@@ -17,19 +27,20 @@
 	ret <2 x float> %tmp2
 }
 
-define <2 x float> @no_fma_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
-; CHECK-LABEL: no_fma_2:
-; CHECK: fmul
-; CHECK: fadd
-	%tmp1 = fmul <2 x float> %A, %B;
-	%tmp2 = fadd contract <2 x float> %C, %tmp1;
+define <2 x float> @fma_sub_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+; CHECK-LABEL: fma_sub_1:
+; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp1 = fmul contract <2 x float> %A, %B;
+	%tmp2 = fsub contract <2 x float> %C, %tmp1;
 	ret <2 x float> %tmp2
 }
 
-define <2 x float> @fma_sub(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
-; CHECK-LABEL: fma_sub:
+; This case will fold as it was only available through unsafe before, now available from
+; the contract on the fsub
+define <2 x float> @fma_sub_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+; CHECK-LABEL: fma_sub_2:
 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp1 = fmul contract <2 x float> %A, %B;
+	%tmp1 = fmul <2 x float> %A, %B;
 	%tmp2 = fsub contract <2 x float> %C, %tmp1;
 	ret <2 x float> %tmp2
 }
@@ -42,12 +53,3 @@
 	%tmp2 = fsub <2 x float> %C, %tmp1;
 	ret <2 x float> %tmp2
 }
-
-define <2 x float> @no_fma_sub_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
-; CHECK-LABEL: no_fma_sub_2:
-; CHECK: fmul
-; CHECK: fsub
-	%tmp1 = fmul <2 x float> %A, %B;
-	%tmp2 = fsub contract <2 x float> %C, %tmp1;
-	ret <2 x float> %tmp2
-}
Index: test/CodeGen/PowerPC/fma-aggr-FMF.ll
===================================================================
--- test/CodeGen/PowerPC/fma-aggr-FMF.ll
+++ test/CodeGen/PowerPC/fma-aggr-FMF.ll
@@ -22,10 +22,10 @@
 define float @no_fma_with_fewer_uses(float %f1, float %f2, float %f3, float %f4) {
 ; CHECK-LABEL: no_fma_with_fewer_uses:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xsmulsp 0, 3, 4
-; CHECK-NEXT:    xsmulsp 13, 1, 2
-; CHECK-NEXT:    xsmaddasp 0, 1, 2
-; CHECK-NEXT:    xsdivsp 1, 13, 0
+; CHECK-NEXT:    xsmulsp 0, 1, 2
+; CHECK-NEXT:    fmr 1, 0
+; CHECK-NEXT:    xsmaddasp 1, 3, 4
+; CHECK-NEXT:    xsdivsp 1, 0, 1
 ; CHECK-NEXT:    blr
   %mul1 = fmul contract float %f1, %f2
   %mul2 = fmul float %f3, %f4
Index: test/CodeGen/PowerPC/fmf-propagation.ll
===================================================================
--- test/CodeGen/PowerPC/fmf-propagation.ll
+++ test/CodeGen/PowerPC/fmf-propagation.ll
@@ -15,15 +15,14 @@
 ; X * Y + Z --> fma(X, Y, Z)
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract1:'
-; FMFDEBUG:         fmul {{t[0-9]+}}, {{t[0-9]+}}
-; FMFDEBUG:         fadd contract {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:         fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_contract1:'
 
 define float @fmul_fadd_contract1(float %x, float %y, float %z) {
 ; FMF-LABEL: fmul_fadd_contract1:
 ; FMF:       # %bb.0:
-; FMF-NEXT:    xsmulsp 0, 1, 2
-; FMF-NEXT:    xsaddsp 1, 0, 3
+; FMF-NEXT:    xsmaddasp 3, 1, 2
+; FMF-NEXT:    fmr 1, 3
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fadd_contract1:
@@ -62,15 +61,14 @@
 ; Reassociation implies that FMA contraction is allowed.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:'
-; FMFDEBUG:         fmul {{t[0-9]+}}, {{t[0-9]+}}
-; FMFDEBUG:         fadd reassoc {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:         fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:'
 
 define float @fmul_fadd_reassoc1(float %x, float %y, float %z) {
 ; FMF-LABEL: fmul_fadd_reassoc1:
 ; FMF:       # %bb.0:
-; FMF-NEXT:    xsmulsp 0, 1, 2
-; FMF-NEXT:    xsaddsp 1, 0, 3
+; FMF-NEXT:    xsmaddasp 3, 1, 2
+; FMF-NEXT:    fmr 1, 3
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fadd_reassoc1:
@@ -156,7 +154,7 @@
 ; This is the minimum FMF needed for this transform - the FMA allows reassociation.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
-; FMFDEBUG:         fma reassoc {{t[0-9]+}}
+; FMFDEBUG:         fmul reassoc {{t[0-9]+}}, 
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
@@ -169,12 +167,7 @@
 ; FMF-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
 ; FMF-NEXT:    addi 3, 3, .LCPI6_0@toc@l
 ; FMF-NEXT:    lfsx 0, 0, 3
-; FMF-NEXT:    addis 3, 2, .LCPI6_1@toc@ha
-; FMF-NEXT:    addi 3, 3, .LCPI6_1@toc@l
-; FMF-NEXT:    lfsx 2, 0, 3
-; FMF-NEXT:    xsmulsp 0, 1, 0
-; FMF-NEXT:    xsmaddasp 0, 1, 2
-; FMF-NEXT:    fmr 1, 0
+; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fma_reassoc1:
@@ -193,7 +186,6 @@
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
 ; FMFDEBUG:         fmul reassoc {{t[0-9]+}}
-; FMFDEBUG:         fma reassoc {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
@@ -206,12 +198,7 @@
 ; FMF-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
 ; FMF-NEXT:    addi 3, 3, .LCPI7_0@toc@l
 ; FMF-NEXT:    lfsx 0, 0, 3
-; FMF-NEXT:    addis 3, 2, .LCPI7_1@toc@ha
-; FMF-NEXT:    addi 3, 3, .LCPI7_1@toc@l
-; FMF-NEXT:    lfsx 2, 0, 3
-; FMF-NEXT:    xsmulsp 0, 1, 0
-; FMF-NEXT:    xsmaddasp 0, 1, 2
-; FMF-NEXT:    fmr 1, 0
+; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fma_reassoc2:
@@ -229,7 +216,7 @@
 ; The FMA is now fully 'fast'. This implies that reassociation is allowed.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
-; FMFDEBUG:         fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
+; FMFDEBUG:         fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
@@ -242,12 +229,7 @@
 ; FMF-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
 ; FMF-NEXT:    addi 3, 3, .LCPI8_0@toc@l
 ; FMF-NEXT:    lfsx 0, 0, 3
-; FMF-NEXT:    addis 3, 2, .LCPI8_1@toc@ha
-; FMF-NEXT:    addi 3, 3, .LCPI8_1@toc@l
-; FMF-NEXT:    lfsx 2, 0, 3
-; FMF-NEXT:    xsmulsp 0, 1, 0
-; FMF-NEXT:    xsmaddasp 0, 1, 2
-; FMF-NEXT:    fmr 1, 0
+; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fma_fast1:
@@ -266,7 +248,6 @@
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
 ; FMFDEBUG:         fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
-; FMFDEBUG:         fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
@@ -279,12 +260,7 @@
 ; FMF-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
 ; FMF-NEXT:    addi 3, 3, .LCPI9_0@toc@l
 ; FMF-NEXT:    lfsx 0, 0, 3
-; FMF-NEXT:    addis 3, 2, .LCPI9_1@toc@ha
-; FMF-NEXT:    addi 3, 3, .LCPI9_1@toc@l
-; FMF-NEXT:    lfsx 2, 0, 3
-; FMF-NEXT:    xsmulsp 0, 1, 0
-; FMF-NEXT:    xsmaddasp 0, 1, 2
-; FMF-NEXT:    fmr 1, 0
+; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fma_fast2:
Index: test/CodeGen/X86/fmf-flags_fma.ll
===================================================================
--- test/CodeGen/X86/fmf-flags_fma.ll
+++ test/CodeGen/X86/fmf-flags_fma.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mattr=+fma -mtriple=x86_64-apple-macosx10.8.0 | FileCheck %s -check-prefix=X64
+
+declare float @llvm.fma.f32(float %a, float %b, float %c);
+
+define float @fast_fmuladd_rep1(float %a , float %b , float %c) {
+; X64-LABEL: fast_fmuladd_rep1:
+; X64:       # %bb.0:
+; X64-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0
+  %mul.1 = fmul fast float %a, %b
+  %res = fadd fast float %c, %mul.1
+  ret float %res
+}
+
+define float @fast_fmuladd_rep2(half %a , half %b , float %c) {
+; X64-LABEL: fast_fmuladd_rep2:
+; X64:       # %bb.0:
+; X64:         vfmadd213ss {{[0-9]+}}(%rsp), %xmm1, %xmm0 
+  %mul.1 = fmul fast half %a, %b
+  %ext = fpext half %mul.1 to float
+  %res = fadd fast float %ext, %c
+  ret float %res
+}
+
+define float @fast_fmuladd_rep3(half %a , half %b , float %c) {
+; X64-LABEL: fast_fmuladd_rep3:
+; X64:       # %bb.0:
+; X64:         vfmadd213ss {{[0-9]+}}(%rsp), %xmm1, %xmm0 
+  %mul.1 = fmul fast half %a, %b
+  %ext = fpext half %mul.1 to float
+  %res = fadd fast float %c, %ext
+  ret float %res
+}