Index: include/llvm/CodeGen/SelectionDAG.h
===================================================================
--- include/llvm/CodeGen/SelectionDAG.h
+++ include/llvm/CodeGen/SelectionDAG.h
@@ -889,7 +889,8 @@
   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
                   SDValue N2, const SDNodeFlags Flags = SDNodeFlags());
   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
-                  SDValue N2, SDValue N3);
+                  SDValue N2, SDValue N3,
+                  const SDNodeFlags Flags = SDNodeFlags());
   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
                   SDValue N2, SDValue N3, SDValue N4);
   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9578,8 +9578,10 @@
   if (!HasFMAD && !HasFMA)
     return SDValue();
 
+  SDNodeFlags Flags = N->getFlags();
+  bool CanFuse = Options.UnsafeFPMath || isContractable(N);
   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
-                              Options.UnsafeFPMath || HasFMAD);
+                              CanFuse || HasFMAD);
   // If the addition is not contractable, do not combine.
   if (!AllowFusionGlobally && !isContractable(N))
     return SDValue();
@@ -9609,14 +9611,14 @@
   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
     return DAG.getNode(PreferredFusedOpcode, SL, VT,
-                       N0.getOperand(0), N0.getOperand(1), N1);
+                       N0.getOperand(0), N0.getOperand(1), N1, Flags);
   }
 
   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
   // Note: Commutes FADD operands.
   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
     return DAG.getNode(PreferredFusedOpcode, SL, VT,
-                       N1.getOperand(0), N1.getOperand(1), N0);
+                       N1.getOperand(0), N1.getOperand(1), N0, Flags);
   }
 
   // Look through FP_EXTEND nodes to do more combining.
@@ -9630,7 +9632,7 @@
                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
                                      N00.getOperand(0)),
                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                     N00.getOperand(1)), N1);
+                                     N00.getOperand(1)), N1, Flags);
     }
   }
 
@@ -9644,16 +9646,14 @@
                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
                                      N10.getOperand(0)),
                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
-                                     N10.getOperand(1)), N0);
+                                     N10.getOperand(1)), N0, Flags);
     }
   }
 
   // More folding opportunities when target permits.
   if (Aggressive) {
     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
-    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
-    // are currently only supported on binary nodes.
-    if (Options.UnsafeFPMath &&
+    if (CanFuse &&
         N0.getOpcode() == PreferredFusedOpcode &&
         N0.getOperand(2).getOpcode() == ISD::FMUL &&
         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
@@ -9662,13 +9662,11 @@
                          DAG.getNode(PreferredFusedOpcode, SL, VT,
                                      N0.getOperand(2).getOperand(0),
                                      N0.getOperand(2).getOperand(1),
-                                     N1));
+                                     N1, Flags), Flags);
     }
 
     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
-    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
-    // are currently only supported on binary nodes.
-    if (Options.UnsafeFPMath &&
+    if (CanFuse &&
         N1->getOpcode() == PreferredFusedOpcode &&
         N1.getOperand(2).getOpcode() == ISD::FMUL &&
         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
@@ -9677,19 +9675,19 @@
                          DAG.getNode(PreferredFusedOpcode, SL, VT,
                                      N1.getOperand(2).getOperand(0),
                                      N1.getOperand(2).getOperand(1),
-                                     N0));
+                                     N0, Flags), Flags);
     }
 
-
     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
     auto FoldFAddFMAFPExtFMul = [&] (
-      SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+      SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
+      SDNodeFlags Flags) {
       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
                          DAG.getNode(PreferredFusedOpcode, SL, VT,
                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
-                                     Z));
+                                     Z, Flags), Flags);
     };
     if (N0.getOpcode() == PreferredFusedOpcode) {
       SDValue N02 = N0.getOperand(2);
@@ -9699,7 +9697,7 @@
             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
                                       N020.getOperand(0), N020.getOperand(1),
-                                      N1);
+                                      N1, Flags);
         }
       }
     }
@@ -9710,14 +9708,15 @@
     // operation into two double-precision operations, which might not be
     // interesting for all targets, especially GPUs.
     auto FoldFAddFPExtFMAFMul = [&] (
-      SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+      SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
+      SDNodeFlags Flags) {
       return DAG.getNode(PreferredFusedOpcode, SL, VT,
                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
                          DAG.getNode(PreferredFusedOpcode, SL, VT,
                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
-                                     Z));
+                                     Z, Flags), Flags);
     };
     if (N0.getOpcode() == ISD::FP_EXTEND) {
       SDValue N00 = N0.getOperand(0);
@@ -9727,7 +9726,7 @@
             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
                                       N002.getOperand(0), N002.getOperand(1),
-                                      N1);
+                                      N1, Flags);
         }
       }
     }
@@ -9742,7 +9741,7 @@
             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
                                       N120.getOperand(0), N120.getOperand(1),
-                                      N0);
+                                      N0, Flags);
         }
       }
     }
@@ -9760,7 +9759,7 @@
             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
                                       N102.getOperand(0), N102.getOperand(1),
-                                      N0);
+                                      N0, Flags);
         }
       }
     }
@@ -9789,8 +9788,11 @@
   if (!HasFMAD && !HasFMA)
     return SDValue();
 
+  const SDNodeFlags Flags = N->getFlags();
+  bool CanFuse = Options.UnsafeFPMath || isContractable(N);
   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
-                              Options.UnsafeFPMath || HasFMAD);
+                              CanFuse || HasFMAD);
+
   // If the subtraction is not contractable, do not combine.
   if (!AllowFusionGlobally && !isContractable(N))
     return SDValue();
@@ -9815,16 +9817,17 @@
   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
     return DAG.getNode(PreferredFusedOpcode, SL, VT,
                        N0.getOperand(0), N0.getOperand(1),
-                       DAG.getNode(ISD::FNEG, SL, VT, N1));
+                       DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);    
   }
 
   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
   // Note: Commutes FSUB operands.
-  if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
+  if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
     return DAG.getNode(PreferredFusedOpcode, SL, VT,
                        DAG.getNode(ISD::FNEG, SL, VT,
                                    N1.getOperand(0)),
-                       N1.getOperand(1), N0);
+                       N1.getOperand(1), N0, Flags);
+  }
 
   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
@@ -9833,7 +9836,7 @@
     SDValue N01 = N0.getOperand(0).getOperand(1);
     return DAG.getNode(PreferredFusedOpcode, SL, VT,
                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
-                       DAG.getNode(ISD::FNEG, SL, VT, N1));
+                       DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
   }
 
   // Look through FP_EXTEND nodes to do more combining.
@@ -9849,7 +9852,7 @@
                                      N00.getOperand(0)),
                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
                                      N00.getOperand(1)),
-                         DAG.getNode(ISD::FNEG, SL, VT, N1));
+                         DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
     }
   }
 
@@ -9866,7 +9869,7 @@
                                                  N10.getOperand(0))),
                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
                                      N10.getOperand(1)),
-                         N0);
+                         N0, Flags);          
     }
   }
 
@@ -9888,7 +9891,7 @@
                                                    N000.getOperand(0)),
                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
                                                    N000.getOperand(1)),
-                                       N1));
+                                       N1, Flags));
       }
     }
   }
@@ -9911,7 +9914,7 @@
                                                    N000.getOperand(0)),
                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
                                                    N000.getOperand(1)),
-                                       N1));
+                                       N1, Flags));
       }
     }
   }
@@ -9920,9 +9923,7 @@
   if (Aggressive) {
     // fold (fsub (fma x, y, (fmul u, v)), z)
     //   -> (fma x, y (fma u, v, (fneg z)))
-    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
-    // are currently only supported on binary nodes.
-    if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
+    if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
         N0.getOperand(2)->hasOneUse()) {
       return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -9931,14 +9932,12 @@
                                      N0.getOperand(2).getOperand(0),
                                      N0.getOperand(2).getOperand(1),
                                      DAG.getNode(ISD::FNEG, SL, VT,
-                                                 N1)));
+                                                 N1), Flags), Flags);          
     }
 
     // fold (fsub x, (fma y, z, (fmul u, v)))
     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
-    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
-    // are currently only supported on binary nodes.
-    if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
+    if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
         isContractableFMUL(N1.getOperand(2))) {
       SDValue N20 = N1.getOperand(2).getOperand(0);
       SDValue N21 = N1.getOperand(2).getOperand(1);
@@ -9948,11 +9947,9 @@
                          N1.getOperand(1),
                          DAG.getNode(PreferredFusedOpcode, SL, VT,
                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
-
-                                     N21, N0));
+                                     N21, N0, Flags), Flags);      
     }
 
-
     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
     if (N0.getOpcode() == PreferredFusedOpcode) {
@@ -9969,7 +9966,7 @@
                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
                                                      N020.getOperand(1)),
                                          DAG.getNode(ISD::FNEG, SL, VT,
-                                                     N1)));
+                                                     N1), Flags), Flags);              
         }
       }
     }
@@ -9997,7 +9994,7 @@
                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
                                                      N002.getOperand(1)),
                                          DAG.getNode(ISD::FNEG, SL, VT,
-                                                     N1)));
+                                                     N1), Flags), Flags);              
         }
       }
     }
@@ -10020,7 +10017,7 @@
                                                                VT, N1200)),
                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
                                                    N1201),
-                                       N0));
+                                       N0, Flags), Flags);        
       }
     }
 
@@ -10051,7 +10048,7 @@
                                                                VT, N1020)),
                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
                                                    N1021),
-                                       N0));
+                                       N0, Flags), Flags);        
       }
     }
   }
@@ -10067,6 +10064,7 @@
   SDValue N1 = N->getOperand(1);
   EVT VT = N->getValueType(0);
   SDLoc SL(N);
+  const SDNodeFlags Flags = N->getFlags();
 
   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
 
@@ -10098,52 +10096,54 @@
 
   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
-  auto FuseFADD = [&](SDValue X, SDValue Y) {
+  auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
       if (XC1 && XC1->isExactlyValue(+1.0))
-        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
+        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+                           Y, Flags);
       if (XC1 && XC1->isExactlyValue(-1.0))
         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
-                           DAG.getNode(ISD::FNEG, SL, VT, Y));
+                           DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);      
     }
     return SDValue();
   };
 
-  if (SDValue FMA = FuseFADD(N0, N1))
+  if (SDValue FMA = FuseFADD(N0, N1, Flags))
     return FMA;
-  if (SDValue FMA = FuseFADD(N1, N0))
+  if (SDValue FMA = FuseFADD(N1, N0, Flags))
     return FMA;
 
   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
-  auto FuseFSUB = [&](SDValue X, SDValue Y) {
+  auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
       if (XC0 && XC0->isExactlyValue(+1.0))
         return DAG.getNode(PreferredFusedOpcode, SL, VT,
                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
-                           Y);
+                           Y, Flags);
       if (XC0 && XC0->isExactlyValue(-1.0))
         return DAG.getNode(PreferredFusedOpcode, SL, VT,
                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
-                           DAG.getNode(ISD::FNEG, SL, VT, Y));
+                           DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);      
 
       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
       if (XC1 && XC1->isExactlyValue(+1.0))
         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
-                           DAG.getNode(ISD::FNEG, SL, VT, Y));
+                           DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
       if (XC1 && XC1->isExactlyValue(-1.0))
-        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
+        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+                           Y, Flags);
     }
     return SDValue();
   };
 
-  if (SDValue FMA = FuseFSUB(N0, N1))
+  if (SDValue FMA = FuseFSUB(N0, N1, Flags))
     return FMA;
-  if (SDValue FMA = FuseFSUB(N1, N0))
+  if (SDValue FMA = FuseFSUB(N1, N0, Flags))
     return FMA;
 
   return SDValue();
@@ -10554,6 +10554,10 @@
   SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
 
+  // FMA nodes have flags that propagate to the created nodes.
+  const SDNodeFlags Flags = N->getFlags();
+  bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
+
   // Constant fold FMA.
   if (isa<ConstantFPSDNode>(N0) &&
       isa<ConstantFPSDNode>(N1) &&
@@ -10561,7 +10565,7 @@
     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
   }
 
-  if (Options.UnsafeFPMath) {
+  if (UnsafeFPMath) {
     if (N0CFP && N0CFP->isZero())
       return N2;
     if (N1CFP && N1CFP->isZero())
@@ -10578,12 +10582,7 @@
      !isConstantFPBuildVectorOrConstantFP(N1))
     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
 
-  // TODO: FMA nodes should have flags that propagate to the created nodes.
-  // For now, create a Flags object for use with reassociation math transforms.
-  SDNodeFlags Flags;
-  Flags.setAllowReassociation(true);
-
-  if (Options.UnsafeFPMath) {
+  if (UnsafeFPMath) {
     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
         isConstantFPBuildVectorOrConstantFP(N1) &&
@@ -10629,7 +10628,7 @@
     }
   }
 
-  if (Options.UnsafeFPMath) {
+  if (UnsafeFPMath) {
     // (fma x, c, x) -> (fmul x, (c+1))
     if (N1CFP && N0 == N2) {
       return DAG.getNode(ISD::FMUL, DL, VT, N0,
Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4859,7 +4859,8 @@
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
-                              SDValue N1, SDValue N2, SDValue N3) {
+                              SDValue N1, SDValue N2, SDValue N3,
+                              const SDNodeFlags Flags) {
   // Perform various simplifications.
   switch (Opcode) {
   case ISD::FMA: {
@@ -4953,10 +4954,13 @@
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTs, Ops);
     void *IP = nullptr;
-    if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+    if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+      E->intersectFlagsWith(Flags);
       return SDValue(E, 0);
+    }
 
     N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+    N->setFlags(Flags);
     createOperands(N, Ops);
     CSEMap.InsertNode(N, IP);
   } else {
Index: test/CodeGen/AArch64/fma-aggressive.ll
===================================================================
--- test/CodeGen/AArch64/fma-aggressive.ll
+++ test/CodeGen/AArch64/fma-aggressive.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=thunderx2t99 < %s | FileCheck %s
+
+define float @test1(float %u , float %v , float %x, float %y, float %z) {
+; CHECK-LABEL: test1
+; CHECK-EVEN: fmadd {{s[0-9]*[02468]}}, {{s[0-9]*}}, {{s[0-9]*}}, {{s[0-9]*[02468]}}
+; CHECK-EVEN: fmadd {{s[0-9]*[02468]}}, {{s[0-9]*}}, {{s[0-9]*}}, {{s[0-9]*[02468]}}
+  %mul.1 = fmul fast float %u, %v
+  %mul.2 = fmul fast float %x, %y
+  %fma = fadd fast float %mul.2, %mul.1
+  %res = fadd fast float %fma, %z
+  ret float %res
+}
+
+define float @test2(float %u , float %v , float %x, float %y, float %z) {
+; CHECK-LABEL: test2
+; CHECK-EVEN: fmadd {{s[0-9]*[02468]}}, {{s[0-9]*}}, {{s[0-9]*}}, {{s[0-9]*[02468]}}
+; CHECK-EVEN: fmadd {{s[0-9]*[02468]}}, {{s[0-9]*}}, {{s[0-9]*}}, {{s[0-9]*[02468]}}
+  %mul.1 = fmul fast float %y, %z
+  %mul.2 = fmul fast float %u, %v
+  %fma = fadd fast float %mul.2, %mul.1
+  %res = fadd fast float %x, %fma
+  ret float %res
+}
+
+define float @test3(float %u , float %v , float %x, float %y, float %z) {
+; CHECK-LABEL: test3
+; CHECK-EVEN: fnmsub {{s[0-9]*[02468]}}, {{s[0-9]*}}, {{s[0-9]*}}, {{s[0-9]*[02468]}}
+  %mul.1 = fmul fast float %x, %y
+  %res = fsub fast float %mul.1, %z
+  ret float %res
+}
+
+define float @test4(float %u , float %v , float %x, float %y, float %z) {
+; CHECK-LABEL: test4
+; CHECK-EVEN: fnmadd {{s[0-9]*[02468]}}, {{s[0-9]*}}, {{s[0-9]*}}, {{s[0-9]*[02468]}}
+  %mul.1 = fmul fast float %x, %y
+  %neg = fsub fast float -0.0, %mul.1
+  %res = fsub fast float %neg, %z
+  ret float %res
+}
Index: test/CodeGen/AArch64/neon-fma-FMF.ll
===================================================================
--- test/CodeGen/AArch64/neon-fma-FMF.ll
+++ test/CodeGen/AArch64/neon-fma-FMF.ll
@@ -1,13 +1,23 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
 
-define <2 x float> @fma(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
-; CHECK-LABEL: fma:
+define <2 x float> @fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+; CHECK-LABEL: fma_1:
 ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 	%tmp1 = fmul contract <2 x float> %A, %B;
 	%tmp2 = fadd contract <2 x float> %C, %tmp1;
 	ret <2 x float> %tmp2
 }
 
+; This case will fold as it was only available through unsafe before, now available from
+; the contract on the fadd
+define <2 x float> @fma_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+; CHECK-LABEL: fma_2:
+; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp1 = fmul <2 x float> %A, %B;
+	%tmp2 = fadd contract <2 x float> %C, %tmp1;
+	ret <2 x float> %tmp2
+}
+
 define <2 x float> @no_fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
 ; CHECK-LABEL: no_fma_1:
 ; CHECK: fmul
@@ -17,19 +27,20 @@
 	ret <2 x float> %tmp2
 }
 
-define <2 x float> @no_fma_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
-; CHECK-LABEL: no_fma_2:
-; CHECK: fmul
-; CHECK: fadd
-	%tmp1 = fmul <2 x float> %A, %B;
-	%tmp2 = fadd contract <2 x float> %C, %tmp1;
+define <2 x float> @fma_sub_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+; CHECK-LABEL: fma_sub_1:
+; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+	%tmp1 = fmul contract <2 x float> %A, %B;
+	%tmp2 = fsub contract <2 x float> %C, %tmp1;
 	ret <2 x float> %tmp2
 }
 
-define <2 x float> @fma_sub(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
-; CHECK-LABEL: fma_sub:
+; This case will fold as it was only available through unsafe before, now available from
+; the contract on the fsub
+define <2 x float> @fma_sub_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
+; CHECK-LABEL: fma_sub_2:
 ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-	%tmp1 = fmul contract <2 x float> %A, %B;
+	%tmp1 = fmul <2 x float> %A, %B;
 	%tmp2 = fsub contract <2 x float> %C, %tmp1;
 	ret <2 x float> %tmp2
 }
@@ -42,12 +53,3 @@
 	%tmp2 = fsub <2 x float> %C, %tmp1;
 	ret <2 x float> %tmp2
 }
-
-define <2 x float> @no_fma_sub_2(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
-; CHECK-LABEL: no_fma_sub_2:
-; CHECK: fmul
-; CHECK: fsub
-	%tmp1 = fmul <2 x float> %A, %B;
-	%tmp2 = fsub contract <2 x float> %C, %tmp1;
-	ret <2 x float> %tmp2
-}
Index: test/CodeGen/PowerPC/fma-aggr-FMF.ll
===================================================================
--- test/CodeGen/PowerPC/fma-aggr-FMF.ll
+++ test/CodeGen/PowerPC/fma-aggr-FMF.ll
@@ -22,10 +22,10 @@
 define float @no_fma_with_fewer_uses(float %f1, float %f2, float %f3, float %f4) {
 ; CHECK-LABEL: no_fma_with_fewer_uses:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xsmulsp 0, 3, 4
-; CHECK-NEXT:    xsmulsp 13, 1, 2
-; CHECK-NEXT:    xsmaddasp 0, 1, 2
-; CHECK-NEXT:    xsdivsp 1, 13, 0
+; CHECK-NEXT:    xsmulsp 0, 1, 2
+; CHECK-NEXT:    fmr 1, 0
+; CHECK-NEXT:    xsmaddasp 1, 3, 4
+; CHECK-NEXT:    xsdivsp 1, 0, 1
 ; CHECK-NEXT:    blr
   %mul1 = fmul contract float %f1, %f2
   %mul2 = fmul float %f3, %f4
Index: test/CodeGen/PowerPC/fmf-propagation.ll
===================================================================
--- test/CodeGen/PowerPC/fmf-propagation.ll
+++ test/CodeGen/PowerPC/fmf-propagation.ll
@@ -15,15 +15,14 @@
 ; X * Y + Z --> fma(X, Y, Z)
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract1:'
-; FMFDEBUG:         fmul {{t[0-9]+}}, {{t[0-9]+}}
-; FMFDEBUG:         fadd contract {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:         fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_contract1:'
 
 define float @fmul_fadd_contract1(float %x, float %y, float %z) {
 ; FMF-LABEL: fmul_fadd_contract1:
 ; FMF:       # %bb.0:
-; FMF-NEXT:    xsmulsp 0, 1, 2
-; FMF-NEXT:    xsaddsp 1, 0, 3
+; FMF-NEXT:    xsmaddasp 3, 1, 2
+; FMF-NEXT:    fmr 1, 3
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fadd_contract1:
@@ -39,7 +38,7 @@
 ; This shouldn't change anything - the intermediate fmul result is now also flagged.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract2:'
-; FMFDEBUG:         fma {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:         fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_contract2:'
 
 define float @fmul_fadd_contract2(float %x, float %y, float %z) {
@@ -62,15 +61,14 @@
 ; Reassociation implies that FMA contraction is allowed.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:'
-; FMFDEBUG:         fmul {{t[0-9]+}}, {{t[0-9]+}}
-; FMFDEBUG:         fadd reassoc {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:         fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:'
 
 define float @fmul_fadd_reassoc1(float %x, float %y, float %z) {
 ; FMF-LABEL: fmul_fadd_reassoc1:
 ; FMF:       # %bb.0:
-; FMF-NEXT:    xsmulsp 0, 1, 2
-; FMF-NEXT:    xsaddsp 1, 0, 3
+; FMF-NEXT:    xsmaddasp 3, 1, 2
+; FMF-NEXT:    fmr 1, 3
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fadd_reassoc1:
@@ -86,7 +84,7 @@
 ; This shouldn't change anything - the intermediate fmul result is now also flagged.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:'
-; FMFDEBUG:         fma {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:         fma reassoc {{t[0-9]+}}, {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:'
 
 define float @fmul_fadd_reassoc2(float %x, float %y, float %z) {
@@ -109,7 +107,7 @@
 ; The fadd is now fully 'fast'. This implies that contraction is allowed.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast1:'
-; FMFDEBUG:         fma {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:         fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_fast1:'
 
 define float @fmul_fadd_fast1(float %x, float %y, float %z) {
@@ -132,7 +130,7 @@
 ; This shouldn't change anything - the intermediate fmul result is now also flagged.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast2:'
-; FMFDEBUG:         fma {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:         fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_fast2:'
 
 define float @fmul_fadd_fast2(float %x, float %y, float %z) {
@@ -156,7 +154,7 @@
 ; This is the minimum FMF needed for this transform - the FMA allows reassociation.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
-; FMFDEBUG:         fma reassoc {{t[0-9]+}}
+; FMFDEBUG:         fmul reassoc {{t[0-9]+}}, 
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
@@ -169,12 +167,7 @@
 ; FMF-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
 ; FMF-NEXT:    addi 3, 3, .LCPI6_0@toc@l
 ; FMF-NEXT:    lfsx 0, 0, 3
-; FMF-NEXT:    addis 3, 2, .LCPI6_1@toc@ha
-; FMF-NEXT:    addi 3, 3, .LCPI6_1@toc@l
-; FMF-NEXT:    lfsx 2, 0, 3
-; FMF-NEXT:    xsmulsp 0, 1, 0
-; FMF-NEXT:    xsmaddasp 0, 1, 2
-; FMF-NEXT:    fmr 1, 0
+; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fma_reassoc1:
@@ -192,7 +185,7 @@
 ; This shouldn't change anything - the intermediate fmul result is now also flagged.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
-; FMFDEBUG:         fma reassoc {{t[0-9]+}}
+; FMFDEBUG:         fmul reassoc {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
@@ -205,12 +198,7 @@
 ; FMF-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
 ; FMF-NEXT:    addi 3, 3, .LCPI7_0@toc@l
 ; FMF-NEXT:    lfsx 0, 0, 3
-; FMF-NEXT:    addis 3, 2, .LCPI7_1@toc@ha
-; FMF-NEXT:    addi 3, 3, .LCPI7_1@toc@l
-; FMF-NEXT:    lfsx 2, 0, 3
-; FMF-NEXT:    xsmulsp 0, 1, 0
-; FMF-NEXT:    xsmaddasp 0, 1, 2
-; FMF-NEXT:    fmr 1, 0
+; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fma_reassoc2:
@@ -228,11 +216,11 @@
 ; The FMA is now fully 'fast'. This implies that reassociation is allowed.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
-; FMFDEBUG:         fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
+; FMFDEBUG:         fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
-; GLOBALDEBUG:         fmul reassoc {{t[0-9]+}}
+; GLOBALDEBUG:         fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
 
 define float @fmul_fma_fast1(float %x) {
@@ -241,12 +229,7 @@
 ; FMF-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
 ; FMF-NEXT:    addi 3, 3, .LCPI8_0@toc@l
 ; FMF-NEXT:    lfsx 0, 0, 3
-; FMF-NEXT:    addis 3, 2, .LCPI8_1@toc@ha
-; FMF-NEXT:    addi 3, 3, .LCPI8_1@toc@l
-; FMF-NEXT:    lfsx 2, 0, 3
-; FMF-NEXT:    xsmulsp 0, 1, 0
-; FMF-NEXT:    xsmaddasp 0, 1, 2
-; FMF-NEXT:    fmr 1, 0
+; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fma_fast1:
@@ -264,11 +247,11 @@
 ; This shouldn't change anything - the intermediate fmul result is now also flagged.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
-; FMFDEBUG:         fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
+; FMFDEBUG:         fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
-; GLOBALDEBUG:         fmul reassoc {{t[0-9]+}}
+; GLOBALDEBUG:         fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
 
 define float @fmul_fma_fast2(float %x) {
@@ -277,12 +260,7 @@
 ; FMF-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
 ; FMF-NEXT:    addi 3, 3, .LCPI9_0@toc@l
 ; FMF-NEXT:    lfsx 0, 0, 3
-; FMF-NEXT:    addis 3, 2, .LCPI9_1@toc@ha
-; FMF-NEXT:    addi 3, 3, .LCPI9_1@toc@l
-; FMF-NEXT:    lfsx 2, 0, 3
-; FMF-NEXT:    xsmulsp 0, 1, 0
-; FMF-NEXT:    xsmaddasp 0, 1, 2
-; FMF-NEXT:    fmr 1, 0
+; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fma_fast2:
Index: test/CodeGen/X86/fmf-flags_fma.ll
===================================================================
--- test/CodeGen/X86/fmf-flags_fma.ll
+++ test/CodeGen/X86/fmf-flags_fma.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mattr=+avx2,+fma -mtriple=x86_64-apple-macosx10.8.0 | FileCheck %s -check-prefix=X64
+
+declare float @llvm.fma.f32(float %a, float %b, float %c);
+
+define float @fast_fmuladd_rep1(float %a , float %b , float %c) {
+; X64-LABEL: fast_fmuladd_rep1:
+; X64:       # %bb.0:
+; X64-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0
+  %mul.1 = fmul fast float %a, %b
+  %res = fadd fast float %mul.1, %c
+  ret float %res
+}
+
+define float @fast_fmuladd_rep2(float %a , float %b , float %c) {
+; X64-LABEL: fast_fmuladd_rep2:
+; X64:       # %bb.0:
+; X64-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0
+  %mul.1 = fmul fast float %a, %b
+  %res = fadd fast float %c, %mul.1
+  ret float %res
+}
+
+define float @fast_fmuladd_rep3(half %a , half %b , float %c) {
+; X64-LABEL: fast_fmuladd_rep3:
+; X64:       # %bb.0:
+; X64:         vfmadd213ss {{[0-9]+}}(%rsp), %xmm1, %xmm0 
+  %mul.1 = fmul fast half %a, %b
+  %ext = fpext half %mul.1 to float
+  %res = fadd fast float %ext, %c
+  ret float %res
+}
+
+define float @fast_fmuladd_rep4(half %a , half %b , float %c) {
+; X64-LABEL: fast_fmuladd_rep4:
+; X64:       # %bb.0:
+; X64:         vfmadd213ss {{[0-9]+}}(%rsp), %xmm1, %xmm0 
+  %mul.1 = fmul fast half %a, %b
+  %ext = fpext half %mul.1 to float
+  %res = fadd fast float %c, %ext
+  ret float %res
+}
Index: test/CodeGen/X86/sqrt-fastmath-mir.ll
===================================================================
--- test/CodeGen/X86/sqrt-fastmath-mir.ll
+++ test/CodeGen/X86/sqrt-fastmath-mir.ll
@@ -9,12 +9,12 @@
 ; CHECK:     %1:fr32 = VRSQRTSSr killed %2, %0
 ; CHECK:     %3:fr32 = reassoc VMULSSrr %0, %1
 ; CHECK:     %4:fr32 = VMOVSSrm
-; CHECK:     %5:fr32 = VFMADD213SSr %1, killed %3, %4
+; CHECK:     %5:fr32 = reassoc VFMADD213SSr %1, killed %3, %4
 ; CHECK:     %6:fr32 = VMOVSSrm
 ; CHECK:     %7:fr32 = reassoc VMULSSrr %1, %6
 ; CHECK:     %8:fr32 = reassoc VMULSSrr killed %7, killed %5
 ; CHECK:     %9:fr32 = reassoc VMULSSrr %0, %8
-; CHECK:     %10:fr32 = VFMADD213SSr %8, %9, %4
+; CHECK:     %10:fr32 = reassoc VFMADD213SSr %8, %9, %4
 ; CHECK:     %11:fr32 = reassoc VMULSSrr %9, %6
 ; CHECK:     %12:fr32 = reassoc VMULSSrr killed %11, killed %10
 ; CHECK:     %14:fr32 = FsFLD0SS
@@ -33,12 +33,12 @@
 ; CHECK:     %1:fr32 = VRSQRTSSr killed %2, %0
 ; CHECK:     %3:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr %0, %1
 ; CHECK:     %4:fr32 = VMOVSSrm
-; CHECK:     %5:fr32 = VFMADD213SSr %1, killed %3, %4
+; CHECK:     %5:fr32 = nnan ninf nsz arcp contract afn reassoc VFMADD213SSr %1, killed %3, %4
 ; CHECK:     %6:fr32 = VMOVSSrm
 ; CHECK:     %7:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr %1, %6
 ; CHECK:     %8:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr killed %7, killed %5
 ; CHECK:     %9:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr %0, %8
-; CHECK:     %10:fr32 = VFMADD213SSr %8, killed %9, %4
+; CHECK:     %10:fr32 = nnan ninf nsz arcp contract afn reassoc VFMADD213SSr %8, killed %9, %4
 ; CHECK:     %11:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr %8, %6
 ; CHECK:     %12:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr killed %11, killed %10
 ; CHECK:     $xmm0 = COPY %12