Index: include/llvm/CodeGen/MachineCombinerPattern.h
===================================================================
--- include/llvm/CodeGen/MachineCombinerPattern.h
+++ include/llvm/CodeGen/MachineCombinerPattern.h
@@ -48,6 +48,8 @@
   FMULADDD_OP2,
   FMULSUBD_OP1,
   FMULSUBD_OP2,
+  FNMULSUBS_OP1,
+  FNMULSUBD_OP1,
   FMLAv1i32_indexed_OP1,
   FMLAv1i32_indexed_OP2,
   FMLAv1i64_indexed_OP1,
Index: lib/Target/AArch64/AArch64InstrInfo.cpp
===================================================================
--- lib/Target/AArch64/AArch64InstrInfo.cpp
+++ lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -3166,6 +3166,13 @@
   return canCombine(MBB, MO, MulOpc);
 }
 
+//
+// Is \param MO defined by a floating-point multiply-negate and can be combined?
+static bool canCombineWithFNMUL(MachineBasicBlock &MBB, MachineOperand &MO,
+                                unsigned MulOpc) {
+  return canCombine(MBB, MO, MulOpc);
+}
+
 // TODO: There are many more machine instruction opcodes to match:
 //       1. Other data types (integer, vectors)
 //       2. Other math / logic operations (xor, or)
@@ -3427,6 +3434,10 @@
       Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
       Found = true;
     }
+    if (canCombineWithFNMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
+      Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
+      Found = true;
+    }
     break;
   case AArch64::FSUBDrr:
     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
@@ -3441,6 +3452,10 @@
       Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
       Found = true;
     }
+    if (canCombineWithFNMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
+      Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
+      Found = true;
+    }
     break;
   case AArch64::FSUBv2f32:
     if (canCombineWithFMUL(MBB, Root.getOperand(2),
@@ -3495,6 +3510,8 @@
   case MachineCombinerPattern::FMULADDD_OP2:
   case MachineCombinerPattern::FMULSUBD_OP1:
   case MachineCombinerPattern::FMULSUBD_OP2:
+  case MachineCombinerPattern::FNMULSUBS_OP1:
+  case MachineCombinerPattern::FNMULSUBD_OP1:
   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
@@ -3996,6 +4013,24 @@
     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
     break;
   }
+
+  case MachineCombinerPattern::FNMULSUBS_OP1:
+  case MachineCombinerPattern::FNMULSUBD_OP1: {
+    // FNMUL I=A,B,0
+    // FSUB R,I,C
+    // ==> FNMADD R,A,B,C // = -A*B - C
+    // --- Create(FNMADD);
+    if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
+      Opc = AArch64::FNMADDSrrr;
+      RC = &AArch64::FPR32RegClass;
+    } else {
+      Opc = AArch64::FNMADDDrrr;
+      RC = &AArch64::FPR64RegClass;
+    }
+    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+    break;
+  }
+
   case MachineCombinerPattern::FMULSUBS_OP2:
   case MachineCombinerPattern::FMULSUBD_OP2: {
     // FMUL I=A,B,0
Index: test/CodeGen/AArch64/arm64-fml-combines.ll
===================================================================
--- test/CodeGen/AArch64/arm64-fml-combines.ll
+++ test/CodeGen/AArch64/arm64-fml-combines.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -O=3 -mtriple=arm64-apple-ios  -mcpu=cyclone -enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -mcpu=cyclone -enable-unsafe-fp-math | FileCheck %s
+
 define void @foo_2d(double* %src) {
 entry:
   %arrayidx1 = getelementptr inbounds double, double* %src, i64 5
@@ -126,3 +127,23 @@
 for.end:                                          ; preds = %for.body
   ret void
 }
+
+; CHECK-LABEL: test1:
+; CHECK: fnmadd s0, s0, s1, s2
+define float @test1(float %a, float %b, float %c) {
+entry:
+  %0 = fmul contract float %a, %b
+  %mul = fsub contract float -0.000000e+00, %0
+  %sub1 = fsub contract float %mul, %c
+  ret float %sub1
+}
+
+; CHECK-LABEL: test2:
+; CHECK: fnmadd d0, d0, d1, d2
+define double @test2(double %a, double %b, double %c) {
+entry:
+  %0 = fmul contract double %a, %b
+  %mul = fsub contract double -0.000000e+00, %0
+  %sub1 = fsub contract double %mul, %c
+  ret double %sub1
+}