Index: lib/Transforms/InstCombine/InstCombineCalls.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1555,6 +1555,36 @@
       return replaceInstUsesWith(*II, V);
     break;
   }
+  case Intrinsic::fma:
+  case Intrinsic::fmuladd: {
+    Value *LHS = nullptr;
+    Value *RHS = nullptr;
+
+    Value *Src0 = II->getArgOperand(0);
+    Value *Src1 = II->getArgOperand(1);
+
+    // fma fneg(x), fneg(y), z -> fma x, y, z
+    if (match(Src0, m_FNeg(m_Value(LHS))) && match(Src1, m_FNeg(m_Value(RHS)))) {
+      CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(),
+                                            { LHS, RHS, II->getArgOperand(2) });
+      NewCall->takeName(II);
+      NewCall->copyFastMathFlags(II);
+      return replaceInstUsesWith(*II, NewCall);
+    }
+
+    // fma fabs(x), fabs(x), z -> fma x, x, z
+    if (match(Src0, m_Intrinsic<Intrinsic::fabs>(m_Value(LHS))) &&
+        match(Src1, m_Intrinsic<Intrinsic::fabs>(m_Value(RHS))) &&
+        LHS == RHS) {
+      CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(),
+                                             { LHS, LHS, II->getArgOperand(2) });
+      NewCall->takeName(II);
+      NewCall->copyFastMathFlags(II);
+      return replaceInstUsesWith(*II, NewCall);
+    }
+
+    break;
+  }
   case Intrinsic::ppc_altivec_lvx:
   case Intrinsic::ppc_altivec_lvxl:
     // Turn PPC lvx -> load if the pointer is known aligned.
Index: test/CodeGen/AMDGPU/fma.ll
===================================================================
--- test/CodeGen/AMDGPU/fma.ll
+++ test/CodeGen/AMDGPU/fma.ll
@@ -1,6 +1,7 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
+declare float @llvm.fabs.f32() nounwind readnone
 declare float @llvm.fma.f32(float, float, float) nounwind readnone
 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
@@ -22,6 +23,18 @@
   ret void
 }
 
+define void @fma_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
+                               float addrspace(1)* %in2, float addrspace(1)* %in3) {
+  %r0 = load float, float addrspace(1)* %in1
+  %r1 = load float, float addrspace(1)* %in2
+  %r2 = load float, float addrspace(1)* %in3
+  %r0.fneg = fsub float -0.0, %r0
+  %r1.fneg = fsub float -0.0, %r1
+  %r3 = tail call float @llvm.fma.f32(float %r0.fneg, float %r1.fneg, float %r2)
+  store float %r3, float addrspace(1)* %out
+  ret void
+}
+
 ; FUNC-LABEL: {{^}}fma_v2f32:
 ; SI: v_fma_f32
 ; SI: v_fma_f32
Index: test/Transforms/InstCombine/fma.ll
===================================================================
--- /dev/null
+++ test/Transforms/InstCombine/fma.ll
@@ -0,0 +1,132 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.fma.f32(float, float, float) #1
+declare float @llvm.fmuladd.f32(float, float, float) #1
+declare float @llvm.fabs.f32(float) #1
+
+@external = external global i32
+
+; CHECK-LABEL: @fma_fneg_x_fneg_y(
+; CHECK: %fma = call float @llvm.fma.f32(float %x, float %y, float %z)
+define float @fma_fneg_x_fneg_y(float %x, float %y, float %z) {
+  %x.fneg = fsub float -0.0, %x
+  %y.fneg = fsub float -0.0, %y
+  %fma = call float @llvm.fma.f32(float %x.fneg, float %y.fneg, float %z)
+  ret float %fma
+}
+
+; CHECK-LABEL: @fma_fneg_x_fneg_y_fast(
+; CHECK: %fma = call fast float @llvm.fma.f32(float %x, float %y, float %z)
+define float @fma_fneg_x_fneg_y_fast(float %x, float %y, float %z) {
+  %x.fneg = fsub float -0.0, %x
+  %y.fneg = fsub float -0.0, %y
+  %fma = call fast float @llvm.fma.f32(float %x.fneg, float %y.fneg, float %z)
+  ret float %fma
+}
+
+; CHECK-LABEL: @fma_fneg_const_fneg_y(
+; CHECK: %fma = call float @llvm.fma.f32(float bitcast (i32 ptrtoint (i32* @external to i32) to float), float %y, float %z)
+define float @fma_fneg_const_fneg_y(float %y, float %z) {
+  %y.fneg = fsub float -0.0, %y
+  %fma = call float @llvm.fma.f32(float fsub (float -0.0, float bitcast (i32 ptrtoint (i32* @external to i32) to float)), float %y.fneg, float %z)
+  ret float %fma
+}
+
+; CHECK-LABEL: @fma_fneg_x_fneg_const(
+; CHECK: %fma = call float @llvm.fma.f32(float %x, float bitcast (i32 ptrtoint (i32* @external to i32) to float), float %z)
+define float @fma_fneg_x_fneg_const(float %x, float %z) {
+  %x.fneg = fsub float -0.0, %x
+  %fma = call float @llvm.fma.f32(float %x.fneg, float fsub (float -0.0, float bitcast (i32 ptrtoint (i32* @external to i32) to float)), float %z)
+  ret float %fma
+}
+
+; CHECK-LABEL: @fma_fabs_x_fabs_y(
+; CHECK: %x.fabs = call float @llvm.fabs.f32(float %x)
+; CHECK: %y.fabs = call float @llvm.fabs.f32(float %y)
+; CHECK: %fma = call float @llvm.fma.f32(float %x.fabs, float %y.fabs, float %z)
+define float @fma_fabs_x_fabs_y(float %x, float %y, float %z) {
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %y.fabs = call float @llvm.fabs.f32(float %y)
+  %fma = call float @llvm.fma.f32(float %x.fabs, float %y.fabs, float %z)
+  ret float %fma
+}
+
+; CHECK-LABEL: @fma_fabs_x_fabs_x(
+; CHECK: %fma = call float @llvm.fma.f32(float %x, float %x, float %z)
+define float @fma_fabs_x_fabs_x(float %x, float %z) {
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %fma = call float @llvm.fma.f32(float %x.fabs, float %x.fabs, float %z)
+  ret float %fma
+}
+
+; CHECK-LABEL: @fma_fabs_x_fabs_x_fast(
+; CHECK: %fma = call fast float @llvm.fma.f32(float %x, float %x, float %z)
+define float @fma_fabs_x_fabs_x_fast(float %x, float %z) {
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %fma = call fast float @llvm.fma.f32(float %x.fabs, float %x.fabs, float %z)
+  ret float %fma
+}
+
+; CHECK-LABEL: @fmuladd_fneg_x_fneg_y(
+; CHECK: %fmuladd = call float @llvm.fmuladd.f32(float %x, float %y, float %z)
+define float @fmuladd_fneg_x_fneg_y(float %x, float %y, float %z) {
+  %x.fneg = fsub float -0.0, %x
+  %y.fneg = fsub float -0.0, %y
+  %fmuladd = call float @llvm.fmuladd.f32(float %x.fneg, float %y.fneg, float %z)
+  ret float %fmuladd
+}
+
+; CHECK-LABEL: @fmuladd_fneg_x_fneg_y_fast(
+; CHECK: %fmuladd = call fast float @llvm.fmuladd.f32(float %x, float %y, float %z)
+define float @fmuladd_fneg_x_fneg_y_fast(float %x, float %y, float %z) {
+  %x.fneg = fsub float -0.0, %x
+  %y.fneg = fsub float -0.0, %y
+  %fmuladd = call fast float @llvm.fmuladd.f32(float %x.fneg, float %y.fneg, float %z)
+  ret float %fmuladd
+}
+
+; CHECK-LABEL: @fmuladd_fneg_const_fneg_y(
+; CHECK: %fmuladd = call float @llvm.fmuladd.f32(float bitcast (i32 ptrtoint (i32* @external to i32) to float), float %y, float %z)
+define float @fmuladd_fneg_const_fneg_y(float %y, float %z) {
+  %y.fneg = fsub float -0.0, %y
+  %fmuladd = call float @llvm.fmuladd.f32(float fsub (float -0.0, float bitcast (i32 ptrtoint (i32* @external to i32) to float)), float %y.fneg, float %z)
+  ret float %fmuladd
+}
+
+; CHECK-LABEL: @fmuladd_fneg_x_fneg_const(
+; CHECK: %fmuladd = call float @llvm.fmuladd.f32(float %x, float bitcast (i32 ptrtoint (i32* @external to i32) to float), float %z)
+define float @fmuladd_fneg_x_fneg_const(float %x, float %z) {
+  %x.fneg = fsub float -0.0, %x
+  %fmuladd = call float @llvm.fmuladd.f32(float %x.fneg, float fsub (float -0.0, float bitcast (i32 ptrtoint (i32* @external to i32) to float)), float %z)
+  ret float %fmuladd
+}
+
+; CHECK-LABEL: @fmuladd_fabs_x_fabs_y(
+; CHECK: %x.fabs = call float @llvm.fabs.f32(float %x)
+; CHECK: %y.fabs = call float @llvm.fabs.f32(float %y)
+; CHECK: %fmuladd = call float @llvm.fmuladd.f32(float %x.fabs, float %y.fabs, float %z)
+define float @fmuladd_fabs_x_fabs_y(float %x, float %y, float %z) {
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %y.fabs = call float @llvm.fabs.f32(float %y)
+  %fmuladd = call float @llvm.fmuladd.f32(float %x.fabs, float %y.fabs, float %z)
+  ret float %fmuladd
+}
+
+; CHECK-LABEL: @fmuladd_fabs_x_fabs_x(
+; CHECK: %fmuladd = call float @llvm.fmuladd.f32(float %x, float %x, float %z)
+define float @fmuladd_fabs_x_fabs_x(float %x, float %z) {
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %fmuladd = call float @llvm.fmuladd.f32(float %x.fabs, float %x.fabs, float %z)
+  ret float %fmuladd
+}
+
+; CHECK-LABEL: @fmuladd_fabs_x_fabs_x_fast(
+; CHECK: %fmuladd = call fast float @llvm.fmuladd.f32(float %x, float %x, float %z)
+define float @fmuladd_fabs_x_fabs_x_fast(float %x, float %z) {
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %fmuladd = call fast float @llvm.fmuladd.f32(float %x.fabs, float %x.fabs, float %z)
+  ret float %fmuladd
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }