Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9579,6 +9579,13 @@
   if (N1CFP && N1CFP->isExactlyValue(+2.0))
     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
 
+  // fold (fmul X, -2.0) -> (fneg (fadd X, X))
+  if (N1CFP && N1CFP->isExactlyValue(-2.0))
+    if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) {
+      SDValue Add = DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
+      return DAG.getNode(ISD::FNEG, DL, VT, Add);
+    }
+
   // fold (fmul X, -1.0) -> (fneg X)
   if (N1CFP && N1CFP->isExactlyValue(-1.0))
     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
Index: test/CodeGen/AArch64/fmul-combines.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/fmul-combines.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -verify-machineinstrs | FileCheck %s
+
+; CHECK-LABEL: test1:
+; CHECK: fadd s0, s0, s0
+; CHECK: fneg s0, s0
+define float @test1(float %x) {
+  %y = fmul float %x, -2.0
+  ret float %y
+}
+
+; CHECK-LABEL: test2:
+; CHECK: fadd d0, d0, d0
+; CHECK: fneg d0, d0
+define double @test2(double %x) {
+  %y = fmul double %x, -2.0
+  ret double %y
+}
+
+; a * b - 2.0 * c
+; CHECK-LABEL: test3:
+; CHECK: fmul d0, d0, d1
+; CHECK: fadd d1, d2, d2
+; CHECK: fsub d0, d0, d1
+define double @test3(double %a, double %b, double %d) {
+entry:
+  %mul = fmul double %a, %b
+  %mul1 = fmul double %d, 2.000000e+00
+  %sub = fsub double %mul, %mul1
+  ret double %sub
+}
Index: test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
===================================================================
--- test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
+++ test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
@@ -84,8 +84,8 @@
 }
 
 ; GCN-LABEL: {{^}}fmul_x2_xn2_f32:
-; GCN: v_mul_f32_e64 [[TMP0:v[0-9]+]], [[X:s[0-9]+]], -4.0
-; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]]
+; GCN: v_add_f32_e64 [[TMP0:v[0-9]+]], [[X:s[0-9]+]], [[X]]
+; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[TMP0]], [[TMP0]]
 ; GCN: buffer_store_dword [[RESULT]]
 define amdgpu_kernel void @fmul_x2_xn2_f32(float addrspace(1)* %out, float %x, float %y) #0 {
   %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
@@ -200,8 +200,8 @@
 }
 
 ; GCN-LABEL: {{^}}fmul_x2_xn2_f16:
-; GCN: v_mul_f16_e64 [[TMP0:v[0-9]+]], [[X:s[0-9]+]], -4.0
-; GCN: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]]
+; GCN: v_add_f16_e64 [[TMP0:v[0-9]+]], [[X:s[0-9]+]], [[X]]
+; GCN: v_mul_f16_e64 [[RESULT:v[0-9]+]], -[[TMP0]], [[TMP0]]
 ; GCN: buffer_store_short [[RESULT]]
 define amdgpu_kernel void @fmul_x2_xn2_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
   %x = bitcast i16 %x.arg to half
Index: test/CodeGen/AMDGPU/fmuladd.f32.ll
===================================================================
--- test/CodeGen/AMDGPU/fmuladd.f32.ll
+++ test/CodeGen/AMDGPU/fmuladd.f32.ll
@@ -191,8 +191,8 @@
 
 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
 
-; GCN-DENORM-SLOWFMA: v_mul_f32_e32 [[TMP:v[0-9]+]], -2.0, [[R1]]
-; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
+; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
+; GCN-DENORM-SLOWFMA: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -251,8 +251,8 @@
 
 ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], -[[R1]], 2.0, [[R2]]
 
-; GCN-DENORM-SLOWFMA: v_mul_f32_e32 [[TMP:v[0-9]+]], -2.0, [[R1]]
-; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
+; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
+; GCN-DENORM-SLOWFMA: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
Index: test/CodeGen/X86/fmul-combines.ll
===================================================================
--- test/CodeGen/X86/fmul-combines.ll
+++ test/CodeGen/X86/fmul-combines.ll
@@ -17,6 +17,15 @@
   ret <4 x float> %y
 }
 
+; CHECK-LABEL: fmulneg2_v4f32:
+; CHECK: addps %xmm0, %xmm0
+; CHECK: xorps
+; CHECK-NEXT: retq
+define <4 x float> @fmulneg2_v4f32(<4 x float> %x) {
+  %y = fmul <4 x float> %x, <float -2.0, float -2.0, float -2.0, float -2.0>
+  ret <4 x float> %y
+}
+
 ; CHECK-LABEL: constant_fold_fmul_v4f32:
 ; CHECK: movaps
 ; CHECK-NEXT: ret