Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32041,6 +32041,32 @@
   return SDValue();
 }
 
+/// Do target specific dag combines of MUL and ADDSUB nodes into FMADDSUB.
+static SDValue combineAddsub(SDNode *N, SelectionDAG &DAG,
+                             const X86Subtarget &Subtarget) {
+  assert(N->getOpcode() == X86ISD::ADDSUB && "Expected X86ISD::ADDSUB opcode");
+
+  SDValue Op1 = N->getOperand(0);
+  if (Op1->getOpcode() != ISD::FMUL || !Op1->hasOneUse() ||
+      !Subtarget.hasAnyFMA())
+    return SDValue();
+
+  // These checks must match the similar ones in
+  // DAGCombiner::visitFADDForFMACombine.
+  const TargetOptions &Options = DAG.getTarget().Options;
+  bool AllowFusion =
+      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
+  if (!AllowFusion)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  SDValue Op3 = N->getOperand(1);
+  SDValue Op2 = Op1->getOperand(1);
+  Op1 = Op1->getOperand(0);
+
+  return DAG.getNode(X86ISD::FMADDSUB, SDLoc(N), VT, Op1, Op2, Op3);
+}
+
 /// Do target-specific dag combines on floating point negations.
 static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
                            const X86Subtarget &Subtarget) {
@@ -33383,6 +33409,7 @@
   case ISD::UINT_TO_FP:     return combineUIntToFP(N, DAG, Subtarget);
   case ISD::FADD:
   case ISD::FSUB:           return combineFaddFsub(N, DAG, Subtarget);
+  case X86ISD::ADDSUB:      return combineAddsub(N, DAG, Subtarget);
   case ISD::FNEG:           return combineFneg(N, DAG, Subtarget);
   case ISD::TRUNCATE:       return combineTruncate(N, DAG, Subtarget);
   case X86ISD::FAND:        return combineFAnd(N, DAG, Subtarget);
Index: llvm/test/CodeGen/X86/fmaddsub-combine.ll
===================================================================
--- llvm/test/CodeGen/X86/fmaddsub-combine.ll
+++ llvm/test/CodeGen/X86/fmaddsub-combine.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma | FileCheck -check-prefix=CHECK -check-prefix=CHECK-FMA3 %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma4 | FileCheck -check-prefix=CHECK -check-prefix=CHECK-FMA4 %s
+
+; This test checks the fusing of MUL + ADDSUB to FMADDSUB.
+
+define <2 x double> @mul_addsub_pd128(<2 x double> %A, <2 x double> %B) #0 {
+; CHECK-LABEL: mul_addsub_pd128:
+; CHECK:       # BB#0: # %entry
+; CHECK-FMA3-NEXT: vfmaddsub213pd  %xmm0, %xmm1, %xmm0
+; CHECK-FMA4-NEXT: vfmaddsubpd     %xmm0, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+entry:
+  %AB = fmul <2 x double> %A, %B
+  %Sub = fsub fast <2 x double> %AB, %A
+  %Add = fadd fast <2 x double> %AB, %A
+  %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %Addsub
+}
+
+define <4 x float> @mul_addsub_ps128(<4 x float> %A, <4 x float> %B) #0 {
+; CHECK-LABEL: mul_addsub_ps128:
+; CHECK:       # BB#0: # %entry
+; CHECK-FMA3-NEXT: vfmaddsub213ps  %xmm0, %xmm1, %xmm0
+; CHECK-FMA4-NEXT: vfmaddsubps     %xmm0, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+entry:
+  %AB = fmul <4 x float> %A, %B
+  %Sub = fsub fast <4 x float> %AB, %A
+  %Add = fadd fast <4 x float> %AB, %A
+  %Addsub = shufflevector <4 x float> %Sub, <4 x float> %Add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x float> %Addsub
+}
+
+define <4 x double> @mul_addsub_pd256(<4 x double> %A, <4 x double> %B) #0 {
+; CHECK-LABEL: mul_addsub_pd256:
+; CHECK:       # BB#0: # %entry
+; CHECK-FMA3-NEXT: vfmaddsub213pd  %ymm0, %ymm1, %ymm0
+; CHECK-FMA4-NEXT: vfmaddsubpd     %ymm0, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %AB = fmul <4 x double> %A, %B
+  %Sub = fsub fast <4 x double> %AB, %A
+  %Add = fadd fast <4 x double> %AB, %A
+  %Addsub = shufflevector <4 x double> %Sub, <4 x double> %Add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x double> %Addsub
+}
+
+define <8 x float> @mul_addsub_ps256(<8 x float> %A, <8 x float> %B) #0 {
+; CHECK-LABEL: mul_addsub_ps256:
+; CHECK:       # BB#0: # %entry
+; CHECK-FMA3-NEXT: vfmaddsub213ps  %ymm0, %ymm1, %ymm0
+; CHECK-FMA4-NEXT: vfmaddsubps     %ymm0, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %AB = fmul <8 x float> %A, %B
+  %Sub = fsub fast <8 x float> %AB, %A
+  %Add = fadd fast <8 x float> %AB, %A
+  %Addsub = shufflevector <8 x float> %Sub, <8 x float> %Add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+  ret <8 x float> %Addsub
+}
+
+attributes #0 = { nounwind "unsafe-fp-math"="true" }
+