Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32041,6 +32041,32 @@ return SDValue(); } +/// Do target specific dag combines of MUL and ADDSUB nodes into FMADDSUB. +static SDValue combineAddsub(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + assert(N->getOpcode() == X86ISD::ADDSUB && "Expected X86ISD::ADDSUB opcode"); + + SDValue Op1 = N->getOperand(0); + if (Op1->getOpcode() != ISD::FMUL || !Op1->hasOneUse() || + !Subtarget.hasAnyFMA()) + return SDValue(); + + // These checks must match the similar ones in + // DAGCombiner::visitFADDForFMACombine. + const TargetOptions &Options = DAG.getTarget().Options; + bool AllowFusion = + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); + if (!AllowFusion) + return SDValue(); + + EVT VT = N->getValueType(0); + SDValue Op3 = N->getOperand(1); + SDValue Op2 = Op1->getOperand(1); + Op1 = Op1->getOperand(0); + + return DAG.getNode(X86ISD::FMADDSUB, SDLoc(N), VT, Op1, Op2, Op3); +} + /// Do target-specific dag combines on floating point negations. static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -33383,6 +33409,7 @@ case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget); case ISD::FADD: case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget); + case X86ISD::ADDSUB: return combineAddsub(N, DAG, Subtarget); case ISD::FNEG: return combineFneg(N, DAG, Subtarget); case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget); case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget); Index: llvm/test/CodeGen/X86/fmaddsub-combine.ll =================================================================== --- llvm/test/CodeGen/X86/fmaddsub-combine.ll +++ llvm/test/CodeGen/X86/fmaddsub-combine.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma | FileCheck -check-prefix=CHECK -check-prefix=CHECK-FMA3 %s +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma4 | FileCheck -check-prefix=CHECK -check-prefix=CHECK-FMA4 %s + +; This test checks the fusing of MUL + ADDSUB to FMADDSUB. + +define <2 x double> @mul_addsub_pd128(<2 x double> %A, <2 x double> %B) #0 { +; CHECK-LABEL: mul_addsub_pd128: +; CHECK: # BB#0: # %entry +; CHECK-FMA3-NEXT: vfmaddsub213pd %xmm0, %xmm1, %xmm0 +; CHECK-FMA4-NEXT: vfmaddsubpd %xmm0, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq +entry: + %AB = fmul <2 x double> %A, %B + %Sub = fsub fast <2 x double> %AB, %A + %Add = fadd fast <2 x double> %AB, %A + %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add, <2 x i32> + ret <2 x double> %Addsub +} + +define <4 x float> @mul_addsub_ps128(<4 x float> %A, <4 x float> %B) #0 { +; CHECK-LABEL: mul_addsub_ps128: +; CHECK: # BB#0: # %entry +; CHECK-FMA3-NEXT: vfmaddsub213ps %xmm0, %xmm1, %xmm0 +; CHECK-FMA4-NEXT: vfmaddsubps %xmm0, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq +entry: + %AB = fmul <4 x float> %A, %B + %Sub = fsub fast <4 x float> %AB, %A + %Add = fadd fast <4 x float> %AB, %A + %Addsub = shufflevector <4 x float> %Sub, <4 x float> %Add, <4 x i32> + ret <4 x float> %Addsub +} + +define <4 x double> @mul_addsub_pd256(<4 x double> %A, <4 x double> %B) #0 { +; CHECK-LABEL: mul_addsub_pd256: +; CHECK: # BB#0: # %entry +; CHECK-FMA3-NEXT: vfmaddsub213pd %ymm0, %ymm1, %ymm0 +; CHECK-FMA4-NEXT: vfmaddsubpd %ymm0, %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %AB = fmul <4 x double> %A, %B + %Sub = fsub fast <4 x double> %AB, %A + %Add = fadd fast <4 x double> %AB, %A + %Addsub = shufflevector <4 x double> %Sub, <4 x double> %Add, <4 x i32> + ret <4 x double> %Addsub +} + +define <8 x float> @mul_addsub_ps256(<8 x float> %A, <8 x float> %B) #0 { +; CHECK-LABEL: mul_addsub_ps256: +; CHECK: # BB#0: # %entry +; CHECK-FMA3-NEXT: vfmaddsub213ps %ymm0, %ymm1, %ymm0 +; CHECK-FMA4-NEXT: vfmaddsubps %ymm0, %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %AB = fmul <8 x float> %A, %B + %Sub = fsub fast <8 x float> %AB, %A + %Add = fadd fast <8 x float> %AB, %A + %Addsub = shufflevector <8 x float> %Sub, <8 x float> %Add, <8 x i32> + ret <8 x float> %Addsub +} + +attributes #0 = { nounwind "unsafe-fp-math"="true" } +