Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32041,6 +32041,28 @@ return SDValue(); } +/// Do target specific dag combines of MUL and ADDSUB nodes into FMADDSUB. +static SDValue combineAddsub(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + SDValue Op1 = N->getOperand(0); + if (N->getOpcode() != X86ISD::ADDSUB || Op1->getOpcode() != ISD::FMUL || + !Op1->hasOneUse() || !Subtarget.hasFMA()) + return SDValue(); + + const TargetOptions &Options = DAG.getTarget().Options; + bool AllowFusion = + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); + if (!AllowFusion) + return SDValue(); + + EVT VT = N->getValueType(0); + SDValue Op3 = N->getOperand(1); + SDValue Op2 = Op1->getOperand(1); + Op1 = Op1->getOperand(0); + + return DAG.getNode(X86ISD::FMADDSUB, SDLoc(N), VT, Op1, Op2, Op3); +} + /// Do target-specific dag combines on floating point negations. static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -33383,6 +33405,7 @@ case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget); case ISD::FADD: case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget); + case X86ISD::ADDSUB: return combineAddsub(N, DAG, Subtarget); case ISD::FNEG: return combineFneg(N, DAG, Subtarget); case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget); case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget); Index: llvm/test/CodeGen/X86/fmaddsub-combine.ll =================================================================== --- llvm/test/CodeGen/X86/fmaddsub-combine.ll +++ llvm/test/CodeGen/X86/fmaddsub-combine.ll @@ -0,0 +1,34 @@ + +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +; This test check the fusing of MUL + ADDSUB to FMADDSUB. + +define void @cmul() #0 { +; CHECK-LABEL: cmul: +; CHECK: vfmaddsub231pd +; CHECK: retq +entry: + %Ar = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @a, i64 0, i32 0), align 8 + %Ai = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @a, i64 0, i32 1), align 8 + %Bir = load <2 x double>, <2 x double>* bitcast ({ double, double }* @b to <2 x double>*), align 8 + %Aur = insertelement <2 x double> undef, double %Ar, i32 0 + %Arr = insertelement <2 x double> %Aur, double %Ar, i32 1 + %ArBi.ArBr = fmul fast <2 x double> %Bir, %Arr + %Bri = shufflevector <2 x double> %Bir, <2 x double> undef, <2 x i32> + %Aui = insertelement <2 x double> undef, double %Ai, i32 0 + %Aii = insertelement <2 x double> %Aui, double %Ai, i32 1 + %AiBr.AiBi = fmul fast <2 x double> %Bri, %Aii + %Sub = fsub fast <2 x double> %ArBi.ArBr, %AiBr.AiBi + %Add = fadd fast <2 x double> %ArBi.ArBr, %AiBr.AiBi + %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add, <2 x i32> + store <2 x double> %Addsub, <2 x double>* bitcast ({ double, double }* @dst to <2 x double>*), align 8 + ret void +} + +@a = common global { double, double } zeroinitializer, align 8 +@b = common global { double, double } zeroinitializer, align 8 +@dst = common global { double, double } zeroinitializer, align 8 + +attributes #0 = { nounwind "target-features"="+fma" "unsafe-fp-math"="true" } + +