Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -1773,6 +1773,7 @@ setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::FSUB); + setTargetDAGCombine(ISD::FNEG); setTargetDAGCombine(ISD::FMA); setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::LOAD); @@ -26148,6 +26149,33 @@ return SDValue(); } +/// Do target-specific dag combines on floating point negations. +static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + SDValue Arg = N->getOperand(0); + + // If we're negating a FMA node, then we can adjust the + // instruction to include the extra negation. + if (Arg.hasOneUse()) { + switch (Arg.getOpcode()) { + case X86ISD::FMADD: + return DAG.getNode(X86ISD::FNMSUB, SDLoc(N), VT, Arg.getOperand(0), + Arg.getOperand(1), Arg.getOperand(2)); + case X86ISD::FMSUB: + return DAG.getNode(X86ISD::FNMADD, SDLoc(N), VT, Arg.getOperand(0), + Arg.getOperand(1), Arg.getOperand(2)); + case X86ISD::FNMADD: + return DAG.getNode(X86ISD::FMSUB, SDLoc(N), VT, Arg.getOperand(0), + Arg.getOperand(1), Arg.getOperand(2)); + case X86ISD::FNMSUB: + return DAG.getNode(X86ISD::FMADD, SDLoc(N), VT, Arg.getOperand(0), + Arg.getOperand(1), Arg.getOperand(2)); + } + } + return SDValue(); +} + /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes. static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { @@ -27042,6 +27070,7 @@ case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG, Subtarget); case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget); case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget); + case ISD::FNEG: return PerformFNEGCombine(N, DAG, Subtarget); case ISD::TRUNCATE: return PerformTRUNCATECombine(N, DAG, Subtarget); case X86ISD::FXOR: case X86ISD::FOR: return PerformFORCombine(N, DAG, Subtarget); Index: llvm/trunk/test/CodeGen/X86/fma_patterns.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fma_patterns.ll +++ llvm/trunk/test/CodeGen/X86/fma_patterns.ll @@ -568,6 +568,74 @@ ret <4 x double> %r } +; (fneg (fma x, y, z)) -> (fma x, -y, -z) + +define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK_FMA-LABEL: test_v4f32_fneg_fmadd: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_fneg_fmadd: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %mul = fmul <4 x float> %a0, %a1 + %add = fadd <4 x float> %mul, %a2 + %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add + ret <4 x float> %neg +} + +define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK_FMA-LABEL: test_v4f64_fneg_fmsub: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 +; CHECK_FMA-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f64_fneg_fmsub: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 +; CHECK_FMA4-NEXT: retq + %mul = fmul <4 x double> %a0, %a1 + %sub = fsub <4 x double> %mul, %a2 + %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub + ret <4 x double> %neg +} + +define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK_FMA-LABEL: test_v4f32_fneg_fnmadd: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_fneg_fnmadd: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %mul = fmul <4 x float> %a0, %a1 + %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul + %add = fadd <4 x float> %neg0, %a2 + %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add + ret <4 x float> %neg1 +} + +define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK_FMA-LABEL: test_v4f64_fneg_fnmsub: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 +; CHECK_FMA-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f64_fneg_fnmsub: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 +; CHECK_FMA4-NEXT: retq + %mul = fmul <4 x double> %a0, %a1 + %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul + %sub = fsub <4 x double> %neg0, %a2 + %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub + ret <4 x double> %neg1 +} + ; (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {