Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -876,6 +876,27 @@ return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, ForCodeSize, Depth + 1); + case ISD::FMA: + case ISD::FMAD: { + if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + return 0; + + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) + char V2 = isNegatibleForFree(Op.getOperand(2), LegalOperations, TLI, + Options, ForCodeSize, Depth + 1); + if (!V2) + return 0; + + // One of Op0/Op1 must be cheaply negatible, then select the cheapest. + char V0 = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, + Options, ForCodeSize, Depth + 1); + char V1 = isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, + Options, ForCodeSize, Depth + 1); + char V01 = std::max(V0, V1); + return V01 ? std::max(V01, V2) : 0; + } + case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FSIN: @@ -917,7 +938,8 @@ return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops); } case ISD::FADD: - assert(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()); + assert((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) && + "Expected NSZ fp-flag"); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, @@ -964,6 +986,35 @@ LegalOperations, ForCodeSize, Depth + 1), Flags); + case ISD::FMA: + case ISD::FMAD: { + assert((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) && + "Expected NSZ fp-flag"); + + SDValue Neg2 = GetNegatedExpression(Op.getOperand(2), DAG, LegalOperations, + ForCodeSize, Depth + 1); + + char V0 = isNegatibleForFree(Op.getOperand(0), LegalOperations, + DAG.getTargetLoweringInfo(), &Options, + ForCodeSize, Depth + 1); + char V1 = isNegatibleForFree(Op.getOperand(1), LegalOperations, + DAG.getTargetLoweringInfo(), &Options, + ForCodeSize, Depth + 1); + if (V0 >= V1) { + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) + SDValue Neg0 = GetNegatedExpression( + Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0, + Op.getOperand(1), Neg2, Flags); + } + + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) + SDValue Neg1 = GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + Op.getOperand(0), Neg1, Neg2, Flags); + } + case ISD::FP_EXTEND: case ISD::FSIN: return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Index: llvm/trunk/test/CodeGen/X86/fma-fneg-combine-2.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fma-fneg-combine-2.ll +++ llvm/trunk/test/CodeGen/X86/fma-fneg-combine-2.ll @@ -5,14 +5,14 @@ define float @test_fneg_fma_subx_y_negz_f32(float %w, float %x, float %y, float %z) { ; FMA3-LABEL: test_fneg_fma_subx_y_negz_f32: ; FMA3: # %bb.0: # %entry -; FMA3-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; FMA3-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm2 * xmm0) + xmm3 +; FMA3-NEXT: vsubss %xmm0, %xmm1, %xmm0 +; FMA3-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm3 ; FMA3-NEXT: retq ; ; FMA4-LABEL: test_fneg_fma_subx_y_negz_f32: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; FMA4-NEXT: vfnmaddss %xmm3, %xmm2, %xmm0, %xmm0 +; FMA4-NEXT: vsubss %xmm0, %xmm1, %xmm0 +; FMA4-NEXT: vfmaddss %xmm3, %xmm2, %xmm0, %xmm0 ; FMA4-NEXT: retq entry: %subx = fsub nsz float %w, %x @@ -25,14 +25,14 @@ define float @test_fneg_fma_x_suby_negz_f32(float %w, float %x, float %y, float %z) { ; FMA3-LABEL: test_fneg_fma_x_suby_negz_f32: ; FMA3: # %bb.0: # %entry -; FMA3-NEXT: vsubss %xmm2, %xmm0, %xmm0 -; FMA3-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm3 +; FMA3-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; FMA3-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm3 ; FMA3-NEXT: retq ; ; FMA4-LABEL: test_fneg_fma_x_suby_negz_f32: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vsubss %xmm2, %xmm0, %xmm0 -; FMA4-NEXT: vfnmaddss %xmm3, %xmm0, %xmm1, %xmm0 +; FMA4-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; FMA4-NEXT: vfmaddss %xmm3, %xmm0, %xmm1, %xmm0 ; FMA4-NEXT: retq entry: %suby = fsub nsz float %w, %y @@ -45,16 +45,16 @@ define float @test_fneg_fma_subx_suby_negz_f32(float %w, float %x, float %y, float %z) { ; FMA3-LABEL: test_fneg_fma_subx_suby_negz_f32: ; FMA3: # %bb.0: # %entry -; FMA3-NEXT: vsubss %xmm1, %xmm0, %xmm1 +; FMA3-NEXT: vsubss %xmm0, %xmm1, %xmm1 ; FMA3-NEXT: vsubss %xmm2, %xmm0, %xmm0 -; FMA3-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm3 +; FMA3-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm3 ; FMA3-NEXT: retq ; ; FMA4-LABEL: test_fneg_fma_subx_suby_negz_f32: ; FMA4: # %bb.0: # %entry -; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm1 +; FMA4-NEXT: vsubss %xmm0, %xmm1, %xmm1 ; FMA4-NEXT: vsubss %xmm2, %xmm0, %xmm0 -; FMA4-NEXT: vfnmaddss %xmm3, %xmm0, %xmm1, %xmm0 +; FMA4-NEXT: vfmaddss %xmm3, %xmm0, %xmm1, %xmm0 ; FMA4-NEXT: retq entry: %subx = fsub nsz float %w, %x