Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -673,6 +673,7 @@ // Don't allow anything with multiple uses unless we know it is free. EVT VT = Op.getValueType(); + const SDNodeFlags Flags = Op->getFlags(); if (!Op.hasOneUse()) if (!(Op.getOpcode() == ISD::FP_EXTEND && TLI.isFPExtFree(VT, Op.getOperand(0).getValueType()))) @@ -710,7 +711,7 @@ case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. if (!Options->NoSignedZerosFPMath && - !Op.getNode()->getFlags().hasNoSignedZeros()) + !Flags.hasNoSignedZeros()) return 0; // fold (fneg (fsub A, B)) -> (fsub B, A) @@ -10391,15 +10392,23 @@ if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - // fold (fsub A, (fneg B)) -> (fadd A, B) - if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) - return DAG.getNode(ISD::FADD, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations), Flags); + // (fsub A, 0) -> A + if (N1CFP && N1CFP->isZero()) { + if (!N1CFP->isNegative() || Options.UnsafeFPMath || + Flags.hasNoSignedZeros()) { + return N0; + } + } - // FIXME: Auto-upgrade the target/function-level option. - if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) { - // (fsub 0, B) -> -B - if (N0CFP && N0CFP->isZero()) { + if (N0 == N1) { + // (fsub x, x) -> 0.0 + if (Options.UnsafeFPMath || Flags.hasNoNaNs()) + return DAG.getConstantFP(0.0f, DL, VT); + } + + // (fsub 0, B) -> -B + if (N0CFP && N0CFP->isZero()) { + if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) @@ -10407,16 +10416,13 @@ } } + // fold (fsub A, (fneg B)) -> (fadd A, B) + if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) + return DAG.getNode(ISD::FADD, DL, VT, N0, + GetNegatedExpression(N1, DAG, LegalOperations), Flags); + // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { - // (fsub A, 0) -> A - if (N1CFP && N1CFP->isZero()) - return N0; - - // (fsub x, x) -> 0.0 - if (N0 == N1) - return DAG.getConstantFP(0.0f, DL, VT); - // (fsub x, (fadd x, y)) -> (fneg y) // (fsub x, (fadd y, x)) -> (fneg y) if (N1.getOpcode() == ISD::FADD) { Index: test/CodeGen/X86/fp-fold.ll =================================================================== --- test/CodeGen/X86/fp-fold.ll +++ test/CodeGen/X86/fp-fold.ll @@ -29,6 +29,14 @@ ret float %r } +define float @fadd_negzero_nsz(float %x) { +; ANY-LABEL: fadd_negzero_nsz: +; ANY: # %bb.0: +; ANY-NEXT: retq + %r = fadd nsz float %x, -0.0 + ret float %r +} + define float @fadd_zero_nsz(float %x) { ; ANY-LABEL: fadd_zero_nsz: ; ANY: # %bb.0: @@ -37,24 +45,20 @@ ret float %r } -define float @fadd_negzero_nsz(float %x) { -; ANY-LABEL: fadd_negzero_nsz: +define float @fsub_zero(float %x) { +; ANY-LABEL: fsub_zero: ; ANY: # %bb.0: ; ANY-NEXT: retq - %r = fadd nsz float %x, -0.0 + %r = fsub float %x, 0.0 ret float %r } -define float @fsub_zero(float %x) { -; STRICT-LABEL: fsub_zero: -; STRICT: # %bb.0: -; STRICT-NEXT: addss {{.*}}(%rip), %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fsub_zero: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq - %r = fsub float %x, 0.0 +define float @fsub_self(float %x) { +; ANY-LABEL: fsub_self: +; ANY: # %bb.0: +; ANY-NEXT: xorps %xmm0, %xmm0 +; ANY-NEXT: retq + %r = fsub nnan float %x, %x ret float %r } @@ -72,14 +76,23 @@ ret float %r } -define float @fsub_zero_nsz(float %x) { -; ANY-LABEL: fsub_zero_nsz: +define float @fsub_zero_nsz_1(float %x) { +; ANY-LABEL: fsub_zero_nsz_1: ; ANY: # %bb.0: ; ANY-NEXT: retq %r = fsub nsz float %x, 0.0 ret float %r } +define float @fsub_zero_nsz_2(float %x) { +; ANY-LABEL: fsub_zero_nsz_2: +; ANY: # %bb.0: +; ANY-NEXT: xorps {{.*}}(%rip), %xmm0 +; ANY-NEXT: retq + %r = fsub nsz float 0.0, %x + ret float %r +} + define float @fsub_negzero_nsz(float %x) { ; ANY-LABEL: fsub_negzero_nsz: ; ANY: # %bb.0: