Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -673,6 +673,7 @@ // Don't allow anything with multiple uses unless we know it is free. EVT VT = Op.getValueType(); + const SDNodeFlags Flags = Op->getFlags(); if (!Op.hasOneUse()) if (!(Op.getOpcode() == ISD::FP_EXTEND && TLI.isFPExtFree(VT, Op.getOperand(0).getValueType()))) @@ -693,8 +694,8 @@ TLI.isFPImmLegal(neg(cast(Op)->getValueAPF()), VT); } case ISD::FADD: - // FIXME: determine better conditions for this xform. - if (!Options->UnsafeFPMath) return 0; + if (!Options->UnsafeFPMath && + !Flags.hasNoSignedZeros()) return 0; // After operation legalization, it might not be legal to create new FSUBs. if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) @@ -756,7 +757,7 @@ } case ISD::FADD: // FIXME: determine better conditions for this xform. - assert(Options.UnsafeFPMath); + assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros()); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, @@ -10253,19 +10254,31 @@ return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags); } - // FIXME: Auto-upgrade the target/function-level option. - if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) { - // fold (fadd A, 0) -> A - if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) - if (N1C->isZero()) - return N0; + ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1); + if (N1C && N1C->isZero()) { + if (N1C->isNegative() || Options.UnsafeFPMath || + Flags.hasNoSignedZeros()) { + // fold (fadd A, 0) -> A + return N0; + } } // If 'unsafe math' is enabled, fold lots of things. - if (Options.UnsafeFPMath) { + bool AllowNewConst = (Level < AfterLegalizeDAG); + if (Options.UnsafeFPMath || Flags.hasNoNaNs()) { + // If allowed, fold (fadd (fneg x), x) -> 0.0 + if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) + return DAG.getConstantFP(0.0, DL, VT); + + // If allowed, fold (fadd x, (fneg x)) -> 0.0 + if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) + return DAG.getConstantFP(0.0, DL, VT); + } + + // If 'unsafe math' is enabled, fold lots of things. + if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) { // No FP constant should be created after legalization as Instruction // Selection pass has a hard time dealing with FP constants. - bool AllowNewConst = (Level < AfterLegalizeDAG); // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && @@ -10275,14 +10288,6 @@ Flags), Flags); - // If allowed, fold (fadd (fneg x), x) -> 0.0 - if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) - return DAG.getConstantFP(0.0, DL, VT); - - // If allowed, fold (fadd x, (fneg x)) -> 0.0 - if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) - return DAG.getConstantFP(0.0, DL, VT); - // We can fold chains of FADD's of the same value into multiplications. // This transform is not safe in general because we are reducing the number // of rounding steps. Index: test/CodeGen/AMDGPU/fadd.ll =================================================================== --- test/CodeGen/AMDGPU/fadd.ll +++ test/CodeGen/AMDGPU/fadd.ll @@ -66,7 +66,7 @@ ; FUNC-LABEL: {{^}}fadd_0_nsz_attr_f32: ; SI-NOT: v_add_f32 define amdgpu_kernel void @fadd_0_nsz_attr_f32(float addrspace(1)* %out, float %a) #1 { - %add = fadd float %a, 0.0 + %add = fadd nsz float %a, 0.0 store float %add, float addrspace(1)* %out, align 4 ret void } Index: test/CodeGen/X86/fmf-flags.ll =================================================================== --- test/CodeGen/X86/fmf-flags.ll +++ test/CodeGen/X86/fmf-flags.ll @@ -38,18 +38,13 @@ define float @fast_fmuladd_opts(float %a , float %b , float %c) { ; X64-LABEL: fast_fmuladd_opts: ; X64: # %bb.0: -; X64-NEXT: movaps %xmm0, %xmm1 -; X64-NEXT: addss %xmm0, %xmm1 -; X64-NEXT: addss %xmm0, %xmm1 -; X64-NEXT: movaps %xmm1, %xmm0 +; X64-NEXT: mulss {{.*}}(%rip), %xmm0 ; X64-NEXT: retq ; ; X86-LABEL: fast_fmuladd_opts: ; X86: # %bb.0: ; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: fld %st(0) -; X86-NEXT: fadd %st(1) -; X86-NEXT: faddp %st(1) +; X86-NEXT: fmuls {{.*}} ; X86-NEXT: retl %res = call fast float @llvm.fmuladd.f32(float %a, float 2.0, float %a) ret float %res @@ -62,9 +57,9 @@ define double @not_so_fast_mul_add(double %x) { ; X64-LABEL: not_so_fast_mul_add: ; X64: # %bb.0: -; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X64-NEXT: movsd {{.*}}(%rip), %xmm1 ; X64-NEXT: mulsd %xmm0, %xmm1 -; X64-NEXT: addsd %xmm1, %xmm0 +; X64-NEXT: mulsd {{.*}}(%rip), %xmm0 ; X64-NEXT: movsd %xmm1, {{.*}}(%rip) ; X64-NEXT: retq ; @@ -73,7 +68,9 @@ ; X86-NEXT: fldl {{[0-9]+}}(%esp) ; X86-NEXT: fld %st(0) ; X86-NEXT: fmull {{\.LCPI.*}} -; X86-NEXT: fadd %st(0), %st(1) +; X86-NEXT: fxch %st(1) +; X86-NEXT: fmull {{\.LCPI.*}} +; X86-NEXT: fxch %st(1) ; X86-NEXT: fstpl mul1 ; X86-NEXT: retl %m = fmul double %x, 4.2 Index: test/CodeGen/X86/fp-fold.ll =================================================================== --- test/CodeGen/X86/fp-fold.ll +++ test/CodeGen/X86/fp-fold.ll @@ -17,15 +17,35 @@ } define float @fadd_negzero(float %x) { -; STRICT-LABEL: fadd_negzero: +; ANY-LABEL: fadd_negzero: +; ANY: # %bb.0: +; ANY-NEXT: retq + %r = fadd float %x, -0.0 + ret float %r +} + +define float @fadd_produce_zero(float %x) { +; STRICT-LABEL: fadd_produce_zero: ; STRICT: # %bb.0: -; STRICT-NEXT: addss {{.*}}(%rip), %xmm0 +; STRICT-NEXT: subss %xmm0, %xmm0 ; STRICT-NEXT: retq ; -; UNSAFE-LABEL: fadd_negzero: +; UNSAFE-LABEL: fadd_produce_zero: ; UNSAFE: # %bb.0: +; UNSAFE-NEXT: xorps %xmm0, %xmm0 ; UNSAFE-NEXT: retq - %r = fadd float %x, -0.0 + %neg = fsub nsz float 0.0, %x + %r = fadd nnan float %neg, %x + ret float %r +} + +define float @fadd_reassociate(float %x) { +; ANY-LABEL: fadd_reassociate: +; ANY: # %bb.0: +; ANY-NEXT: addss {{.*}}(%rip), %xmm0 +; ANY-NEXT: retq + %sum = fadd reassoc float %x, 8.0 + %r = fadd reassoc float %sum, 12.0 ret float %r } @@ -46,14 +66,9 @@ } define float @fsub_zero(float %x) { -; STRICT-LABEL: fsub_zero: -; STRICT: # %bb.0: -; STRICT-NEXT: addss {{.*}}(%rip), %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fsub_zero: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fsub_zero: +; ANY: # %bb.0: +; ANY-NEXT: retq %r = fsub float %x, 0.0 ret float %r }