Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6773,12 +6773,13 @@ // NaN. // - // FIXME: Instead of testing for UnsafeFPMath, this should be checking for - // no signed zeros as well as no nans. + SDValue Cmp = N0.getOperand(2); + const SDNodeFlags Flags = Cmp->getFlags(); const TargetOptions &Options = DAG.getTarget().Options; - if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() && + bool UnsafeFPMath = Options.UnsafeFPMath || Flags.hasNoSignedZeros(); + if (UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() && DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { - ISD::CondCode CC = cast(N0.getOperand(2))->get(); + ISD::CondCode CC = cast(Cmp)->get(); if (SDValue FMinMax = combineMinNumMaxNum( DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) @@ -10845,17 +10846,15 @@ } SDValue DAGCombiner::visitFSQRT(SDNode *N) { - if (!DAG.getTarget().Options.UnsafeFPMath) + SDNodeFlags Flags = N->getFlags(); + if (!DAG.getTarget().Options.UnsafeFPMath && !Flags.hasAllowReciprocal()) return SDValue(); SDValue N0 = N->getOperand(0); if (TLI.isFsqrtCheap(N0, DAG)) return SDValue(); - // TODO: FSQRT nodes should have flags that propagate to the created nodes. - // For now, create a Flags object for use with reassociation math transforms. - SDNodeFlags Flags; - Flags.setAllowReassociation(true); + // FSQRT nodes have flags that propagate to the created nodes. return buildSqrtEstimate(N0, Flags); } Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4049,7 +4049,8 @@ break; case ISD::FNEG: // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) + if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) && + OpOpcode == ISD::FSUB) // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags? return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), Operand.getOperand(0), Operand.getNode()->getFlags()); Index: test/CodeGen/PowerPC/fmf-propagation.ll =================================================================== --- test/CodeGen/PowerPC/fmf-propagation.ll +++ test/CodeGen/PowerPC/fmf-propagation.ll @@ -304,7 +304,7 @@ ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:' -; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} +; GLOBALDEBUG: fmul afn {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:' define float @sqrt_afn(float %x) { @@ -340,18 +340,18 @@ ; The call is now fully 'fast'. This implies that approximation is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:' -; FMFDEBUG: fsqrt nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} +; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:' -; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} +; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:' define float @sqrt_fast(float %x) { ; FMF-LABEL: sqrt_fast: -; FMF: # %bb.0: -; FMF-NEXT: xssqrtsp 1, 1 -; FMF-NEXT: blr +; FMF: # %bb.1: +; FMF-NEXT: xsrsqrtesp 2, 1 +; FMF: blr ; ; GLOBAL-LABEL: sqrt_fast: ; GLOBAL: # %bb.0: Index: test/CodeGen/X86/fmf-flags.ll =================================================================== --- test/CodeGen/X86/fmf-flags.ll +++ test/CodeGen/X86/fmf-flags.ll @@ -7,9 +7,18 @@ define float @fast_recip_sqrt(float %x) { ; X64-LABEL: fast_recip_sqrt: ; X64: # %bb.0: -; X64-NEXT: sqrtss %xmm0, %xmm1 -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: divss %xmm1, %xmm0 +; X64-NEXT: rsqrtss %xmm0, %xmm1 +; X64-NEXT: xorps %xmm2, %xmm2 +; X64-NEXT: cmpeqss %xmm0, %xmm2 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: movss {{.*}}(%rip), %xmm3 +; X64-NEXT: mulss %xmm0, %xmm3 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: addss {{.*}}(%rip), %xmm0 +; X64-NEXT: mulss %xmm3, %xmm0 +; X64-NEXT: andnps %xmm0, %xmm2 +; X64-NEXT: movss {{.*}}(%rip), %xmm0 +; X64-NEXT: divss %xmm2, %xmm0 ; X64-NEXT: retq ; ; X86-LABEL: fast_recip_sqrt: Index: test/CodeGen/X86/sqrt-fastmath-mir.ll =================================================================== --- test/CodeGen/X86/sqrt-fastmath-mir.ll +++ test/CodeGen/X86/sqrt-fastmath-mir.ll @@ -7,16 +7,16 @@ ; CHECK: body: ; CHECK: %0:fr32 = COPY $xmm0 ; CHECK: %1:fr32 = VRSQRTSSr killed %2, %0 -; CHECK: %3:fr32 = reassoc VMULSSrr %0, %1 +; CHECK: %3:fr32 = VMULSSrr %0, %1 ; CHECK: %4:fr32 = VMOVSSrm ; CHECK: %5:fr32 = VFMADD213SSr %1, killed %3, %4 ; CHECK: %6:fr32 = VMOVSSrm -; CHECK: %7:fr32 = reassoc VMULSSrr %1, %6 -; CHECK: %8:fr32 = reassoc VMULSSrr killed %7, killed %5 -; CHECK: %9:fr32 = reassoc VMULSSrr %0, %8 +; CHECK: %7:fr32 = VMULSSrr %1, %6 +; CHECK: %8:fr32 = VMULSSrr killed %7, killed %5 +; CHECK: %9:fr32 = VMULSSrr %0, %8 ; CHECK: %10:fr32 = VFMADD213SSr %8, %9, %4 -; CHECK: %11:fr32 = reassoc VMULSSrr %9, %6 -; CHECK: %12:fr32 = reassoc VMULSSrr killed %11, killed %10 +; CHECK: %11:fr32 = VMULSSrr %9, %6 +; CHECK: %12:fr32 = VMULSSrr killed %11, killed %10 ; CHECK: %14:fr32 = FsFLD0SS ; CHECK: %15:fr32 = VCMPSSrr %0, killed %14, 0 ; CHECK: %17:vr128 = VANDNPSrr killed %16, killed %13