Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10893,17 +10893,16 @@ } SDValue DAGCombiner::visitFSQRT(SDNode *N) { - if (!DAG.getTarget().Options.UnsafeFPMath) + SDNodeFlags Flags = N->getFlags(); + if (!DAG.getTarget().Options.UnsafeFPMath && + !Flags.hasApproximateFuncs()) return SDValue(); SDValue N0 = N->getOperand(0); if (TLI.isFsqrtCheap(N0, DAG)) return SDValue(); - // TODO: FSQRT nodes should have flags that propagate to the created nodes. - // For now, create a Flags object for use with reassociation math transforms. - SDNodeFlags Flags; - Flags.setAllowReassociation(true); + // FSQRT nodes have flags that propagate to the created nodes. return buildSqrtEstimate(N0, Flags); } Index: test/CodeGen/PowerPC/fmf-propagation.ll =================================================================== --- test/CodeGen/PowerPC/fmf-propagation.ll +++ test/CodeGen/PowerPC/fmf-propagation.ll @@ -300,18 +300,34 @@ ; Reduced precision for sqrt is allowed - should use estimate and NR iterations. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:' -; FMFDEBUG: fsqrt afn {{t[0-9]+}} +; FMFDEBUG: fmul afn {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:' -; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} +; GLOBALDEBUG: fmul afn {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:' define float @sqrt_afn(float %x) { ; FMF-LABEL: sqrt_afn: -; FMF: # %bb.0: -; FMF-NEXT: xssqrtsp 1, 1 -; FMF-NEXT: blr +; FMF: # %bb.0: +; FMF-NEXT: xxlxor 0, 0, 0 +; FMF-NEXT: fcmpu 0, 1, 0 +; FMF-NEXT: beq 0, .LBB10_2 +; FMF-NEXT: # %bb.1: +; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha +; FMF-NEXT: xsrsqrtesp 3, 1 +; FMF-NEXT: addi 3, 3, .LCPI10_0@toc@l +; FMF-NEXT: lfsx 0, 0, 3 +; FMF-NEXT: xsmulsp 2, 1, 0 +; FMF-NEXT: xsmulsp 4, 3, 3 +; FMF-NEXT: xssubsp 2, 2, 1 +; FMF-NEXT: xsmulsp 2, 2, 4 +; FMF-NEXT: xssubsp 0, 0, 2 +; FMF-NEXT: xsmulsp 0, 3, 0 +; FMF-NEXT: xsmulsp 0, 0, 1 +; FMF-NEXT: .LBB10_2: +; FMF-NEXT: fmr 1, 0 +; FMF-NEXT: blr ; ; GLOBAL-LABEL: sqrt_afn: ; GLOBAL: # %bb.0: @@ -340,18 +356,34 @@ ; The call is now fully 'fast'. This implies that approximation is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:' -; FMFDEBUG: fsqrt nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} +; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:' -; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} +; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:' define float @sqrt_fast(float %x) { ; FMF-LABEL: sqrt_fast: -; FMF: # %bb.0: -; FMF-NEXT: xssqrtsp 1, 1 -; FMF-NEXT: blr +; FMF: # %bb.0: +; FMF-NEXT: xxlxor 0, 0, 0 +; FMF-NEXT: fcmpu 0, 1, 0 +; FMF-NEXT: beq 0, .LBB11_2 +; FMF-NEXT: # %bb.1: +; FMF-NEXT: xsrsqrtesp 2, 1 +; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha +; FMF-NEXT: fneg 0, 1 +; FMF-NEXT: fmr 4, 1 +; FMF-NEXT: addi 3, 3, .LCPI11_0@toc@l +; FMF-NEXT: lfsx 3, 0, 3 +; FMF-NEXT: xsmaddasp 4, 0, 3 +; FMF-NEXT: xsmulsp 0, 2, 2 +; FMF-NEXT: xsmaddasp 3, 4, 0 +; FMF-NEXT: xsmulsp 0, 2, 3 +; FMF-NEXT: xsmulsp 0, 0, 1 +; FMF-NEXT: .LBB11_2: +; FMF-NEXT: fmr 1, 0 +; FMF-NEXT: blr ; ; GLOBAL-LABEL: sqrt_fast: ; GLOBAL: # %bb.0: Index: test/CodeGen/X86/fmf-flags.ll =================================================================== --- test/CodeGen/X86/fmf-flags.ll +++ test/CodeGen/X86/fmf-flags.ll @@ -7,9 +7,18 @@ define float @fast_recip_sqrt(float %x) { ; X64-LABEL: fast_recip_sqrt: ; X64: # %bb.0: -; X64-NEXT: sqrtss %xmm0, %xmm1 -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: divss %xmm1, %xmm0 +; X64-NEXT: rsqrtss %xmm0, %xmm1 +; X64-NEXT: xorps %xmm2, %xmm2 +; X64-NEXT: cmpeqss %xmm0, %xmm2 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: movss {{.*}}(%rip), %xmm3 +; X64-NEXT: mulss %xmm0, %xmm3 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: addss {{.*}}(%rip), %xmm0 +; X64-NEXT: mulss %xmm3, %xmm0 +; X64-NEXT: andnps %xmm0, %xmm2 +; X64-NEXT: movss {{.*}}(%rip), %xmm0 +; X64-NEXT: divss %xmm2, %xmm0 ; X64-NEXT: retq ; ; X86-LABEL: fast_recip_sqrt: Index: test/CodeGen/X86/sqrt-fastmath-mir.ll =================================================================== --- test/CodeGen/X86/sqrt-fastmath-mir.ll +++ test/CodeGen/X86/sqrt-fastmath-mir.ll @@ -7,16 +7,16 @@ ; CHECK: body: ; CHECK: %0:fr32 = COPY $xmm0 ; CHECK: %1:fr32 = VRSQRTSSr killed %2, %0 -; CHECK: %3:fr32 = reassoc VMULSSrr %0, %1 +; CHECK: %3:fr32 = VMULSSrr %0, %1 ; CHECK: %4:fr32 = VMOVSSrm ; CHECK: %5:fr32 = VFMADD213SSr %1, killed %3, %4 ; CHECK: %6:fr32 = VMOVSSrm -; CHECK: %7:fr32 = reassoc VMULSSrr %1, %6 -; CHECK: %8:fr32 = reassoc VMULSSrr killed %7, killed %5 -; CHECK: %9:fr32 = reassoc VMULSSrr %0, %8 +; CHECK: %7:fr32 = VMULSSrr %1, %6 +; CHECK: %8:fr32 = VMULSSrr killed %7, killed %5 +; CHECK: %9:fr32 = VMULSSrr %0, %8 ; CHECK: %10:fr32 = VFMADD213SSr %8, %9, %4 -; CHECK: %11:fr32 = reassoc VMULSSrr %9, %6 -; CHECK: %12:fr32 = reassoc VMULSSrr killed %11, killed %10 +; CHECK: %11:fr32 = VMULSSrr %9, %6 +; CHECK: %12:fr32 = VMULSSrr killed %11, killed %10 ; CHECK: %14:fr32 = FsFLD0SS ; CHECK: %15:fr32 = VCMPSSrr %0, killed %14, 0 ; CHECK: %17:vr128 = VANDNPSrr killed %16, killed %13