Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -678,6 +678,8 @@ // Don't recurse exponentially. if (Depth > 6) return 0; + bool UnsafeFPMath = Options->UnsafeFPMath || Op->isFast(); + switch (Op.getOpcode()) { default: return false; case ISD::ConstantFP: { @@ -691,7 +693,7 @@ } case ISD::FADD: // FIXME: determine better conditions for this xform. - if (!Options->UnsafeFPMath) return 0; + if (!UnsafeFPMath) return 0; // After operation legalization, it might not be legal to create new FSUBs. if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) @@ -715,7 +717,7 @@ case ISD::FMUL: case ISD::FDIV: - if (Options->HonorSignDependentRoundingFPMath()) return 0; + if (Options->HonorSignDependentRoundingFPMathOption && !UnsafeFPMath) return 0; // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, @@ -753,7 +755,7 @@ } case ISD::FADD: // FIXME: determine better conditions for this xform. - assert(Options.UnsafeFPMath); + assert(Options.UnsafeFPMath || Op->isFast()); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, @@ -6766,10 +6768,12 @@ // FIXME: Instead of testing for UnsafeFPMath, this should be checking for // no signed zeros as well as no nans. + SDValue Cmp = N0.getOperand(2); const TargetOptions &Options = DAG.getTarget().Options; - if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() && + bool UnsafeFPMath = Options.UnsafeFPMath || Cmp->isFast(); + if (UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() && DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { - ISD::CondCode CC = cast(N0.getOperand(2))->get(); + ISD::CondCode CC = cast(Cmp)->get(); if (SDValue FMinMax = combineMinNumMaxNum( DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) @@ -10226,7 +10230,7 @@ } // If 'unsafe math' is enabled, fold lots of things. - if (Options.UnsafeFPMath) { + if (Options.UnsafeFPMath || Flags.isFast()) { // No FP constant should be created after legalization as Instruction // Selection pass has a hard time dealing with FP constants. bool AllowNewConst = (Level < AfterLegalizeDAG); @@ -10361,7 +10365,7 @@ GetNegatedExpression(N1, DAG, LegalOperations), Flags); // FIXME: Auto-upgrade the target/function-level option. - if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) { + if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) { // (fsub 0, B) -> -B if (N0CFP && N0CFP->isZero()) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) @@ -10372,7 +10376,7 @@ } // If 'unsafe math' is enabled, fold lots of things. - if (Options.UnsafeFPMath) { + if (Options.UnsafeFPMath || Flags.isFast()) { // (fsub A, 0) -> A if (N1CFP && N1CFP->isZero()) return N0; @@ -10437,7 +10441,7 @@ if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if (Options.UnsafeFPMath) { + if (Options.UnsafeFPMath || Flags.isFast()) { // fold (fmul A, 0) -> 0 if (N1CFP && N1CFP->isZero()) return N1; @@ -10670,7 +10674,7 @@ // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { - bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; + bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath || N->isFast(); const SDNodeFlags Flags = N->getFlags(); if (!UnsafeMath && !Flags.hasAllowReciprocal()) return SDValue(); @@ -10748,7 +10752,7 @@ if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if (Options.UnsafeFPMath) { + if (Options.UnsafeFPMath || N->isFast()) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (N1CFP) { // Compute the reciprocal 1.0 / c2. @@ -10857,17 +10861,15 @@ } SDValue DAGCombiner::visitFSQRT(SDNode *N) { - if (!DAG.getTarget().Options.UnsafeFPMath) + SDNodeFlags Flags = N->getFlags(); + if (!DAG.getTarget().Options.UnsafeFPMath && !Flags.isFast()) return SDValue(); SDValue N0 = N->getOperand(0); if (TLI.isFsqrtCheap(N0, DAG)) return SDValue(); - // TODO: FSQRT nodes should have flags that propagate to the created nodes. - // For now, create a Flags object for use with reassociation math transforms. - SDNodeFlags Flags; - Flags.setAllowReassociation(true); + // FSQRT nodes have flags that propagate to the created nodes. return buildSqrtEstimate(N0, Flags); } @@ -11162,10 +11164,11 @@ // single-step fp_round we want to fold to. // In other words, double rounding isn't the same as rounding. // Also, this is a value preserving truncation iff both fp_round's are. - if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { + if (DAG.getTarget().Options.UnsafeFPMath || N->isFast() || N0IsTrunc) { SDLoc DL(N); return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), - DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); + DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL), + N->getFlags()); } } Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3317,7 +3317,7 @@ break; case ISD::FP_TO_FP16: DEBUG(dbgs() << "Legalizing FP_TO_FP16\n"); - if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { + if (!TLI.useSoftFloat() && (TM.Options.UnsafeFPMath || Node->isFast())) { SDValue Op = Node->getOperand(0); MVT SVT = Op.getSimpleValueType(); if ((SVT == MVT::f64 || SVT == MVT::f80) && Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4044,7 +4044,7 @@ break; case ISD::FNEG: // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) + if ((getTarget().Options.UnsafeFPMath || Operand.getNode()->isFast()) && OpOpcode == ISD::FSUB) // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags? return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), Operand.getOperand(0), Operand.getNode()->getFlags()); @@ -4437,7 +4437,7 @@ case ISD::FMUL: case ISD::FDIV: case ISD::FREM: - if (getTarget().Options.UnsafeFPMath) { + if (getTarget().Options.UnsafeFPMath || Flags.isFast()) { if (Opcode == ISD::FADD) { // x+0 --> x if (N2CFP && N2CFP->getValueAPF().isZero()) @@ -4813,7 +4813,7 @@ case ISD::FMUL: case ISD::FDIV: case ISD::FREM: - if (getTarget().Options.UnsafeFPMath) + if (getTarget().Options.UnsafeFPMath || Flags.isFast()) return N2; break; case ISD::MUL: Index: test/CodeGen/PowerPC/fmf-propagation.ll =================================================================== --- test/CodeGen/PowerPC/fmf-propagation.ll +++ test/CodeGen/PowerPC/fmf-propagation.ll @@ -304,7 +304,7 @@ ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:' -; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} +; GLOBALDEBUG: fmul {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:' define float @sqrt_afn(float %x) { @@ -344,7 +344,7 @@ ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:' -; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} +; GLOBALDEBUG: fmul {{t[0-9]+}} ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:' define float @sqrt_fast(float %x) { Index: test/CodeGen/X86/fmf-flags.ll =================================================================== --- test/CodeGen/X86/fmf-flags.ll +++ test/CodeGen/X86/fmf-flags.ll @@ -7,9 +7,12 @@ define float @fast_recip_sqrt(float %x) { ; X64-LABEL: fast_recip_sqrt: ; X64: # %bb.0: -; X64-NEXT: sqrtss %xmm0, %xmm1 -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: divss %xmm1, %xmm0 +; X64-NEXT: rsqrtss %xmm0, %xmm1 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: addss {{.*}}(%rip), %xmm0 +; X64-NEXT: mulss {{.*}}(%rip), %xmm1 +; X64-NEXT: mulss %xmm1, %xmm0 ; X64-NEXT: retq ; ; X86-LABEL: fast_recip_sqrt: @@ -53,9 +56,9 @@ define double @not_so_fast_mul_add(double %x) { ; X64-LABEL: not_so_fast_mul_add: ; X64: # %bb.0: -; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X64-NEXT: movsd {{.*}}(%rip), %xmm1 ; X64-NEXT: mulsd %xmm0, %xmm1 -; X64-NEXT: addsd %xmm1, %xmm0 +; X64-NEXT: mulsd {{.*}}(%rip), %xmm0 ; X64-NEXT: movsd %xmm1, {{.*}}(%rip) ; X64-NEXT: retq ; @@ -64,7 +67,9 @@ ; X86-NEXT: fldl {{[0-9]+}}(%esp) ; X86-NEXT: fld %st(0) ; X86-NEXT: fmull {{\.LCPI.*}} -; X86-NEXT: fadd %st(0), %st(1) +; X86-NEXT: fxch %st(1) +; X86-NEXT: fmull {{\.LCPI.*}} +; X86-NEXT: fxch %st(1) ; X86-NEXT: fstpl mul1 ; X86-NEXT: retl %m = fmul double %x, 4.2 @@ -80,10 +85,14 @@ define float @not_so_fast_recip_sqrt(float %x) { ; X64-LABEL: not_so_fast_recip_sqrt: ; X64: # %bb.0: -; X64-NEXT: sqrtss %xmm0, %xmm1 -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: divss %xmm1, %xmm0 -; X64-NEXT: movss %xmm1, {{.*}}(%rip) +; X64-NEXT: rsqrtss %xmm0, %xmm1 +; X64-NEXT: sqrtss %xmm0, %xmm2 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: addss {{.*}}(%rip), %xmm0 +; X64-NEXT: mulss {{.*}}(%rip), %xmm1 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: movss %xmm2, {{.*}}(%rip) ; X64-NEXT: retq ; ; X86-LABEL: not_so_fast_recip_sqrt: Index: test/CodeGen/X86/sqrt-fastmath-mir.ll =================================================================== --- test/CodeGen/X86/sqrt-fastmath-mir.ll +++ test/CodeGen/X86/sqrt-fastmath-mir.ll @@ -7,16 +7,16 @@ ; CHECK: body: ; CHECK: %0:fr32 = COPY $xmm0 ; CHECK: %1:fr32 = VRSQRTSSr killed %2, %0 -; CHECK: %3:fr32 = reassoc VMULSSrr %0, %1 +; CHECK: %3:fr32 = VMULSSrr %0, %1 ; CHECK: %4:fr32 = VMOVSSrm ; CHECK: %5:fr32 = VFMADD213SSr %1, killed %3, %4 ; CHECK: %6:fr32 = VMOVSSrm -; CHECK: %7:fr32 = reassoc VMULSSrr %1, %6 -; CHECK: %8:fr32 = reassoc VMULSSrr killed %7, killed %5 -; CHECK: %9:fr32 = reassoc VMULSSrr %0, %8 +; CHECK: %7:fr32 = VMULSSrr %1, %6 +; CHECK: %8:fr32 = VMULSSrr killed %7, killed %5 +; CHECK: %9:fr32 = VMULSSrr %0, %8 ; CHECK: %10:fr32 = VFMADD213SSr %8, %9, %4 -; CHECK: %11:fr32 = reassoc VMULSSrr %9, %6 -; CHECK: %12:fr32 = reassoc VMULSSrr killed %11, killed %10 +; CHECK: %11:fr32 = VMULSSrr %9, %6 +; CHECK: %12:fr32 = VMULSSrr killed %11, killed %10 ; CHECK: %14:fr32 = FsFLD0SS ; CHECK: %15:fr32 = VCMPSSrr %0, killed %14, 0 ; CHECK: %17:vr128 = VANDNPSrr killed %16, killed %13