Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -678,6 +678,8 @@
   // Don't recurse exponentially.
   if (Depth > 6) return 0;
 
+  bool UnsafeFPMath = Options->UnsafeFPMath || Op->isFast();
+
   switch (Op.getOpcode()) {
   default: return false;
   case ISD::ConstantFP: {
@@ -691,7 +693,7 @@
   }
   case ISD::FADD:
     // FIXME: determine better conditions for this xform.
-    if (!Options->UnsafeFPMath) return 0;
+    if (!UnsafeFPMath) return 0;
 
     // After operation legalization, it might not be legal to create new FSUBs.
     if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
@@ -715,7 +717,7 @@
 
   case ISD::FMUL:
   case ISD::FDIV:
-    if (Options->HonorSignDependentRoundingFPMath()) return 0;
+    if (Options->HonorSignDependentRoundingFPMathOption && !UnsafeFPMath) return 0;
 
     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
@@ -753,7 +755,7 @@
   }
   case ISD::FADD:
     // FIXME: determine better conditions for this xform.
-    assert(Options.UnsafeFPMath);
+    assert(Options.UnsafeFPMath || Op->isFast());
 
     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
@@ -6766,10 +6768,12 @@
 
     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
     // no signed zeros as well as no nans.
+    SDValue Cmp = N0.getOperand(2);
     const TargetOptions &Options = DAG.getTarget().Options;
-    if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
+    bool UnsafeFPMath = Options.UnsafeFPMath || Cmp->isFast();
+    if (UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
-      ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+      ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get();
 
       if (SDValue FMinMax = combineMinNumMaxNum(
               DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
@@ -10226,7 +10230,7 @@
   }
 
   // If 'unsafe math' is enabled, fold lots of things.
-  if (Options.UnsafeFPMath) {
+  if (Options.UnsafeFPMath || Flags.isFast()) {
     // No FP constant should be created after legalization as Instruction
     // Selection pass has a hard time dealing with FP constants.
     bool AllowNewConst = (Level < AfterLegalizeDAG);
@@ -10361,7 +10365,7 @@
                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
 
   // FIXME: Auto-upgrade the target/function-level option.
-  if (Options.NoSignedZerosFPMath  || N->getFlags().hasNoSignedZeros()) {
+  if (Options.NoSignedZerosFPMath  || Flags.hasNoSignedZeros()) {
     // (fsub 0, B) -> -B
     if (N0CFP && N0CFP->isZero()) {
       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
@@ -10372,7 +10376,7 @@
   }
 
   // If 'unsafe math' is enabled, fold lots of things.
-  if (Options.UnsafeFPMath) {
+  if (Options.UnsafeFPMath || Flags.isFast()) {
     // (fsub A, 0) -> A
     if (N1CFP && N1CFP->isZero())
       return N0;
@@ -10437,7 +10441,7 @@
   if (SDValue NewSel = foldBinOpIntoSelect(N))
     return NewSel;
 
-  if (Options.UnsafeFPMath) {
+  if (Options.UnsafeFPMath || Flags.isFast()) {
     // fold (fmul A, 0) -> 0
     if (N1CFP && N1CFP->isZero())
       return N1;
@@ -10670,7 +10674,7 @@
 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
-  bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
+  bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath || N->isFast();
   const SDNodeFlags Flags = N->getFlags();
   if (!UnsafeMath && !Flags.hasAllowReciprocal())
     return SDValue();
@@ -10748,7 +10752,7 @@
   if (SDValue NewSel = foldBinOpIntoSelect(N))
     return NewSel;
 
-  if (Options.UnsafeFPMath) {
+  if (Options.UnsafeFPMath || N->isFast()) {
     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
     if (N1CFP) {
       // Compute the reciprocal 1.0 / c2.
@@ -10857,17 +10861,15 @@
 }
 
 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
-  if (!DAG.getTarget().Options.UnsafeFPMath)
+  SDNodeFlags Flags = N->getFlags();
+  if (!DAG.getTarget().Options.UnsafeFPMath && !Flags.isFast())
     return SDValue();
 
   SDValue N0 = N->getOperand(0);
   if (TLI.isFsqrtCheap(N0, DAG))
     return SDValue();
 
-  // TODO: FSQRT nodes should have flags that propagate to the created nodes.
-  // For now, create a Flags object for use with reassociation math transforms.
-  SDNodeFlags Flags;
-  Flags.setAllowReassociation(true);
+  // FSQRT nodes have flags that propagate to the created nodes.
   return buildSqrtEstimate(N0, Flags);
 }
 
@@ -11162,10 +11164,11 @@
     // single-step fp_round we want to fold to.
     // In other words, double rounding isn't the same as rounding.
     // Also, this is a value preserving truncation iff both fp_round's are.
-    if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
+    if (DAG.getTarget().Options.UnsafeFPMath || N->isFast() || N0IsTrunc) {
       SDLoc DL(N);
       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
-                         DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
+                         DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL),
+                         N->getFlags());
     }
   }
 
Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3317,7 +3317,7 @@
     break;
   case ISD::FP_TO_FP16:
     DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
-    if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
+    if (!TLI.useSoftFloat() && (TM.Options.UnsafeFPMath || Node->isFast())) {
       SDValue Op = Node->getOperand(0);
       MVT SVT = Op.getSimpleValueType();
       if ((SVT == MVT::f64 || SVT == MVT::f80) &&
Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4044,7 +4044,7 @@
     break;
   case ISD::FNEG:
     // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
-    if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
+    if ((getTarget().Options.UnsafeFPMath || Operand.getNode()->isFast()) && OpOpcode == ISD::FSUB)
       // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags?
       return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
                      Operand.getOperand(0), Operand.getNode()->getFlags());
@@ -4437,7 +4437,7 @@
   case ISD::FMUL:
   case ISD::FDIV:
   case ISD::FREM:
-    if (getTarget().Options.UnsafeFPMath) {
+    if (getTarget().Options.UnsafeFPMath || Flags.isFast()) {
       if (Opcode == ISD::FADD) {
         // x+0 --> x
         if (N2CFP && N2CFP->getValueAPF().isZero())
@@ -4813,7 +4813,7 @@
     case ISD::FMUL:
     case ISD::FDIV:
     case ISD::FREM:
-      if (getTarget().Options.UnsafeFPMath)
+      if (getTarget().Options.UnsafeFPMath || Flags.isFast())
         return N2;
       break;
     case ISD::MUL:
Index: test/CodeGen/PowerPC/fmf-propagation.ll
===================================================================
--- test/CodeGen/PowerPC/fmf-propagation.ll
+++ test/CodeGen/PowerPC/fmf-propagation.ll
@@ -304,7 +304,7 @@
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:'
-; GLOBALDEBUG:         fmul reassoc {{t[0-9]+}}
+; GLOBALDEBUG:         fmul {{t[0-9]+}}
 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn:'
 
 define float @sqrt_afn(float %x) {
@@ -344,7 +344,7 @@
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_fast:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:'
-; GLOBALDEBUG:         fmul reassoc {{t[0-9]+}}
+; GLOBALDEBUG:         fmul {{t[0-9]+}}
 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_fast:'
 
 define float @sqrt_fast(float %x) {
Index: test/CodeGen/X86/fmf-flags.ll
===================================================================
--- test/CodeGen/X86/fmf-flags.ll
+++ test/CodeGen/X86/fmf-flags.ll
@@ -7,9 +7,12 @@
 define float @fast_recip_sqrt(float %x) {
 ; X64-LABEL: fast_recip_sqrt:
 ; X64:       # %bb.0:
-; X64-NEXT:    sqrtss %xmm0, %xmm1
-; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT:    divss %xmm1, %xmm0
+; X64-NEXT:    rsqrtss %xmm0, %xmm1
+; X64-NEXT:    mulss %xmm1, %xmm0
+; X64-NEXT:    mulss %xmm1, %xmm0
+; X64-NEXT:    addss {{.*}}(%rip), %xmm0
+; X64-NEXT:    mulss {{.*}}(%rip), %xmm1
+; X64-NEXT:    mulss %xmm1, %xmm0
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: fast_recip_sqrt:
@@ -53,9 +56,9 @@
 define double @not_so_fast_mul_add(double %x) {
 ; X64-LABEL: not_so_fast_mul_add:
 ; X64:       # %bb.0:
-; X64-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; X64-NEXT:    movsd {{.*}}(%rip), %xmm1
 ; X64-NEXT:    mulsd %xmm0, %xmm1
-; X64-NEXT:    addsd %xmm1, %xmm0
+; X64-NEXT:    mulsd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    movsd %xmm1, {{.*}}(%rip)
 ; X64-NEXT:    retq
 ;
@@ -64,7 +67,9 @@
 ; X86-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X86-NEXT:    fld %st(0)
 ; X86-NEXT:    fmull {{\.LCPI.*}}
-; X86-NEXT:    fadd %st(0), %st(1)
+; X86-NEXT:    fxch %st(1)
+; X86-NEXT:    fmull {{\.LCPI.*}}
+; X86-NEXT:    fxch %st(1)
 ; X86-NEXT:    fstpl mul1
 ; X86-NEXT:    retl
   %m = fmul double %x, 4.2
@@ -80,10 +85,14 @@
 define float @not_so_fast_recip_sqrt(float %x) {
 ; X64-LABEL: not_so_fast_recip_sqrt:
 ; X64:       # %bb.0:
-; X64-NEXT:    sqrtss %xmm0, %xmm1
-; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT:    divss %xmm1, %xmm0
-; X64-NEXT:    movss %xmm1, {{.*}}(%rip)
+; X64-NEXT:    rsqrtss %xmm0, %xmm1
+; X64-NEXT:    sqrtss %xmm0, %xmm2
+; X64-NEXT:    mulss %xmm1, %xmm0
+; X64-NEXT:    mulss %xmm1, %xmm0
+; X64-NEXT:    addss {{.*}}(%rip), %xmm0
+; X64-NEXT:    mulss {{.*}}(%rip), %xmm1
+; X64-NEXT:    mulss %xmm1, %xmm0
+; X64-NEXT:    movss %xmm2, {{.*}}(%rip)
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: not_so_fast_recip_sqrt:
Index: test/CodeGen/X86/sqrt-fastmath-mir.ll
===================================================================
--- test/CodeGen/X86/sqrt-fastmath-mir.ll
+++ test/CodeGen/X86/sqrt-fastmath-mir.ll
@@ -7,16 +7,16 @@
 ; CHECK: body:
 ; CHECK:     %0:fr32 = COPY $xmm0
 ; CHECK:     %1:fr32 = VRSQRTSSr killed %2, %0
-; CHECK:     %3:fr32 = reassoc VMULSSrr %0, %1
+; CHECK:     %3:fr32 = VMULSSrr %0, %1
 ; CHECK:     %4:fr32 = VMOVSSrm
 ; CHECK:     %5:fr32 = VFMADD213SSr %1, killed %3, %4
 ; CHECK:     %6:fr32 = VMOVSSrm
-; CHECK:     %7:fr32 = reassoc VMULSSrr %1, %6
-; CHECK:     %8:fr32 = reassoc VMULSSrr killed %7, killed %5
-; CHECK:     %9:fr32 = reassoc VMULSSrr %0, %8
+; CHECK:     %7:fr32 = VMULSSrr %1, %6
+; CHECK:     %8:fr32 = VMULSSrr killed %7, killed %5
+; CHECK:     %9:fr32 = VMULSSrr %0, %8
 ; CHECK:     %10:fr32 = VFMADD213SSr %8, %9, %4
-; CHECK:     %11:fr32 = reassoc VMULSSrr %9, %6
-; CHECK:     %12:fr32 = reassoc VMULSSrr killed %11, killed %10
+; CHECK:     %11:fr32 = VMULSSrr %9, %6
+; CHECK:     %12:fr32 = VMULSSrr killed %11, killed %10
 ; CHECK:     %14:fr32 = FsFLD0SS
 ; CHECK:     %15:fr32 = VCMPSSrr %0, killed %14, 0
 ; CHECK:     %17:vr128 = VANDNPSrr killed %16, killed %13