Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -673,6 +673,7 @@
 
   // Don't allow anything with multiple uses unless we know it is free.
   EVT VT = Op.getValueType();
+  const SDNodeFlags Flags = Op->getFlags();
   if (!Op.hasOneUse())
     if (!(Op.getOpcode() == ISD::FP_EXTEND &&
           TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
@@ -717,7 +718,6 @@
     return 1;
 
   case ISD::FMUL:
-  case ISD::FDIV:
     if (Options->HonorSignDependentRoundingFPMath()) return 0;
 
     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
@@ -727,6 +727,18 @@
 
     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
                               Depth + 1);
+							  
+  case ISD::FDIV:
+    if (Options->HonorSignDependentRoundingFPMathOption && 
+        !Options->UnsafeFPMath && !Flags.hasNoNaNs()) return 0;
+
+    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
+    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+                                    Options, Depth + 1))
+      return V;
+
+    return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
+                              Depth + 1);
 
   case ISD::FP_EXTEND:
   case ISD::FP_ROUND:
@@ -782,7 +794,7 @@
 
   case ISD::FMUL:
   case ISD::FDIV:
-    assert(!Options.HonorSignDependentRoundingFPMath());
+    assert(!Options.HonorSignDependentRoundingFPMath() || Flags.hasNoNaNs());
 
     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
@@ -10789,7 +10801,7 @@
   if (SDValue NewSel = foldBinOpIntoSelect(N))
     return NewSel;
 
-  if (Options.UnsafeFPMath) {
+  if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
     if (N1CFP) {
       // Compute the reciprocal 1.0 / c2.
Index: test/CodeGen/AMDGPU/fdiv.f16.ll
===================================================================
--- test/CodeGen/AMDGPU/fdiv.f16.ll
+++ test/CodeGen/AMDGPU/fdiv.f16.ll
@@ -218,7 +218,7 @@
 }
 
 ; FUNC-LABEL: {{^}}div_arcp_k_x_pat_f16:
-; SI: v_mul_f32_e32 v{{[0-9]+}}, 0x3dcccccd, v{{[0-9]+}}
+; SI: v_mul_f32_e32 v{{[0-9]+}}, 0x3dccc000, v{{[0-9]+}}
 
 ; GFX8_9: v_mul_f16_e32 [[MUL:v[0-9]+]], 0x2e66, v{{[0-9]+}}
 ; GFX8_9: buffer_store_short [[MUL]]
@@ -230,7 +230,7 @@
 }
 
 ; FUNC-LABEL: {{^}}div_arcp_neg_k_x_pat_f16:
-; SI: v_mul_f32_e32 v{{[0-9]+}}, 0xbdcccccd, v{{[0-9]+}}
+; SI: v_mul_f32_e32 v{{[0-9]+}}, 0xbdccc000, v{{[0-9]+}}
 
 ; GFX8_9: v_mul_f16_e32 [[MUL:v[0-9]+]], 0xae66, v{{[0-9]+}}
 ; GFX8_9: buffer_store_short [[MUL]]
Index: test/CodeGen/X86/fmf-flags.ll
===================================================================
--- test/CodeGen/X86/fmf-flags.ll
+++ test/CodeGen/X86/fmf-flags.ll
@@ -8,17 +8,11 @@
 ; X64-LABEL: fast_recip_sqrt:
 ; X64:       # %bb.0:
 ; X64-NEXT:    rsqrtss %xmm0, %xmm1
-; X64-NEXT:    xorps   %xmm2, %xmm2
-; X64-NEXT:    cmpeqss %xmm0, %xmm2
 ; X64-NEXT:    mulss   %xmm1, %xmm0
-; X64-NEXT:    movss   {{.*}}(%rip), %xmm3
-; X64-NEXT:    mulss   %xmm0, %xmm3
 ; X64-NEXT:    mulss   %xmm1, %xmm0
 ; X64-NEXT:    addss   {{.*}}(%rip), %xmm0
-; X64-NEXT:    mulss   %xmm3, %xmm0
-; X64-NEXT:    andnps  %xmm0, %xmm2
-; X64-NEXT:    movss   {{.*}}(%rip), %xmm0
-; X64-NEXT:    divss   %xmm2, %xmm0
+; X64-NEXT:    mulss   {{.*}}(%rip), %xmm1
+; X64-NEXT:    mulss   %xmm1, %xmm0
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: fast_recip_sqrt:
@@ -89,10 +83,14 @@
 define float @not_so_fast_recip_sqrt(float %x) {
 ; X64-LABEL: not_so_fast_recip_sqrt:
 ; X64:       # %bb.0:
-; X64-NEXT:    sqrtss %xmm0, %xmm1
-; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT:    divss %xmm1, %xmm0
-; X64-NEXT:    movss %xmm1, {{.*}}(%rip)
+; X64-NEXT:    rsqrtss %xmm0, %xmm1
+; X64-NEXT:    sqrtss  %xmm0, %xmm2
+; X64-NEXT:    mulss   %xmm1, %xmm0
+; X64-NEXT:    mulss   %xmm1, %xmm0
+; X64-NEXT:    addss   {{.*}}(%rip), %xmm0
+; X64-NEXT:    mulss   {{.*}}(%rip), %xmm1
+; X64-NEXT:    mulss   %xmm1, %xmm0
+; X64-NEXT:    movss   %xmm2, sqrt1(%rip)
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: not_so_fast_recip_sqrt: