Index: include/llvm/IR/Operator.h
===================================================================
--- include/llvm/IR/Operator.h
+++ include/llvm/IR/Operator.h
@@ -202,7 +202,6 @@
     setNoNaNs();
     setNoInfs();
     setNoSignedZeros();
-    setAllowReciprocal();
   }
 
   void operator&=(const FastMathFlags &OtherFlags) {
@@ -227,7 +226,6 @@
       setHasNoNaNs(true);
       setHasNoInfs(true);
       setHasNoSignedZeros(true);
-      setHasAllowReciprocal(true);
     }
   }
   void setHasNoNaNs(bool B) {
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8887,9 +8887,8 @@
 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
-  bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
   const SDNodeFlags *Flags = N->getFlags();
-  if (!UnsafeMath && !Flags->hasAllowReciprocal())
+  if (!Flags->hasAllowReciprocal())
     return SDValue();
 
   // Skip if current node is a reciprocal.
@@ -8912,7 +8911,7 @@
     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
       // This division is eligible for optimization only if global unsafe math
       // is enabled or if this division allows reciprocal formation.
-      if (UnsafeMath || U->getFlags()->hasAllowReciprocal())
+      if (U->getFlags()->hasAllowReciprocal())
         Users.insert(U);
     }
   }
Index: test/CodeGen/AArch64/fdiv-combine.ll
===================================================================
--- test/CodeGen/AArch64/fdiv-combine.ll
+++ test/CodeGen/AArch64/fdiv-combine.ll
@@ -11,9 +11,9 @@
 ; CHECK: fmul
 ; CHECK: fmul
 ; CHECK: fmul
-  %div = fdiv float %a, %D
-  %div1 = fdiv float %b, %D
-  %div2 = fdiv float %c, %D
+  %div = fdiv arcp float %a, %D
+  %div1 = fdiv arcp float %b, %D
+  %div2 = fdiv arcp float %c, %D
   tail call void @foo_3f(float %div, float %div1, float %div2)
   ret void
 }
@@ -25,9 +25,9 @@
 ; CHECK: fmul
 ; CHECK: fmul
 ; CHECK: fmul
-  %div = fdiv double %a, %D
-  %div1 = fdiv double %b, %D
-  %div2 = fdiv double %c, %D
+  %div = fdiv arcp double %a, %D
+  %div1 = fdiv arcp double %b, %D
+  %div2 = fdiv arcp double %c, %D
   tail call void @foo_3d(double %div, double %div1, double %div2)
   ret void
 }
@@ -39,9 +39,9 @@
 ; CHECK: fmul
 ; CHECK: fmul
 ; CHECK: fmul
-  %div = fdiv <4 x float> %a, %D
-  %div1 = fdiv <4 x float> %b, %D
-  %div2 = fdiv <4 x float> %c, %D
+  %div = fdiv arcp <4 x float> %a, %D
+  %div1 = fdiv arcp <4 x float> %b, %D
+  %div2 = fdiv arcp <4 x float> %c, %D
   tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
   ret void
 }
@@ -53,9 +53,9 @@
 ; CHECK: fmul
 ; CHECK: fmul
 ; CHECK: fmul
-  %div = fdiv <2 x double> %a, %D
-  %div1 = fdiv <2 x double> %b, %D
-  %div2 = fdiv <2 x double> %c, %D
+  %div = fdiv arcp <2 x double> %a, %D
+  %div1 = fdiv arcp <2 x double> %b, %D
+  %div2 = fdiv arcp <2 x double> %c, %D
   tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2)
   ret void
 }
Index: test/CodeGen/AMDGPU/fdiv.ll
===================================================================
--- test/CodeGen/AMDGPU/fdiv.ll
+++ test/CodeGen/AMDGPU/fdiv.ll
@@ -62,7 +62,7 @@
 ; SI: buffer_store_dword [[RESULT]]
 define void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
 entry:
-  %fdiv = fdiv fast float %a, %b
+  %fdiv = fdiv fast arcp float %a, %b
   store float %fdiv, float addrspace(1)* %out
   ret void
 }
@@ -77,7 +77,7 @@
 ; SI: buffer_store_dword [[RESULT]]
 define void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) #0 {
 entry:
-  %fdiv = fdiv fast float %a, %b
+  %fdiv = fdiv fast arcp float %a, %b
   store float %fdiv, float addrspace(1)* %out
   ret void
 }
Index: test/CodeGen/PowerPC/fdiv-combine.ll
===================================================================
--- test/CodeGen/PowerPC/fdiv-combine.ll
+++ test/CodeGen/PowerPC/fdiv-combine.ll
@@ -14,9 +14,9 @@
 ; CHECK: fmul
 ; CHECK: fmul
 ; CHECK: fmul
-  %div = fdiv double %a, %D
-  %div1 = fdiv double %b, %D
-  %div2 = fdiv double %c, %D
+  %div = fdiv arcp double %a, %D
+  %div1 = fdiv arcp double %b, %D
+  %div2 = fdiv arcp double %c, %D
   tail call void @foo_3d(double %div, double %div1, double %div2)
   ret void
 }
Index: test/CodeGen/X86/fdiv-combine.ll
===================================================================
--- test/CodeGen/X86/fdiv-combine.ll
+++ test/CodeGen/X86/fdiv-combine.ll
@@ -89,8 +89,8 @@
 ; CHECK-NEXT:    mulsd %xmm2, %xmm0
 ; CHECK-NEXT:    addsd %xmm2, %xmm0
 ; CHECK-NEXT:    retq
-  %div1 = fdiv fast double 1.0, %y
-  %div2 = fdiv fast double %x, %y
+  %div1 = fdiv arcp double 1.0, %y
+  %div2 = fdiv arcp double %x, %y
   %ret = fadd fast double %div2, %div1
   ret double %ret
 }
Index: test/LTO/X86/Inputs/fast-with-recip.ll
===================================================================
--- /dev/null
+++ test/LTO/X86/Inputs/fast-with-recip.ll
@@ -0,0 +1,9 @@
+define void @fastWithRecip(float %a, float %b, float %c) {
+entry:
+  %div = fdiv fast arcp float %a, %c
+  %div1 = fdiv fast arcp float %b, %c
+  tail call void @useWithRecip(float %div, float %div1)
+  ret void
+}
+
+declare void @useWithRecip(float, float)
Index: test/LTO/X86/Inputs/fast-without-recip.ll
===================================================================
--- /dev/null
+++ test/LTO/X86/Inputs/fast-without-recip.ll
@@ -0,0 +1,9 @@
+define void @fastWithoutRecip(float %a, float %b, float %c) {
+entry:
+  %div = fdiv fast float %a, %c
+  %div1 = fdiv fast float %b, %c
+  tail call void @useWithoutRecip(float %div, float %div1)
+  ret void
+}
+
+declare void @useWithoutRecip(float, float)
Index: test/LTO/X86/fast-recip.ll
===================================================================
--- /dev/null
+++ test/LTO/X86/fast-recip.ll
@@ -0,0 +1,38 @@
+; RUN: llvm-link -o %t.bc %s %p/Inputs/fast-without-recip.ll %p/Inputs/fast-with-recip.ll
+; RUN: opt -inline -instcombine -o %t2.bc %t.bc
+; RUN: llc -disable-tail-calls %t2.bc -o - | FileCheck %s
+
+; Inlining will be done on fastWithRecip() (built with fast-math leaving the
+; reciprocal-transformation enabled), and fastWithoutRecip() (built with
+; fast-math but disabling the reciprocal-transformation).  They both contain
+; two divisions with the same denominator, and so are candidates for the
+; reciprocal-transformation.  We verify that in the enabled version, only
+; one division is done (the reciprocal) followed by two multiplications. And
+; in the disabled version, both divisions are done (and no multiplications).
+
+define void @foo(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5) #0 {
+entry:
+; CHECK:     fooEnter
+; CHECK:     div
+; CHECK-NOT: div
+; CHECK:     mul
+; CHECK:     mul
+; CHECK:     useWithRecip
+; CHECK:     div
+; CHECK:     div
+; CHECK-NOT: mul
+; CHECK:     useWithoutRecip
+; CHECK:     fooExit
+  tail call void @fooEnter()
+  tail call void @fastWithRecip(float %a0, float %a1, float %a2)
+  tail call void @fastWithoutRecip(float %a3, float %a4, float %a5)
+  tail call void @fooExit()
+  ret void
+}
+
+declare void @fooEnter()
+declare void @fastWithRecip(float, float, float)
+declare void @fastWithoutRecip(float, float, float)
+declare void @fooExit()
+
+attributes #0 = { "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "unsafe-fp-math"="true" }
Index: test/Transforms/InstCombine/fast-math.ll
===================================================================
--- test/Transforms/InstCombine/fast-math.ll
+++ test/Transforms/InstCombine/fast-math.ll
@@ -347,7 +347,7 @@
 ; X/C1 / C2 => X * (1/(C2*C1))
 define float @fdiv1(float %x) {
   %div = fdiv float %x, 0x3FF3333340000000
-  %div1 = fdiv fast float %div, 0x4002666660000000
+  %div1 = fdiv fast arcp float %div, 0x4002666660000000
   ret float %div1
 ; 0x3FF3333340000000 = 1.2f
 ; 0x4002666660000000 = 2.3f
Index: test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
===================================================================
--- test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
+++ test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
@@ -254,9 +254,9 @@
   %load3 = load float, float* %idx3, align 4
   %load4 = load float, float* %idx4, align 4
 
-  %op1 = fadd fast float %load1, 1.0
-  %op2 = fadd fast float %load2, 1.0
-  %op3 = fadd fast float %load3, 1.0
+  %op1 = fadd fast arcp float %load1, 1.0
+  %op2 = fadd fast arcp float %load2, 1.0
+  %op3 = fadd fast arcp float %load3, 1.0
   %op4 = fadd arcp float %load4, 1.0
 
   store float %op1, float* %idx1, align 4
Index: unittests/IR/IRBuilderTest.cpp
===================================================================
--- unittests/IR/IRBuilderTest.cpp
+++ unittests/IR/IRBuilderTest.cpp
@@ -163,6 +163,8 @@
   FAdd = cast<Instruction>(F);
   EXPECT_TRUE(FAdd->hasNoNaNs());
 
+  FMF.setAllowReciprocal();
+  Builder.setFastMathFlags(FMF);
   F = Builder.CreateFDiv(F, F);
   EXPECT_TRUE(Builder.getFastMathFlags().any());
   EXPECT_TRUE(Builder.getFastMathFlags().UnsafeAlgebra);