Index: llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
@@ -613,12 +614,29 @@
       }
     }
 
+    // Sink division: (X / Y) * Z --> (X * Z) / Y
+    Value *FDiv;
     Value *Z;
-    if (match(&I, m_c_FMul(m_OneUse(m_FDiv(m_Value(X), m_Value(Y))),
-                           m_Value(Z)))) {
-      // Sink division: (X / Y) * Z --> (X * Z) / Y
-      Value *NewFMul = Builder.CreateFMulFMF(X, Z, &I);
-      return BinaryOperator::CreateFDivFMF(NewFMul, Y, &I);
+    if (match(&I,
+              m_c_FMul(m_CombineAnd(m_Value(FDiv),
+                                    m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))),
+                       m_Value(Z)))) {
+      // Avoid sinking a loop-invariant fdiv into a loop with the fmul.
+      // This is a hack to avoid putting an expensive math op into a loop that
+      // it doesn't need to be in. Ideally, we would be able to rely on other
+      // passes to undo that code movement if profitable, but it might require
+      // altering the pass pipeline and negatively impacting compile-time.
+      auto ShouldSink = [&]() {
+        if (LI) {
+          if (Loop *L = LI->getLoopFor(I.getParent()))
+            return !L->isLoopInvariant(FDiv);
+        }
+        return true;
+      };
+      if (ShouldSink()) {
+        Value *NewFMul = Builder.CreateFMulFMF(X, Z, &I);
+        return BinaryOperator::CreateFDivFMF(NewFMul, Y, &I);
+      }
     }
 
     // sqrt(X) * sqrt(Y) -> sqrt(X * Y)
Index: llvm/test/Transforms/InstCombine/fmul.ll
===================================================================
--- llvm/test/Transforms/InstCombine/fmul.ll
+++ llvm/test/Transforms/InstCombine/fmul.ll
@@ -1051,6 +1051,13 @@
   ret float %mul
 }
 
+; The loop-invariant fdiv is sunk into the loop because this
+; test file does not require LoopInfo. This is the expected
+; behavior in early invocations of InstCombine. Later, when
+; LoopInfo is available, LICM or other passes should move the
+; fdiv back outside of the loop, and we don't want to invert that
+; in InstCombine.
+
 define void @fmul_loop_invariant_fdiv(float* %a, float %x) {
 ; CHECK-LABEL: @fmul_loop_invariant_fdiv(
 ; CHECK-NEXT:  entry:
Index: llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll
===================================================================
--- llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll
+++ llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll
@@ -17,19 +17,19 @@
 define void @vdiv(ptr %a, float %b) #0 {
 ; CHECK-LABEL: @vdiv(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP0:%.*]] = fdiv fast float 1.000000e+00, [[B:%.*]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP0:%.*]] = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4, !tbaa [[TBAA3:![0-9]+]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], [[TMP0]]
-; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[TMP1]], align 4, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT:    br i1 [[TMP5]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
 ;
Index: llvm/test/Transforms/PhaseOrdering/lto-licm.ll
===================================================================
--- llvm/test/Transforms/PhaseOrdering/lto-licm.ll
+++ llvm/test/Transforms/PhaseOrdering/lto-licm.ll
@@ -4,6 +4,7 @@
 define void @hoist_fdiv(ptr %a, float %b) {
 ; CHECK-LABEL: @hoist_fdiv(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = fdiv fast float 1.000000e+00, [[B:%.*]]
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
@@ -12,9 +13,9 @@
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[I_0]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv fast float [[TMP0]], [[B:%.*]]
-; CHECK-NEXT:    store float [[TMP1]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    store float [[TMP2]], ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_0]], 1
 ; CHECK-NEXT:    br label [[FOR_COND]]
 ; CHECK:       for.end: