Index: lib/CodeGen/CodeGenPrepare.cpp
===================================================================
--- lib/CodeGen/CodeGenPrepare.cpp
+++ lib/CodeGen/CodeGenPrepare.cpp
@@ -4470,11 +4470,29 @@
   // case currently.
 
   CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
+  if (!Cmp)
+    return false;
+
+  Value *CmpOp0 = Cmp->getOperand(0);
+  Value *CmpOp1 = Cmp->getOperand(1);
+
+  // Emit "cmov on compare with a expensive operand" as a branch to avoid stalls
+  // on executing expensive instruction likes division.
+  auto IsExpensiveCostInst = [&](Value *V) -> bool {
+    auto *I = dyn_cast<Instruction>(V);
+    if (I && I->getOpcode() == Instruction::FDiv)
+      return true;
+
+    return false;
+  };
+
+  if (IsExpensiveCostInst(CmpOp0) || IsExpensiveCostInst(CmpOp1))
+    return true;
 
   // If a branch is predictable, an out-of-order CPU can avoid blocking on its
   // comparison condition. If the compare has more than one use, there's
   // probably another cmov or setcc around, so it's not worth emitting a branch.
-  if (!Cmp || !Cmp->hasOneUse())
+  if (!Cmp->hasOneUse())
     return false;
 
   // If either operand of the select is expensive and only needed on one side
Index: test/CodeGen/X86/machine-combiner.ll
===================================================================
--- test/CodeGen/X86/machine-combiner.ll
+++ test/CodeGen/X86/machine-combiner.ll
@@ -363,18 +363,18 @@
 define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) {
 ; SSE-LABEL: reassociate_mins_single:
 ; SSE:       # BB#0:
-; SSE-NEXT:    divss %xmm1, %xmm0
+; SSE-NEXT:    mulss %xmm1, %xmm0
 ; SSE-NEXT:    minss %xmm3, %xmm2
 ; SSE-NEXT:    minss %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: reassociate_mins_single:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vminss %xmm3, %xmm2, %xmm1
 ; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %t0 = fdiv float %x0, %x1
+  %t0 = fmul float %x0, %x1
   %cmp1 = fcmp olt float %x2, %t0
   %sel1 = select i1 %cmp1, float %x2, float %t0
   %cmp2 = fcmp olt float %x3, %sel1
@@ -387,18 +387,18 @@
 define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3) {
 ; SSE-LABEL: reassociate_maxs_single:
 ; SSE:       # BB#0:
-; SSE-NEXT:    divss %xmm1, %xmm0
+; SSE-NEXT:    mulss %xmm1, %xmm0
 ; SSE-NEXT:    maxss %xmm3, %xmm2
 ; SSE-NEXT:    maxss %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: reassociate_maxs_single:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmaxss %xmm3, %xmm2, %xmm1
 ; AVX-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %t0 = fdiv float %x0, %x1
+  %t0 = fmul float %x0, %x1
   %cmp1 = fcmp ogt float %x2, %t0
   %sel1 = select i1 %cmp1, float %x2, float %t0
   %cmp2 = fcmp ogt float %x3, %sel1
@@ -411,18 +411,18 @@
 define double @reassociate_mins_double(double %x0, double %x1, double %x2, double %x3) {
 ; SSE-LABEL: reassociate_mins_double:
 ; SSE:       # BB#0:
-; SSE-NEXT:    divsd %xmm1, %xmm0
+; SSE-NEXT:    mulsd %xmm1, %xmm0
 ; SSE-NEXT:    minsd %xmm3, %xmm2
 ; SSE-NEXT:    minsd %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: reassociate_mins_double:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vminsd %xmm3, %xmm2, %xmm1
 ; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %t0 = fdiv double %x0, %x1
+  %t0 = fmul double %x0, %x1
   %cmp1 = fcmp olt double %x2, %t0
   %sel1 = select i1 %cmp1, double %x2, double %t0
   %cmp2 = fcmp olt double %x3, %sel1
@@ -435,18 +435,18 @@
 define double @reassociate_maxs_double(double %x0, double %x1, double %x2, double %x3) {
 ; SSE-LABEL: reassociate_maxs_double:
 ; SSE:       # BB#0:
-; SSE-NEXT:    divsd %xmm1, %xmm0
+; SSE-NEXT:    mulsd %xmm1, %xmm0
 ; SSE-NEXT:    maxsd %xmm3, %xmm2
 ; SSE-NEXT:    maxsd %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: reassociate_maxs_double:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmaxsd %xmm3, %xmm2, %xmm1
 ; AVX-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %t0 = fdiv double %x0, %x1
+  %t0 = fmul double %x0, %x1
   %cmp1 = fcmp ogt double %x2, %t0
   %sel1 = select i1 %cmp1, double %x2, double %t0
   %cmp2 = fcmp ogt double %x3, %sel1
Index: test/Transforms/CodeGenPrepare/X86/select.ll
===================================================================
--- test/Transforms/CodeGenPrepare/X86/select.ll
+++ test/Transforms/CodeGenPrepare/X86/select.ll
@@ -134,3 +134,18 @@
 ; CHECK:  %sel = select i1 %cmp, i32 %div1, i32 %div2
 }
 
+; Nothing to sink here, but this gets converted to a branch to
+; avoid stalling an out-of-order CPU on a predictable branch.
+; Because cmp's operand is expensive instruction likes division.
+
+define float @fdiv_do_transform(float %a, float %b) {
+entry:
+  %div = fdiv float %a, %b
+  %cmp = fcmp ogt float %div, %b
+  %sel = select i1 %cmp, float %div, float 8.0
+  ret float %sel
+
+; CHECK-LABEL: @fdiv_do_transform(
+; CHECK:  br i1 %cmp, label %select.end, label %select.false
+}
+