Index: lib/Transforms/Utils/SimplifyCFG.cpp
===================================================================
--- lib/Transforms/Utils/SimplifyCFG.cpp
+++ lib/Transforms/Utils/SimplifyCFG.cpp
@@ -259,7 +259,8 @@
 static bool DominatesMergePoint(Value *V, BasicBlock *BB,
                                 SmallPtrSetImpl<Instruction*> *AggressiveInsts,
                                 unsigned &CostRemaining,
-                                const TargetTransformInfo &TTI) {
+                                const TargetTransformInfo &TTI,
+                                unsigned Depth = 0) {
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) {
     // Non-instructions all dominate instructions, but not all constantexprs
@@ -297,15 +298,23 @@
 
   unsigned Cost = ComputeSpeculationCost(I, TTI);
 
-  if (Cost > CostRemaining)
+  // Allow exactly one instruction to be speculated regardless of its cost
+  // (as long as it is safe to do so).
+  // This is intended to flatten the CFG even if the instruction is a division
+  // or other expensive operation. The speculation of an expensive instruction
+  // is expected to be undone in CodeGenPrepare if the speculation has not
+  // enabled further IR optimizations.
+  if (Cost > CostRemaining && (AggressiveInsts->size() || Depth))
     return false;
 
-  CostRemaining -= Cost;
+  // Avoid unsigned wrap.
+  CostRemaining = (Cost > CostRemaining) ? 0 : CostRemaining - Cost;
 
   // Okay, we can only really hoist these out if their operands do
   // not take us over the cost threshold.
   for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
-    if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI))
+    if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI,
+                             Depth + 1))
       return false;
   // Okay, it's safe to do this!  Remember this instruction.
   AggressiveInsts->insert(I);
Index: test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
===================================================================
--- test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
+++ test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
@@ -7,9 +7,7 @@
 ; ALL-LABEL: @test1(
 ; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
 ; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
-; LZCNT-NEXT: select i1 [[COND]], i64 64, i64 [[CTLZ]]
-; BMI-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i64 64, i64 [[CTLZ]]
 ; ALL: ret
 entry:
   %tobool = icmp eq i64 %A, 0
@@ -28,9 +26,7 @@
 ; ALL-LABEL: @test2(
 ; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
 ; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
-; LZCNT-NEXT: select i1 [[COND]], i32 32, i32 [[CTLZ]]
-; BMI-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i32 32, i32 [[CTLZ]]
 ; ALL: ret
 entry:
   %tobool = icmp eq i32 %A, 0
@@ -50,9 +46,7 @@
 ; ALL-LABEL: @test3(
 ; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
 ; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
-; LZCNT-NEXT: select i1 [[COND]], i16 16, i16 [[CTLZ]]
-; BMI-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i16 16, i16 [[CTLZ]]
 ; ALL: ret
 entry:
   %tobool = icmp eq i16 %A, 0
@@ -72,9 +66,7 @@
 ; ALL-LABEL: @test1b(
 ; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
 ; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
-; BMI-NEXT: select i1 [[COND]], i64 64, i64 [[CTTZ]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i64 64, i64 [[CTTZ]]
 ; ALL: ret
 entry:
   %tobool = icmp eq i64 %A, 0
@@ -94,9 +86,7 @@
 ; ALL-LABEL: @test2b(
 ; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
 ; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
-; BMI-NEXT: select i1 [[COND]], i32 32, i32 [[CTTZ]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i32 32, i32 [[CTTZ]]
 ; ALL: ret
 entry:
   %tobool = icmp eq i32 %A, 0
@@ -116,9 +106,7 @@
 ; ALL-LABEL: @test3b(
 ; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
 ; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
-; BMI-NEXT: select i1 [[COND]], i16 16, i16 [[CTTZ]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i16 16, i16 [[CTTZ]]
 ; ALL: ret
 entry:
   %tobool = icmp eq i16 %A, 0
Index: test/Transforms/SimplifyCFG/speculate-math.ll
===================================================================
--- test/Transforms/SimplifyCFG/speculate-math.ll
+++ test/Transforms/SimplifyCFG/speculate-math.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -simplifycfg -phi-node-folding-threshold=2 < %s | FileCheck %s
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
 
 declare float @llvm.sqrt.f32(float) nounwind readonly
 declare float @llvm.fma.f32(float, float, float) nounwind readonly
@@ -7,19 +7,8 @@
 declare float @llvm.minnum.f32(float, float) nounwind readonly
 declare float @llvm.maxnum.f32(float, float) nounwind readonly
 
-; FIXME: This is intended to be a temporary test. As discussed in 
-; D12882, we actually do want to speculate even expensive operations
-; in SimplifyCFG because it can expose more optimizations for other
-; passes. Therefore, we either need to adjust SimplifyCFG's 
-; calculations that use the TTI cost model or use a different cost
-; model for deciding which ops should be speculated in SimplifyCFG. 
-; We should also be using the TTI cost model later - for example in
-; CodeGenPrepare - to potentially undo this speculation.
-
-; Do not speculate fdiv by default because it is generally expensive. 
-
 ; CHECK-LABEL: @fdiv_test(
-; CHECK-NOT: select
+; CHECK: select i1 %cmp, double %div, double 0.0
 define double @fdiv_test(double %a, double %b) {
 entry:
   %cmp = fcmp ogt double %a, 0.0