diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -151,9 +151,10 @@
              "speculatively executed instructions"));
 
 static cl::opt<int>
-MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10),
-                  cl::desc("Max size of a block which is still considered "
-                           "small enough to thread through"));
+    MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
+                      cl::init(11),
+                      cl::desc("Max size of a block which is still considered "
+                               "small enough to thread through"));
 
 // Two is chosen to allow one negation and a logical combine.
 static cl::opt<unsigned>
@@ -2430,19 +2431,32 @@
 static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
   int Size = 0;
 
-  for (Instruction &I : BB->instructionsWithoutDebug()) {
-    if (Size > MaxSmallBlockSize)
-      return false; // Don't clone large BB's.
+  SmallPtrSet<const Value *, 32> EphValues;
+  auto IsEphemeral = [&](const Value *V) {
+    if (isa<AssumeInst>(V))
+      return true;
+    return isSafeToSpeculativelyExecute(V) &&
+           all_of(V->users(),
+                  [&](const User *U) { return EphValues.count(U); });
+  };
 
+  // Walk the loop in reverse so that we can identify ephemeral values properly
+  // (values only feeding assumes).
+  for (Instruction &I : reverse(BB->instructionsWithoutDebug())) {
     // Can't fold blocks that contain noduplicate or convergent calls.
     if (CallInst *CI = dyn_cast<CallInst>(&I))
       if (CI->cannotDuplicate() || CI->isConvergent())
         return false;
 
+    // Ignore ephemeral values which are deleted during codegen.
+    if (IsEphemeral(&I))
+      EphValues.insert(&I);
     // We will delete Phis while threading, so Phis should not be accounted in
-    // block's size
-    if (!isa<PHINode>(I))
-      ++Size;
+    // block's size.
+    else if (!isa<PHINode>(I)) {
+      if (++Size > MaxSmallBlockSize)
+        return false; // Don't clone large BB's.
+    }
 
     // We can only support instructions that do not define values that are
     // live outside of the current basic block.
diff --git a/llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll b/llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll
--- a/llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll
+++ b/llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll
@@ -1,10 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -simplifycfg-max-small-block-size=10 -S < %s | FileCheck %s
-; RUN: opt -passes=simplify-cfg -simplifycfg-max-small-block-size=10 -S < %s | FileCheck %s
+; RUN: opt -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -simplifycfg-max-small-block-size=7 -S < %s | FileCheck %s
+; RUN: opt -passes=simplify-cfg -simplifycfg-max-small-block-size=7 -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64-p5:32:32-A5"
 
 declare void @llvm.assume(i1)
+declare i1 @llvm.type.test(i8*, metadata) nounwind readnone
 
 define void @test_01(i1 %c, i64* align 1 %ptr) local_unnamed_addr #0 {
 ; CHECK-LABEL: @test_01(
@@ -165,3 +166,61 @@
   store volatile i64 3, i64* %ptr, align 8
   ret void
 }
+
+; Try the max block size for PRE again but with the bitcast/type test/assume
+; sequence used for whole program devirt.
+define void @test_04(i1 %c, i64* align 1 %ptr, [3 x i8*]* %vtable) local_unnamed_addr #0 {
+; CHECK-LABEL: @test_04(
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[TRUE2_CRITEDGE:%.*]], label [[FALSE1:%.*]]
+; CHECK:       false1:
+; CHECK-NEXT:    store volatile i64 1, i64* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[VTABLE:%.*]] = bitcast [3 x i8*]* %vtable to i8*
+; CHECK-NEXT:    [[P:%.*]] = call i1 @llvm.type.test(i8* [[VTABLE]], metadata !"foo")
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[P]])
+; CHECK-NEXT:    store volatile i64 0, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 3, i64* [[PTR]], align 8
+; CHECK-NEXT:    ret void
+; CHECK:       true2.critedge:
+; CHECK-NEXT:    [[VTABLE:%.*]] = bitcast [3 x i8*]* %vtable to i8*
+; CHECK-NEXT:    [[P:%.*]] = call i1 @llvm.type.test(i8* [[VTABLE]], metadata !"foo")
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[P]])
+; CHECK-NEXT:    store volatile i64 0, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 2, i64* [[PTR]], align 8
+; CHECK-NEXT:    ret void
+;
+  br i1 %c, label %true1, label %false1
+
+true1:                                            ; preds = %false1, %0
+  %vtablei8 = bitcast [3 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"foo")
+  tail call void @llvm.assume(i1 %p)
+  store volatile i64 0, i64* %ptr, align 8
+  store volatile i64 -1, i64* %ptr, align 8
+  store volatile i64 -1, i64* %ptr, align 8
+  store volatile i64 -1, i64* %ptr, align 8
+  store volatile i64 -1, i64* %ptr, align 8
+  store volatile i64 -1, i64* %ptr, align 8
+  br i1 %c, label %true2, label %false2
+
+false1:                                           ; preds = %0
+  store volatile i64 1, i64* %ptr, align 4
+  br label %true1
+
+true2:                                            ; preds = %true1
+  store volatile i64 2, i64* %ptr, align 8
+  ret void
+
+false2:                                           ; preds = %true1
+  store volatile i64 3, i64* %ptr, align 8
+  ret void
+}