Index: lib/Analysis/ValueTracking.cpp
===================================================================
--- lib/Analysis/ValueTracking.cpp
+++ lib/Analysis/ValueTracking.cpp
@@ -4132,6 +4132,12 @@
   if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS)
     return true;
 
+  if (Constant *CLHS = dyn_cast<Constant>(LHS))
+    if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
+      Constant *Res = ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, DL);
+      return Res->isOneValue();
+    }
+
   switch (Pred) {
   default:
     return false;
@@ -4207,7 +4213,15 @@
                            DT) &&
            isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI,
                            DT);
+
+  case CmpInst::ICMP_UGT:
+  case CmpInst::ICMP_UGE:
+    return isTruePredicate(CmpInst::ICMP_UGE, BLHS, ALHS, DL, Depth, AC, CxtI,
+                           DT) &&
+           isTruePredicate(CmpInst::ICMP_UGE, ARHS, BRHS, DL, Depth, AC, CxtI,
+                           DT);
   }
+
 }
 
 bool llvm::isImpliedCondition(Value *LHS, Value *RHS, const DataLayout &DL,
Index: lib/Transforms/Utils/SimplifyCFG.cpp
===================================================================
--- lib/Transforms/Utils/SimplifyCFG.cpp
+++ lib/Transforms/Utils/SimplifyCFG.cpp
@@ -95,6 +95,16 @@
     cl::desc("Limit maximum recursion depth when calculating costs of "
              "speculatively executed instructions"));
 
+static cl::opt<unsigned> SpeculativeFlattenBias(
+    "speculative-flatten-bias", cl::Hidden, cl::init(100),
+    cl::desc("Control how biased a branch needs to be to be considered rarely"
+             " failing for speculative flattening (default = 100)"));
+
+static cl::opt<unsigned> SpeculativeFlattenThreshold(
+    "speculative-flatten-threshold", cl::Hidden, cl::init(10),
+    cl::desc("Control how much speculation happens due to speculative"
+             " flattening (default = 10)"));
+
 STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
 STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping");
 STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
@@ -2665,6 +2675,130 @@
   return Changed;
 }
 
+/// If we have a series of tests leading to a frequently executed fallthrough
+/// path and we can test all the conditions at once, we can execute a single
+/// test on the fast path and figure out which condition failed on the slow
+/// path.  This transformation considers a pair of branches at a time since
+/// recursively considering branches pairwise will cause an entire chain to
+/// collapse.  This transformation is code size neutral, but makes it
+/// dynamically more expensive to fail either check.  As such, we only want to
+/// do this if both checks are expected to essentially never fail.
+/// The key motivating examples are cases like:
+///   br (5 > Length), in_bounds, fail1
+/// in_bounds:
+///   br (6 > Length), in_bounds2, fail2
+/// in_bounds2:
+///   ...
+///
+/// We can rewrite this as:
+///   br (6 > Length), in_bounds2, dispatch
+/// in_bounds2:
+///   ...
+/// dispatch:
+///   br (5 > Length), fail2, fail1
+///
+/// TODO: we could consider duplicating some (non-speculatable) instructions
+/// from BI->getParent() down both paths
+static bool SpeculativelyFlattenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
+                                                       const DataLayout &DL) {
+  auto *PredBB = PBI->getParent();
+  auto *BB = BI->getParent();
+
+  /// Is the failing path of this branch taken rarely if at all?
+  auto isRarelyUntaken = [](BranchInst *BI) {
+    uint64_t ProbTrue;
+    uint64_t ProbFalse;
+    if (!ExtractBranchMetadata(BI, ProbTrue, ProbFalse))
+      return false;
+    return ProbTrue > ProbFalse * SpeculativeFlattenBias;
+  };
+
+  if (PBI->getSuccessor(0) != BB ||
+      !isRarelyUntaken(PBI) || !isRarelyUntaken(BI) ||
+      !isImpliedCondition(BI->getCondition(), PBI->getCondition(), DL))
+    return false;
+
+  // TODO: The following code performs a similiar, but slightly distinct
+  // transformation to that done by SpeculativelyExecuteBB.  We should consider
+  // combining them at some point.
+
+  // Can we speculate everything in the given block (except for the terminator
+  // instruction) at the instruction boundary before 'At'?
+  unsigned SpeculationCost = 0;
+  for (Instruction &I : *BB) {
+    if (isa<TerminatorInst>(I)) break;
+    if (!isSafeToSpeculativelyExecute(&I, PBI))
+      return false;
+    SpeculationCost++;
+    // Only flatten relatively small BBs to avoid making the bad case terrible
+    if (SpeculationCost > SpeculativeFlattenThreshold || isa<CallInst>(I))
+      return false;
+  }
+
+  // We need to check whether any of the instructions we may have examined when
+  // deciding the BI implies PBI have flags which might be control dependent on
+  // PBI.  This can happen if (for instance), PBI ensures no overflow in the
+  // computaiton leading to BI's condition.  We can't use the overflow bits to
+  // prove the original condition checking for overflow.  This only considers
+  // the instructions within BB which actually contribute to BI's condition.
+  SmallVector<Instruction *, 8> Worklist;
+  SmallSet<Instruction *, 8> Visited;
+  Value *BICond = BI->getCondition();
+  if (auto *CondI = dyn_cast<Instruction>(BICond))
+    if (CondI->getParent() == BB)
+      Worklist.push_back(CondI);
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.pop_back_val();
+    assert(I->getParent() == BB && "broken invariant");
+
+    if (!Visited.insert(I).second)
+      // already visited
+      continue;
+
+    if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
+      if (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap())
+        // TODO: can we prove the flags are valid before PBI?
+        return false;
+
+    for (Value *Op : I->operands())
+      if (auto *OpI = dyn_cast<Instruction>(Op))
+        if (OpI->getParent() == BB)
+          Worklist.push_back(OpI);
+  }
+  
+
+  DEBUG(dbgs() << "Outlining slow path comparison: "
+        << *PBI->getCondition() << " implied by "
+        << *BI->getCondition() << "\n");
+  // See the example in the function comment.
+  Value *WhichCond = PBI->getCondition();
+  auto *Success = BI->getSuccessor(0);
+  auto *FailPBI = PBI->getSuccessor(1);
+  auto *FailBI =  BI->getSuccessor(1);
+  // Have PBI branch directly to the fast path using BI's condition, branch
+  // to this BI's block for the slow path dispatch
+  PBI->setSuccessor(0, Success);
+  PBI->setSuccessor(1, BB);
+  PBI->setCondition(BI->getCondition());
+  // Rewrite BI to distinguish between the two failing cases
+  BI->setSuccessor(0, FailBI);
+  BI->setSuccessor(1, FailPBI);
+  BI->setCondition(WhichCond);
+  // Move all of the instructions from BI->getParent other than
+  // the terminator into the fastpath.  This requires speculating them past
+  // the original PBI branch, but we checked for that legality above.
+  // TODO: This doesn't handle dependent loads.  We could duplicate those
+  // down both paths, but that involves further code growth.  We need to
+  // figure out a good cost model here.
+  PredBB->getInstList().splice(BasicBlock::iterator(PBI), BB->getInstList(),
+                               BB->begin(), std::prev(BB->end()));
+
+  // To be conservatively correct, drop all metadata on the rewritten
+  // branches.  TODO: update metadata
+  PBI->dropUnknownNonDebugMetadata();
+  BI->dropUnknownNonDebugMetadata();
+  return true;
+}
 /// If we have a conditional branch as a predecessor of another block,
 /// this function tries to simplify it.  We know
 /// that PBI and BI are both conditional branches, and BI is in one of the
@@ -2729,6 +2863,8 @@
       isImpliedCondition(PBI->getCondition(), BI->getCondition(), DL) &&
       PBI->getSuccessor(0) != PBI->getSuccessor(1) &&
       BB->getSinglePredecessor()) {
+    DEBUG(dbgs() << "Branch (" << *BI << ") implied by predecessor ("
+                 << *PBI << ")\n");
     // Turn this into a branch on constant.
     auto *OldCond = BI->getCondition();
     BI->setCondition(ConstantInt::getTrue(BB->getContext()));
@@ -2736,6 +2872,10 @@
     return true;  // Nuke the branch on constant.
   }
 
+  // If BI could imply PBI, try to remove one check from the fast-path
+  if (SpeculativelyFlattenCondBranchToCondBranch(PBI, BI, DL))
+    return true;
+
   // If both branches are conditional and both contain stores to the same
   // address, remove the stores from the conditionals and create a conditional
   // merged store at the end.
Index: test/Transforms/InstSimplify/implies.ll
===================================================================
--- test/Transforms/InstSimplify/implies.ll
+++ test/Transforms/InstSimplify/implies.ll
@@ -215,3 +215,50 @@
   %res = icmp sge i1 %var30, %var29
   ret i1 %res
 }
+
+define i1 @test16(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test16
+; CHECK: ret i1 true
+  %var29 = icmp ugt i32 %length.i, 5
+  %var30 = icmp ugt i32 %length.i, 6
+  %res = icmp ule i1 %var30, %var29
+  ret i1 %res
+}
+
+define i1 @test17(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test17
+; CHECK: ret i1 true
+  %var29 = icmp ugt i32 %length.i, 5
+  %var30 = icmp ugt i32 %length.i, 5
+  %res = icmp ule i1 %var30, %var29
+  ret i1 %res
+}
+
+; negative test (smaller range)
+define i1 @test18(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test18
+; CHECK: ret i1 %res
+  %var29 = icmp ugt i32 %length.i, 5
+  %var30 = icmp ugt i32 %length.i, 4
+  %res = icmp ule i1 %var30, %var29
+  ret i1 %res
+}
+
+; negative test
+define i1 @test19(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test19
+; CHECK: ret i1 %res
+  %var29 = icmp ugt i32 5, %length.i
+  %var30 = icmp ugt i32 6, %length.i
+  %res = icmp ule i1 %var30, %var29
+  ret i1 %res
+}
+
+define i1 @test20(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test20
+; CHECK: ret i1 true
+  %var29 = icmp ugt i32 5, %length.i
+  %var30 = icmp ugt i32 4, %length.i
+  %res = icmp ule i1 %var30, %var29
+  ret i1 %res
+}
Index: test/Transforms/SimplifyCFG/fast-fallthrough.ll
===================================================================
--- test/Transforms/SimplifyCFG/fast-fallthrough.ll
+++ test/Transforms/SimplifyCFG/fast-fallthrough.ll
@@ -0,0 +1,113 @@
+; RUN: opt -S %s -simplifycfg | FileCheck %s
+
+define void @test(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test
+  %iplus1 = add nsw i32 %i, 1
+  %var29 = icmp ugt i32 %length.i, 5
+  %var30 = icmp ugt i32 %length.i, 6
+; CHECK: br i1 %var30, label %in_bounds, label %next
+  br i1 %var29, label %next, label %out_of_bounds, !prof !{!"branch_weights", i32 1000, i32 0}
+
+next:
+; CHECK-LABEL: next:
+; CHECK: br i1 %var29, label %out_of_bounds2, label %out_of_bounds
+  br i1 %var30, label %in_bounds, label %out_of_bounds2, !prof !{!"branch_weights", i32 1000, i32 0}
+
+in_bounds:
+  ret void
+
+out_of_bounds:
+  call void @foo(i64 0)
+  unreachable
+
+out_of_bounds2:
+  call void @foo(i64 1)
+  unreachable
+}
+
+define void @test2(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test2
+  %iplus1 = add nsw i32 %i, 1
+  %var29 = icmp slt i32 %i, %length.i
+  %var30 = icmp slt i32 %iplus1, %length.i
+; CHECK: br i1 %var30, label %in_bounds, label %next
+  br i1 %var29, label %next, label %out_of_bounds, !prof !{!"branch_weights", i32 1000, i32 0}
+
+next:
+; CHECK-LABEL: next:
+; CHECK: br i1 %var29, label %out_of_bounds2, label %out_of_bounds
+  br i1 %var30, label %in_bounds, label %out_of_bounds2, !prof !{!"branch_weights", i32 1000, i32 0}
+
+in_bounds:
+  ret void
+
+out_of_bounds:
+  call void @foo(i64 0)
+  unreachable
+
+out_of_bounds2:
+  call void @foo(i64 1)
+  unreachable
+}
+
+; This is a negative test.  The msw on the add might be control
+; dependent on the first check, so we can't hoist it above.
+define void @test3(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test3
+  %var29 = icmp slt i32 %i, %length.i
+; CHECK: br i1 %var29, label %next, label %out_of_bounds
+  br i1 %var29, label %next, label %out_of_bounds, !prof !{!"branch_weights", i32 1000, i32 0}
+
+next:
+; CHECK-LABEL: next:
+; CHECK: br i1 %var30, label %in_bounds, label %out_of_bounds2
+  %iplus1 = add nsw i32 %i, 1
+  %var30 = icmp slt i32 %iplus1, %length.i
+  br i1 %var30, label %in_bounds, label %out_of_bounds2, !prof !{!"branch_weights", i32 1000, i32 0}
+
+in_bounds:
+  ret void
+
+out_of_bounds:
+  call void @foo(i64 0)
+  unreachable
+
+out_of_bounds2:
+  call void @foo(i64 1)
+  unreachable
+}
+
+; As written, this one can't trigger today.  It would require us to duplicate
+; the %val1 load down two paths and that's not implemented yet.
+define i64 @test4(i32 %length.i, i32 %i, i64* %base) {
+; CHECK-LABEL: @test4
+  %var29 = icmp slt i32 %i, %length.i
+; CHECK: br i1 %var29, label %next, label %out_of_bounds
+  br i1 %var29, label %next, label %out_of_bounds, !prof !{!"branch_weights", i32 1000, i32 0}
+
+next:
+; CHECK-LABEL: next:
+  %addr1 = getelementptr i64, i64* %base, i32 %i
+  %val1 = load i64, i64* %addr1
+  %iplus1 = add nsw i32 %i, 1
+  %var30 = icmp slt i32 %iplus1, %length.i
+; CHECK: br i1 %var30, label %in_bounds, label %out_of_bounds2
+  br i1 %var30, label %in_bounds, label %out_of_bounds2, !prof !{!"branch_weights", i32 1000, i32 0}
+
+in_bounds:
+  %addr2 = getelementptr i64, i64* %base, i32 %iplus1
+  %val2 = load i64, i64* %addr2
+  %res = sub i64 %val1, %val2
+  ret i64 %res
+
+out_of_bounds:
+  call void @foo(i64 0)
+  unreachable
+
+out_of_bounds2:
+  call void @foo(i64 %val1)
+  unreachable
+}
+
+declare void @foo(i64)
+