Index: lib/Transforms/Scalar/LoopSimplifyCFG.cpp
===================================================================
--- lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -30,38 +30,134 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "loop-simplifycfg"
 
+static void UpdateDomTreeInfo(BasicBlock *BB, DominatorTree &DT) {
+    // When BB is about to be removed, move all of the BB's children to be children of
+    // its IDom. This allows us to remove the domtree entry for BB.
+    auto *BBDom = DT.getNode(BB);
+    if (!BBDom)
+      return;
+    SmallVector<DomTreeNode *, 8> ChildNodes;
+    ChildNodes.insert(ChildNodes.begin(), BBDom->begin(), BBDom->end());
+    for (auto &CN : ChildNodes)
+      DT.changeImmediateDominator(CN, BBDom->getIDom());
+    DT.eraseNode(BB);
+}
+
+// Fold a constant conditional branch into an unconditional
+// branch. Return true if we succeed in folding the branch.
+static bool TryToFoldConstConditionalBranch(Loop &L, BasicBlock *BB,
+                                            LoopInfo &LI, DominatorTree &DT) {
+  TerminatorInst *T = BB->getTerminator();
+  IRBuilder<> Builder(T);
+  auto *BI = dyn_cast<BranchInst>(T);
+  if (!BI || BI->isUnconditional())
+    return false;
+  ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+  if (!Cond)
+    return false;
+  BasicBlock *Dest1 = BI->getSuccessor(0);
+  BasicBlock *Dest2 = BI->getSuccessor(1);
+
+  BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;
+  BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1;
+  // Only fold conditions where taken successor is within the loop.  This is to
+  // avoid the possibility of converting a loop to a non-loop, where the only
+  // exiting block always jumps to the exit block out of the loop.
+  if (LI.getLoopFor(Destination) != &L)
+    return false;
+
+  // Let the basic block know that we are letting go of it.  Based on this,
+  // it will adjust its PHI nodes.
+  OldDest->removePredecessor(BB);
+
+  // Replace the conditional branch with an unconditional one.
+  Builder.CreateBr(Destination);
+  BI->eraseFromParent();
+
+  // Consider the case:
+  // block: 
+  // br true, label Dest, label OldDest
+  // OldDest:    ; predecessors: block
+  // When the branch is made an unconditional one:
+  // block:
+  // br label Dest
+  // OldDest:    ; no predecessors!
+  // NB: OldDest predecessor list is not updated through `removePredecessor`
+  // above! It still has the `block` as it's predecessor. However, changing the
+  // branch to an unconditional one, makes this OldDest (and its successors)
+  // unreachable in LoopInfo, where unreachable is defined as having no predecessors.
+  // So, we need to update the relevant data structures.
+  // NB: This will never delete any loops completely. It will only delete
+  // blocks within loops.
+  if (pred_begin(OldDest) == pred_end(OldDest)) {
+    SmallPtrSet<BasicBlock *, 8> WorkList;
+    WorkList.insert(OldDest);
+    // Delete OldDest and dead successor blocks.
+    while (!WorkList.empty()) {
+      auto *BB = *WorkList.begin();
+      WorkList.erase(BB);
+      SmallVector<BasicBlock *, 2> Successors(succ_begin(BB), succ_end(BB));
+      UpdateDomTreeInfo(BB, DT);
+      // Update LoopInfo to remove the block from all loops where it is
+      // present.
+      LI.removeBlock(BB);
+      // Delete the block as well, so that these unreachable blocks do not
+      // affect the loop's LCSSA form.
+      DeleteDeadBlock(BB);
+      for (auto &SuccBB : Successors)
+        if (pred_begin(SuccBB) == pred_end(SuccBB)) {
+          WorkList.insert(SuccBB);
+        }
+    }
+  }
+  return true;
+}
+
 static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI) {
-  bool Changed = false;
+  bool Changed = false, ChangedConstantFolding = false;;
   // Copy blocks into a temporary array to avoid iterator invalidation issues
   // as we remove them.
   SmallVector<WeakVH, 16> Blocks(L.blocks());
 
   for (auto &Block : Blocks) {
-    // Attempt to merge blocks in the trivial case. Don't modify blocks which
-    // belong to other loops.
-    BasicBlock *Succ = cast_or_null<BasicBlock>(Block);
-    if (!Succ)
+    BasicBlock *BB = cast_or_null<BasicBlock>(Block);
+    if (!BB)
       continue;
 
-    BasicBlock *Pred = Succ->getSinglePredecessor();
-    if (!Pred || !Pred->getSingleSuccessor() || LI.getLoopFor(Pred) != &L)
-      continue;
+    if (TryToFoldConstConditionalBranch(L, BB, LI, DT))
+      ChangedConstantFolding = true;
 
+    auto *Succ = BB->getSingleSuccessor();
+    if (!Succ || !Succ->getSinglePredecessor() || LI.getLoopFor(Succ) != &L)
+      continue;
     // Pred is going to disappear, so we need to update the loop info.
-    if (L.getHeader() == Pred)
+    if (L.getHeader() == BB)
       L.moveToHeader(Succ);
-    LI.removeBlock(Pred);
-    MergeBasicBlockIntoOnlyPred(Succ, &DT);
+    LI.removeBlock(BB);
+    // BB's dom node may already be erased if we passed through the constant
+    // folding path. If not, update it here.
+    UpdateDomTreeInfo(BB, DT);
+    MergeBasicBlockIntoOnlyPred(Succ);
     Changed = true;
   }
 
-  return Changed;
+  auto *F = L.getHeader()->getParent();
+  // When we change constant branches to unconditional, we effectively make some
+  // blocks dead (i.e. physically remove their predecessors). When the DT is
+  // recomputed for verification, the dom nodes are not created for these dead
+  // blocks. However, for the in-place DT fix in UpdateDomTreeInfo, the dead
+  // blocks are marked as dominated by the entry block.  TODO: Fix the dominator
+  // tree in place.
+  if (ChangedConstantFolding)
+    DT.recalculate(*F);
+  return Changed|ChangedConstantFolding;
 }
 
 PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM,
Index: test/Transforms/LoopSimplifyCFG/merge-constant-conditional-branch.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopSimplifyCFG/merge-constant-conditional-branch.ll
@@ -0,0 +1,232 @@
+; RUN: opt -S -loop-simplifycfg < %s | FileCheck %s
+; RUN: opt -S -loop-simplifycfg -verify-dom-info < %s
+; RUN: opt -S -loop-simplifycfg -licm < %s
+
+
+; dont modify self loops, which can convert a loop to a non-loop.
+define i32 @test1(i32 %arg) {
+; CHECK-LABEL: test1
+; CHECK: br i1 false, label %loop, label %exit
+entry: 
+  br label %loop
+
+loop:
+  %x = phi i32 [ %arg, %entry ], [ %x.next, %loop ]
+  %x.next = add i32 %x, 1
+  br i1 false, label %loop, label %exit
+
+exit:
+  ret i32 %x.next
+}
+
+
+
+; fold constant conditional branch
+; loop has more than one backedge
+define i32 @test2(i32 %arg, i32 %n) {
+; CHECK-LABEL: test2
+; CHECK-NOT: br i1 true
+; CHECK-LABEL: latch:
+; CHECK-NEXT:  %x = phi i32 [ %arg, %entry ], [ %x.next, %header.backedge ]
+; CHECK-NEXT:  %x2.next = add i32 %x, 2
+; CHECK-NEXT:  %cond = icmp eq i32 %x, %n
+; CHECK-NEXT:  %x.next = add i32 %x, 1
+; CHECK-NEXT:  br i1 %cond, label %header.backedge, label %exit
+entry: 
+  br label %header
+
+header:
+  %x = phi i32 [ %arg, %entry ], [ %x.next, %latch ], [%x2.next, %header]
+  %x2.next = add i32 %x, 2
+  br i1 true, label %latch, label %header
+
+latch:
+  %cond = icmp eq i32 %x, %n
+  %x.next = add i32 %x, 1
+  br i1 %cond, label %header, label %exit
+
+exit:
+  ret i32 %x
+}
+
+; fold both conditional branches
+define i32 @test3(i32 %x, i32 %y, i32 %n) {
+; CHECK-LABEL: @test3
+; CHECK-NOT: br i1 false
+; CHECK-NOT: br i1 true
+; CHECK-LABEL: done:
+; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %done ]
+; CHECK: %cond = icmp slt i32 %i.next, %n
+; CHECK-NEXT: br i1 %cond, label %done, label %exit
+entry:
+  br label %header
+
+header:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %done ]
+  %x.next = phi i32 [ %x, %entry ], [ %x.i, %done ]
+  %y.next = phi i32 [ %y, %entry ], [ %y.i, %done ]
+  br i1 false, label %div-by-zero, label %non-zero
+
+non-zero:
+  br i1 true, label %done, label %special_case
+
+special_case:
+  %div = sdiv i32 %x.next, %y.next
+  br label %done
+
+done:
+  %result = phi i32 [ %div, %special_case ], [ 1234, %non-zero ]
+  %i.next = add i32 %i, %result
+  %x.i = add i32 %x.next, %i
+  %y.i = add i32 %y.next, %i
+  %cond = icmp slt i32 %i.next, %n
+  br i1 %cond, label %header, label %exit
+
+exit:
+  ret i32 %x.i
+
+div-by-zero:
+  ret i32 1234
+}
+
+; We fold both conditions, and delete the div-by-zero block.
+; second_loop block is still present in IR, will be deleted with a
+; function/module pass. We do not delete/remove loops during loop-simplifycfg
+define i32 @test4(i32 %x, i32 %y, i32 %n) {
+; CHECK-LABEL: test4
+; CHECK-LABEL: done:
+; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %done ]
+; CHECK-NEXT: %x.next = phi i32 [ %x, %entry ], [ %x.i, %done ]
+; CHECK: %cond = icmp slt i32 %i.next, %n
+; CHECK-NEXT: br i1 %cond, label %done, label %exit.loopexit1
+; CHECK-NOT: non-zero
+; CHECK-NOT: special_case
+; CHECK-NOT: div-by-zero
+; CHECK-LABEL: second_loop:
+entry:
+  br label %header
+
+header:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %done ]
+  %x.next = phi i32 [ %x, %entry ], [ %x.i, %done ]
+  %y.next = phi i32 [ %y, %entry ], [ %y.i, %done ]
+  br i1 false, label %div-by-zero, label %non-zero
+
+non-zero:
+  br i1 true, label %done, label %special_case
+
+special_case:
+  %div = sdiv i32 %x.next, %y.next
+  br label %done
+
+done:
+  %result = phi i32 [ %div, %special_case ], [ 1234, %non-zero ]
+  %i.next = add i32 %i, %result
+  %x.i = add i32 %x.next, %i
+  %y.i = add i32 %y.next, %i
+  %cond = icmp slt i32 %i.next, %n
+  br i1 %cond, label %header, label %exit
+
+exit:
+  ret i32 %x.next
+
+div-by-zero:
+  %nsquare = mul i32 %n, %n
+  br label %second_loop
+
+second_loop:
+  %x2 = phi i32 [ %x.next, %div-by-zero], [%x2.next, %second_loop]
+  %x2.next = add i32 %x2, 1
+  %cond2 = icmp slt i32 %x2.next, %nsquare
+  br i1 %cond2, label %second_loop, label %exit
+}
+
+; when the not-taken target contains more than one predecessor
+define i32 @test5(i32 %x, i32 %y, i32 %n) {
+; CHECK-LABEL: test5
+; CHECK-NOT: br i1 false
+; CHECK-NOT: br i1 true
+; CHECK-NOT: special_case
+; CHECK-LABEL: done:
+; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %done ]
+; CHECK:  %cond = icmp slt i32 %i.next, %n
+; CHECK: br i1 %cond, label %done, label %exit
+; CHECK-LABEL: exit:
+; CHECK-NEXT: %x.next.lcssa = phi i32 [ %x.next, %done ]
+entry:
+  br label %header
+
+header:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %done ]
+  %x.next = phi i32 [ %x, %entry ], [ %x.i, %done ]
+  %y.next = phi i32 [ %y, %entry ], [ %y.i, %done ]
+  br i1 false, label %special_case, label %non-zero
+
+non-zero:
+  br i1 true, label %done, label %special_case
+
+special_case:
+  %div = sdiv i32 %x.next, %y.next
+  br label %done
+
+done:
+  %result = phi i32 [ %div, %special_case ], [ 1234, %non-zero ]
+  %i.next = add i32 %i, %result
+  %x.i = add i32 %x.next, %i
+  %y.i = add i32 %y.next, %i
+  %cond = icmp slt i32 %i.next, %n
+  br i1 %cond, label %header, label %exit
+
+exit:
+  ret i32 %x.next
+}
+
+; delete multiple successor blocks because they became dead.
+define i32 @test6(i32 %x, i32 %y, i32 %n) {
+; CHECK-LABEL: test6
+; CHECK-NOT: br i1 false
+; CHECK-NOT: br i1 true
+; CHECK-NOT: special_case
+; CHECK-NOT: interm:
+; CHECK-LABEL: interm2:
+; CHECK-NEXT: br label %interm2
+; CHECK-LABEL: done:
+; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %done ]
+; CHECK:  %cond = icmp slt i32 %i.next, %n
+; CHECK: br i1 %cond, label %done, label %exit
+; CHECK-LABEL: exit:
+; CHECK-NEXT: %x.next.lcssa2 = phi i32 [ %x.next, %done ]
+entry:
+  br label %header
+
+header:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %done ]
+  %x.next = phi i32 [ %x, %entry ], [ %x.i, %done ]
+  %y.next = phi i32 [ %y, %entry ], [ %y.i, %done ]
+  br i1 false, label %special_case, label %non-zero
+
+non-zero:
+  br i1 true, label %done, label %special_case
+
+special_case:
+  %div = sdiv i32 %x.next, %y.next
+  br label %interm1
+
+interm1:
+  br label %interm2
+
+; dont delete loop interm2, even though it's unreachable.
+interm2:
+  br label %interm2
+
+done:
+  %result = phi i32 [ 1234, %non-zero ]
+  %i.next = add i32 %i, %result
+  %x.i = add i32 %x.next, %i
+  %y.i = add i32 %y.next, %i
+  %cond = icmp slt i32 %i.next, %n
+  br i1 %cond, label %header, label %exit
+
+exit:
+  ret i32 %x.next
+}