Index: lib/Transforms/Scalar/IndVarSimplify.cpp
===================================================================
--- lib/Transforms/Scalar/IndVarSimplify.cpp
+++ lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -146,7 +146,8 @@
   bool rewriteFirstIterationLoopExitValues(Loop *L);
   bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
 
-  bool linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+  bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
+                                 const SCEV *BackedgeTakenCount,
                                  PHINode *IndVar, SCEVExpander &Rewriter);
 
   bool sinkUnusedInvariants(Loop *L);
@@ -1992,17 +1993,6 @@
   if (BackedgeTakenCount->isZero())
     return false;
 
-  // Loops with multiple exits are not currently suported by lftr
-  if (!L->getExitingBlock())
-    return false;
-
-  // Can't rewrite non-branch yet.
-  if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
-    return false;
-
-  if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
-    return false;
-
   return true;
 }
 
@@ -2048,15 +2038,14 @@
 /// Given a loop with one backedge and one exit, return the ICmpInst
 /// controlling the sole loop exit.  There is no guarantee that the exiting
 /// block is also the latch.
-static ICmpInst *getLoopTest(Loop *L) {
-  assert(L->getExitingBlock() && "expected loop exit");
+static ICmpInst *getLoopTest(Loop *L, BasicBlock *ExitingBB) {
 
   BasicBlock *LatchBlock = L->getLoopLatch();
   // Don't bother with LFTR if the loop is not properly simplified.
   if (!LatchBlock)
     return nullptr;
 
-  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+  BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
   assert(BI && "expected exit branch");
 
   return dyn_cast<ICmpInst>(BI->getCondition());
@@ -2064,9 +2053,9 @@
 
 /// linearFunctionTestReplace policy. Return true unless we can show that the
 /// current exit test is already sufficiently canonical.
-static bool needsLFTR(Loop *L) {
+static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
   // Do LFTR to simplify the exit condition to an ICMP.
-  ICmpInst *Cond = getLoopTest(L);
+  ICmpInst *Cond = getLoopTest(L, ExitingBB);
   if (!Cond)
     return true;
 
@@ -2188,12 +2177,11 @@
 /// BECount may be an i8* pointer type. The pointer difference is already
 /// valid count without scaling the address stride, so it remains a pointer
 /// expression as far as SCEV is concerned.
-static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
-                                ScalarEvolution *SE) {
+static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
+                                const SCEV *BECount, ScalarEvolution *SE) {
   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
 
-  Value *Cond =
-    cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
+  Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
 
   // Loop over all of the PHI nodes, looking for a simple counter.
   PHINode *BestPhi = nullptr;
@@ -2226,13 +2214,15 @@
       // We explicitly allow unknown phis as long as they are already used by
       // the loop test. In this case we assume that performing LFTR could not
       // increase the number of undef users.
-      if (ICmpInst *Cond = getLoopTest(L)) {
-        if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L) &&
-            Phi != getLoopPhiForCounter(Cond->getOperand(1), L)) {
-          continue;
-        }
-      }
+      // TODO: Generlize this to allow *any* loop exit which is known to
+      // execute on each iteration
+      if (L->getExitingBlock())
+        if (ICmpInst *Cond = getLoopTest(L, ExitingBB))
+          if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L) &&
+              Phi != getLoopPhiForCounter(Cond->getOperand(1), L))
+            continue;
     }
+
     const SCEV *Init = AR->getStart();
 
     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
@@ -2261,7 +2251,8 @@
 /// Insert an IR expression which computes the value held by the IV IndVar
 /// (which must be an loop counter w/unit stride) after the backedge of loop L
 /// is taken IVCount times.  
-static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
+static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
+                           const SCEV *IVCount, Loop *L,
                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
   assert(isLoopCounter(IndVar, L, SE));
   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
@@ -2284,7 +2275,7 @@
     // Expand the code for the iteration count.
     assert(SE->isLoopInvariant(IVOffset, L) &&
            "Computed iteration count is not loop invariant!");
-    BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+    BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
     Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
 
     Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
@@ -2328,7 +2319,7 @@
       IVLimit = SE->getAddExpr(IVInit, IVCount);
     }
     // Expand the code for the iteration count.
-    BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+    BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
     IRBuilder<> Builder(BI);
     assert(SE->isLoopInvariant(IVLimit, L) &&
            "Computed iteration count is not loop invariant!");
@@ -2347,7 +2338,8 @@
 /// determine a loop-invariant trip count of the loop, which is actually a much
 /// broader range than just linear tests.
 bool IndVarSimplify::
-linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
+                          const SCEV *BackedgeTakenCount,
                           PHINode *IndVar, SCEVExpander &Rewriter) {
   assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
 
@@ -2360,7 +2352,7 @@
   // If the exiting block is the same as the backedge block, we prefer to
   // compare against the post-incremented value, otherwise we must compare
   // against the preincremented value.
-  if (L->getExitingBlock() == L->getLoopLatch()) {
+  if (ExitingBB == L->getLoopLatch()) {
     // Add one to the "backedge-taken" count to get the trip count.
     // This addition may overflow, which is valid as long as the comparison is
     // truncated to BackedgeTakenCount->getType().
@@ -2369,16 +2361,16 @@
     // The BackedgeTaken expression contains the number of times that the
     // backedge branches to the loop header.  This is one less than the
     // number of times the loop executes, so use the incremented indvar.
-    CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+    CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBB);
   }
 
-  Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
+  Value *ExitCnt = genLoopLimit(IndVar, ExitingBB, IVCount, L, Rewriter, SE);
   assert(ExitCnt->getType()->isPointerTy() ==
              IndVar->getType()->isPointerTy() &&
          "genLoopLimit missed a cast");
 
   // Insert a new icmp_ne or icmp_eq instruction before the branch.
-  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
   ICmpInst::Predicate P;
   if (L->contains(BI->getSuccessor(0)))
     P = ICmpInst::ICMP_NE;
@@ -2567,6 +2559,19 @@
   return MadeAnyChanges;
 }
 
+/// If we have a unique latch block which is also an exiting block from the
+/// loop, return it.  Otherwise, return null.
+static BasicBlock* getLoopExitingLatch(Loop *L) {
+  if (BasicBlock *Latch = L->getLoopLatch()) {
+    SmallVector<BasicBlock*, 16> ExitingBlocks;
+    L->getExitingBlocks(ExitingBlocks);
+    for (BasicBlock *ExitingBB : ExitingBlocks)
+      if (ExitingBB == Latch)
+        return Latch;
+  }
+  return nullptr;
+}
+
 //===----------------------------------------------------------------------===//
 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
 //===----------------------------------------------------------------------===//
@@ -2624,22 +2629,55 @@
   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
 
   // If we have a trip count expression, rewrite the loop's exit condition
-  // using it.  We can currently only handle loops with a single exit.
-  if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) &&
-      needsLFTR(L)) {
-    PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE);
-    if (IndVar) {
+  // using it.  
+  if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter)) {
+    // For the moment, we only allow when either a) we have a single exit
+    // loop, or b) we're considering the latch exit of a multiple exit loop.
+    // TODO: Expand this to all loop exit blocks.
+    SmallVector<BasicBlock*, 16> ExitingBlocks;
+    if (auto *ExitingBB = L->getExitingBlock())
+      ExitingBlocks.push_back(ExitingBB);
+    // NOTE TO REVIEWERS: When actually landing, I'm going to commit without
+    // this next if block (to form an NFC patch), and then land this line and
+    // the helper function separately.  I want any regressions caused to be
+    // clearly separated between the NFC refactoring and the increase in scope.
+    if (auto *ExitingBB = getLoopExitingLatch(L))
+      ExitingBlocks.push_back(ExitingBB);
+    for (BasicBlock *ExitingBB : ExitingBlocks) {
+      // Can't rewrite non-branch yet.
+      if (!isa<BranchInst>(ExitingBB->getTerminator()))
+        continue;
+      
+      if (!needsLFTR(L, ExitingBB))
+        continue;
+      
+      // Note: Expanding backedge taken count implies ability to get exit
+      // counts for each exit.
+      const SCEV *BETakenCount = SE->getExitCount(L, ExitingBB);
+      assert(BETakenCount && !isa<SCEVCouldNotCompute>(BETakenCount));
+      
+      PHINode *IndVar = FindLoopCounter(L, ExitingBB, BETakenCount, SE);
+      if (!IndVar)
+        continue;
+      
+      // Avoid high cost expansions.  Note: This heuristic is questionable in
+      // that our definition of "high cost" is not exactly principled.
+      if (Rewriter.isHighCostExpansion(BETakenCount, L))
+        continue;
+      
       // Check preconditions for proper SCEVExpander operation. SCEV does not
-      // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
-      // pass that uses the SCEVExpander must do it. This does not work well for
-      // loop passes because SCEVExpander makes assumptions about all loops,
-      // while LoopPassManager only forces the current loop to be simplified.
+      // express SCEVExpander's dependencies, such as LoopSimplify. Instead
+      // any pass that uses the SCEVExpander must do it. This does not work
+      // well for loop passes because SCEVExpander makes assumptions about
+      // all loops, while LoopPassManager only forces the current loop to be
+      // simplified. 
       //
       // FIXME: SCEV expansion has no way to bail out, so the caller must
       // explicitly check any assumptions made by SCEV. Brittle.
-      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
+      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BETakenCount);
       if (!AR || AR->getLoop()->getLoopPreheader())
-        Changed |= linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+        Changed |= linearFunctionTestReplace(L, ExitingBB,
+                                             BETakenCount, IndVar,
                                              Rewriter);
     }
   }
Index: lib/Transforms/Scalar/LoopPredication.cpp
===================================================================
--- lib/Transforms/Scalar/LoopPredication.cpp
+++ lib/Transforms/Scalar/LoopPredication.cpp
@@ -230,7 +230,8 @@
     cl::init(true));
 
 namespace {
-class LoopPredication {
+  class LoopPredication {
+  public:
   /// Represents an induction variable check:
   ///   icmp Pred, <induction variable>, <loop invariant limit>
   struct LoopICmp {
@@ -246,6 +247,7 @@
              << ", Limit = " << *Limit << "\n";
     }
   };
+  private:
 
   AliasAnalysis *AA;
   ScalarEvolution *SE;
@@ -614,6 +616,21 @@
   return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
 }
 
+static void normalizePredicate(ScalarEvolution *SE,
+                               LoopPredication::LoopICmp& RC) {
+  // LFTR canonicalizes checks to the ICMP_NE form instead of an ULT/SLT form.
+  // Normalize back to the ULT/SLT form for ease of handling.
+  if (RC.Pred == ICmpInst::ICMP_NE &&
+      RC.IV->getStepRecurrence(*SE)->isOne() &&
+      SE->isKnownPredicate(ICmpInst::ICMP_ULT, RC.IV->getStart(), RC.Limit))
+    RC.Pred = ICmpInst::ICMP_ULT;
+  if (RC.Pred == ICmpInst::ICMP_NE &&
+      RC.IV->getStepRecurrence(*SE)->isOne() &&
+      SE->isKnownPredicate(ICmpInst::ICMP_SLE, RC.IV->getStart(), RC.Limit))
+    RC.Pred = ICmpInst::ICMP_SLT;
+}
+
+
 /// If ICI can be widened to a loop invariant condition emits the loop
 /// invariant condition in the loop preheader and return it, otherwise
 /// returns None.
@@ -852,6 +869,7 @@
     }
   };
 
+  normalizePredicate(SE, *Result);
   if (IsUnsupportedPredicate(Step, Result->Pred)) {
     LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred
                       << ")!\n");