Index: include/llvm/Analysis/ScalarEvolution.h
===================================================================
--- include/llvm/Analysis/ScalarEvolution.h
+++ include/llvm/Analysis/ScalarEvolution.h
@@ -548,8 +548,9 @@
   /// pair of exact and max expressions that are eventually summarized in
   /// ExitNotTakenInfo and BackedgeTakenInfo.
   struct ExitLimit {
-    const SCEV *ExactNotTaken;
-    const SCEV *MaxNotTaken;
+    const SCEV *ExactNotTaken; //< The exit is not taken exactly this many times
+    const SCEV *MaxNotTaken; //< The exit is not taken at most this many times
+    bool MaxOrZero; //< Not taken either exactly MaxNotTaken or zero times
 
     /// A set of predicate guards for this ExitLimit. The result is only valid
     /// if all of the predicates in \c Predicates evaluate to 'true' at
@@ -561,12 +562,13 @@
       Predicates.insert(P);
     }
 
-    /*implicit*/ ExitLimit(const SCEV *E) : ExactNotTaken(E), MaxNotTaken(E) {}
+    /*implicit*/ ExitLimit(const SCEV *E)
+        : ExactNotTaken(E), MaxNotTaken(E), MaxOrZero(false) {}
 
     ExitLimit(
-        const SCEV *E, const SCEV *M,
+        const SCEV *E, const SCEV *M, bool MaxOrZero,
         ArrayRef<const SmallPtrSetImpl<const SCEVPredicate *> *> PredSetList)
-        : ExactNotTaken(E), MaxNotTaken(M) {
+        : ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) {
       assert((isa<SCEVCouldNotCompute>(ExactNotTaken) ||
               !isa<SCEVCouldNotCompute>(MaxNotTaken)) &&
              "Exact is not allowed to be less precise than Max");
@@ -575,11 +577,12 @@
           addPredicate(P);
     }
 
-    ExitLimit(const SCEV *E, const SCEV *M,
+    ExitLimit(const SCEV *E, const SCEV *M, bool MaxOrZero,
               const SmallPtrSetImpl<const SCEVPredicate *> &PredSet)
-        : ExitLimit(E, M, {&PredSet}) {}
+        : ExitLimit(E, M, MaxOrZero, {&PredSet}) {}
 
-    ExitLimit(const SCEV *E, const SCEV *M) : ExitLimit(E, M, None) {}
+    ExitLimit(const SCEV *E, const SCEV *M, bool MaxOrZero)
+        : ExitLimit(E, M, MaxOrZero, None) {}
 
     /// Test whether this ExitLimit contains any computed information, or
     /// whether it's all SCEVCouldNotCompute values.
@@ -644,6 +647,9 @@
     /// ExitNotTaken has an element for every exiting block in the loop.
     PointerIntPair<const SCEV *, 1> MaxAndComplete;
 
+    /// True iff the backedge is taken either exactly Max or zero times.
+    bool MaxOrZero;
+
     /// \name Helper projection functions on \c MaxAndComplete.
     /// @{
     bool isComplete() const { return MaxAndComplete.getInt(); }
@@ -658,11 +664,13 @@
     BackedgeTakenInfo(BackedgeTakenInfo &&Other) {
       ExitNotTaken = std::move(Other.ExitNotTaken);
       MaxAndComplete = std::move(Other.MaxAndComplete);
+      MaxOrZero = std::move(Other.MaxOrZero);
     }
 
     BackedgeTakenInfo &operator=(BackedgeTakenInfo &&Other) {
       ExitNotTaken = std::move(Other.ExitNotTaken);
       MaxAndComplete = std::move(Other.MaxAndComplete);
+      MaxOrZero = std::move(Other.MaxOrZero);
       return *this;
     }
 
@@ -670,7 +678,7 @@
 
     /// Initialize BackedgeTakenInfo from a list of exact exit counts.
     BackedgeTakenInfo(SmallVectorImpl<EdgeExitInfo> &&ExitCounts, bool Complete,
-                      const SCEV *MaxCount);
+                      const SCEV *MaxCount, bool MaxOrZero);
 
     /// Test whether this BackedgeTakenInfo contains any computed information,
     /// or whether it's all SCEVCouldNotCompute values.
@@ -709,6 +717,10 @@
     /// Get the max backedge taken count for the loop.
     const SCEV *getMax(ScalarEvolution *SE) const;
 
+    /// Return true if the number of times this backedge is taken is either the
+    /// value returned by getMax or zero.
+    bool isMaxOrZero(ScalarEvolution *SE) const;
+
     /// Return true if any backedge taken count expressions refer to the given
     /// subexpression.
     bool hasOperand(const SCEV *S, ScalarEvolution *SE) const;
@@ -1380,6 +1392,10 @@
   /// that is known never to be less than the actual backedge taken count.
   const SCEV *getMaxBackedgeTakenCount(const Loop *L);
 
+  /// Return true if the backedge taken count is either the value returned by
+  /// getMaxBackedgeTakenCount or zero.
+  bool isBackedgeTakenCountMaxOrZero(const Loop *L);
+
   /// Return true if the specified loop has an analyzable loop-invariant
   /// backedge-taken count.
   bool hasLoopInvariantBackedgeTakenCount(const Loop *L);
Index: include/llvm/Transforms/Utils/UnrollLoop.h
===================================================================
--- include/llvm/Transforms/Utils/UnrollLoop.h
+++ include/llvm/Transforms/Utils/UnrollLoop.h
@@ -32,8 +32,9 @@
 
 bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
                 bool AllowRuntime, bool AllowExpensiveTripCount,
-                bool UseUpperBound, unsigned TripMultiple, LoopInfo *LI,
-                ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+                bool PreserveCondBr, bool PreserveOnlyFirst,
+                unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
+                DominatorTree *DT, AssumptionCache *AC,
                 OptimizationRemarkEmitter *ORE, bool PreserveLCSSA);
 
 bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
Index: lib/Analysis/ScalarEvolution.cpp
===================================================================
--- lib/Analysis/ScalarEvolution.cpp
+++ lib/Analysis/ScalarEvolution.cpp
@@ -5417,6 +5417,10 @@
   return getBackedgeTakenInfo(L).getMax(this);
 }
 
+bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) {
+  return getBackedgeTakenInfo(L).isMaxOrZero(this);
+}
+
 /// Push PHI nodes in the header of the given loop onto the given Worklist.
 static void
 PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
@@ -5649,6 +5653,13 @@
   return getMax();
 }
 
+bool ScalarEvolution::BackedgeTakenInfo::isMaxOrZero(ScalarEvolution *SE) const {
+  auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
+    return !ENT.hasAlwaysTruePredicate();
+  };
+  return MaxOrZero && !any_of(ExitNotTaken, PredicateNotAlwaysTrue);
+}
+
 bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
                                                     ScalarEvolution *SE) const {
   if (getMax() && getMax() != SE->getCouldNotCompute() &&
@@ -5668,8 +5679,8 @@
 ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
     SmallVectorImpl<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo>
         &&ExitCounts,
-    bool Complete, const SCEV *MaxCount)
-    : MaxAndComplete(MaxCount, Complete) {
+    bool Complete, const SCEV *MaxCount, bool MaxOrZero)
+    : MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) {
   typedef ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo EdgeExitInfo;
   ExitNotTaken.reserve(ExitCounts.size());
   std::transform(
@@ -5707,6 +5718,7 @@
   BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
   const SCEV *MustExitMaxBECount = nullptr;
   const SCEV *MayExitMaxBECount = nullptr;
+  bool MustExitMaxOrZero = false;
 
   // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
   // and compute maxBECount.
@@ -5739,9 +5751,10 @@
     // computable EL.MaxNotTaken.
     if (EL.MaxNotTaken != getCouldNotCompute() && Latch &&
         DT.dominates(ExitBB, Latch)) {
-      if (!MustExitMaxBECount)
+      if (!MustExitMaxBECount) {
         MustExitMaxBECount = EL.MaxNotTaken;
-      else {
+        MustExitMaxOrZero = EL.MaxOrZero;
+      } else {
         MustExitMaxBECount =
             getUMinFromMismatchedTypes(MustExitMaxBECount, EL.MaxNotTaken);
       }
@@ -5756,8 +5769,11 @@
   }
   const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
     (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
+  // The loop backedge will be taken the maximum or zero times if there's
+  // a single exit that must be taken the maximum or zero times.
+  bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1);
   return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount,
-                           MaxBECount);
+                           MaxBECount, MaxOrZero);
 }
 
 ScalarEvolution::ExitLimit
@@ -5894,7 +5910,8 @@
           !isa<SCEVCouldNotCompute>(BECount))
         MaxBECount = BECount;
 
-      return ExitLimit(BECount, MaxBECount, {&EL0.Predicates, &EL1.Predicates});
+      return ExitLimit(BECount, MaxBECount, false,
+                       {&EL0.Predicates, &EL1.Predicates});
     }
     if (BO->getOpcode() == Instruction::Or) {
       // Recurse on the operands of the or.
@@ -5933,7 +5950,8 @@
           BECount = EL0.ExactNotTaken;
       }
 
-      return ExitLimit(BECount, MaxBECount, {&EL0.Predicates, &EL1.Predicates});
+      return ExitLimit(BECount, MaxBECount, false,
+                       {&EL0.Predicates, &EL1.Predicates});
     }
   }
 
@@ -6318,7 +6336,7 @@
     unsigned BitWidth = getTypeSizeInBits(RHS->getType());
     const SCEV *UpperBound =
         getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth);
-    return ExitLimit(getCouldNotCompute(), UpperBound);
+    return ExitLimit(getCouldNotCompute(), UpperBound, false);
   }
 
   return getCouldNotCompute();
@@ -7114,7 +7132,8 @@
         // should not accept a root of 2.
         const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
         if (Val->isZero())
-          return ExitLimit(R1, R1, Predicates); // We found a quadratic root!
+          // We found a quadratic root!
+          return ExitLimit(R1, R1, false, Predicates);
       }
     }
     return getCouldNotCompute();
@@ -7171,7 +7190,7 @@
     else
       MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
                                          : -CR.getUnsignedMin());
-    return ExitLimit(Distance, MaxBECount, Predicates);
+    return ExitLimit(Distance, MaxBECount, false, Predicates);
   }
 
   // As a special case, handle the instance where Step is a positive power of
@@ -7226,7 +7245,7 @@
 
       const SCEV *Limit =
           getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy);
-      return ExitLimit(Limit, Limit, Predicates);
+      return ExitLimit(Limit, Limit, false, Predicates);
     }
   }
 
@@ -7239,14 +7258,14 @@
       loopHasNoAbnormalExits(AddRec->getLoop())) {
     const SCEV *Exact =
         getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
-    return ExitLimit(Exact, Exact, Predicates);
+    return ExitLimit(Exact, Exact, false, Predicates);
   }
 
   // Then, try to solve the above equation provided that Start is constant.
   if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) {
     const SCEV *E = SolveLinEquationWithOverflow(
         StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this);
-    return ExitLimit(E, E, Predicates);
+    return ExitLimit(E, E, false, Predicates);
   }
   return getCouldNotCompute();
 }
@@ -8688,14 +8707,16 @@
   }
 
   const SCEV *MaxBECount;
+  bool MaxOrZero = false;
   if (isa<SCEVConstant>(BECount))
     MaxBECount = BECount;
-  else if (isa<SCEVConstant>(BECountIfBackedgeTaken))
+  else if (isa<SCEVConstant>(BECountIfBackedgeTaken)) {
     // If we know exactly how many times the backedge will be taken if it's
     // taken at least once, then the backedge count will either be that or
     // zero.
     MaxBECount = BECountIfBackedgeTaken;
-  else {
+    MaxOrZero = true;
+  } else {
     // Calculate the maximum backedge count based on the range of values
     // permitted by Start, End, and Stride.
     APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
@@ -8732,7 +8753,7 @@
   if (isa<SCEVCouldNotCompute>(MaxBECount))
     MaxBECount = BECount;
 
-  return ExitLimit(BECount, MaxBECount, Predicates);
+  return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates);
 }
 
 ScalarEvolution::ExitLimit
@@ -8809,7 +8830,7 @@
   if (isa<SCEVCouldNotCompute>(MaxBECount))
     MaxBECount = BECount;
 
-  return ExitLimit(BECount, MaxBECount, Predicates);
+  return ExitLimit(BECount, MaxBECount, false, Predicates);
 }
 
 const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
@@ -9591,6 +9612,8 @@
 
   if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
     OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
+    if (SE->isBackedgeTakenCountMaxOrZero(L))
+      OS << ", actual taken count either this or zero.";
   } else {
     OS << "Unpredictable max backedge-taken count. ";
   }
Index: lib/Transforms/Scalar/LoopUnrollPass.cpp
===================================================================
--- lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1000,14 +1000,22 @@
   if (Convergent)
     UP.AllowRemainder = false;
 
-  // Try to find the trip count upper bound if it is allowed and we cannot find
-  // exact trip count.
-  if (UP.UpperBound) {
-    if (!TripCount) {
-      MaxTripCount = SE->getSmallConstantMaxTripCount(L);
-      // Only unroll with small upper bound.
-      if (MaxTripCount > UnrollMaxUpperBound)
-        MaxTripCount = 0;
+  // Try to find the trip count upper bound if we cannot find the exact trip
+  // count.
+  bool MaxOrZero = false;
+  if (!TripCount) {
+    MaxTripCount = SE->getSmallConstantMaxTripCount(L);
+    MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
+    // We can unroll by the upper bound amount if it's generally allowed or if
+    // we know that the loop is executed either the upper bound or zero times.
+    // (MaxOrZero unrolling keeps only the first loop test, so the number of
+    // loop tests remains the same compared to the non-unrolled version, whereas
+    // the generic upper bound unrolling keeps all but the last loop test so the
+    // number of loop tests goes up which may end up being worse on targets with
+    // constriained branch predictor resources so is controlled by an option.)
+    // In addition we only unroll small upper bounds.
+    if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
+      MaxTripCount = 0;
     }
   }
 
@@ -1025,8 +1033,8 @@
 
   // Unroll the loop.
   if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
-                  UP.AllowExpensiveTripCount, UseUpperBound, TripMultiple, LI,
-                  SE, &DT, &AC, &ORE, PreserveLCSSA))
+                  UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero,
+                  TripMultiple, LI, SE, &DT, &AC, &ORE, PreserveLCSSA))
     return false;
 
   // If loop has an unroll count pragma or unrolled by explicitly set count
Index: lib/Transforms/Utils/LoopUnroll.cpp
===================================================================
--- lib/Transforms/Utils/LoopUnroll.cpp
+++ lib/Transforms/Utils/LoopUnroll.cpp
@@ -189,7 +189,8 @@
 ///
 /// PreserveCondBr indicates whether the conditional branch of the LatchBlock
 /// needs to be preserved.  It is needed when we use trip count upper bound to
-/// fully unroll the loop.
+/// fully unroll the loop. If PreserveOnlyFirst is also set then only the first
+/// conditional branch needs to be preserved.
 ///
 /// Similarly, TripMultiple divides the number of times that the LatchBlock may
 /// execute without exiting the loop.
@@ -207,10 +208,10 @@
 /// DominatorTree if they are non-null.
 bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
                       bool AllowRuntime, bool AllowExpensiveTripCount,
-                      bool PreserveCondBr, unsigned TripMultiple, LoopInfo *LI,
-                      ScalarEvolution *SE, DominatorTree *DT,
-                      AssumptionCache *AC, OptimizationRemarkEmitter *ORE,
-                      bool PreserveLCSSA) {
+                      bool PreserveCondBr, bool PreserveOnlyFirst,
+                      unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
+                      DominatorTree *DT, AssumptionCache *AC,
+                      OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) {
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
     DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
@@ -550,7 +551,7 @@
       assert(NeedConditional &&
              "NeedCondition cannot be modified by both complete "
              "unrolling and runtime unrolling");
-      NeedConditional = (PreserveCondBr && j);
+      NeedConditional = (PreserveCondBr && j && !(PreserveOnlyFirst && i != 0));
     } else if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
       // If we know the trip count or a multiple of it, we can safely use an
       // unconditional branch for some iterations.
Index: test/Analysis/ScalarEvolution/trip-count13.ll
===================================================================
--- test/Analysis/ScalarEvolution/trip-count13.ll
+++ test/Analysis/ScalarEvolution/trip-count13.ll
@@ -14,7 +14,7 @@
 
 ; CHECK-LABEL: Determining loop execution counts for: @u_0
 ; CHECK-NEXT: Loop %loop: backedge-taken count is (-100 + (-1 * %rhs) + ((100 + %rhs) umax %rhs))
-; CHECK-NEXT: Loop %loop: max backedge-taken count is -100
+; CHECK-NEXT: Loop %loop: max backedge-taken count is -100, actual taken count either this or zero.
 
 leave:
   ret void
@@ -34,7 +34,7 @@
 
 ; CHECK-LABEL: Determining loop execution counts for: @u_1
 ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-1 * %start) + ((-100 + %start) umax %start))
-; CHECK-NEXT: Loop %loop: max backedge-taken count is -100
+; CHECK-NEXT: Loop %loop: max backedge-taken count is -100, actual taken count either this or zero.
 
 leave:
   ret void
@@ -54,7 +54,7 @@
 
 ; CHECK-LABEL: Determining loop execution counts for: @s_0
 ; CHECK-NEXT: Loop %loop: backedge-taken count is (-100 + (-1 * %rhs) + ((100 + %rhs) smax %rhs))
-; CHECK-NEXT: Loop %loop: max backedge-taken count is -100
+; CHECK-NEXT: Loop %loop: max backedge-taken count is -100, actual taken count either this or zero.
 
 leave:
   ret void
@@ -74,7 +74,7 @@
 
 ; CHECK-LABEL: Determining loop execution counts for: @s_1
 ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-1 * %start) + ((-100 + %start) smax %start))
-; CHECK-NEXT: Loop %loop: max backedge-taken count is -100
+; CHECK-NEXT: Loop %loop: max backedge-taken count is -100, actual taken count either this or zero.
 
 leave:
   ret void
Index: test/Analysis/ScalarEvolution/trip-count14.ll
===================================================================
--- test/Analysis/ScalarEvolution/trip-count14.ll
+++ test/Analysis/ScalarEvolution/trip-count14.ll
@@ -15,7 +15,7 @@
 
 ; CHECK-LABEL: Determining loop execution counts for: @s32_max1
 ; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((1 + %n) smax %n))
-; CHECK-NEXT: Loop %do.body: max backedge-taken count is 1
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is 1, actual taken count either this or zero.
 
 do.end:
   ret void
@@ -36,7 +36,7 @@
 
 ; CHECK-LABEL: Determining loop execution counts for: @s32_max2
 ; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((2 + %n) smax %n))
-; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2, actual taken count either this or zero.
 
 do.end:
   ret void
@@ -57,7 +57,7 @@
 
 ; CHECK-LABEL: Determining loop execution counts for: @s32_maxx
 ; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((%n + %x) smax %n))
-; CHECK-NEXT: Loop %do.body: max backedge-taken count is -1
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is -1{{$}}
 
 do.end:
   ret void
@@ -82,7 +82,7 @@
 
 ; CHECK-LABEL: Determining loop execution counts for: @s32_max2_unpredictable_exit
 ; CHECK-NEXT: Loop %do.body: <multiple exits> Unpredictable backedge-taken count.
-; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}}
 
 do.end:
   ret void
@@ -103,7 +103,7 @@
 
 ; CHECK-LABEL: Determining loop execution counts for: @u32_max1
 ; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((1 + %n) umax %n))
-; CHECK-NEXT: Loop %do.body: max backedge-taken count is 1
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is 1, actual taken count either this or zero.
 
 do.end:
   ret void
@@ -124,7 +124,7 @@
 
 ; CHECK-LABEL: Determining loop execution counts for: @u32_max2
 ; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((2 + %n) umax %n))
-; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2, actual taken count either this or zero.
 
 do.end:
   ret void
@@ -145,7 +145,7 @@
 
 ; CHECK-LABEL: Determining loop execution counts for: @u32_maxx
 ; CHECK-NEXT: Loop %do.body: backedge-taken count is ((-1 * %n) + ((%n + %x) umax %n))
-; CHECK-NEXT: Loop %do.body: max backedge-taken count is -1
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is -1{{$}}
 
 do.end:
   ret void
@@ -170,7 +170,7 @@
 
 ; CHECK-LABEL: Determining loop execution counts for: @u32_max2_unpredictable_exit
 ; CHECK-NEXT: Loop %do.body: <multiple exits> Unpredictable backedge-taken count.
-; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2
+; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}}
 
 do.end:
   ret void
Index: test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
@@ -0,0 +1,207 @@
+; RUN: opt -S -loop-unroll < %s | FileCheck %s
+
+; Unroll twice, with first loop exit kept
+; CHECK-LABEL: @s32_max1
+; CHECK: do.body:
+; CHECK:  store
+; CHECK:  br i1 %cmp, label %do.body.1, label %do.end
+; CHECK: do.end:
+; CHECK:  ret void
+; CHECK: do.body.1:
+; CHECK:  store
+; CHECK:  br label %do.end
+define void @s32_max1(i32 %n, i32* %p) {
+entry:
+  %add = add i32 %n, 1
+  br label %do.body
+
+do.body:
+  %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+  %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+  store i32 %i.0, i32* %arrayidx, align 4
+  %inc = add i32 %i.0, 1
+  %cmp = icmp slt i32 %i.0, %add
+  br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 1 times
+
+do.end:
+  ret void
+}
+
+; Unroll thrice, with first loop exit kept
+; CHECK-LABEL: @s32_max2
+; CHECK: do.body:
+; CHECK:  store
+; CHECK:  br i1 %cmp, label %do.body.1, label %do.end
+; CHECK: do.end:
+; CHECK:  ret void
+; CHECK: do.body.1:
+; CHECK:  store
+; CHECK:  store
+; CHECK:  br label %do.end
+define void @s32_max2(i32 %n, i32* %p) {
+entry:
+  %add = add i32 %n, 2
+  br label %do.body
+
+do.body:
+  %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+  %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+  store i32 %i.0, i32* %arrayidx, align 4
+  %inc = add i32 %i.0, 1
+  %cmp = icmp slt i32 %i.0, %add
+  br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 2 times
+
+do.end:
+  ret void
+}
+
+; Should not be unrolled
+; CHECK-LABEL: @s32_maxx
+; CHECK: do.body:
+; CHECK: do.end:
+; CHECK-NOT: do.body.1:
+define void @s32_maxx(i32 %n, i32 %x, i32* %p) {
+entry:
+  %add = add i32 %x, %n
+  br label %do.body
+
+do.body:
+  %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+  %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+  store i32 %i.0, i32* %arrayidx, align 4
+  %inc = add i32 %i.0, 1
+  %cmp = icmp slt i32 %i.0, %add
+  br i1 %cmp, label %do.body, label %do.end ; taken either 0 or x times
+
+do.end:
+  ret void
+}
+
+; Should not be unrolled
+; CHECK-LABEL: @s32_max2_unpredictable_exit
+; CHECK: do.body:
+; CHECK: do.end:
+; CHECK-NOT: do.body.1:
+define void @s32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) {
+entry:
+  %add = add i32 %n, 2
+  br label %do.body
+
+do.body:
+  %i.0 = phi i32 [ %n, %entry ], [ %inc, %if.end ]
+  %cmp = icmp eq i32 %i.0, %x
+  br i1 %cmp, label %do.end, label %if.end ; unpredictable
+
+if.end:
+  %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+  store i32 %i.0, i32* %arrayidx, align 4
+  %inc = add i32 %i.0, 1
+  %cmp1 = icmp slt i32 %i.0, %add
+  br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times
+
+do.end:
+  ret void
+}
+
+; Unroll twice, with first loop exit kept
+; CHECK-LABEL: @u32_max1
+; CHECK: do.body:
+; CHECK:  store
+; CHECK:  br i1 %cmp, label %do.body.1, label %do.end
+; CHECK: do.end:
+; CHECK:  ret void
+; CHECK: do.body.1:
+; CHECK:  store
+; CHECK:  br label %do.end
+define void @u32_max1(i32 %n, i32* %p) {
+entry:
+  %add = add i32 %n, 1
+  br label %do.body
+
+do.body:
+  %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+  %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+  store i32 %i.0, i32* %arrayidx, align 4
+  %inc = add i32 %i.0, 1
+  %cmp = icmp ult i32 %i.0, %add
+  br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 1 times
+
+do.end:
+  ret void
+}
+
+; Unroll thrice, with first loop exit kept
+; CHECK-LABEL: @u32_max2
+; CHECK: do.body:
+; CHECK:  store
+; CHECK:  br i1 %cmp, label %do.body.1, label %do.end
+; CHECK: do.end:
+; CHECK:  ret void
+; CHECK: do.body.1:
+; CHECK:  store
+; CHECK:  store
+; CHECK:  br label %do.end
+define void @u32_max2(i32 %n, i32* %p) {
+entry:
+  %add = add i32 %n, 2
+  br label %do.body
+
+do.body:
+  %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+  %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+  store i32 %i.0, i32* %arrayidx, align 4
+  %inc = add i32 %i.0, 1
+  %cmp = icmp ult i32 %i.0, %add
+  br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 2 times
+
+do.end:
+  ret void
+}
+
+; Should not be unrolled
+; CHECK-LABEL: @u32_maxx
+; CHECK: do.body:
+; CHECK: do.end:
+; CHECK-NOT: do.body.1:
+define void @u32_maxx(i32 %n, i32 %x, i32* %p) {
+entry:
+  %add = add i32 %x, %n
+  br label %do.body
+
+do.body:
+  %i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
+  %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+  store i32 %i.0, i32* %arrayidx, align 4
+  %inc = add i32 %i.0, 1
+  %cmp = icmp ult i32 %i.0, %add
+  br i1 %cmp, label %do.body, label %do.end ; taken either 0 or x times
+
+do.end:
+  ret void
+}
+
+; Should not be unrolled
+; CHECK-LABEL: @u32_max2_unpredictable_exit
+; CHECK: do.body:
+; CHECK: do.end:
+; CHECK-NOT: do.body.1:
+define void @u32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) {
+entry:
+  %add = add i32 %n, 2
+  br label %do.body
+
+do.body:
+  %i.0 = phi i32 [ %n, %entry ], [ %inc, %if.end ]
+  %cmp = icmp eq i32 %i.0, %x
+  br i1 %cmp, label %do.end, label %if.end ; unpredictable
+
+if.end:
+  %arrayidx = getelementptr i32, i32* %p, i32 %i.0
+  store i32 %i.0, i32* %arrayidx, align 4
+  %inc = add i32 %i.0, 1
+  %cmp1 = icmp ult i32 %i.0, %add
+  br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times
+
+do.end:
+  ret void
+}