Index: include/llvm/Analysis/ScalarEvolution.h
===================================================================
--- include/llvm/Analysis/ScalarEvolution.h
+++ include/llvm/Analysis/ScalarEvolution.h
@@ -520,9 +520,14 @@
       const SCEV *Exact;
       const SCEV *Max;
 
+      /// A predicate union guard for this ExitLimit. The result is only
+      /// valid if this predicate evaluates to 'true' at run-time.
+      SCEVUnionPredicate Pred;
+
       /*implicit*/ ExitLimit(const SCEV *E) : Exact(E), Max(E) {}
 
-      ExitLimit(const SCEV *E, const SCEV *M) : Exact(E), Max(M) {
+      ExitLimit(const SCEV *E, const SCEV *M, SCEVUnionPredicate &P)
+          : Exact(E), Max(M), Pred(P) {
         assert((isa<SCEVCouldNotCompute>(Exact) ||
                 !isa<SCEVCouldNotCompute>(Max)) &&
                "Exact is not allowed to be less precise than Max");
@@ -534,6 +539,11 @@
         return !isa<SCEVCouldNotCompute>(Exact) ||
           !isa<SCEVCouldNotCompute>(Max);
       }
+
+      /// Test whether this ExitLimit contains all information.
+      bool hasFullInfo() const {
+        return !isa<SCEVCouldNotCompute>(Exact);
+      }
     };
 
     /// Information about the number of times a particular loop exit may be
@@ -542,6 +552,9 @@
       AssertingVH<BasicBlock> ExitingBlock;
       const SCEV *ExactNotTaken;
       PointerIntPair<ExitNotTakenInfo*, 1> NextExit;
+      /// The information is only valid if Pred evaluates to true,
+      /// otherwise it's all CouldNotCompute.
+      SCEVUnionPredicate Pred;
 
       ExitNotTakenInfo() : ExitingBlock(nullptr), ExactNotTaken(nullptr) {}
 
@@ -577,8 +590,9 @@
 
       /// Initialize BackedgeTakenInfo from a list of exact exit counts.
       BackedgeTakenInfo(
-        SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
-        bool Complete, const SCEV *MaxCount);
+        SmallVectorImpl<std::pair<BasicBlock *, const SCEV *>> &ExitCounts,
+        SmallVectorImpl<SCEVUnionPredicate *> &ExitPreds, bool Complete,
+        const SCEV *MaxCount);
 
       /// Test whether this BackedgeTakenInfo contains any computed information,
       /// or whether it's all SCEVCouldNotCompute values.
@@ -586,11 +600,21 @@
         return ExitNotTaken.ExitingBlock || !isa<SCEVCouldNotCompute>(Max);
       }
 
+      /// Test whether this BackedgeTakenInfo contains complete information.
+      bool hasFullInfo() const {
+        return ExitNotTaken.isCompleteList();
+      }
+
       /// Return an expression indicating the exact backedge-taken count of the
-      /// loop if it is known, or SCEVCouldNotCompute otherwise. This is the
-      /// number of times the loop header can be guaranteed to execute, minus
-      /// one.
-      const SCEV *getExact(ScalarEvolution *SE) const;
+      /// loop if it is known and always correct (independent of any
+      /// assumptions that should be checked at run-time), or
+      /// SCEVCouldNotCompute otherwise. This is the number of times the loop
+      /// header can be guaranteed to execute, minus one. If the SCEV predicate
+      /// associated with the answer can be different from AlwaysTrue, we must
+      /// add the Predicates argument. The SCEV predicate associated with the
+      /// answer will be added to Predicates.
+      const SCEV *getExact(ScalarEvolution *SE,
+                           SCEVUnionPredicate *Predicates = nullptr) const;
 
       /// Return the number of times this loop exit may fall through to the back
       /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via
@@ -612,6 +636,9 @@
     /// Cache the backedge-taken count of the loops for this function as they
     /// are computed.
     DenseMap<const Loop*, BackedgeTakenInfo> BackedgeTakenCounts;
+    /// Cache the predicated backedge-taken count of the loops for this
+    /// function as they are computed.
+    DenseMap<const Loop*, BackedgeTakenInfo> PredicatedBackedgeTakenCounts;
 
     /// This map contains entries for all of the PHI instructions that we
     /// attempt to compute constant evolutions for.  This allows us to avoid
@@ -713,33 +740,49 @@
     void forgetSymbolicName(Instruction *I, const SCEV *SymName);
 
     /// Return the BackedgeTakenInfo for the given loop, lazily computing new
-    /// values if the loop hasn't been analyzed yet.
+    /// values if the loop hasn't been analyzed yet. The returned result is
+    /// guaranteed not to be predicated.
     const BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L);
 
+    /// Similar to getBackedgeTakenInfo, but will add predicates as required
+    /// with the purpose of returning complete information (the exact backedge
+    /// taken count will be known).
+    const BackedgeTakenInfo &getPredicatedBackedgeTakenInfo(const Loop *L);
+
     /// Compute the number of times the specified loop will iterate.
-    BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L);
+    /// If IsGuarded is set, we will create new SCEV predicates as necessary in
+    /// order to return an exact answer.
+    BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L, bool IsGuarded);
 
     /// Compute the number of times the backedge of the specified loop will
-    /// execute if it exits via the specified block.
-    ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock);
+    /// execute if it exits via the specified block. If IsGuarded is set,
+    /// this call will try to use a minimal set of SCEV predicates in order to
+    /// return an exact answer.
+    ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
+                               bool IsGuarded);
 
     /// Compute the number of times the backedge of the specified loop will
     /// execute if its exit condition were a conditional branch of ExitCond,
-    /// TBB, and FBB.
+    /// TBB, and FBB. If IsGuarded is set, this call try to use a minimal set
+    /// of SCEV predicates in order to return an exact answer.
     ExitLimit computeExitLimitFromCond(const Loop *L,
                                        Value *ExitCond,
                                        BasicBlock *TBB,
                                        BasicBlock *FBB,
-                                       bool IsSubExpr);
+                                       bool IsSubExpr,
+                                       bool Guarded);
 
     /// Compute the number of times the backedge of the specified loop will
     /// execute if its exit condition were a conditional branch of the ICmpInst
-    /// ExitCond, TBB, and FBB.
+    /// ExitCond, TBB, and FBB. If CreatePredicates is set, this call try to
+    /// use a minimal set of SCEV predicates in order to return an exact
+    /// answer.
     ExitLimit computeExitLimitFromICmp(const Loop *L,
                                        ICmpInst *ExitCond,
                                        BasicBlock *TBB,
                                        BasicBlock *FBB,
-                                       bool IsSubExpr);
+                                       bool IsSubExpr,
+                                       bool CreatePredicates);
 
     /// Compute the number of times the backedge of the specified loop will
     /// execute if its exit condition were a switch with a single exiting case
@@ -777,7 +820,10 @@
 
     /// Return the number of times an exit condition comparing the specified
     /// value to zero will execute.  If not computable, return CouldNotCompute.
-    ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr);
+    /// If CreatePredicates is set, this call try to use a minimal set of SCEV
+    /// predicates in order to return an exact answer.
+    ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr,
+                           bool CreatePredicates = false);
 
     /// Return the number of times an exit condition checking the specified
     /// value for nonzero will execute.  If not computable, return
@@ -787,10 +833,15 @@
     /// Return the number of times an exit condition containing the specified
     /// less-than comparison will execute.  If not computable, return
     /// CouldNotCompute. isSigned specifies whether the less-than is signed.
-    ExitLimit HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
-                               const Loop *L, bool isSigned, bool IsSubExpr);
+    /// If CreatePredicates is set, this call try to use a minimal set of SCEV
+    /// predicates in order to return an exact answer.
+    ExitLimit HowManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L,
+                               bool isSigned, bool IsSubExpr,
+                               bool CreatePredicates = false);
+
     ExitLimit HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
-                                  const Loop *L, bool isSigned, bool IsSubExpr);
+                                  const Loop *L, bool isSigned, bool IsSubExpr,
+                                  bool CreatePredicates = false);
 
     /// Return a predecessor of BB (which may not be an immediate predecessor)
     /// which has exactly one successor from which BB is reachable, or null if
@@ -1168,6 +1219,13 @@
     ///
     const SCEV *getBackedgeTakenCount(const Loop *L);
 
+    /// Similar to getBackedgeTakenCount, except it will add a set of
+    /// SCEV predicates to Predicates that are required to be true in order for
+    /// the answer to be correct. Predicates can be checked with run-time
+    /// checks and can be used to perform loop versioning.
+    const SCEV *getPredicatedBackedgeTakenCount(const Loop *L,
+                                             SCEVUnionPredicate &Predicates);
+
     /// Similar to getBackedgeTakenCount, except return the least SCEV value
     /// that is known never to be less than the actual backedge taken count.
     const SCEV *getMaxBackedgeTakenCount(const Loop *L);
@@ -1489,6 +1547,8 @@
     /// by ScalarEvolution is guaranteed to be preserved, even when adding new
     /// predicates.
     const SCEV *getSCEV(Value *V);
+    /// Get the (predicated) backedge count for the analyzed loop.
+    const SCEV *getBackedgeTakenCount();
     /// \brief Adds a new predicate.
     void addPredicate(const SCEVPredicate &Pred);
     /// \brief Attempts to produce an AddRecExpr for V by adding additional
@@ -1530,6 +1590,8 @@
     /// figure out if the predicate has changed from the last rewrite of the
     /// SCEV. If so, we need to perform a new rewrite.
     unsigned Generation;
+    /// The backedge taken count.
+    const SCEV *BackedgeCount;
   };
 }
 
Index: lib/Analysis/LoopAccessAnalysis.cpp
===================================================================
--- lib/Analysis/LoopAccessAnalysis.cpp
+++ lib/Analysis/LoopAccessAnalysis.cpp
@@ -132,8 +132,7 @@
   const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
   assert(AR && "Invalid addrec expression");
-  ScalarEvolution *SE = PSE.getSE();
-  const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
+  const SCEV *Ex = PSE.getBackedgeTakenCount();
 
   const SCEV *ScStart = AR->getStart();
   const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
@@ -1447,7 +1446,7 @@
   }
 
   // ScalarEvolution needs to be able to find the exit count.
-  const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop);
+  const SCEV *ExitCount = PSE.getBackedgeTakenCount();
   if (ExitCount == PSE.getSE()->getCouldNotCompute()) {
     emitAnalysis(LoopAccessReport()
                  << "could not determine number of loop iterations");
Index: lib/Analysis/ScalarEvolution.cpp
===================================================================
--- lib/Analysis/ScalarEvolution.cpp
+++ lib/Analysis/ScalarEvolution.cpp
@@ -5124,6 +5124,12 @@
   return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
 }
 
+const SCEV *
+ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L,
+                                                 SCEVUnionPredicate &Preds) {
+  return getPredicatedBackedgeTakenInfo(L).getExact(this, &Preds);
+}
+
 /// getBackedgeTakenCount - If the specified loop has a predictable
 /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
 /// object. The backedge-taken count is the number of times the loop header
@@ -5159,6 +5165,23 @@
 }
 
 const ScalarEvolution::BackedgeTakenInfo &
+ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) {
+  auto &BTI = getBackedgeTakenInfo(L);
+  if (BTI.hasFullInfo())
+    return BTI;
+
+  std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
+      PredicatedBackedgeTakenCounts.insert({L, BackedgeTakenInfo()});
+
+  if (!Pair.second)
+    return Pair.first->second;
+
+  BackedgeTakenInfo Result = computeBackedgeTakenCount(L, true);
+
+  return PredicatedBackedgeTakenCounts.find(L)->second = Result;
+}
+
+const ScalarEvolution::BackedgeTakenInfo &
 ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   // Initially insert an invalid entry for this loop. If the insertion
   // succeeds, proceed to actually compute a backedge-taken count and
@@ -5173,7 +5196,7 @@
   // computeBackedgeTakenCount may allocate memory for its result. Inserting it
   // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
   // must be cleared in this scope.
-  BackedgeTakenInfo Result = computeBackedgeTakenCount(L);
+  BackedgeTakenInfo Result = computeBackedgeTakenCount(L, false);
 
   if (Result.getExact(this) != getCouldNotCompute()) {
     assert(isLoopInvariant(Result.getExact(this), L) &&
@@ -5238,12 +5261,17 @@
 /// compute a trip count, or if the loop is deleted.
 void ScalarEvolution::forgetLoop(const Loop *L) {
   // Drop any stored trip count value.
-  DenseMap<const Loop*, BackedgeTakenInfo>::iterator BTCPos =
-    BackedgeTakenCounts.find(L);
-  if (BTCPos != BackedgeTakenCounts.end()) {
-    BTCPos->second.clear();
-    BackedgeTakenCounts.erase(BTCPos);
-  }
+  auto RemoveLoopFromBackedgeMap =
+    [L] (DenseMap<const Loop*, BackedgeTakenInfo> &Map) {
+    auto BTCPos = Map.find(L);
+    if (BTCPos != Map.end()) {
+      BTCPos->second.clear();
+      Map.erase(BTCPos);
+    }
+  };
+
+  RemoveLoopFromBackedgeMap(BackedgeTakenCounts);
+  RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts);
 
   // Drop information about expressions based on loop-header PHIs.
   SmallVector<Instruction *, 16> Worklist;
@@ -5312,7 +5340,8 @@
 /// is the caller's responsibility to specify the relevant loop exit using
 /// getExact(ExitingBlock, SE).
 const SCEV *
-ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
+ScalarEvolution::BackedgeTakenInfo::getExact(
+    ScalarEvolution *SE, SCEVUnionPredicate *Preds) const {
   // If any exits were not computable, the loop is not computable.
   if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
 
@@ -5330,6 +5359,12 @@
       BECount = ENT->ExactNotTaken;
     else if (BECount != ENT->ExactNotTaken)
       return SE->getCouldNotCompute();
+
+    if (Preds)
+      Preds->add(&ENT->Pred);
+
+    assert((Preds || ENT->Pred.isAlwaysTrue()) &&
+      "Predicate should be always true!");
   }
   assert(BECount && "Invalid not taken count for loop exit");
   return BECount;
@@ -5339,10 +5374,10 @@
 const SCEV *
 ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
                                              ScalarEvolution *SE) const {
-  for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
-       ENT != nullptr; ENT = ENT->getNextExit()) {
+  for (const ExitNotTakenInfo *ENT = &ExitNotTaken; ENT != nullptr;
+       ENT = ENT->getNextExit()) {
 
-    if (ENT->ExitingBlock == ExitingBlock)
+    if (ENT->ExitingBlock == ExitingBlock && ENT->Pred.isAlwaysTrue())
       return ENT->ExactNotTaken;
   }
   return SE->getCouldNotCompute();
@@ -5351,6 +5386,12 @@
 /// getMax - Get the max backedge taken count for the loop.
 const SCEV *
 ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
+  for (const ExitNotTakenInfo *ENT = &ExitNotTaken; ENT != nullptr;
+       ENT = ENT->getNextExit()) {
+    if (!ENT->Pred.isAlwaysTrue())
+      return SE->getCouldNotCompute();
+  }
+
   return Max ? Max : SE->getCouldNotCompute();
 }
 
@@ -5376,8 +5417,10 @@
 /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
 /// computable exit into a persistent ExitNotTakenInfo array.
 ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
-  SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
-  bool Complete, const SCEV *MaxCount) : Max(MaxCount) {
+    SmallVectorImpl<std::pair<BasicBlock *, const SCEV *>> &ExitCounts,
+    SmallVectorImpl<SCEVUnionPredicate *> &ExitPreds, bool Complete,
+    const SCEV *MaxCount)
+    : Max(MaxCount) {
 
   if (!Complete)
     ExitNotTaken.setIncomplete();
@@ -5387,16 +5430,22 @@
 
   ExitNotTaken.ExitingBlock = ExitCounts[0].first;
   ExitNotTaken.ExactNotTaken = ExitCounts[0].second;
+
+  assert(ExitCounts.size() == ExitPreds.size() &&
+         "ExitCounts should have the same size as ExitPreds");
+  ExitNotTaken.Pred.add(ExitPreds[0]);
+
   if (NumExits == 1) return;
 
   // Handle the rare case of multiple computable exits.
-  ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1];
+  ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits - 1];
 
   ExitNotTakenInfo *PrevENT = &ExitNotTaken;
   for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) {
     PrevENT->setNextExit(ENT);
     ENT->ExitingBlock = ExitCounts[i].first;
     ENT->ExactNotTaken = ExitCounts[i].second;
+    ENT->Pred.add(ExitPreds[i]);
   }
 }
 
@@ -5410,11 +5459,12 @@
 /// computeBackedgeTakenCount - Compute the number of times the backedge
 /// of the specified loop will execute.
 ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::computeBackedgeTakenCount(const Loop *L) {
+ScalarEvolution::computeBackedgeTakenCount(const Loop *L, bool Guarded) {
   SmallVector<BasicBlock *, 8> ExitingBlocks;
   L->getExitingBlocks(ExitingBlocks);
 
   SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
+  SmallVector<SCEVUnionPredicate *, 4> ExitCountPreds;
   bool CouldComputeBECount = true;
   BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
   const SCEV *MustExitMaxBECount = nullptr;
@@ -5422,9 +5472,10 @@
 
   // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
   // and compute maxBECount.
+  // Do a union of all the predicates here.
   for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
     BasicBlock *ExitBB = ExitingBlocks[i];
-    ExitLimit EL = computeExitLimit(L, ExitBB);
+    ExitLimit EL = computeExitLimit(L, ExitBB, Guarded);
 
     // 1. For each exit that can be computed, add an entry to ExitCounts.
     // CouldComputeBECount is true only if all exits can be computed.
@@ -5432,8 +5483,10 @@
       // We couldn't compute an exact value for this exit, so
       // we won't be able to compute an exact value for the loop.
       CouldComputeBECount = false;
-    else
+    else {
       ExitCounts.push_back({ExitBB, EL.Exact});
+      ExitCountPreds.push_back(&EL.Pred);
+    }
 
     // 2. Derive the loop's MaxBECount from each exit's max number of
     // non-exiting iterations. Partition the loop exits into two kinds:
@@ -5463,11 +5516,13 @@
   }
   const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
     (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
-  return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
+  return BackedgeTakenInfo(ExitCounts, ExitCountPreds, CouldComputeBECount,
+                           MaxBECount);
 }
 
 ScalarEvolution::ExitLimit
-ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
+ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
+                                  bool Guarded) {
 
   // Okay, we've chosen an exiting block.  See what condition causes us to exit
   // at this block and remember the exit block and whether all other targets
@@ -5534,7 +5589,7 @@
     // Proceed to the next level to examine the exit condition expression.
     return computeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
                                     BI->getSuccessor(1),
-                                    /*ControlsExit=*/IsOnlyExit);
+                                    /*ControlsExit=*/IsOnlyExit, Guarded);
   }
 
   if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
@@ -5557,16 +5612,19 @@
                                           Value *ExitCond,
                                           BasicBlock *TBB,
                                           BasicBlock *FBB,
-                                          bool ControlsExit) {
+                                          bool ControlsExit,
+                                          bool Guarded) {
   // Check if the controlling expression for this loop is an And or Or.
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
     if (BO->getOpcode() == Instruction::And) {
       // Recurse on the operands of the and.
       bool EitherMayExit = L->contains(TBB);
       ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
-                                               ControlsExit && !EitherMayExit);
+                                               ControlsExit && !EitherMayExit,
+                                               Guarded);
       ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
-                                               ControlsExit && !EitherMayExit);
+                                               ControlsExit && !EitherMayExit,
+                                               Guarded);
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
       if (EitherMayExit) {
@@ -5593,6 +5651,9 @@
           BECount = EL0.Exact;
       }
 
+      SCEVUnionPredicate NP;
+      NP.add(&EL0.Pred);
+      NP.add(&EL1.Pred);
       // There are cases (e.g. PR26207) where computeExitLimitFromCond is able
       // to be more aggressive when computing BECount than when computing
       // MaxBECount.  In these cases it is possible for EL0.Exact and EL1.Exact
@@ -5601,15 +5662,17 @@
           !isa<SCEVCouldNotCompute>(BECount))
         MaxBECount = BECount;
 
-      return ExitLimit(BECount, MaxBECount);
+      return ExitLimit(BECount, MaxBECount, NP);
     }
     if (BO->getOpcode() == Instruction::Or) {
       // Recurse on the operands of the or.
       bool EitherMayExit = L->contains(FBB);
       ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
-                                               ControlsExit && !EitherMayExit);
+                                               ControlsExit && !EitherMayExit,
+                                               Guarded);
       ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
-                                               ControlsExit && !EitherMayExit);
+                                               ControlsExit && !EitherMayExit,
+                                               Guarded);
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
       if (EitherMayExit) {
@@ -5636,14 +5699,25 @@
           BECount = EL0.Exact;
       }
 
-      return ExitLimit(BECount, MaxBECount);
+      SCEVUnionPredicate NP;
+      NP.add(&EL0.Pred);
+      NP.add(&EL1.Pred);
+      return ExitLimit(BECount, MaxBECount, NP);
     }
   }
 
   // With an icmp, it may be feasible to compute an exact backedge-taken count.
   // Proceed to the next level to examine the icmp.
-  if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
-    return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
+  if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) {
+    ExitLimit EL = computeExitLimitFromICmp(L, ExitCondICmp, TBB,
+                                            FBB, ControlsExit, false);
+    if (EL.hasFullInfo() || !Guarded)
+      return EL;
+
+    // Try again, but use SCEV predicates this time.
+    return computeExitLimitFromICmp(L, ExitCondICmp, TBB,
+                                    FBB, ControlsExit, true);
+  }
 
   // Check for a constant condition. These are normally stripped out by
   // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
@@ -5667,7 +5741,8 @@
                                           ICmpInst *ExitCond,
                                           BasicBlock *TBB,
                                           BasicBlock *FBB,
-                                          bool ControlsExit) {
+                                          bool ControlsExit,
+                                          bool CreatePredicates) {
 
   // If the condition was exit on true, convert the condition to exit on false
   ICmpInst::Predicate Cond;
@@ -5724,7 +5799,8 @@
   switch (Cond) {
   case ICmpInst::ICMP_NE: {                     // while (X != Y)
     // Convert to: while (X-Y != 0)
-    ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
+    ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit,
+                                CreatePredicates);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
@@ -5737,14 +5813,17 @@
   case ICmpInst::ICMP_SLT:
   case ICmpInst::ICMP_ULT: {                    // while (X < Y)
     bool IsSigned = Cond == ICmpInst::ICMP_SLT;
-    ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit);
+    ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit,
+                                    CreatePredicates);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
   case ICmpInst::ICMP_SGT:
   case ICmpInst::ICMP_UGT: {                    // while (X > Y)
     bool IsSigned = Cond == ICmpInst::ICMP_SGT;
-    ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit);
+    ExitLimit EL =
+        HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit,
+                            CreatePredicates);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
@@ -6006,7 +6085,8 @@
     unsigned BitWidth = getTypeSizeInBits(RHS->getType());
     const SCEV *UpperBound =
         getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth);
-    return ExitLimit(getCouldNotCompute(), UpperBound);
+    SCEVUnionPredicate P;
+    return ExitLimit(getCouldNotCompute(), UpperBound, P);
   }
 
   return getCouldNotCompute();
@@ -6783,7 +6863,9 @@
 /// effectively V != 0.  We know and take advantage of the fact that this
 /// expression only being used in a comparison by zero context.
 ScalarEvolution::ExitLimit
-ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
+                              bool CreatePredicates) {
+  SCEVUnionPredicate P;
   // If the value is a constant
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
     // If the value is already zero, the branch will execute zero times.
@@ -6792,6 +6874,14 @@
   }
 
   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V);
+  if (!AddRec && CreatePredicates) {
+    // Try to make this a chrec using runtime tests.
+    const SCEV *Expr = convertSCEVToAddRecWithPredicates(V, L, P);
+    AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
+    if (!AddRec)
+      return getCouldNotCompute();
+  }
+
   if (!AddRec || AddRec->getLoop() != L)
     return getCouldNotCompute();
 
@@ -6816,7 +6906,7 @@
         // should not accept a root of 2.
         const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
         if (Val->isZero())
-          return R1;  // We found a quadratic root!
+          return ExitLimit(R1, R1, P); // We found a quadratic root!
       }
     }
     return getCouldNotCompute();
@@ -6873,7 +6963,7 @@
     else
       MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
                                          : -CR.getUnsignedMin());
-    return ExitLimit(Distance, MaxBECount);
+    return ExitLimit(Distance, MaxBECount, P);
   }
 
   // As a special case, handle the instance where Step is a positive power of
@@ -6926,7 +7016,9 @@
       auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth);
       auto *WideTy = Distance->getType();
 
-      return getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy);
+      const SCEV *Limit =
+          getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy);
+      return ExitLimit(Limit, Limit, P);
     }
   }
 
@@ -6938,13 +7030,15 @@
   if (ControlsExit && AddRec->hasNoSelfWrap()) {
     const SCEV *Exact =
         getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
-    return ExitLimit(Exact, Exact);
+    return ExitLimit(Exact, Exact, P);
   }
 
   // Then, try to solve the above equation provided that Start is constant.
-  if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
-    return SolveLinEquationWithOverflow(StepC->getAPInt(), -StartC->getAPInt(),
-                                        *this);
+  if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) {
+    const SCEV *E = SolveLinEquationWithOverflow(
+        StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this);
+    return ExitLimit(E, E, P);
+  }
   return getCouldNotCompute();
 }
 
@@ -8387,12 +8481,20 @@
 ScalarEvolution::ExitLimit
 ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
                                   const Loop *L, bool IsSigned,
-                                  bool ControlsExit) {
+                                  bool ControlsExit, bool CreatePredicates) {
+  SCEVUnionPredicate P;
   // We handle only IV < Invariant
   if (!isLoopInvariant(RHS, L))
     return getCouldNotCompute();
 
   const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+  if (!IV && CreatePredicates) {
+    // Try to make this a chrec using runtime tests.
+    const SCEV *Expr = convertSCEVToAddRecWithPredicates(LHS, L, P);
+    IV = dyn_cast<SCEVAddRecExpr>(Expr);
+    if (!IV)
+      return getCouldNotCompute();
+  }
 
   // Avoid weird loops
   if (!IV || IV->getLoop() != L || !IV->isAffine())
@@ -8461,18 +8563,27 @@
   if (isa<SCEVCouldNotCompute>(MaxBECount))
     MaxBECount = BECount;
 
-  return ExitLimit(BECount, MaxBECount);
+  return ExitLimit(BECount, MaxBECount, P);
 }
 
 ScalarEvolution::ExitLimit
 ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
                                      const Loop *L, bool IsSigned,
-                                     bool ControlsExit) {
+                                     bool ControlsExit,
+                                     bool CreatePredicates) {
+  SCEVUnionPredicate P;
   // We handle only IV > Invariant
   if (!isLoopInvariant(RHS, L))
     return getCouldNotCompute();
 
   const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+  if (!IV && CreatePredicates) {
+    // Try to make this a chrec using runtime tests.
+    const SCEV *Expr = convertSCEVToAddRecWithPredicates(LHS, L, P);
+    IV = dyn_cast<SCEVAddRecExpr>(Expr);
+    if (!IV)
+      return getCouldNotCompute();
+  }
 
   // Avoid weird loops
   if (!IV || IV->getLoop() != L || !IV->isAffine())
@@ -8543,7 +8654,7 @@
   if (isa<SCEVCouldNotCompute>(MaxBECount))
     MaxBECount = BECount;
 
-  return ExitLimit(BECount, MaxBECount);
+  return ExitLimit(BECount, MaxBECount, P);
 }
 
 /// getNumIterationsInRange - Return the number of iterations of this loop that
@@ -9247,6 +9358,8 @@
       ValueExprMap(std::move(Arg.ValueExprMap)),
       WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
       BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
+      PredicatedBackedgeTakenCounts(
+          std::move(Arg.PredicatedBackedgeTakenCounts)),
       ConstantEvolutionLoopExitValue(
           std::move(Arg.ConstantEvolutionLoopExitValue)),
       ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
@@ -9279,6 +9392,8 @@
   // that a loop had multiple computable exits.
   for (auto &BTCI : BackedgeTakenCounts)
     BTCI.second.clear();
+  for (auto &BTCI : PredicatedBackedgeTakenCounts)
+    BTCI.second.clear();
 
   assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
   assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
@@ -9321,6 +9436,20 @@
     OS << "Unpredictable max backedge-taken count. ";
   }
 
+  OS << "\n"
+        "Loop ";
+  L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+  OS << ": ";
+
+  SCEVUnionPredicate Pred;
+  auto PBT = SE->getPredicatedBackedgeTakenCount(L, Pred);
+  if (!isa<SCEVCouldNotCompute>(PBT)) {
+    OS << "Predicated backedge-taken count is " << *PBT << "\n";
+    OS << " Predicates:\n";
+    Pred.print(OS, 4);
+  } else {
+    OS << "Unpredictable predicated backedge-taken count. ";
+  }
   OS << "\n";
 }
 
@@ -9605,16 +9734,21 @@
   ExprValueMap.erase(S);
   HasRecMap.erase(S);
 
-  for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
-         BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) {
-    BackedgeTakenInfo &BEInfo = I->second;
-    if (BEInfo.hasOperand(S, this)) {
-      BEInfo.clear();
-      BackedgeTakenCounts.erase(I++);
+  auto RemoveSCEVFromBackedgeMap =
+    [S, this] (DenseMap<const Loop*, BackedgeTakenInfo> &Map) {
+    for (auto I = Map.begin(), E = Map.end(); I != E;) {
+      BackedgeTakenInfo &BEInfo = I->second;
+      if (BEInfo.hasOperand(S, this)) {
+        BEInfo.clear();
+        Map.erase(I++);
+      }
+      else
+        ++I;
     }
-    else
-      ++I;
-  }
+  };
+
+  RemoveSCEVFromBackedgeMap(BackedgeTakenCounts);
+  RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts);
 }
 
 typedef DenseMap<const Loop *, std::string> VerifyMap;
@@ -10019,7 +10153,7 @@
 
 PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE,
                                                      Loop &L)
-    : SE(SE), L(L), Generation(0) {}
+    : SE(SE), L(L), Generation(0), BackedgeCount(nullptr) {}
 
 const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
   const SCEV *Expr = SE.getSCEV(V);
@@ -10040,6 +10174,15 @@
   return NewSCEV;
 }
 
+const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() {
+  if (!BackedgeCount) {
+    SCEVUnionPredicate BackedgePred;
+    BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, BackedgePred);
+    addPredicate(BackedgePred);
+  }
+  return BackedgeCount;
+}
+
 void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) {
   if (Preds.implies(&Pred))
     return;
@@ -10101,10 +10244,10 @@
   return New;
 }
 
-PredicatedScalarEvolution::
-PredicatedScalarEvolution(const PredicatedScalarEvolution &Init) :
-  RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds),
-  Generation(Init.Generation) {
+PredicatedScalarEvolution::PredicatedScalarEvolution(
+    const PredicatedScalarEvolution &Init)
+    : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds),
+      Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) {
   for (auto I = Init.FlagsMap.begin(), E = Init.FlagsMap.end(); I != E; ++I)
     FlagsMap.insert(*I);
 }
Index: lib/Analysis/ScalarEvolutionExpander.cpp
===================================================================
--- lib/Analysis/ScalarEvolutionExpander.cpp
+++ lib/Analysis/ScalarEvolutionExpander.cpp
@@ -2003,7 +2003,9 @@
   assert(AR->isAffine() && "Cannot generate RT check for "
                            "non-affine expression");
 
-  const SCEV *ExitCount = SE.getBackedgeTakenCount(AR->getLoop());
+  SCEVUnionPredicate Pred;
+  const SCEV *ExitCount = SE.getPredicatedBackedgeTakenCount(AR->getLoop(),
+                                                             Pred);
   const SCEV *Step = AR->getStepRecurrence(SE);
   const SCEV *Start = AR->getStart();
 
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2727,7 +2727,7 @@
   IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
   // Find the loop boundaries.
   ScalarEvolution *SE = PSE.getSE();
-  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(OrigLoop);
+  const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
   assert(BackedgeTakenCount != SE->getCouldNotCompute() &&
          "Invalid loop count");
 
@@ -4368,7 +4368,7 @@
   }
 
   // ScalarEvolution needs to be able to find the exit count.
-  const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop);
+  const SCEV *ExitCount = PSE.getBackedgeTakenCount();
   if (ExitCount == PSE.getSE()->getCouldNotCompute()) {
     emitAnalysis(VectorizationReport()
                  << "could not determine number of loop iterations");
Index: test/Analysis/ScalarEvolution/predicated-trip-count.ll
===================================================================
--- /dev/null
+++ test/Analysis/ScalarEvolution/predicated-trip-count.ll
@@ -0,0 +1,109 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@A = weak global [1000 x i32] zeroinitializer, align 32         
+
+; The resulting predicate is i16 {0,+,1} <nssw>, meanining
+; that the resulting backedge expression will be valid for:
+;   (1 + (-1 smax %M)) <= MAX_INT16
+;
+; At the limit condition for M (MAX_INT16 - 1) we have in the
+; last iteration:
+;    i0 <- MAX_INT16
+;    i0.ext <- MAX_INT16
+;
+; and therefore no wrapping happend for i0 or i0.ext
+; throughout the execution of the loop. The resulting predicated
+; backedge taken count is correct.
+
+; CHECK: Classifying expressions for: @test1
+; CHECK: %i.0.ext = sext i16 %i.0 to i32
+; CHECK-NEXT:  -->  (sext i16 {0,+,1}<%bb3> to i32)
+; CHECK:      Loop %bb3: Unpredictable backedge-taken count.
+; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count.
+; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (1 + (-1 smax %M))
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT:    {0,+,1}<%bb3> Added Flags: <nssw>
+define void @test1(i32 %N, i32 %M) {
+entry:
+        br label %bb3
+
+bb:             ; preds = %bb3
+        %tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i16 %i.0          ; <i32*> [#uses=1]
+        store i32 123, i32* %tmp
+        %tmp2 = add i16 %i.0, 1         ; <i32> [#uses=1]
+        br label %bb3
+
+bb3:            ; preds = %bb, %entry
+        %i.0 = phi i16 [ 0, %entry ], [ %tmp2, %bb ]            ; <i32> [#uses=3]
+        %i.0.ext = sext i16 %i.0 to i32
+        %tmp3 = icmp sle i32 %i.0.ext, %M          ; <i1> [#uses=1]
+        br i1 %tmp3, label %bb, label %bb5
+
+bb5:            ; preds = %bb3
+        br label %return
+
+return:         ; preds = %bb5
+        ret void
+}
+
+; The predicated backedge taken count is:
+;    (2 + (zext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32)))
+;                                     smax (-1 + (-1 * %M)))
+;    )
+
+; -1 + (-1 * %M) <= (-2 + (-1 * (sext i16 %Start to i32))
+; The predicated backedge taken count is 0.
+; From the IR, this is correct since we will bail out at the
+; first iteration.
+
+
+; * -1 + (-1 * %M) > (-2 + (-1 * (sext i16 %Start to i32))
+; or: %M < 1 + (sext i16 %Start to i32)
+;
+; The predicated backedge taken count is 1 + (zext i16 %Start to i32) - %M
+;
+; If %M >= MIN_INT + 1, this predicated backedge taken count would be correct (even
+; without predicates). However, for %M < MIN_INT this would be an infinite loop.
+; In these cases, the {%Start,+,-1} <nusw> predicate would be false, as the
+; final value of the expression {%Start,+,-1} expression (%M - 1) would not be
+; representable as an i16.
+
+; There is also a limit case here where the value of %M is MIN_INT. In this case
+; we still have an infinite loop, since icmp sge %x, MIN_INT will always return
+; true.
+
+; CHECK: Classifying expressions for: @test2
+
+; CHECK:      %i.0.ext = sext i16 %i.0 to i32
+; CHECK-NEXT:    -->  (sext i16 {%Start,+,-1}<%bb3> to i32)
+; CHECK:       Loop %bb3: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %bb3: Unpredictable max backedge-taken count.
+; CHECK-NEXT:  Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))) smax (-1 + (-1 * %M))))
+; CHECK-NEXT:  Predicates:
+; CHECK-NEXT:    {%Start,+,-1}<%bb3> Added Flags: <nssw>
+
+define void @test2(i32 %N, i32 %M, i16 %Start) {
+entry:
+        br label %bb3
+
+bb:             ; preds = %bb3
+        %tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i16 %i.0          ; <i32*> [#uses=1]
+        store i32 123, i32* %tmp
+        %tmp2 = sub i16 %i.0, 1         ; <i32> [#uses=1]
+        br label %bb3
+
+bb3:            ; preds = %bb, %entry
+        %i.0 = phi i16 [ %Start, %entry ], [ %tmp2, %bb ]            ; <i32> [#uses=3]
+        %i.0.ext = sext i16 %i.0 to i32
+        %tmp3 = icmp sge i32 %i.0.ext, %M          ; <i1> [#uses=1]
+        br i1 %tmp3, label %bb, label %bb5
+
+bb5:            ; preds = %bb3
+        br label %return
+
+return:         ; preds = %bb5
+        ret void
+}
+
Index: test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll
@@ -0,0 +1,166 @@
+; RUN: opt -mtriple=aarch64--linux-gnueabi -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s -S | FileCheck %s
+
+; The following tests contain loops for which SCEV cannot determine the backedge
+; taken count. This is because the backedge taken condition is produced by an
+; icmp with one of the sides being a loop varying non-AddRec expression.
+; However, there is a possibility to normalize this to an AddRec expression
+; using SCEV predicates. This allows us to compute a 'guarded' backedge count.
+; The Loop Vectorizer is able to version to loop in order to use this guarded
+; backedge count and vectorize more loops.
+
+
+; CHECK-LABEL: test_sge
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_sge(i32* noalias %A,
+                      i32* noalias %B,
+                      i32* noalias %C, i32 %N) {
+entry:
+  %cmp13 = icmp eq i32 %N, 0
+  br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ]
+  %indvars.next = add i16 %indvars.iv, 1
+  %indvars.ext = zext i16 %indvars.iv to i32
+
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
+  %1 = load i32, i32* %arrayidx3, align 4
+
+  %mul4 = mul i32 %1, %0
+
+  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
+  store i32 %mul4, i32* %arrayidx7, align 4
+
+  %exitcond = icmp sge i32 %indvars.ext, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: test_uge
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_uge(i32* noalias %A,
+                      i32* noalias %B,
+                      i32* noalias %C, i32 %N, i32 %Offset) {
+entry:
+  %cmp13 = icmp eq i32 %N, 0
+  br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ]
+  %indvars.next = add i16 %indvars.iv, 1
+
+  %indvars.ext = sext i16 %indvars.iv to i32
+  %indvars.access = add i32 %Offset, %indvars.ext
+
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.access
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.access
+  %1 = load i32, i32* %arrayidx3, align 4
+
+  %mul4 = add i32 %1, %0
+
+  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.access
+  store i32 %mul4, i32* %arrayidx7, align 4
+
+  %exitcond = icmp uge i32 %indvars.ext, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: test_ule
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_ule(i32* noalias %A,
+                      i32* noalias %B,
+                      i32* noalias %C, i32 %N,
+                      i16 %M) {
+entry:
+  %cmp13 = icmp eq i32 %N, 0
+  br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ]
+  %indvars.next = sub i16 %indvars.iv, 1
+  %indvars.ext = zext i16 %indvars.iv to i32
+
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
+  %1 = load i32, i32* %arrayidx3, align 4
+
+  %mul4 = mul i32 %1, %0
+
+  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
+  store i32 %mul4, i32* %arrayidx7, align 4
+
+  %exitcond = icmp ule i32 %indvars.ext, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: test_sle
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_sle(i32* noalias %A,
+                   i32* noalias %B,
+                   i32* noalias %C, i32 %N,
+                   i16 %M) {
+entry:
+  %cmp13 = icmp eq i32 %N, 0
+  br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ]
+  %indvars.next = sub i16 %indvars.iv, 1
+  %indvars.ext = sext i16 %indvars.iv to i32
+
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
+  %1 = load i32, i32* %arrayidx3, align 4
+
+  %mul4 = mul i32 %1, %0
+
+  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
+  store i32 %mul4, i32* %arrayidx7, align 4
+
+  %exitcond = icmp sle i32 %indvars.ext, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
Index: test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
===================================================================
--- test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -54,8 +54,9 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !16
   %0 = trunc i64 %indvars.iv to i32, !dbg !16
+  %ld = load i32, i32* %arrayidx, align 4
   store i32 %0, i32* %arrayidx, align 4, !dbg !16, !tbaa !18
-  %cmp3 = icmp sle i32 %0, %Length, !dbg !22
+  %cmp3 = icmp sle i32 %ld, %Length, !dbg !22
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !12
   %1 = trunc i64 %indvars.iv.next to i32
   %cmp = icmp slt i32 %1, %Length, !dbg !12