Index: include/llvm/Analysis/LoopAccessAnalysis.h
===================================================================
--- include/llvm/Analysis/LoopAccessAnalysis.h
+++ include/llvm/Analysis/LoopAccessAnalysis.h
@@ -24,6 +24,7 @@
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
 
 namespace llvm {
 
@@ -193,11 +194,10 @@
                const SmallVectorImpl<Instruction *> &Instrs) const;
   };
 
-  MemoryDepChecker(ScalarEvolution *Se, const Loop *L,
-                   SCEVUnionPredicate &Preds)
-      : SE(Se), InnermostLoop(L), AccessIdx(0),
+  MemoryDepChecker(SCEVPredicatedLayer &PredSE, const Loop *L)
+      : PredSE(PredSE), InnermostLoop(L), AccessIdx(0),
         ShouldRetryWithRuntimeCheck(false), SafeForVectorization(true),
-        RecordDependences(true), Preds(Preds) {}
+        RecordDependences(true) {}
 
   /// \brief Register the location (instructions are given increasing numbers)
   /// of a write access.
@@ -266,7 +266,13 @@
                                                          bool isWrite) const;
 
 private:
-  ScalarEvolution *SE;
+  /// The SCEV predicated layer is used to add runtime SCEV checks, and applies
+  /// dynamic knowledge to simplify SCEV expressions and convert them to a more
+  /// usable form. We need this in case assumptions about SCEV expressions need
+  /// to be made in order to avoid unknown dependences. For example we might
+  /// assume a unit stride for a pointer in order to prove that a memory access
+  /// is strided and doesn't wrap.
+  SCEVPredicatedLayer &PredSE;
   const Loop *InnermostLoop;
 
   /// \brief Maps access locations (ptr, read/write) to program order.
@@ -317,15 +323,6 @@
   /// \brief Check whether the data dependence could prevent store-load
   /// forwarding.
   bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize);
-
-  /// The SCEV predicate containing all the SCEV-related assumptions.
-  /// The dependence checker needs this in order to convert SCEVs of pointers
-  /// to more accurate expressions in the context of existing assumptions.
-  /// We also need this in case assumptions about SCEV expressions need to
-  /// be made in order to avoid unknown dependences. For example we might
-  /// assume a unit stride for a pointer in order to prove that a memory access
-  /// is strided and doesn't wrap.
-  SCEVUnionPredicate &Preds;
 };
 
 /// \brief Holds information about the memory runtime legality checks to verify
@@ -373,7 +370,7 @@
   /// and change \p Preds.
   void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
               unsigned ASId, const ValueToValueMap &Strides,
-              SCEVUnionPredicate &Preds);
+              SCEVPredicatedLayer &PredSE);
 
   /// \brief No run-time memory checking is necessary.
   bool empty() const { return Pointers.empty(); }
@@ -591,14 +588,13 @@
     return StoreToLoopInvariantAddress;
   }
 
-  /// The SCEV predicate contains all the SCEV-related assumptions.
-  /// The is used to keep track of the minimal set of assumptions on SCEV
-  /// expressions that the analysis needs to make in order to return a
-  /// meaningful result. All SCEV expressions during the analysis should be
+  /// The SCEV predicated layer is used to add runtime SCEV checks, and applies
+  /// dynamic knowledge to simplify SCEV expressions and convert them to a more
+  /// usable form.  All SCEV expressions during the analysis should be
   /// re-written (and therefore simplified) according to Preds.
   /// A user of LoopAccessAnalysis will need to emit the runtime checks
   /// associated with this predicate.
-  SCEVUnionPredicate Preds;
+  SCEVPredicatedLayer PredSE;
 
 private:
   /// \brief Analyze the loop.  Substitute symbolic strides using Strides.
@@ -619,7 +615,6 @@
   MemoryDepChecker DepChecker;
 
   Loop *TheLoop;
-  ScalarEvolution *SE;
   const DataLayout &DL;
   const TargetLibraryInfo *TLI;
   AliasAnalysis *AA;
@@ -654,18 +649,17 @@
 /// If \p OrigPtr is not null, use it to look up the stride value instead of \p
 /// Ptr.  \p PtrToStride provides the mapping between the pointer value and its
 /// stride as collected by LoopVectorizationLegality::collectStridedAccess.
-const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE,
+const SCEV *replaceSymbolicStrideSCEV(SCEVPredicatedLayer &PredSE,
                                       const ValueToValueMap &PtrToStride,
-                                      SCEVUnionPredicate &Preds, Value *Ptr,
-                                      Value *OrigPtr = nullptr);
+                                      Value *Ptr, Value *OrigPtr = nullptr);
 
 /// \brief Check the stride of the pointer and ensure that it does not wrap in
 /// the address space, assuming \p Preds is true.
 ///
 /// If necessary this method will version the stride of the pointer according
 /// to \p PtrToStride and therefore add a new predicate to \p Preds.
-int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
-                 const ValueToValueMap &StridesMap, SCEVUnionPredicate &Preds);
+int isStridedPtr(SCEVPredicatedLayer &PredSE, Value *Ptr, const Loop *Lp,
+                 const ValueToValueMap &StridesMap);
 
 /// \brief This analysis provides dependence information for the memory accesses
 /// of a loop.
Index: include/llvm/Transforms/Utils/LoopUtils.h
===================================================================
--- include/llvm/Transforms/Utils/LoopUtils.h
+++ include/llvm/Transforms/Utils/LoopUtils.h
@@ -374,6 +374,43 @@
 
 /// \brief Returns the instructions that use values defined in the loop.
 SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L);
+
+/// An iterface layer with SCEV used to manage how we see SCEV expressions for
+/// values in the context of existing predicates. We can add new predicates,
+/// but we cannot remove them.
+///
+/// This layer has multiple purposes:
+///   - provides a simple interface for SCEV versioning.
+///   - guarantees that the order of transformations applied on a SCEV
+///     expression for a single Value is consistent across two different
+///     getSCEV calls. This means that, for example, once we've obtained
+///     an AddRec expression for a certain value through expression rewriting,
+///     we will continue to get an AddRec expresion for that Value.
+///   - lowers the number of expression rewrites.
+class SCEVPredicatedLayer {
+public:
+  SCEVPredicatedLayer(ScalarEvolution &SE);
+  const SCEVUnionPredicate &getPredicate() const;
+  /// \brief Returns the SCEV expression of V, in the context of the current
+  /// SCEV predicate.
+  /// The order of transformations applied on the expression of V returned
+  /// by ScalarEvolution is guaranteed to be preserved, even when adding new
+  /// predicates.
+  const SCEV *getSCEV(Value *V);
+  /// \brief Adds a new predicate.
+  void addPredicate(const SCEVPredicate &Pred);
+
+  ScalarEvolution *getSE() const { return &SE; }
+
+private:
+  void updateGeneration();
+
+  typedef std::pair<unsigned, const SCEV *> RewriteEntry;
+  DenseMap<const SCEV *, RewriteEntry> RewriteMap;
+  ScalarEvolution &SE;
+  SCEVUnionPredicate Preds;
+  unsigned Generation;
+};
 }
 
 #endif
Index: lib/Analysis/LoopAccessAnalysis.cpp
===================================================================
--- lib/Analysis/LoopAccessAnalysis.cpp
+++ lib/Analysis/LoopAccessAnalysis.cpp
@@ -87,11 +87,10 @@
   return V;
 }
 
-const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
+const SCEV *llvm::replaceSymbolicStrideSCEV(SCEVPredicatedLayer &PredSE,
                                             const ValueToValueMap &PtrToStride,
-                                            SCEVUnionPredicate &Preds,
                                             Value *Ptr, Value *OrigPtr) {
-  const SCEV *OrigSCEV = SE->getSCEV(Ptr);
+  const SCEV *OrigSCEV = PredSE.getSCEV(Ptr);
 
   // If there is an entry in the map return the SCEV of the pointer with the
   // symbolic stride replaced by one.
@@ -108,16 +107,17 @@
     ValueToValueMap RewriteMap;
     RewriteMap[StrideVal] = One;
 
+    ScalarEvolution *SE = PredSE.getSE();
     const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
     const auto *CT =
         static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
 
-    Preds.add(SE->getEqualPredicate(U, CT));
+    PredSE.addPredicate(*SE->getEqualPredicate(U, CT));
+    auto *Expr = PredSE.getSCEV(Ptr);
 
-    const SCEV *ByOne = SE->rewriteUsingPredicate(OrigSCEV, Preds);
-    DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
+    DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr
                  << "\n");
-    return ByOne;
+    return Expr;
   }
 
   // Otherwise, just return the SCEV of the original pointer.
@@ -127,11 +127,12 @@
 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
                                     unsigned DepSetId, unsigned ASId,
                                     const ValueToValueMap &Strides,
-                                    SCEVUnionPredicate &Preds) {
+                                    SCEVPredicatedLayer &PredSE) {
   // Get the stride replaced scev.
-  const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Preds, Ptr);
+  const SCEV *Sc = replaceSymbolicStrideSCEV(PredSE, Strides, Ptr);
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
   assert(AR && "Invalid addrec expression");
+  ScalarEvolution *SE = PredSE.getSE();
   const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
 
   const SCEV *ScStart = AR->getStart();
@@ -423,9 +424,10 @@
   typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
 
   AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
-                 MemoryDepChecker::DepCandidates &DA, SCEVUnionPredicate &Preds)
+                 MemoryDepChecker::DepCandidates &DA,
+                 SCEVPredicatedLayer &PredSE)
       : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false),
-        Preds(Preds) {}
+        PredSE(PredSE) {}
 
   /// \brief Register a load  and whether it is only read from.
   void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
@@ -512,16 +514,16 @@
   bool IsRTCheckAnalysisNeeded;
 
   /// The SCEV predicate containing all the SCEV-related assumptions.
-  SCEVUnionPredicate &Preds;
+  SCEVPredicatedLayer &PredSE;
 };
 
 } // end anonymous namespace
 
 /// \brief Check whether a pointer can participate in a runtime bounds check.
-static bool hasComputableBounds(ScalarEvolution *SE,
+static bool hasComputableBounds(SCEVPredicatedLayer &PredSE,
                                 const ValueToValueMap &Strides, Value *Ptr,
-                                Loop *L, SCEVUnionPredicate &Preds) {
-  const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Preds, Ptr);
+                                Loop *L) {
+  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PredSE, Strides, Ptr);
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
   if (!AR)
     return false;
@@ -564,11 +566,11 @@
       else
         ++NumReadPtrChecks;
 
-      if (hasComputableBounds(SE, StridesMap, Ptr, TheLoop, Preds) &&
+      if (hasComputableBounds(PredSE, StridesMap, Ptr, TheLoop) &&
           // When we run after a failing dependency check we have to make sure
           // we don't have wrapping pointers.
           (!ShouldCheckStride ||
-           isStridedPtr(SE, Ptr, TheLoop, StridesMap, Preds) == 1)) {
+           isStridedPtr(PredSE, Ptr, TheLoop, StridesMap) == 1)) {
         // The id of the dependence set.
         unsigned DepId;
 
@@ -582,7 +584,7 @@
           // Each access has its own dependence set.
           DepId = RunningDepId++;
 
-        RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, Preds);
+        RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PredSE);
 
         DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
       } else {
@@ -817,9 +819,8 @@
 }
 
 /// \brief Check whether the access through \p Ptr has a constant stride.
-int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
-                       const ValueToValueMap &StridesMap,
-                       SCEVUnionPredicate &Preds) {
+int llvm::isStridedPtr(SCEVPredicatedLayer &PredSE, Value *Ptr, const Loop *Lp,
+                       const ValueToValueMap &StridesMap) {
   Type *Ty = Ptr->getType();
   assert(Ty->isPointerTy() && "Unexpected non-ptr");
 
@@ -831,7 +832,7 @@
     return 0;
   }
 
-  const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Preds, Ptr);
+  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PredSE, StridesMap, Ptr);
 
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
   if (!AR) {
@@ -854,7 +855,7 @@
   // to access the pointer value "0" which is undefined behavior in address
   // space 0, therefore we can also vectorize this case.
   bool IsInBoundsGEP = isInBoundsGep(Ptr);
-  bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, SE, Lp);
+  bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, PredSE.getSE(), Lp);
   bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
   if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
     DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
@@ -863,7 +864,7 @@
   }
 
   // Check the step is constant.
-  const SCEV *Step = AR->getStepRecurrence(*SE);
+  const SCEV *Step = AR->getStepRecurrence(*PredSE.getSE());
 
   // Calculate the pointer stride and check if it is constant.
   const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
@@ -1046,11 +1047,11 @@
       BPtr->getType()->getPointerAddressSpace())
     return Dependence::Unknown;
 
-  const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, Preds, APtr);
-  const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, Preds, BPtr);
+  const SCEV *AScev = replaceSymbolicStrideSCEV(PredSE, Strides, APtr);
+  const SCEV *BScev = replaceSymbolicStrideSCEV(PredSE, Strides, BPtr);
 
-  int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides, Preds);
-  int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides, Preds);
+  int StrideAPtr = isStridedPtr(PredSE, APtr, InnermostLoop, Strides);
+  int StrideBPtr = isStridedPtr(PredSE, BPtr, InnermostLoop, Strides);
 
   const SCEV *Src = AScev;
   const SCEV *Sink = BScev;
@@ -1067,7 +1068,7 @@
     std::swap(StrideAPtr, StrideBPtr);
   }
 
-  const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
+  const SCEV *Dist = PredSE.getSE()->getMinusSCEV(Sink, Src);
 
   DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
         << "(Induction step: " << StrideAPtr <<  ")\n");
@@ -1343,8 +1344,8 @@
   }
 
   // ScalarEvolution needs to be able to find the exit count.
-  const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
-  if (ExitCount == SE->getCouldNotCompute()) {
+  const SCEV *ExitCount = PredSE.getSE()->getBackedgeTakenCount(TheLoop);
+  if (ExitCount == PredSE.getSE()->getCouldNotCompute()) {
     emitAnalysis(LoopAccessReport() <<
                  "could not determine number of loop iterations");
     DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
@@ -1447,7 +1448,7 @@
 
   MemoryDepChecker::DepCandidates DependentAccesses;
   AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
-                          AA, LI, DependentAccesses, Preds);
+                          AA, LI, DependentAccesses, PredSE);
 
   // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
   // multiple times on the same object. If the ptr is accessed twice, once
@@ -1499,7 +1500,7 @@
     // words may be written to the same address.
     bool IsReadOnlyPtr = false;
     if (Seen.insert(Ptr).second ||
-        !isStridedPtr(SE, Ptr, TheLoop, Strides, Preds)) {
+        !isStridedPtr(PredSE, Ptr, TheLoop, Strides)) {
       ++NumReads;
       IsReadOnlyPtr = true;
     }
@@ -1529,7 +1530,7 @@
   // Find pointers with computable bounds. We are going to use this information
   // to place a runtime bound check.
   bool CanDoRTIfNeeded =
-      Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides);
+      Accesses.canCheckPtrAtRT(PtrRtChecking, PredSE.getSE(), TheLoop, Strides);
   if (!CanDoRTIfNeeded) {
     emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
     DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
@@ -1556,6 +1557,7 @@
       PtrRtChecking.reset();
       PtrRtChecking.Need = true;
 
+      auto *SE = PredSE.getSE();
       CanDoRTIfNeeded =
           Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true);
 
@@ -1598,7 +1600,7 @@
 }
 
 bool LoopAccessInfo::isUniform(Value *V) const {
-  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
+  return (PredSE.getSE()->isLoopInvariant(PredSE.getSE()->getSCEV(V), TheLoop));
 }
 
 // FIXME: this function is currently a duplicate of the one in
@@ -1679,7 +1681,7 @@
     Instruction *Loc,
     const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks)
     const {
-
+  auto *SE = PredSE.getSE();
   SCEVExpander Exp(*SE, DL, "induction");
   auto ExpandedChecks =
       expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, PtrRtChecking);
@@ -1749,7 +1751,7 @@
                                const TargetLibraryInfo *TLI, AliasAnalysis *AA,
                                DominatorTree *DT, LoopInfo *LI,
                                const ValueToValueMap &Strides)
-    : PtrRtChecking(SE), DepChecker(SE, L, Preds), TheLoop(L), SE(SE), DL(DL),
+    : PredSE(*SE), PtrRtChecking(SE), DepChecker(PredSE, L), TheLoop(L), DL(DL),
       TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
       MaxSafeDepDistBytes(-1U), CanVecMem(false),
       StoreToLoopInvariantAddress(false) {
@@ -1786,7 +1788,7 @@
                    << "found in loop.\n";
 
   OS.indent(Depth) << "SCEV assumptions:\n";
-  Preds.print(OS, Depth);
+  PredSE.getPredicate().print(OS, Depth);
 }
 
 const LoopAccessInfo &
Index: lib/Transforms/Scalar/LoopDistribute.cpp
===================================================================
--- lib/Transforms/Scalar/LoopDistribute.cpp
+++ lib/Transforms/Scalar/LoopDistribute.cpp
@@ -761,7 +761,7 @@
     }
 
     // Don't distribute the loop if we need too many SCEV run-time checks.
-    const SCEVUnionPredicate &Pred = LAI.Preds;
+    const SCEVUnionPredicate &Pred = LAI.PredSE.getPredicate();
     if (Pred.getComplexity() > DistributeSCEVCheckThreshold) {
       DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
       return false;
@@ -790,7 +790,7 @@
       DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
       LoopVersioning LVer(LAI, L, LI, DT, SE, false);
       LVer.setAliasChecks(std::move(Checks));
-      LVer.setSCEVChecks(LAI.Preds);
+      LVer.setSCEVChecks(LAI.PredSE.getPredicate());
       LVer.versionLoop(DefsUsedOutside);
     }
 
Index: lib/Transforms/Scalar/LoopLoadElimination.cpp
===================================================================
--- lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -459,17 +459,18 @@
       return false;
     }
 
-    if (LAI.Preds.getComplexity() > LoadElimSCEVCheckThreshold) {
+    if (LAI.PredSE.getPredicate().getComplexity() >
+        LoadElimSCEVCheckThreshold) {
       DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
       return false;
     }
 
     // Point of no-return, start the transformation.  First, version the loop if
     // necessary.
-    if (!Checks.empty() || !LAI.Preds.isAlwaysTrue()) {
+    if (!Checks.empty() || !LAI.PredSE.getPredicate().isAlwaysTrue()) {
       LoopVersioning LV(LAI, L, LI, DT, SE, false);
       LV.setAliasChecks(std::move(Checks));
-      LV.setSCEVChecks(LAI.Preds);
+      LV.setSCEVChecks(LAI.PredSE.getPredicate());
       LV.versionLoop();
     }
 
Index: lib/Transforms/Utils/LoopUtils.cpp
===================================================================
--- lib/Transforms/Utils/LoopUtils.cpp
+++ lib/Transforms/Utils/LoopUtils.cpp
@@ -727,3 +727,45 @@
 
   return UsedOutside;
 }
+
+SCEVPredicatedLayer::SCEVPredicatedLayer(ScalarEvolution &SE)
+    : SE(SE), Generation(0) {}
+
+const SCEV *SCEVPredicatedLayer::getSCEV(Value *V) {
+  const SCEV *Expr = SE.getSCEV(V);
+  auto II = RewriteMap.find(Expr);
+  if (II == RewriteMap.end()) {
+    const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, Preds);
+    RewriteMap[Expr] = {Generation, NewSCEV};
+    return NewSCEV;
+  }
+
+  if (Generation == II->second.first)
+    return II->second.second;
+
+  const SCEV *NewExpr = SE.rewriteUsingPredicate(II->second.second, Preds);
+  RewriteMap[Expr] = {Generation, NewExpr};
+  return NewExpr;
+}
+
+void SCEVPredicatedLayer::addPredicate(const SCEVPredicate &Pred) {
+  if (Preds.implies(&Pred))
+    return;
+  Preds.add(&Pred);
+  updateGeneration();
+}
+
+const SCEVUnionPredicate &SCEVPredicatedLayer::getPredicate() const {
+  return Preds;
+}
+
+void SCEVPredicatedLayer::updateGeneration() {
+  Generation++;
+  // The generation number wrapped. Recompute everything.
+  if (Generation == 0) {
+    for (auto &II : RewriteMap) {
+      const SCEV *Rewritten = II.second.second;
+      II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, Preds)};
+    }
+  }
+}
Index: lib/Transforms/Utils/LoopVersioning.cpp
===================================================================
--- lib/Transforms/Utils/LoopVersioning.cpp
+++ lib/Transforms/Utils/LoopVersioning.cpp
@@ -33,7 +33,7 @@
   assert(L->getLoopPreheader() && "No preheader");
   if (UseLAIChecks) {
     setAliasChecks(LAI.getRuntimePointerChecking()->getChecks());
-    setSCEVChecks(LAI.Preds);
+    setSCEVChecks(LAI.PredSE.getPredicate());
   }
 }
 
@@ -59,7 +59,7 @@
       LAI.addRuntimeChecks(RuntimeCheckBB->getTerminator(), AliasChecks);
   assert(MemRuntimeCheck && "called even though needsAnyChecking = false");
 
-  const SCEVUnionPredicate &Pred = LAI.Preds;
+  const SCEVUnionPredicate &Pred = LAI.PredSE.getPredicate();
   SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(),
                    "scev.check");
   SCEVRuntimeCheck =
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -310,15 +310,15 @@
 /// and reduction variables that were found to a given vectorization factor.
 class InnerLoopVectorizer {
 public:
-  InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
+  InnerLoopVectorizer(Loop *OrigLoop, SCEVPredicatedLayer &PredSE, LoopInfo *LI,
                       DominatorTree *DT, const TargetLibraryInfo *TLI,
                       const TargetTransformInfo *TTI, unsigned VecWidth,
-                      unsigned UnrollFactor, SCEVUnionPredicate &Preds)
-      : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
-        VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()),
+                      unsigned UnrollFactor)
+      : OrigLoop(OrigLoop), PredSE(PredSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
+        VF(VecWidth), UF(UnrollFactor), Builder(PredSE.getSE()->getContext()),
         Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor),
         TripCount(nullptr), VectorTripCount(nullptr), Legal(nullptr),
-        AddedSafetyChecks(false), Preds(Preds) {}
+        AddedSafetyChecks(false) {}
 
   // Perform the actual loop widening (vectorization).
   // MinimumBitWidths maps scalar integer values to the smallest bitwidth they
@@ -486,8 +486,10 @@
 
   /// The original loop.
   Loop *OrigLoop;
-  /// Scev analysis to use.
-  ScalarEvolution *SE;
+  /// The SCEV predicated layer is used to add runtime SCEV checks, and applies
+  /// dynamic knowledge to simplify SCEV expressions and convert them to a more
+  /// usable form.
+  SCEVPredicatedLayer &PredSE;
   /// Loop Info.
   LoopInfo *LI;
   /// Dominator Tree.
@@ -552,22 +554,15 @@
   // Record whether runtime check is added.
   bool AddedSafetyChecks;
 
-  /// The SCEV predicate containing all the SCEV-related assumptions.
-  /// The predicate is used to simplify existing expressions in the
-  /// context of existing SCEV assumptions. Since legality checking is
-  /// not done here, we don't need to use this predicate to record
-  /// further assumptions.
-  SCEVUnionPredicate &Preds;
 };
 
 class InnerLoopUnroller : public InnerLoopVectorizer {
 public:
-  InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
+  InnerLoopUnroller(Loop *OrigLoop, SCEVPredicatedLayer &PredSE, LoopInfo *LI,
                     DominatorTree *DT, const TargetLibraryInfo *TLI,
-                    const TargetTransformInfo *TTI, unsigned UnrollFactor,
-                    SCEVUnionPredicate &Preds)
-      : InnerLoopVectorizer(OrigLoop, SE, LI, DT, TLI, TTI, 1, UnrollFactor,
-                            Preds) {}
+                    const TargetTransformInfo *TTI, unsigned UnrollFactor)
+      : InnerLoopVectorizer(OrigLoop, PredSE, LI, DT, TLI, TTI, 1,
+                            UnrollFactor) {}
 
 private:
   void scalarizeInstruction(Instruction *Instr,
@@ -788,9 +783,8 @@
 /// between the member and the group in a map.
 class InterleavedAccessInfo {
 public:
-  InterleavedAccessInfo(ScalarEvolution *SE, Loop *L, DominatorTree *DT,
-                        SCEVUnionPredicate &Preds)
-      : SE(SE), TheLoop(L), DT(DT), Preds(Preds) {}
+  InterleavedAccessInfo(SCEVPredicatedLayer &PredSE, Loop *L, DominatorTree *DT)
+      : PredSE(PredSE), TheLoop(L), DT(DT) {}
 
   ~InterleavedAccessInfo() {
     SmallSet<InterleaveGroup *, 4> DelSet;
@@ -820,16 +814,14 @@
   }
 
 private:
-  ScalarEvolution *SE;
-  Loop *TheLoop;
-  DominatorTree *DT;
-
   /// The SCEV predicate containing all the SCEV-related assumptions.
   /// The predicate is used to simplify SCEV expressions in the
   /// context of existing SCEV assumptions. The interleaved access
   /// analysis can also add new predicates (for example by versioning
   /// strides of pointers).
-  SCEVUnionPredicate &Preds;
+  SCEVPredicatedLayer &PredSE;
+  Loop *TheLoop;
+  DominatorTree *DT;
 
   /// Holds the relationships between the members and the interleave group.
   DenseMap<Instruction *, InterleaveGroup *> InterleaveGroupMap;
@@ -1188,18 +1180,17 @@
 /// induction variable and the different reduction variables.
 class LoopVectorizationLegality {
 public:
-  LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
-                            TargetLibraryInfo *TLI, AliasAnalysis *AA,
-                            Function *F, const TargetTransformInfo *TTI,
+  LoopVectorizationLegality(Loop *L, SCEVPredicatedLayer &PredSE,
+                            DominatorTree *DT, TargetLibraryInfo *TLI,
+                            AliasAnalysis *AA, Function *F,
+                            const TargetTransformInfo *TTI,
                             LoopAccessAnalysis *LAA,
                             LoopVectorizationRequirements *R,
-                            const LoopVectorizeHints *H,
-                            SCEVUnionPredicate &Preds)
-      : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F),
-        TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr),
-        InterleaveInfo(SE, L, DT, Preds), Induction(nullptr),
-        WidestIndTy(nullptr), HasFunNoNaNAttr(false), Requirements(R), Hints(H),
-        Preds(Preds) {}
+                            const LoopVectorizeHints *H)
+      : NumPredStores(0), TheLoop(L), PredSE(PredSE), TLI(TLI), TheFunction(F),
+        TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(PredSE, L, DT),
+        Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false),
+        Requirements(R), Hints(H) {}
 
   /// ReductionList contains the reduction descriptors for all
   /// of the reductions that were found in the loop.
@@ -1343,8 +1334,12 @@
 
   /// The loop that we evaluate.
   Loop *TheLoop;
-  /// Scev analysis.
-  ScalarEvolution *SE;
+  /// The SCEV predicated layer is used to add runtime SCEV checks, and applies
+  /// dynamic knowledge to simplify SCEV expressions and convert them to a more
+  /// context of existing SCEV assumptions. The analysis will also add a minimal
+  /// set of new predicates if this is required to enable vectorization and
+  /// unrolling.
+  SCEVPredicatedLayer &PredSE;
   /// Target Library Info.
   TargetLibraryInfo *TLI;
   /// Parent function
@@ -1399,13 +1394,6 @@
   /// While vectorizing these instructions we have to generate a
   /// call to the appropriate masked intrinsic
   SmallPtrSet<const Instruction *, 8> MaskedOp;
-
-  /// The SCEV predicate containing all the SCEV-related assumptions.
-  /// The predicate is used to simplify SCEV expressions in the
-  /// context of existing SCEV assumptions. The analysis will also
-  /// add a minimal set of new predicates if this is required to
-  /// enable vectorization/unrolling.
-  SCEVUnionPredicate &Preds;
 };
 
 /// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -1423,8 +1411,7 @@
                              const TargetLibraryInfo *TLI, DemandedBits *DB,
                              AssumptionCache *AC, const Function *F,
                              const LoopVectorizeHints *Hints,
-                             SmallPtrSetImpl<const Value *> &ValuesToIgnore,
-                             SCEVUnionPredicate &Preds)
+                             SmallPtrSetImpl<const Value *> &ValuesToIgnore)
       : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
         TheFunction(F), Hints(Hints), ValuesToIgnore(ValuesToIgnore) {}
 
@@ -1754,12 +1741,12 @@
       }
     }
 
-    SCEVUnionPredicate Preds;
+    SCEVPredicatedLayer PredSE(*SE);
 
     // Check if it is legal to vectorize the loop.
     LoopVectorizationRequirements Requirements;
-    LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA,
-                                  &Requirements, &Hints, Preds);
+    LoopVectorizationLegality LVL(L, PredSE, DT, TLI, AA, F, TTI, LAA,
+                                  &Requirements, &Hints);
     if (!LVL.canVectorize()) {
       DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
       emitMissedWarning(F, L, Hints);
@@ -1777,8 +1764,8 @@
     }
 
     // Use the cost model.
-    LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, DB, AC, F, &Hints,
-                                  ValuesToIgnore, Preds);
+    LoopVectorizationCostModel CM(L, PredSE.getSE(), LI, &LVL, *TTI, TLI, DB,
+                                  AC, F, &Hints, ValuesToIgnore);
 
     // Check the function attributes to find out if this function should be
     // optimized for size.
@@ -1889,7 +1876,7 @@
       assert(IC > 1 && "interleave count should not be 1 or 0");
       // If we decided that it is not legal to vectorize the loop then
       // interleave it.
-      InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, IC, Preds);
+      InnerLoopUnroller Unroller(L, PredSE, LI, DT, TLI, TTI, IC);
       Unroller.vectorize(&LVL, CM.MinBWs);
 
       emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(),
@@ -1897,7 +1884,7 @@
                                  Twine(IC) + ")");
     } else {
       // If we decided that it is *legal* to vectorize the loop then do it.
-      InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, IC, Preds);
+      InnerLoopVectorizer LB(L, PredSE, LI, DT, TLI, TTI, VF.Width, IC);
       LB.vectorize(&LVL, CM.MinBWs);
       ++LoopsVectorized;
 
@@ -1998,6 +1985,7 @@
 
 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
   assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
+  auto *SE = PredSE.getSE();
   // Make sure that the pointer does not point to structs.
   if (Ptr->getType()->getPointerElementType()->isAggregateType())
     return 0;
@@ -2027,7 +2015,7 @@
 
     // Make sure that all of the index operands are loop invariant.
     for (unsigned i = 1; i < NumOperands; ++i)
-      if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+      if (!SE->isLoopInvariant(PredSE.getSCEV(Gep->getOperand(i)), TheLoop))
         return 0;
 
     InductionDescriptor II = Inductions[Phi];
@@ -2040,14 +2028,14 @@
   // operand.
   for (unsigned i = 0; i != NumOperands; ++i)
     if (i != InductionOperand &&
-        !SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+        !SE->isLoopInvariant(PredSE.getSCEV(Gep->getOperand(i)), TheLoop))
       return 0;
 
   // We can emit wide load/stores only if the last non-zero index is the
   // induction variable.
   const SCEV *Last = nullptr;
   if (!Strides.count(Gep))
-    Last = SE->getSCEV(Gep->getOperand(InductionOperand));
+    Last = PredSE.getSCEV(Gep->getOperand(InductionOperand));
   else {
     // Because of the multiplication by a stride we can have a s/zext cast.
     // We are going to replace this stride by 1 so the cast is safe to ignore.
@@ -2058,7 +2046,7 @@
     //  %idxprom = zext i32 %mul to i64  << Safe cast.
     //  %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom
     //
-    Last = replaceSymbolicStrideSCEV(SE, Strides, Preds,
+    Last = replaceSymbolicStrideSCEV(PredSE, Strides,
                                      Gep->getOperand(InductionOperand), Gep);
     if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(Last))
       Last =
@@ -2416,8 +2404,9 @@
     Ptr = Builder.Insert(Gep2);
   } else if (Gep) {
     setDebugLocFromInst(Builder, Gep);
-    assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()),
-                               OrigLoop) && "Base ptr must be invariant");
+    assert(PredSE.getSE()->isLoopInvariant(
+               PredSE.getSCEV(Gep->getPointerOperand()), OrigLoop) &&
+           "Base ptr must be invariant");
 
     // The last index does not have to be the induction. It can be
     // consecutive and be a function of the index. For example A[I+1];
@@ -2434,7 +2423,8 @@
       if (i == InductionOperand ||
           (GepOperandInst && OrigLoop->contains(GepOperandInst))) {
         assert((i == InductionOperand ||
-               SE->isLoopInvariant(SE->getSCEV(GepOperandInst), OrigLoop)) &&
+                PredSE.getSE()->isLoopInvariant(PredSE.getSCEV(GepOperandInst),
+                                                OrigLoop)) &&
                "Must be last index or loop invariant");
 
         VectorParts &GEPParts = getVectorValue(GepOperand);
@@ -2652,6 +2642,7 @@
 
   IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
   // Find the loop boundaries.
+  ScalarEvolution *SE = PredSE.getSE();
   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(OrigLoop);
   assert(BackedgeTakenCount != SE->getCouldNotCompute() && "Invalid loop count");
 
@@ -2758,8 +2749,10 @@
   // Generate the code to check that the SCEV assumptions that we made.
   // We want the new basic block to start at the first instruction in a
   // sequence of instructions that form a check.
-  SCEVExpander Exp(*SE, Bypass->getModule()->getDataLayout(), "scev.check");
-  Value *SCEVCheck = Exp.expandCodeForPredicate(&Preds, BB->getTerminator());
+  SCEVExpander Exp(*PredSE.getSE(), Bypass->getModule()->getDataLayout(),
+                   "scev.check");
+  Value *SCEVCheck =
+      Exp.expandCodeForPredicate(&PredSE.getPredicate(), BB->getTerminator());
 
   if (auto *C = dyn_cast<ConstantInt>(SCEVCheck))
     if (C->isZero())
@@ -3777,8 +3770,9 @@
       // Widen selects.
       // If the selector is loop invariant we can create a select
       // instruction with a scalar condition. Otherwise, use vector-select.
-      bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(it->getOperand(0)),
-                                               OrigLoop);
+      auto *SE = PredSE.getSE();
+      bool InvariantCond =
+          SE->isLoopInvariant(PredSE.getSCEV(it->getOperand(0)), OrigLoop);
       setDebugLocFromInst(Builder, &*it);
 
       // The condition can be loop invariant  but still defined inside the
@@ -3958,7 +3952,7 @@
 
 void InnerLoopVectorizer::updateAnalysis() {
   // Forget the original basic block.
-  SE->forgetLoop(OrigLoop);
+  PredSE.getSE()->forgetLoop(OrigLoop);
 
   // Update the dominator tree information.
   assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) &&
@@ -4110,8 +4104,8 @@
   }
 
   // ScalarEvolution needs to be able to find the exit count.
-  const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
-  if (ExitCount == SE->getCouldNotCompute()) {
+  const SCEV *ExitCount = PredSE.getSE()->getBackedgeTakenCount(TheLoop);
+  if (ExitCount == PredSE.getSE()->getCouldNotCompute()) {
     emitAnalysis(VectorizationReport() <<
                  "could not determine number of loop iterations");
     DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
@@ -4153,7 +4147,7 @@
   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
 
-  if (Preds.getComplexity() > SCEVThreshold) {
+  if (PredSE.getPredicate().getComplexity() > SCEVThreshold) {
     emitAnalysis(VectorizationReport()
                  << "Too many SCEV assumptions need to be made and checked "
                  << "at runtime");
@@ -4259,7 +4253,7 @@
         }
 
         InductionDescriptor ID;
-        if (InductionDescriptor::isInductionPHI(Phi, SE, ID)) {
+        if (InductionDescriptor::isInductionPHI(Phi, PredSE.getSE(), ID)) {
           Inductions[Phi] = ID;
           // Get the widest type.
           if (!WidestIndTy)
@@ -4328,7 +4322,8 @@
       // second argument is the same (i.e. loop invariant)
       if (CI &&
           hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) {
-        if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) {
+        auto *SE = PredSE.getSE();
+        if (!SE->isLoopInvariant(PredSE.getSCEV(CI->getOperand(1)), TheLoop)) {
           emitAnalysis(VectorizationReport(&*it)
                        << "intrinsic instruction cannot be vectorized");
           DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
@@ -4401,7 +4396,7 @@
   else
     return;
 
-  Value *Stride = getStrideFromPointer(Ptr, SE, TheLoop);
+  Value *Stride = getStrideFromPointer(Ptr, PredSE.getSE(), TheLoop);
   if (!Stride)
     return;
 
@@ -4465,7 +4460,7 @@
   }
 
   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
-  Preds.add(&LAI->Preds);
+  PredSE.addPredicate(LAI->PredSE.getPredicate());
 
   return true;
 }
@@ -4580,7 +4575,7 @@
     StoreInst *SI = dyn_cast<StoreInst>(I);
 
     Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
-    int Stride = isStridedPtr(SE, Ptr, TheLoop, Strides, Preds);
+    int Stride = isStridedPtr(PredSE, Ptr, TheLoop, Strides);
 
     // The factor of the corresponding interleave group.
     unsigned Factor = std::abs(Stride);
@@ -4589,7 +4584,7 @@
     if (Factor < 2 || Factor > MaxInterleaveGroupFactor)
       continue;
 
-    const SCEV *Scev = replaceSymbolicStrideSCEV(SE, Strides, Preds, Ptr);
+    const SCEV *Scev = replaceSymbolicStrideSCEV(PredSE, Strides, Ptr);
     PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
     unsigned Size = DL.getTypeAllocSize(PtrTy->getElementType());
 
@@ -4676,8 +4671,8 @@
         continue;
 
       // Calculate the distance and prepare for the rule 3.
-      const SCEVConstant *DistToA =
-          dyn_cast<SCEVConstant>(SE->getMinusSCEV(DesB.Scev, DesA.Scev));
+      const SCEVConstant *DistToA = dyn_cast<SCEVConstant>(
+          PredSE.getSE()->getMinusSCEV(DesB.Scev, DesA.Scev));
       if (!DistToA)
         continue;