Index: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h
===================================================================
--- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h
+++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h
@@ -23,6 +23,7 @@
 
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Function.h"
@@ -382,6 +383,22 @@
     /// This SCEV is used to represent unknown trip counts and things.
     std::unique_ptr<SCEVCouldNotCompute> CouldNotCompute;
 
+    /// HasRecMapType - The typedef for HasRecMap.
+    ///
+    typedef DenseMap<const SCEV *, bool> HasRecMapType;
+
+    /// HasRecMap -- This is a cache to record whether a SCEV contains
+    /// any scAddRecExpr.
+    HasRecMapType HasRecMap;
+
+    /// ExprValueMapType - The typedef for ExprValueMap.
+    ///
+    typedef DenseMap<const SCEV *, SetVector<Value *>> ExprValueMapType;
+
+    /// ExprValueMap -- This map records the original values from which
+    /// the SCEV expr is generated from.
+    ExprValueMapType ExprValueMap;
+
     /// The typedef for ValueExprMap.
     ///
     typedef DenseMap<SCEVCallbackVH, const SCEV *, DenseMapInfo<Value *> >
@@ -821,6 +838,18 @@
     /// return true. For pointer types, this is the pointer-sized integer type.
     Type *getEffectiveSCEVType(Type *Ty) const;
 
+    /// containsAddRecurrence - Return true if the SCEV is a scAddRecExpr or
+    /// it contains scAddRecExpr. The result will be cached in HasRecMap.
+    ///
+    bool containsAddRecurrence(const SCEV *S);
+
+    /// getSCEVValues - Return the Value set from which the SCEV expr is
+    /// generated.
+    SetVector<Value *> *getSCEVValues(const SCEV *S);
+
+    /// eraseValueFromMap - Erase Value from ValueExprMap and ExprValueMap.
+    void eraseValueFromMap(Value *V);
+
     /// Return a SCEV expression for the full generality of the specified
     /// expression.
     const SCEV *getSCEV(Value *V);
Index: llvm/trunk/lib/Analysis/ScalarEvolution.cpp
===================================================================
--- llvm/trunk/lib/Analysis/ScalarEvolution.cpp
+++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp
@@ -115,6 +115,10 @@
 static cl::opt<bool>
 VerifySCEV("verify-scev",
            cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
+static cl::opt<bool>
+    VerifySCEVMap("verify-scev-maps",
+                  cl::desc("Verify no dangling value in ScalarEvolution's"
+                           "ExprValueMap (slow)"));
 
 //===----------------------------------------------------------------------===//
 //                           SCEV class definitions
@@ -3310,6 +3314,73 @@
   return !F.FindOne;
 }
 
+namespace {
+// Helper class working with SCEVTraversal to figure out if a SCEV contains
+// a sub SCEV of scAddRecExpr type.  FindInvalidSCEVUnknown::FoundOne is set
+// iff if such sub scAddRecExpr type SCEV is found.
+struct FindAddRecurrence {
+  bool FoundOne;
+  FindAddRecurrence() : FoundOne(false) {}
+
+  bool follow(const SCEV *S) {
+    switch (static_cast<SCEVTypes>(S->getSCEVType())) {
+    case scAddRecExpr:
+      FoundOne = true;
+    case scConstant:
+    case scUnknown:
+    case scCouldNotCompute:
+      return false;
+    default:
+      return true;
+    }
+  }
+  bool isDone() const { return FoundOne; }
+};
+}
+
+bool ScalarEvolution::containsAddRecurrence(const SCEV *S) {
+  HasRecMapType::iterator I = HasRecMap.find_as(S);
+  if (I != HasRecMap.end())
+    return I->second;
+
+  FindAddRecurrence F;
+  SCEVTraversal<FindAddRecurrence> ST(F);
+  ST.visitAll(S);
+  HasRecMap.insert(std::make_pair(S, F.FoundOne));
+  return F.FoundOne;
+}
+
+/// getSCEVValues - Return the Value set from S.
+SetVector<Value *> *ScalarEvolution::getSCEVValues(const SCEV *S) {
+  ExprValueMapType::iterator SI = ExprValueMap.find_as(S);
+  if (SI == ExprValueMap.end())
+    return nullptr;
+#ifndef NDEBUG
+  if (VerifySCEVMap) {
+    // Check there is no dangling Value in the set returned.
+    for (const auto &VE : SI->second)
+      assert(ValueExprMap.count(VE));
+  }
+#endif
+  return &SI->second;
+}
+
+/// eraseValueFromMap - Erase Value from ValueExprMap and ExprValueMap.
+/// If ValueExprMap.erase(V) is not used together with forgetMemoizedResults(S),
+/// eraseValueFromMap should be used instead to ensure whenever V->S is removed
+/// from ValueExprMap, V is also removed from the set of ExprValueMap[S].
+void ScalarEvolution::eraseValueFromMap(Value *V) {
+  ValueExprMapType::iterator I = ValueExprMap.find_as(V);
+  if (I != ValueExprMap.end()) {
+    const SCEV *S = I->second;
+    SetVector<Value *> *SV = getSCEVValues(S);
+    // Remove V from the set of ExprValueMap[S]
+    if (SV)
+      SV->remove(V);
+    ValueExprMap.erase(V);
+  }
+}
+
 /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
 /// expression and create a new one.
 const SCEV *ScalarEvolution::getSCEV(Value *V) {
@@ -3318,7 +3389,13 @@
   const SCEV *S = getExistingSCEV(V);
   if (S == nullptr) {
     S = createSCEV(V);
-    ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
+    // During PHI resolution, it is possible to create two SCEVs for the same
+    // V, so it is needed to double check whether V->S is inserted into
+    // ValueExprMap before insert S->V into ExprValueMap.
+    std::pair<ValueExprMapType::iterator, bool> Pair =
+        ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
+    if (Pair.second)
+      ExprValueMap[S].insert(V);
   }
   return S;
 }
@@ -3331,6 +3408,7 @@
     const SCEV *S = I->second;
     if (checkValidity(S))
       return S;
+    forgetMemoizedResults(S);
     ValueExprMap.erase(I);
   }
   return nullptr;
@@ -8967,7 +9045,7 @@
   assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
   if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
     SE->ConstantEvolutionLoopExitValue.erase(PN);
-  SE->ValueExprMap.erase(getValPtr());
+  SE->eraseValueFromMap(getValPtr());
   // this now dangles!
 }
 
@@ -8990,13 +9068,13 @@
       continue;
     if (PHINode *PN = dyn_cast<PHINode>(U))
       SE->ConstantEvolutionLoopExitValue.erase(PN);
-    SE->ValueExprMap.erase(U);
+    SE->eraseValueFromMap(U);
     Worklist.insert(Worklist.end(), U->user_begin(), U->user_end());
   }
   // Delete the Old value.
   if (PHINode *PN = dyn_cast<PHINode>(Old))
     SE->ConstantEvolutionLoopExitValue.erase(PN);
-  SE->ValueExprMap.erase(Old);
+  SE->eraseValueFromMap(Old);
   // this now dangles!
 }
 
@@ -9046,7 +9124,9 @@
   }
   FirstUnknown = nullptr;
 
+  ExprValueMap.clear();
   ValueExprMap.clear();
+  HasRecMap.clear();
 
   // Free any extra memory created for ExitNotTakenInfo in the unlikely event
   // that a loop had multiple computable exits.
@@ -9375,6 +9455,8 @@
   BlockDispositions.erase(S);
   UnsignedRanges.erase(S);
   SignedRanges.erase(S);
+  ExprValueMap.erase(S);
+  HasRecMap.erase(S);
 
   for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
          BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) {
Index: llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp
===================================================================
--- llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp
+++ llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1600,6 +1600,12 @@
   return V;
 }
 
+// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
+// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
+// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
+// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
+// the expansion will try to reuse Value from ExprValueMap, and only when it
+// fails, expand the SCEV literally.
 Value *SCEVExpander::expand(const SCEV *S) {
   // Compute an insertion point for this SCEV object. Hoist the instructions
   // as far out in the loop nest as possible.
@@ -1639,7 +1645,25 @@
   Builder.SetInsertPoint(InsertPt);
 
   // Expand the expression into instructions.
-  Value *V = visit(S);
+  SetVector<Value *> *Set = SE.getSCEVValues(S);
+  Value *V = nullptr;
+  // If the expansion is in LSRMode, and the SCEV contains any sub scAddRecExpr
+  // type SCEV, it will be expanded literally, to prevent LSR's transformed SCEV
+  // from being reverted.
+  if (!(LSRMode && SE.containsAddRecurrence(S))) {
+    if (Set) {
+      // Choose a Value from the set which dominates the insertPt.
+      for (auto const &Ent : *Set) {
+        if (Ent && isa<Instruction>(Ent) && S->getType() == Ent->getType() &&
+            SE.DT.dominates(cast<Instruction>(Ent), InsertPt)) {
+          V = Ent;
+          break;
+        }
+      }
+    }
+  }
+  if (!V)
+    V = visit(S);
 
   // Remember the expanded value for this SCEV at this location.
   //
Index: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2718,6 +2718,10 @@
   
   BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(),
                                           "min.iters.checked");
+  // Update dominator tree immediately if the generated block is a
+  // LoopBypassBlock because SCEV expansions to generate loop bypass
+  // checks may query it before the current function is finished.
+  DT->addNewBlock(NewBB, BB);
   if (L->getParentLoop())
     L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
   ReplaceInstWithInst(BB->getTerminator(),
@@ -2740,6 +2744,10 @@
   // adding one to the backedge-taken count will not overflow.
   BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(),
                                           "vector.ph");
+  // Update dominator tree immediately if the generated block is a
+  // LoopBypassBlock because SCEV expansions to generate loop bypass
+  // checks may query it before the current function is finished.
+  DT->addNewBlock(NewBB, BB);
   if (L->getParentLoop())
     L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
   ReplaceInstWithInst(BB->getTerminator(),
@@ -2765,6 +2773,10 @@
   // Create a new block containing the stride check.
   BB->setName("vector.scevcheck");
   auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
+  // Update dominator tree immediately if the generated block is a
+  // LoopBypassBlock because SCEV expansions to generate loop bypass
+  // checks may query it before the current function is finished.
+  DT->addNewBlock(NewBB, BB);
   if (L->getParentLoop())
     L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
   ReplaceInstWithInst(BB->getTerminator(),
@@ -2790,6 +2802,10 @@
   // Create a new block containing the memory check.
   BB->setName("vector.memcheck");
   auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
+  // Update dominator tree immediately if the generated block is a
+  // LoopBypassBlock because SCEV expansions to generate loop bypass
+  // checks may query it before the current function is finished.
+  DT->addNewBlock(NewBB, BB);
   if (L->getParentLoop())
     L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
   ReplaceInstWithInst(BB->getTerminator(),
@@ -3957,10 +3973,6 @@
   assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) &&
          "Entry does not dominate exit.");
 
-  for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
-    DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]);
-  DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back());
-
   // We don't predicate stores by this point, so the vector body should be a
   // single loop.
   assert(LoopVectorBody.size() == 1 && "Expected single block loop!");
Index: llvm/trunk/test/Analysis/ScalarEvolution/scev-expander-existing-value.ll
===================================================================
--- llvm/trunk/test/Analysis/ScalarEvolution/scev-expander-existing-value.ll
+++ llvm/trunk/test/Analysis/ScalarEvolution/scev-expander-existing-value.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -verify-scev-maps -S |FileCheck %s
+
+; SCEV expansion uses existing value when the SCEV has no AddRec expr.
+; CHECK: select
+; CHECK-NOT: select
+
+@a = common global [1000 x i16] zeroinitializer, align 16
+
+define i32 @foo(i32 %x, i32 %y) {
+entry:
+  %cmp = icmp slt i32 %x, %y
+  %cond = select i1 %cmp, i32 %x, i32 %y
+  %cmp1.10 = icmp sgt i32 %cond, 0
+  br i1 %cmp1.10, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %tmp = sext i32 %cond to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %total.011 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds [1000 x i16], [1000 x i16]* @a, i64 0, i64 %indvars.iv
+  %tmp1 = load i16, i16* %arrayidx, align 2
+  %conv = sext i16 %tmp1 to i32
+  %add = add nsw i32 %conv, %total.011
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %cmp1 = icmp slt i64 %indvars.iv.next, %tmp
+  br i1 %cmp1, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  %total.0.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %for.end.loopexit ]
+  ret i32 %total.0.lcssa
+}
Index: llvm/trunk/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
===================================================================
--- llvm/trunk/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
+++ llvm/trunk/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
@@ -8,7 +8,6 @@
 ; -- The loop following the load should only use a single add-literation
 ;    instruction.
 ; CHECK: vldr
-; CHECK: adds r{{[0-9]+.*}}#1
 ; CHECK-NOT: adds
 ; CHECK: subsections_via_symbols
 
Index: llvm/trunk/test/Transforms/IRCE/decrementing-loop.ll
===================================================================
--- llvm/trunk/test/Transforms/IRCE/decrementing-loop.ll
+++ llvm/trunk/test/Transforms/IRCE/decrementing-loop.ll
@@ -28,7 +28,6 @@
   ret void
 
 ; CHECK: loop.preheader:
-; CHECK:   [[indvar_start:[^ ]+]] = add i32 %n, -1
 ; CHECK:   [[not_len:[^ ]+]] = sub i32 -1, %len
 ; CHECK:   [[not_n:[^ ]+]] = sub i32 -1, %n
 ; CHECK:   [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]]
Index: llvm/trunk/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
===================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
+++ llvm/trunk/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
@@ -11,7 +11,7 @@
   br i1 %cmp1, label %for.body, label %for.end
 
 ; Make sure the added GEP has the right index type
-; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(2)* %base, i8 %0
+; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(2)* %base, i8 %idx.trunc
 
 ; CHECK: for.body:
 ; CHECK: phi i8 addrspace(2)*
@@ -43,7 +43,7 @@
   br i1 %cmp1, label %for.body, label %for.end
 
 ; Make sure the added GEP has the right index type
-; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(3)* %base, i16 %0
+; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(3)* %base, i16 %idx.trunc
 
 ; CHECK: for.body:
 ; CHECK: phi i8 addrspace(3)*
Index: llvm/trunk/test/Transforms/IndVarSimplify/pr24783.ll
===================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/pr24783.ll
+++ llvm/trunk/test/Transforms/IndVarSimplify/pr24783.ll
@@ -6,9 +6,6 @@
 define void @f(i32* %end.s, i8** %loc, i32 %p) {
 ; CHECK-LABEL: @f(
 entry:
-; CHECK:  [[P_SEXT:%[0-9a-z]+]] = sext i32 %p to i64
-; CHECK:  [[END:%[0-9a-z]+]] = getelementptr i32, i32* %end.s, i64 [[P_SEXT]]
-
   %end = getelementptr inbounds i32, i32* %end.s, i32 %p
   %init = bitcast i32* %end.s to i8*
   br label %while.body.i
@@ -22,7 +19,7 @@
 
 loop.exit:
 ; CHECK: loop.exit:
-; CHECK: [[END_BCASTED:%[a-z0-9]+]] = bitcast i32* %scevgep to i8*
+; CHECK: [[END_BCASTED:%[a-z0-9]+]] = bitcast i32* %end to i8*
 ; CHECK: store i8* [[END_BCASTED]], i8** %loc
   %ptr.inc.lcssa = phi i8* [ %ptr.inc, %while.body.i ]
   store i8* %ptr.inc.lcssa, i8** %loc
Index: llvm/trunk/test/Transforms/IndVarSimplify/udiv.ll
===================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/udiv.ll
+++ llvm/trunk/test/Transforms/IndVarSimplify/udiv.ll
@@ -127,12 +127,12 @@
 
 declare i32 @printf(i8* nocapture, ...) nounwind
 
-; IndVars shouldn't be afraid to emit a udiv here, since there's a udiv in
-; the original code.
+; IndVars doesn't emit a udiv in for.body.preheader since SCEVExpander::expand will
+; find out there's already a udiv in the original code.
 
 ; CHECK-LABEL: @foo(
 ; CHECK: for.body.preheader:
-; CHECK-NEXT: udiv
+; CHECK-NOT: udiv
 
 define void @foo(double* %p, i64 %n) nounwind {
 entry:
Index: llvm/trunk/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
===================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
+++ llvm/trunk/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
@@ -32,15 +32,9 @@
 
 ; CHECK-LABEL: @test1(
 
-; First check that we move the sub into the preheader, it doesn't have to be
-; executed if %cmp4 == false
-; CHECK: for.body.preheader:
-; CHECK: sub i32 %data_len, %sample
-; CHECK: br label %for.body
-
-; Second, check that we turn the IV test into an eq.
+; check that we turn the IV test into an eq.
 ; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %0
+; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %sub
 ; CHECK: br i1 %exitcond, label %for.body, label %for.end.loopexit
 }
 
Index: llvm/trunk/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
===================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -4,8 +4,9 @@
 ; LSR should properly handle the post-inc offset when folding the
 ; non-IV operand of an icmp into the IV.
 
-; CHECK:   [[r1:%[a-z0-9]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
-; CHECK:   [[r2:%[a-z0-9]+]] = lshr i64 [[r1]], 1
+; CHECK:   [[r1:%[a-z0-9\.]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+; CHECK:   [[r2:%[a-z0-9\.]+]] = lshr exact i64 [[r1]], 1
+; CHECK: for.body.lr.ph:
 ; CHECK:   [[r3:%[a-z0-9]+]] = shl i64 [[r2]], 1
 ; CHECK:   br label %for.body
 ; CHECK: for.body: