Index: lib/Analysis/ScalarEvolution.cpp
===================================================================
--- lib/Analysis/ScalarEvolution.cpp
+++ lib/Analysis/ScalarEvolution.cpp
@@ -204,6 +204,11 @@
                   cl::desc("Max coefficients in AddRec during evolving"),
                   cl::init(16));
 
+static cl::opt<unsigned>
+    MaxConstExprSize("scalar-evolution-max-const-expr-size", cl::Hidden,
+                     cl::desc("Max number of nodes of const expression"),
+                     cl::init(2048));
+
 //===----------------------------------------------------------------------===//
 //                           SCEV class definitions
 //===----------------------------------------------------------------------===//
@@ -4730,6 +4735,7 @@
     if (Shifted != getCouldNotCompute() &&
         Start != getCouldNotCompute()) {
       const SCEV *StartVal = getSCEV(StartValueV);
+
       if (Start == StartVal) {
         // Okay, for the entire analysis of this edge we assumed the PHI
         // to be symbolic.  We now need to go back and purge all of the
@@ -6013,6 +6019,12 @@
     }
     return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
 
+  case Instruction::IntToPtr:
+  case Instruction::PtrToInt:
+    if (getTypeSizeInBits(U->getType()) !=
+        getTypeSizeInBits(U->getOperand(0)->getType()))
+      break;
+  // Otherwise fall through
   case Instruction::BitCast:
     // BitCasts are no-op casts so we just eliminate the cast.
     if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
@@ -7372,6 +7384,18 @@
   return IncomingVal;
 }
 
+static size_t GetConstExprSize(Constant *C) {
+  ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+  if (!CE)
+    return 1;
+  size_t S = 1;
+  for (User::const_op_iterator OI = CE->op_begin(); OI != CE->op_end(); ++OI) {
+    if (Constant *CV = dyn_cast<Constant>(*OI))
+      S += GetConstExprSize(CV);
+  }
+  return S;
+}
+
 /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
 /// in the header of its containing loop, we know the loop executes a
 /// constant number of times, and the PHI node is just a recurrence
@@ -7427,6 +7451,10 @@
         EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
     if (!NextPHI)
       return nullptr;        // Couldn't evaluate!
+
+    if (GetConstExprSize(NextPHI) > MaxConstExprSize)
+      return nullptr;
+
     NextIterVals[PN] = NextPHI;
 
     bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
@@ -10425,6 +10453,7 @@
       continue;
     if (PHINode *PN = dyn_cast<PHINode>(U))
       SE->ConstantEvolutionLoopExitValue.erase(PN);
+
     SE->eraseValueFromMap(U);
     Worklist.insert(Worklist.end(), U->user_begin(), U->user_end());
   }
Index: lib/Analysis/ScalarEvolutionExpander.cpp
===================================================================
--- lib/Analysis/ScalarEvolutionExpander.cpp
+++ lib/Analysis/ScalarEvolutionExpander.cpp
@@ -857,8 +857,17 @@
 /// the PHI. If so, it may be reused by expanded expressions.
 bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
                                          const Loop *L) {
+  auto IsBitCast = [this](Instruction *I) {
+    if (isa<BitCastInst>(I))
+      return true;
+    if (isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I))
+      return SE.getTypeSizeInBits(I->getType()) ==
+             SE.getTypeSizeInBits(I->getOperand(0)->getType());
+    return false;
+  };
+
   if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
-      (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV)))
+      (isa<CastInst>(IncV) && !IsBitCast(IncV)))
     return false;
   // If any of the operands don't dominate the insert position, bail.
   // Addrec operands are always loop-invariant, so this can only happen
@@ -910,6 +919,12 @@
       return dyn_cast<Instruction>(IncV->getOperand(0));
     return nullptr;
   }
+  case Instruction::IntToPtr:
+  case Instruction::PtrToInt:
+    if (SE.getTypeSizeInBits(IncV->getType()) !=
+        SE.getTypeSizeInBits(IncV->getOperand(0)->getType()))
+      return nullptr;
+  // fall through:
   case Instruction::BitCast:
     return dyn_cast<Instruction>(IncV->getOperand(0));
   case Instruction::GetElementPtr:
@@ -1495,7 +1510,7 @@
       // actually a pointer type.
       if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
         Value *StartV = expand(Base);
-        assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
+        StartV = InsertNoopCastOfTo(StartV, PTy);
         return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV);
       }
     }
@@ -1680,6 +1695,16 @@
 SCEVExpander::FindValueInExprValueMap(const SCEV *S,
                                       const Instruction *InsertPt) {
   SetVector<ScalarEvolution::ValueOffsetPair> *Set = SE.getSCEVValues(S);
+  auto IsCompatibleTy = [this](Type *Type1, Type *Type2) {
+    if (Type1 == Type2)
+      return true;
+
+    if ((!Type1->isPointerTy() && !Type1->isIntegerTy()) ||
+        (!Type2->isPointerTy() && !Type2->isIntegerTy()))
+      return false;
+
+    return SE.getTypeSizeInBits(Type1) == SE.getTypeSizeInBits(Type2);
+  };
   // If the expansion is not in CanonicalMode, and the SCEV contains any
   // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
   if (CanonicalMode || !SE.containsAddRecurrence(S)) {
@@ -1693,7 +1718,7 @@
         ConstantInt *Offset = VOPair.second;
         Instruction *EntInst = nullptr;
         if (V && isa<Instruction>(V) && (EntInst = cast<Instruction>(V)) &&
-            S->getType() == V->getType() &&
+            IsCompatibleTy(S->getType(), V->getType()) &&
             EntInst->getFunction() == InsertPt->getFunction() &&
             SE.DT.dominates(EntInst, InsertPt) &&
             (SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
Index: lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
===================================================================
--- lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -268,12 +268,14 @@
     // Try to find the ptrtoint; subtract it and the rest is the offset.
     for (SCEVAddExpr::op_iterator J = AndLHSAddSCEV->op_begin(),
          JE = AndLHSAddSCEV->op_end(); J != JE; ++J)
-      if (const SCEVUnknown *OpUnk = dyn_cast<SCEVUnknown>(*J))
-        if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(OpUnk->getValue())) {
+      if (const SCEVUnknown *OpUnk = dyn_cast<SCEVUnknown>(*J)) {
+        OffSCEV = SE->getMinusSCEV(AndLHSAddSCEV, *J);
+        if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(OpUnk->getValue()))
           AAPtr = PToI->getPointerOperand();
-          OffSCEV = SE->getMinusSCEV(AndLHSAddSCEV, *J);
-          break;
-        }
+        else
+          AAPtr = OpUnk->getValue();
+        break;
+      }
   }
 
   if (!AAPtr)
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3535,6 +3535,41 @@
     // value.
     for (BasicBlock *BB : LoopBypassBlocks)
       BCResumeVal->addIncoming(II.getStartValue(), BB);
+
+    // FIXME: Strictly speaking, when an incoming operand is changed
+    // here, we should erase the cached SCEV expressions for the
+    // PHI and its related values. However as the new start value is
+    // merged from multiple predecessors, Scalar Evolution may
+    // no longer be able to rediscover the phi is an AddRecExprs. For
+    // instance,
+    //  int *p = base;
+    //  int i = 0;
+    //  do {
+    //   p1 = phi(base, p2);
+    //   i1 = phi(0, i2);
+    //   .. = *p2;
+    //   i2 = i1 + 1;
+    //   p2 = base + i2;
+    //    ..
+    //  while (...);
+    //
+    // Before the loop vetorization, scalar evolution can discover that
+    // i2 is {0, +, 1},  and p2 is derived from i2 which is {4 + base, +, 4}.
+    // It also discovers that p1 is {base, +, 4} which is shifted from 
+    // from p2. When the loop vectorizer creates the vector loop, the scalar
+    // loop's header phi's start value will become a merged merge of the
+    // original start and vector loop exit value. In this case, the start 
+    // value of p1 becomes bc_resume_val1, and i1 has start of bc_resume_val2.
+    // Scalar evolution can no longer establish that p1 is a shifted value
+    // of {4 + 4*bc_resume_val2, +, 4}.
+    //
+    // Since the scalar evolution expander can sometimes erase cached SCEV vals
+    // for some  of the phis in the scalar loop, we do a recomputation cache
+    // it here before the phi is updated (Doing on-demand recomputation after
+    // the update will produce inconsistent SCEVs with other PHIs with cached
+    // SCEVs from the original scalar loop.
+    // 
+    PSE.getSCEV(OrigPhi);
     OrigPhi->setIncomingValue(BlockIdx, BCResumeVal);
   }
 
Index: test/Analysis/ScalarEvolution/timeout_constexpr.ll
===================================================================
--- test/Analysis/ScalarEvolution/timeout_constexpr.ll
+++ test/Analysis/ScalarEvolution/timeout_constexpr.ll
@@ -0,0 +1,33 @@
+; CHECK the test won't timeout (will happen when -scalar-evolution-max-const-expr-size= is set to 
+; large value
+; RUN: opt < %s -indvars 
+%struct.ST = type { %struct.ST* }
+
+@global = internal global [121 x i8] zeroinitializer, align 1
+
+define void @func() {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %0 = phi %struct.ST* [ %2, %for.body ], [ bitcast ([121 x i8]* @global to %struct.ST*), %entry ]
+  %inc1 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %cmp = icmp slt i32 %inc1, 30
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %add.ptr1 = getelementptr inbounds %struct.ST, %struct.ST* %0, i32 1
+  %1 = ptrtoint %struct.ST* %add.ptr1 to i32
+  %rem = and i32 %1, 1
+  %add = add i32 %rem, %1
+  %2 = inttoptr i32 %add to %struct.ST*
+  %next = getelementptr inbounds %struct.ST, %struct.ST* %0, i32 0, i32 0
+  store %struct.ST* %2, %struct.ST** %next, align 4
+  %inc = add nsw i32 %inc1, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %next6 = getelementptr inbounds %struct.ST, %struct.ST* %0, i32 0, i32 0
+  store %struct.ST* null, %struct.ST** %next6, align 4
+  ret void
+}
Index: test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
===================================================================
--- test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
+++ test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
@@ -12,7 +12,7 @@
 
 for.body:
 ; CHECK: for.body
-; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}]
+; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}]
 ; CHECK: add x[[REG:[0-9]+]],
 ; CHECK:                      x[[REG]], #1, lsl  #12
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
Index: test/CodeGen/PowerPC/unal-altivec.ll
===================================================================
--- test/CodeGen/PowerPC/unal-altivec.ll
+++ test/CodeGen/PowerPC/unal-altivec.ll
@@ -30,7 +30,7 @@
 
 ; CHECK: @foo
 ; CHECK-DAG: li [[C0:[0-9]+]], 0
-; CHECK-DAG: li [[C15:[0-9]+]], 15
+; CHECK-DAG: li [[C15:[0-9]+]], 16
 ; CHECK-DAG: lvx [[CNST:[0-9]+]],
 ; CHECK: .LBB0_1:
 ; CHECK-DAG: lvsl [[MASK1:[0-9]+]], [[B1:[0-9]+]], [[C0]]
Index: test/Other/constant-fold-gep.ll
===================================================================
--- test/Other/constant-fold-gep.ll
+++ test/Other/constant-fold-gep.ll
@@ -312,31 +312,31 @@
 ; TO: }
 ; SCEV: Classifying expressions for: @fa
 ; SCEV:   %t = bitcast i64 mul (i64 ptrtoint (double* getelementptr (double, double* null, i32 1) to i64), i64 2310) to i64
-; SCEV:   -->  (2310 * sizeof(double))
+; SCEV:   -->  18480 U: [18480,18481) S: [18480,18481)
 ; SCEV: Classifying expressions for: @fb
 ; SCEV:   %t = bitcast i64 ptrtoint (double* getelementptr ({ i1, double }, { i1, double }* null, i64 0, i32 1) to i64) to i64
-; SCEV:   -->  alignof(double)
+; SCEV:   -->  8 U: [8,9) S: [8,9)
 ; SCEV: Classifying expressions for: @fc
 ; SCEV:   %t = bitcast i64 mul nuw (i64 ptrtoint (double* getelementptr (double, double* null, i32 1) to i64), i64 2) to i64
-; SCEV:   -->  (2 * sizeof(double))
+; SCEV:   --> 16 U: [16,17) S: [16,17)
 ; SCEV: Classifying expressions for: @fd
 ; SCEV:   %t = bitcast i64 mul nuw (i64 ptrtoint (double* getelementptr (double, double* null, i32 1) to i64), i64 11) to i64
-; SCEV:   -->  (11 * sizeof(double))
+; SCEV:   --> 88 U: [88,89) S: [88,89)
 ; SCEV: Classifying expressions for: @fe
 ; SCEV:   %t = bitcast i64 ptrtoint (double* getelementptr ({ double, float, double, double }, { double, float, double, double }* null, i64 0, i32 2) to i64) to i64
-; SCEV:   -->  offsetof({ double, float, double, double }, 2)
+; SCEV:   --> 16 U: [16,17) S: [16,17)
 ; SCEV: Classifying expressions for: @ff
 ; SCEV:   %t = bitcast i64 1 to i64
 ; SCEV:   -->  1
 ; SCEV: Classifying expressions for: @fg
 ; SCEV:   %t = bitcast i64 ptrtoint (double* getelementptr ({ i1, double }, { i1, double }* null, i64 0, i32 1) to i64) to i64
-; SCEV:   -->  alignof(double)
+; SCEV:   --> 8 U: [8,9) S: [8,9)
 ; SCEV: Classifying expressions for: @fh
 ; SCEV:   %t = bitcast i64 ptrtoint (i1** getelementptr (i1*, i1** null, i32 1) to i64) to i64
-; SCEV:   -->  sizeof(i1*)
+; SCEV:   --> 8 U: [8,9) S: [8,9)
 ; SCEV: Classifying expressions for: @fi
 ; SCEV:   %t = bitcast i64 ptrtoint (i1** getelementptr ({ i1, i1* }, { i1, i1* }* null, i64 0, i32 1) to i64) to i64
-; SCEV:   -->  alignof(i1*)
+; SCEV:   --> 8 U: [8,9) S: [8,9)
 
 define i64 @fa() nounwind {
   %t = bitcast i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]}* getelementptr ({[7 x double], [7 x double]}, {[7 x double], [7 x double]}* null, i64 11) to i64), i64 5)) to i64
Index: test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
===================================================================
--- test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
+++ test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-reduce -S < %s | FileCheck %s
+; RUN: opt -loop-reduce -dce -S < %s | FileCheck %s
 ;
 ; LTO of clang, which mistakenly uses no TargetLoweringInfo, causes a
 ; miscompile. ReuseOrCreateCast replace ptrtoint operand with undef.
@@ -14,8 +14,9 @@
 ; current LSR cost model.
 ; CHECK-NOT: = ptrtoint i8* undef to i64
 ; CHECK: .lr.ph
-; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp{{[0-9]+}}, -1
-; CHECK: sub i64 [[TMP]], %tmp{{[0-9]+}}
+; CHECK: [[TMP:%[^ ]+]] = sub i64 -1, %tmp5
+; CHECK: getelementptr i8, i8* %tmp3, i64 [[TMP]]
+; CHECK-NOT: getelementptr
 ; CHECK: ret void
 define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 {
 bb:
Index: test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
===================================================================
--- test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -7,11 +7,10 @@
 ; CHECK:   [[r1:%[a-z0-9\.]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
 ; CHECK:   [[r2:%[a-z0-9\.]+]] = lshr exact i64 [[r1]], 1
 ; CHECK: for.body.lr.ph:
-; CHECK:   [[r3:%[a-z0-9]+]] = shl i64 [[r2]], 1
 ; CHECK:   br label %for.body
 ; CHECK: for.body:
-; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ [[r3]], %for.body.lr.ph ]
-; CHECK:   %lsr.iv.next = add i64 %lsr.iv2, -2
+; CHECK:   %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 0, %for.body.lr.ph ]
+; CHECK:   %lsr.iv.next = add i64 %lsr.iv,
 ; CHECK:   %lsr.iv.next3 = inttoptr i64 %lsr.iv.next to i16*
 ; CHECK:   %cmp27 = icmp eq i16* %lsr.iv.next3, null
 
Index: test/Transforms/LoopVectorize/header_phi1.ll
===================================================================
--- test/Transforms/LoopVectorize/header_phi1.ll
+++ test/Transforms/LoopVectorize/header_phi1.ll
@@ -0,0 +1,31 @@
+;RUN: opt < %s  -loop-vectorize  -S | FileCheck %s
+
+;CHECK-LABEL: @foo
+;CHECK: vector.body:
+;CHECK: store <4 x i32>
+;CHECK: load <4 x i32>
+
+target triple = "x86_64-grtev4-linux-gnu"
+
+define  void @foo() {
+entry:
+  br label %loop
+
+loop:
+  %t1 = phi i32* [ %t3, %loop ], [ null, %entry ]
+  %t2 = phi i32* [ %t5, %loop ], [ undef, %entry ]
+  %t3 = getelementptr inbounds i32, i32* %t1, i64 1
+  store i32 0, i32* %t1, align 4
+  %t4 = load i32, i32* %t3, align 4
+  %t5 = getelementptr inbounds i32, i32* %t2, i64 1
+  %t6 = icmp ugt i32* undef, %t5
+  br i1 %t6, label %loop, label %exit
+
+exit:
+  ret void
+
+}
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 6.0.0 (trunk 311306) (llvm/trunk 311373)"}
Index: test/Transforms/LoopVectorize/header_phi2.ll
===================================================================
--- test/Transforms/LoopVectorize/header_phi2.ll
+++ test/Transforms/LoopVectorize/header_phi2.ll
@@ -0,0 +1,68 @@
+;RUN: opt < %s  -loop-vectorize  -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+define  void @foo() {
+;CHECK-LABEL: @foo
+;CHECK: vector.body:
+;CHECK: store <4 x i32>
+bb:
+  br label %bb56
+
+bb56:                                             ; preds = %bb100, %bb
+  %tmp = phi i32* [ undef, %bb ], [ %tmp98, %bb100 ]
+  br label %bb67
+
+bb67:                                             ; preds = %bb56
+  br i1 undef, label %bb68, label %bb97
+
+bb68:                                             ; preds = %bb67
+  br label %bb74
+
+bb73:                                             ; preds = %bb92
+  unreachable
+
+bb74:                                             ; preds = %bb68
+  br label %bb75
+
+bb75:                                             ; preds = %bb74
+  br i1 undef, label %bb76, label %bb77
+
+bb76:                                             ; preds = %bb75
+  br label %bb97
+
+bb77:                                             ; preds = %bb75
+  br label %bb83
+
+bb83:                                             ; preds = %bb92, %bb77
+  %tmp84 = phi i32* [ %tmp93, %bb92 ], [ undef, %bb77 ]
+  br label %bb85
+
+bb85:                                             ; preds = %bb85, %bb83
+  %tmp86 = phi i32* [ %tmp88, %bb85 ], [ %tmp84, %bb83 ]
+  %tmp87 = phi i32* [ %tmp90, %bb85 ], [ %tmp, %bb83 ]
+  %tmp88 = getelementptr inbounds i32, i32* %tmp86, i64 1
+  store i32 undef, i32* %tmp86, align 4
+  %tmp89 = load i32, i32* %tmp88, align 4
+  %tmp90 = getelementptr inbounds i32, i32* %tmp87, i64 1
+  store i32 %tmp89, i32* %tmp87, align 4
+  %tmp91 = icmp ugt i32* undef, %tmp90
+  br i1 %tmp91, label %bb85, label %bb92
+
+bb92:                                             ; preds = %bb85
+  %tmp93 = getelementptr inbounds i32, i32* undef, i64 2
+  %tmp94 = icmp sgt i32 undef, undef
+  br i1 %tmp94, label %bb83, label %bb73
+
+bb97:                                             ; preds = %bb76, %bb67
+  %tmp98 = phi i32* [ undef, %bb76 ], [ %tmp, %bb67 ]
+  br label %bb100
+
+bb100:                                            ; preds = %bb97
+  br i1 undef, label %bb101, label %bb56
+
+bb101:                                            ; preds = %bb100
+  unreachable
+}
+
Index: test/Transforms/LoopVectorize/header_phi3.ll
===================================================================
--- test/Transforms/LoopVectorize/header_phi3.ll
+++ test/Transforms/LoopVectorize/header_phi3.ll
@@ -0,0 +1,73 @@
+;RUN: opt < %s  -loop-vectorize  -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+define void @foo() {
+; CHECK-LABEL: @foo
+; CHECK: vector.body:
+bb:
+  br i1 undef, label %bb1, label %bb9
+
+bb1:                                              ; preds = %bb8, %bb
+  br label %bb2
+
+bb2:                                              ; preds = %bb2, %bb1
+  %tmp = phi i16* [ undef, %bb1 ], [ %tmp6, %bb2 ]
+  %tmp3 = phi i64 [ 0, %bb1 ], [ %tmp5, %bb2 ]
+  %tmp4 = load i16, i16* %tmp, align 2
+  %tmp5 = add nuw nsw i64 %tmp3, 1
+  %tmp6 = getelementptr inbounds i16, i16* undef, i64 %tmp5
+  %tmp7 = icmp eq i64 %tmp5, 65535
+  br i1 %tmp7, label %bb8, label %bb2
+
+bb8:                                              ; preds = %bb2
+  br i1 undef, label %bb1, label %bb9
+
+bb9:                                              ; preds = %bb8, %bb
+  br i1 undef, label %bb10, label %bb12
+
+bb10:                                             ; preds = %bb10, %bb9
+  br i1 false, label %bb10, label %bb11
+
+bb11:                                             ; preds = %bb10
+  br label %bb12
+
+bb12:                                             ; preds = %bb11, %bb9
+  ret void
+}
+
+define void @foo2(i16 *%t1) { 
+; CHECK-LABEL: @foo2
+; CHECK: vector.body:
+bb:
+  br i1 undef, label %bb1, label %bb9
+
+bb1:                                              ; preds = %bb8, %bb
+  br label %bb2
+
+bb2:                                              ; preds = %bb2, %bb1
+  %tmp = phi i16* [ %t1, %bb1 ], [ %tmp6, %bb2 ]
+  %tmp3 = phi i64 [ 0, %bb1 ], [ %tmp5, %bb2 ]
+  %tmp4 = load i16, i16* %tmp, align 2
+  %tmp5 = add nuw nsw i64 %tmp3, 1
+  %tmp6 = getelementptr inbounds i16, i16* %t1, i64 %tmp5
+  %tmp7 = icmp eq i64 %tmp5, 65535
+  br i1 %tmp7, label %bb8, label %bb2
+
+bb8:                                              ; preds = %bb2
+  br i1 undef, label %bb1, label %bb9
+
+bb9:                                              ; preds = %bb8, %bb
+  br i1 undef, label %bb10, label %bb12
+
+bb10:                                             ; preds = %bb10, %bb9
+  br i1 false, label %bb10, label %bb11
+
+bb11:                                             ; preds = %bb10
+  br label %bb12
+
+bb12:                                             ; preds = %bb11, %bb9
+  ret void
+}
+
+
Index: test/Transforms/LoopVectorize/intptr1.ll
===================================================================
--- test/Transforms/LoopVectorize/intptr1.ll
+++ test/Transforms/LoopVectorize/intptr1.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s  -loop-vectorize  -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK-LABEL: @test
+;CHECK: vector.body:
+;CHECK: load <4 x float>
+;CHECK: fmul <4 x float>
+;CHECK: store <4 x float>
+
+
+@.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1
+
+define void @test(float* %a, float* readnone %a_end, i64 %b) unnamed_addr {
+entry:
+  %cmp1 = icmp ult float* %a, %a_end
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ]
+  %b.addr.02 = phi i64 [ %add, %for.body ], [ %b, %for.body.preheader ]
+  %tmp = inttoptr i64 %b.addr.02 to float*
+  %tmp1 = load float, float* %tmp, align 4
+  %mul.i = fmul float %tmp1, 4.200000e+01
+  store float %mul.i, float* %a.addr.03, align 4
+  %add = add nsw i64 %b.addr.02, 4
+  %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1
+  %cmp = icmp ult float* %incdec.ptr, %a_end
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
Index: test/Transforms/LoopVectorize/intptr2.ll
===================================================================
--- test/Transforms/LoopVectorize/intptr2.ll
+++ test/Transforms/LoopVectorize/intptr2.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s  -loop-vectorize  -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK-LABEL: @test
+;CHECK: vector.body:
+;CHECK: load <4 x float>
+;CHECK: fmul <4 x float>
+;CHECK: store <4 x float>
+
+define void @test(float* %a, float* readnone %a_end, i64 %b) {
+entry:
+  %cmp1 = icmp ult float* %a, %a_end
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ]
+  %b.addr.02 = phi i64 [ %add.int, %for.body ], [ %b, %for.body.preheader ]
+  %tmp = inttoptr i64 %b.addr.02 to float*
+  %tmp1 = load float, float* %tmp, align 4
+  %mul.i = fmul float %tmp1, 4.200000e+01
+  store float %mul.i, float* %a.addr.03, align 4
+  %add = getelementptr inbounds float, float* %tmp, i64 1
+  %add.int = ptrtoint float* %add to i64
+  %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1
+  %cmp = icmp ult float* %incdec.ptr, %a_end
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
Index: test/Transforms/LoopVectorize/intptr3.ll
===================================================================
--- test/Transforms/LoopVectorize/intptr3.ll
+++ test/Transforms/LoopVectorize/intptr3.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s  -loop-vectorize  -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK-LABEL: @test
+;CHECK: vector.body:
+;CHECK: load <4 x float>
+;CHECK: fmul <4 x float>
+;CHECK: store <4 x float>
+
+; Function Attrs: noinline norecurse nounwind uwtable
+define  void @test(float* %a, float* readnone %a_end, i64 %b) unnamed_addr  {
+entry:
+  %cmp1 = icmp ult float* %a, %a_end
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %b.float = inttoptr i64 %b to float*
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ]
+  %b.addr.float = phi float* [ %b.addr.float.inc, %for.body ], [ %b.float, %for.body.preheader ]
+  %b.addr.i64 = phi i64 [ %b.addr.i64.inc, %for.body ], [ %b, %for.body.preheader ]
+  %l = load float, float* %b.addr.float, align 4 
+  %mul.i = fmul float %l, 4.200000e+01
+  store float %mul.i, float* %a.addr.03, align 4
+  %b.addr.float.2 = inttoptr i64 %b.addr.i64 to float*
+  %b.addr.float.inc = getelementptr inbounds float, float* %b.addr.float.2, i64 1
+  %b.addr.i64.inc = ptrtoint float* %b.addr.float.inc to i64
+  %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1
+  %cmp = icmp ult float* %incdec.ptr, %a_end
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
Index: test/Transforms/LoopVectorize/intptr4.ll
===================================================================
--- test/Transforms/LoopVectorize/intptr4.ll
+++ test/Transforms/LoopVectorize/intptr4.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s  -loop-vectorize  -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK-LABEL: @test
+;CHECK: vector.body:
+;CHECK: load <8 x i8>
+;CHECK: mul <8 x i8>
+;CHECK: store <8 x i8>
+
+
+; Function Attrs: noinline norecurse nounwind uwtable
+define  void @test(i8* %a, i8* readnone %a_end, i64 %b) unnamed_addr  {
+entry:
+  %cmp1 = icmp ult i8* %a, %a_end
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %b.i8 = inttoptr i64 %b to i8*
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %a.addr.03 = phi i8* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ]
+  %b.addr.i8 = phi i8* [ %b.addr.i8.inc, %for.body ], [ %b.i8, %for.body.preheader ]
+  %b.addr.i64 = phi i64 [ %b.addr.i64.inc, %for.body ], [ %b, %for.body.preheader ]
+  %l = load i8, i8* %b.addr.i8, align 4 
+  %mul.i = mul i8 %l, 4
+  store i8 %mul.i, i8* %a.addr.03, align 4
+  %b.addr.i8.2 = inttoptr i64 %b.addr.i64 to i8*
+  %b.addr.i8.inc = getelementptr inbounds i8, i8* %b.addr.i8.2, i64 1
+  %b.addr.i64.inc = ptrtoint i8* %b.addr.i8.inc to i64
+  %incdec.ptr = getelementptr inbounds i8, i8* %a.addr.03, i64 1
+  %cmp = icmp ult i8* %incdec.ptr, %a_end
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
Index: test/Transforms/LoopVectorize/intptr5.ll
===================================================================
--- test/Transforms/LoopVectorize/intptr5.ll
+++ test/Transforms/LoopVectorize/intptr5.ll
@@ -0,0 +1,55 @@
+; RUN: opt < %s  -loop-vectorize  -S | FileCheck %s
+
+;CHECK-LABEL: @foo
+;CHECK: vector.body:
+;CHECK: load <16 x i8>
+;CHECK: store <16 x i8>
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+define void @foo(i64 %tmp0) {
+bb:
+  br label %bb3
+
+bb3:                                              ; preds = %bb20, %bb
+  %tmp = phi i64 [ %tmp0, %bb ], [ %tmp21, %bb20 ]
+  br i1 false, label %bb4, label %bb19
+
+bb4:                                              ; preds = %bb3
+  %tmp5 = inttoptr i64 %tmp to i8*
+  br label %bb7
+
+bb7:                                              ; preds = %bb7, %bb4
+  %tmp8 = phi i64 [ %tmp, %bb4 ], [ %tmp16, %bb7 ]
+  %tmp9 = phi i8* [ %tmp5, %bb4 ], [ %tmp15, %bb7 ]
+  %tmp10 = phi i8* [ undef, %bb4 ], [ %tmp13, %bb7 ]
+  %tmp11 = phi i32 [ 0, %bb4 ], [ %tmp17, %bb7 ]
+  %tmp12 = load i8, i8* %tmp9, align 1, !range !1, !noalias !2
+  %tmp13 = getelementptr inbounds i8, i8* %tmp10, i64 1
+  store i8 %tmp12, i8* %tmp10, align 1, !noalias !2
+  %tmp14 = inttoptr i64 %tmp8 to i8*
+  %tmp15 = getelementptr inbounds i8, i8* %tmp14, i64 1
+  %tmp16 = ptrtoint i8* %tmp15 to i64
+  %tmp17 = add nuw nsw i32 %tmp11, 1
+  %tmp18 = icmp eq i32 %tmp17, undef
+  br i1 %tmp18, label %bb19, label %bb7
+
+bb19: 
+  br i1 undef, label %bb20, label %bb22
+
+bb20:                                             ; preds = %bb19
+  %tmp21 = ptrtoint i8* undef to i64
+  br label %bb3
+
+bb22:                                             ; preds = %bb19
+  ret void
+}
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version google3-trunk (trunk r311977)"}
+!1 = !{i8 0, i8 2}
+!2 = !{!3}
+!3 = distinct !{!3, !4, !"_ZN15quality_ranklab4impl12UnionApplierINS_8internal13CondOpWrapperINS2_10UnionSumOpIbEEEEbE10MakeNStepsINS_15ForwardIteratorINS_10FullVectorIbEEEENS_25MaybeFullPropertyInserterIbPbSt20back_insert_iteratorISt6vectorIiSaIiEEEEEEET0_PT_SL_S6_i: argument 0"}
+!4 = distinct !{!4, !"_ZN15quality_ranklab4impl12UnionApplierINS_8internal13CondOpWrapperINS2_10UnionSumOpIbEEEEbE10MakeNStepsINS_15ForwardIteratorINS_10FullVectorIbEEEENS_25MaybeFullPropertyInserterIbPbSt20back_insert_iteratorISt6vectorIiSaIiEEEEEEET0_PT_SL_S6_i"}
Index: unittests/Analysis/ScalarEvolutionTest.cpp
===================================================================
--- unittests/Analysis/ScalarEvolutionTest.cpp
+++ unittests/Analysis/ScalarEvolutionTest.cpp
@@ -335,9 +335,7 @@
   // Expect the expansion code contains:
   //   %0 = bitcast i32* %bitcast2 to i8*
   //   %uglygep = getelementptr i8, i8* %0, i64 -1
-  //   %1 = bitcast i8* %uglygep to i32*
-  EXPECT_TRUE(isa<BitCastInst>(V));
-  Instruction *Gep = cast<Instruction>(V)->getPrevNode();
+  Instruction *Gep = cast<Instruction>(V);
   EXPECT_TRUE(isa<GetElementPtrInst>(Gep));
   EXPECT_TRUE(isa<ConstantInt>(Gep->getOperand(1)));
   EXPECT_EQ(cast<ConstantInt>(Gep->getOperand(1))->getSExtValue(), -1);