Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2507,33 +2507,52 @@
   auto StartValue = ID.getStartValue();
   assert(Index->getType() == Step->getType() &&
          "Index type does not match StepValue type");
+
+  // Note: the IR at this point is broken. We cannot use SE to create any new
+  // SCEV and then expand it, hoping that SCEV's simplification will give us
+  // a more optimal code. Unfortunately, attempt of doing so on invalid IR may
+  // lead to various SCEV crashes. So all we can do is to use builder and rely
+  // on InstCombine for future simplifications. Here we handle some trivial
+  // cases only.
+  auto CreateAdd = [&B](Value *X, Value *Y) {
+    assert(X->getType() == Y->getType() && "Types don't match!");
+    if (auto *CX = dyn_cast<ConstantInt>(X))
+      if (CX->isZero())
+        return Y;
+    if (auto *CY = dyn_cast<ConstantInt>(Y))
+      if (CY->isZero())
+        return X;
+    return B.CreateAdd(X, Y);
+  };
+
+  auto CreateMul = [&B](Value *X, Value *Y) {
+    assert(X->getType() == Y->getType() && "Types don't match!");
+    if (auto *CX = dyn_cast<ConstantInt>(X))
+      if (CX->isOne())
+        return Y;
+    if (auto *CY = dyn_cast<ConstantInt>(Y))
+      if (CY->isOne())
+        return X;
+    return B.CreateMul(X, Y);
+  };
+
   switch (ID.getKind()) {
   case InductionDescriptor::IK_IntInduction: {
     assert(Index->getType() == StartValue->getType() &&
            "Index type does not match StartValue type");
-
-    // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution
-    // and calculate (Start + Index * Step) for all cases, without
-    // special handling for "isOne" and "isMinusOne".
-    // But in the real life the result code getting worse. We mix SCEV
-    // expressions and ADD/SUB operations and receive redundant
-    // intermediate values being calculated in different ways and
-    // Instcombine is unable to reduce them all.
-
     if (ID.getConstIntStepValue() && ID.getConstIntStepValue()->isMinusOne())
       return B.CreateSub(StartValue, Index);
-    if (ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne())
-      return B.CreateAdd(StartValue, Index);
-    const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue),
-                                   SE->getMulExpr(Step, SE->getSCEV(Index)));
-    return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint());
+    auto *Offset = CreateMul(
+        Index, Exp.expandCodeFor(Step, Index->getType(), &*B.GetInsertPoint()));
+    return CreateAdd(StartValue, Offset);
   }
   case InductionDescriptor::IK_PtrInduction: {
     assert(isa<SCEVConstant>(Step) &&
            "Expected constant step for pointer induction");
-    const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step);
-    Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint());
-    return B.CreateGEP(nullptr, StartValue, Index);
+    return B.CreateGEP(
+        nullptr, StartValue,
+        CreateMul(Index, Exp.expandCodeFor(Step, Index->getType(),
+                                           &*B.GetInsertPoint())));
   }
   case InductionDescriptor::IK_FpInduction: {
     assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
Index: test/Transforms/LoopVectorize/X86/constant-fold.ll
===================================================================
--- test/Transforms/LoopVectorize/X86/constant-fold.ll
+++ test/Transforms/LoopVectorize/X86/constant-fold.ll
@@ -18,20 +18,19 @@
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16
-; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i16 0, [[TMP0]]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> undef, i16 [[OFFSET_IDX]], i32 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> undef, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], <i16 0, i16 1>
-; CHECK-NEXT:    [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = sext i16 [[TMP1]] to i64
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr [2 x i16*], [2 x i16*]* @b, i16 0, i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i16*, i16** [[TMP3]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16** [[TMP4]] to <2 x i16*>*
-; CHECK-NEXT:    store <2 x i16*> <i16* getelementptr inbounds (%rec8, %rec8* extractelement (<2 x %rec8*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i16> zeroinitializer, <2 x i64> zeroinitializer), i32 0), i32 0, i32 0), i16* getelementptr inbounds (%rec8, %rec8* extractelement (<2 x %rec8*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i16> zeroinitializer, <2 x i64> zeroinitializer), i32 1), i32 0, i32 0)>, <2 x i16*>* [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [2 x i16*], [2 x i16*]* @b, i16 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i16*, i16** [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16** [[TMP3]] to <2 x i16*>*
+; CHECK-NEXT:    store <2 x i16*> <i16* getelementptr inbounds (%rec8, %rec8* extractelement (<2 x %rec8*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i16> zeroinitializer, <2 x i64> zeroinitializer), i32 0), i32 0, i32 0), i16* getelementptr inbounds (%rec8, %rec8* extractelement (<2 x %rec8*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i16> zeroinitializer, <2 x i64> zeroinitializer), i32 1), i32 0, i32 0)>, <2 x i16*>* [[TMP4]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 2
-; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 2
+; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
 ; CHECK:       middle.block:
 
 bb1:
Index: test/Transforms/LoopVectorize/induction.ll
===================================================================
--- test/Transforms/LoopVectorize/induction.ll
+++ test/Transforms/LoopVectorize/induction.ll
@@ -138,7 +138,7 @@
 ; CHECK-LABEL: @scalarize_induction_variable_02(
 ; CHECK: vector.body:
 ; CHECK:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECK:   %offset.idx = shl i64 %index, 3
+; CHECK:   %offset.idx = mul i64 %index, 8
 ; CHECK:   %[[i0:.+]] = add i64 %offset.idx, 0
 ; CHECK:   %[[i1:.+]] = add i64 %offset.idx, 8
 ; CHECK:   getelementptr inbounds float, float* %a, i64 %[[i0]]
@@ -149,7 +149,7 @@
 ; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_02(
 ; UNROLL-NO-IC: vector.body:
 ; UNROLL-NO-IC:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; UNROLL-NO-IC:   %offset.idx = shl i64 %index, 3
+; UNROLL-NO-IC:   %offset.idx = mul i64 %index, 8
 ; UNROLL-NO-IC:   %[[i0:.+]] = add i64 %offset.idx, 0
 ; UNROLL-NO-IC:   %[[i1:.+]] = add i64 %offset.idx, 8
 ; UNROLL-NO-IC:   %[[i2:.+]] = add i64 %offset.idx, 16
Index: test/Transforms/LoopVectorize/iv_outside_user.ll
===================================================================
--- test/Transforms/LoopVectorize/iv_outside_user.ll
+++ test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -23,11 +23,10 @@
 ; CHECK-LABEL: @preinc
 ; CHECK-LABEL: middle.block:
 ; CHECK: %[[v3:.+]] = sub i32 %n.vec, 1
-; CHECK: %ind.escape = add i32 0, %[[v3]]
 ; CHECK-LABEL: scalar.ph:
 ; CHECK: %bc.resume.val = phi i32 [ %n.vec, %middle.block ], [ 0, %entry ]
 ; CHECK-LABEL: for.end:
-; CHECK: %[[RET:.*]] = phi i32 [ {{.*}}, %for.body ], [ %ind.escape, %middle.block ]
+; CHECK: %[[RET:.*]] = phi i32 [ {{.*}}, %for.body ], [ %[[v3]], %middle.block ]
 ; CHECK: ret i32 %[[RET]]
 define i32 @preinc(i32 %k)  {
 entry:
@@ -135,16 +134,13 @@
 }
 
 ; CHECK-LABEL: @PR30742
+; CHECK:   %[[T15:.+]] = add nsw i32 %tmp03, -7
 ; CHECK: vector.ph
 ; CHECK:   %[[N_MOD_VF:.+]] = urem i32 %[[T5:.+]], 2
 ; CHECK:   %[[N_VEC:.+]] = sub i32 %[[T5]], %[[N_MOD_VF]]
 ; CHECK: middle.block
 ; CHECK:   %[[CMP:.+]] = icmp eq i32 %[[T5]], %[[N_VEC]]
-; CHECK:   %[[T15:.+]] = add i32 %tmp03, -7
-; CHECK:   %[[T16:.+]] = shl i32 %[[N_MOD_VF]], 3
-; CHECK:   %[[T17:.+]] = add i32 %[[T15]], %[[T16]]
-; CHECK:   %[[T18:.+]] = shl i32 {{.*}}, 3
-; CHECK:   %ind.escape = sub i32 %[[T17]], %[[T18]]
+; CHECK:   %ind.escape = add i32 %[[T15]],
 ; CHECK:   br i1 %[[CMP]], label %BB3, label %scalar.ph
 define void @PR30742() {
 BB0:
Index: test/Transforms/LoopVectorize/pr39160.ll
===================================================================
--- test/Transforms/LoopVectorize/pr39160.ll
+++ test/Transforms/LoopVectorize/pr39160.ll
@@ -0,0 +1,98 @@
+; RUN: opt -loop-vectorize -S < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Make sure that we can compile the test without crash.
+define void @barney() {
+
+; CHECK-LABEL: @barney(
+; CHECK:       middle.block:
+
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb2, %bb
+  %tmp4 = icmp slt i32 undef, 0
+  br i1 %tmp4, label %bb2, label %bb5
+
+bb5:                                              ; preds = %bb2
+  br label %bb19
+
+bb18:                                             ; preds = %bb33
+  ret void
+
+bb19:                                             ; preds = %bb36, %bb5
+  %tmp21 = phi i64 [ undef, %bb36 ], [ 2, %bb5 ]
+  %tmp22 = phi i32 [ %tmp65, %bb36 ], [ undef, %bb5 ]
+  br label %bb50
+
+bb33:                                             ; preds = %bb62
+  br i1 undef, label %bb18, label %bb36
+
+bb36:                                             ; preds = %bb33
+  br label %bb19
+
+bb46:                                             ; preds = %bb50
+  br i1 undef, label %bb48, label %bb59
+
+bb48:                                             ; preds = %bb46
+  %tmp49 = add i32 %tmp52, 14
+  ret void
+
+bb50:                                             ; preds = %bb50, %bb19
+  %tmp52 = phi i32 [ %tmp55, %bb50 ], [ %tmp22, %bb19 ]
+  %tmp53 = phi i64 [ %tmp56, %bb50 ], [ 1, %bb19 ]
+  %tmp54 = add i32 %tmp52, 12
+  %tmp55 = add i32 %tmp52, 13
+  %tmp56 = add nuw nsw i64 %tmp53, 1
+  %tmp58 = icmp ult i64 %tmp53, undef
+  br i1 %tmp58, label %bb50, label %bb46
+
+bb59:                                             ; preds = %bb46
+  br label %bb62
+
+bb62:                                             ; preds = %bb68, %bb59
+  %tmp63 = phi i32 [ %tmp65, %bb68 ], [ %tmp55, %bb59 ]
+  %tmp64 = phi i64 [ %tmp66, %bb68 ], [ %tmp56, %bb59 ]
+  %tmp65 = add i32 %tmp63, 13
+  %tmp66 = add nuw nsw i64 %tmp64, 1
+  %tmp67 = icmp ult i64 %tmp66, %tmp21
+  br i1 %tmp67, label %bb68, label %bb33
+
+bb68:                                             ; preds = %bb62
+  br label %bb62
+}
+
+define i32 @foo(i32 addrspace(1)* %p) {
+
+; CHECK-LABEL: foo
+; CHECK:       middle.block:
+
+entry:
+  br label %outer
+
+outer:                                            ; preds = %outer_latch, %entry
+  %iv = phi i64 [ 2, %entry ], [ %iv.next, %outer_latch ]
+  br label %inner
+
+inner:                                            ; preds = %inner, %outer
+  %0 = phi i32 [ %2, %inner ], [ 0, %outer ]
+  %a = phi i32 [ %3, %inner ], [ 1, %outer ]
+  %b = phi i32 [ %1, %inner ], [ 6, %outer ]
+  %1 = add i32 %b, 2
+  %2 = or i32 %0, %b
+  %3 = add nuw nsw i32 %a, 1
+  %4 = zext i32 %3 to i64
+  %5 = icmp ugt i64 %iv, %4
+  br i1 %5, label %inner, label %outer_latch
+
+outer_latch:                                      ; preds = %inner
+  store atomic i32 %2, i32 addrspace(1)* %p unordered, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %6 = icmp ugt i64 %iv, 63
+  br i1 %6, label %exit, label %outer
+
+exit:                                             ; preds = %outer_latch
+  ret i32 0
+}