Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2507,33 +2507,52 @@
   auto StartValue = ID.getStartValue();
   assert(Index->getType() == Step->getType() &&
          "Index type does not match StepValue type");
+
+  // Note: the IR at this point is broken. We cannot use SE to create any new
+  // SCEV and then expand it, hoping that SCEV's simplification will give us
+  // a more optimal code. Unfortunately, attempt of doing so on invalid IR may
+  // lead to various SCEV crashes. So all we can do is to use builder and rely
+  // on InstCombine for future simplifications. Here we handle some trivial
+  // cases only.
+  auto CreateAdd = [&](Value *X, Value *Y) {
+    assert(X->getType() == Y->getType() && "Types don't match!");
+    if (auto *CX = dyn_cast<ConstantInt>(X))
+      if (CX->isZero())
+        return Y;
+    if (auto *CY = dyn_cast<ConstantInt>(Y))
+      if (CY->isZero())
+        return X;
+    return B.CreateAdd(X, Y);
+  };
+
+  auto CreateMul = [&](Value *X, Value *Y) {
+    assert(X->getType() == Y->getType() && "Types don't match!");
+    if (auto *CX = dyn_cast<ConstantInt>(X))
+      if (CX->isOne())
+        return Y;
+    if (auto *CY = dyn_cast<ConstantInt>(Y))
+      if (CY->isOne())
+        return X;
+    return B.CreateMul(X, Y);
+  };
+
   switch (ID.getKind()) {
   case InductionDescriptor::IK_IntInduction: {
     assert(Index->getType() == StartValue->getType() &&
            "Index type does not match StartValue type");
-
-    // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution
-    // and calculate (Start + Index * Step) for all cases, without
-    // special handling for "isOne" and "isMinusOne".
-    // But in the real life the result code getting worse. We mix SCEV
-    // expressions and ADD/SUB operations and receive redundant
-    // intermediate values being calculated in different ways and
-    // Instcombine is unable to reduce them all.
-
     if (ID.getConstIntStepValue() && ID.getConstIntStepValue()->isMinusOne())
       return B.CreateSub(StartValue, Index);
-    if (ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne())
-      return B.CreateAdd(StartValue, Index);
-    const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue),
-                                   SE->getMulExpr(Step, SE->getSCEV(Index)));
-    return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint());
+    auto *Offset = CreateMul(
+        Index, Exp.expandCodeFor(Step, Index->getType(), &*B.GetInsertPoint()));
+    return CreateAdd(StartValue, Offset);
   }
   case InductionDescriptor::IK_PtrInduction: {
     assert(isa<SCEVConstant>(Step) &&
            "Expected constant step for pointer induction");
-    const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step);
-    Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint());
-    return B.CreateGEP(nullptr, StartValue, Index);
+    return B.CreateGEP(
+        nullptr, StartValue,
+        CreateMul(Index, Exp.expandCodeFor(Step, Index->getType(),
+                                           &*B.GetInsertPoint())));
   }
   case InductionDescriptor::IK_FpInduction: {
     assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
Index: test/Transforms/LoopVectorize/X86/constant-fold.ll
===================================================================
--- test/Transforms/LoopVectorize/X86/constant-fold.ll
+++ test/Transforms/LoopVectorize/X86/constant-fold.ll
@@ -18,20 +18,19 @@
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16
-; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i16 0, [[TMP0]]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> undef, i16 [[OFFSET_IDX]], i32 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> undef, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], <i16 0, i16 1>
-; CHECK-NEXT:    [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = sext i16 [[TMP1]] to i64
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr [2 x i16*], [2 x i16*]* @b, i16 0, i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i16*, i16** [[TMP3]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16** [[TMP4]] to <2 x i16*>*
-; CHECK-NEXT:    store <2 x i16*> <i16* getelementptr inbounds (%rec8, %rec8* extractelement (<2 x %rec8*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i16> zeroinitializer, <2 x i64> zeroinitializer), i32 0), i32 0, i32 0), i16* getelementptr inbounds (%rec8, %rec8* extractelement (<2 x %rec8*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i16> zeroinitializer, <2 x i64> zeroinitializer), i32 1), i32 0, i32 0)>, <2 x i16*>* [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [2 x i16*], [2 x i16*]* @b, i16 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i16*, i16** [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16** [[TMP3]] to <2 x i16*>*
+; CHECK-NEXT:    store <2 x i16*> <i16* getelementptr inbounds (%rec8, %rec8* extractelement (<2 x %rec8*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i16> zeroinitializer, <2 x i64> zeroinitializer), i32 0), i32 0, i32 0), i16* getelementptr inbounds (%rec8, %rec8* extractelement (<2 x %rec8*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i16> zeroinitializer, <2 x i64> zeroinitializer), i32 1), i32 0, i32 0)>, <2 x i16*>* [[TMP4]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 2
-; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 2
+; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
 ; CHECK:       middle.block:
 
 bb1:
Index: test/Transforms/LoopVectorize/X86/pr39160.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopVectorize/X86/pr39160.ll
@@ -0,0 +1,116 @@
+; RUN: opt < %s  -loop-vectorize -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Make sure that we can compile the test without crash.
+
+define void @barney(i8 addrspace(1)** %arg) #0 {
+; CHECK-LABEL: @barney(
+; CHECK:       middle.block:
+
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb5, %bb
+  br label %bb2
+
+bb2:                                              ; preds = %bb2, %bb1
+  %tmp = phi i32 [ 0, %bb1 ], [ %tmp3, %bb2 ]
+  %tmp3 = add i32 %tmp, 1
+  %tmp4 = icmp slt i32 %tmp3, 0
+  br i1 %tmp4, label %bb2, label %bb5
+
+bb5:                                              ; preds = %bb2
+  br i1 true, label %bb6, label %bb1
+
+bb6:                                              ; preds = %bb5
+  %tmp7 = load i8 addrspace(1)*, i8 addrspace(1)** %arg, align 8
+  %tmp8 = getelementptr inbounds i8, i8 addrspace(1)* %tmp7, i64 816
+  %tmp9 = bitcast i8 addrspace(1)* %tmp8 to i64 addrspace(1)*
+  %tmp10 = load i64, i64 addrspace(1)* %tmp9, align 8
+  %tmp11 = trunc i64 %tmp10 to i32
+  %tmp12 = and i32 %tmp11, 31
+  %tmp13 = lshr i32 1, %tmp12
+  %tmp14 = getelementptr inbounds i8, i8 addrspace(1)* %tmp7, i64 800
+  %tmp15 = bitcast i8 addrspace(1)* %tmp14 to i8 addrspace(1)* addrspace(1)*
+  %tmp16 = getelementptr inbounds i8, i8 addrspace(1)* %tmp7, i64 808
+  %tmp17 = bitcast i8 addrspace(1)* %tmp16 to i8 addrspace(1)* addrspace(1)*
+  br label %bb19
+
+bb18:                                             ; preds = %bb42, %bb36, %bb33
+  ret void
+
+bb19:                                             ; preds = %bb42, %bb6
+  %tmp20 = phi i64 [ %tmp45, %bb42 ], [ 0, %bb6 ]
+  %tmp21 = phi i64 [ %tmp43, %bb42 ], [ 2, %bb6 ]
+  %tmp22 = phi i32 [ %tmp65, %bb42 ], [ %tmp13, %bb6 ]
+  %tmp23 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %tmp15, align 8
+  %tmp24 = getelementptr inbounds i8, i8 addrspace(1)* %tmp23, i64 12
+  %tmp25 = bitcast i8 addrspace(1)* %tmp24 to i32 addrspace(1)*
+  %tmp26 = getelementptr inbounds i8, i8 addrspace(1)* %tmp23, i64 8
+  %tmp27 = bitcast i8 addrspace(1)* %tmp26 to i32 addrspace(1)*
+  %tmp28 = load i32, i32 addrspace(1)* %tmp27, align 8
+  %tmp29 = zext i32 %tmp28 to i64
+  %tmp30 = icmp ult i64 %tmp20, %tmp29
+  %tmp31 = select i1 %tmp30, i64 %tmp20, i64 %tmp29
+  %tmp32 = icmp eq i64 %tmp31, 0
+  br i1 %tmp32, label %bb59, label %bb50
+
+bb33:                                             ; preds = %bb62
+  %tmp34 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %tmp17, align 8
+  %tmp35 = icmp eq i8 addrspace(1)* %tmp34, null
+  br i1 %tmp35, label %bb18, label %bb36
+
+bb36:                                             ; preds = %bb33
+  %tmp37 = getelementptr inbounds i8, i8 addrspace(1)* %tmp34, i64 8
+  %tmp38 = bitcast i8 addrspace(1)* %tmp37 to i32 addrspace(1)*
+  %tmp39 = load i32, i32 addrspace(1)* %tmp38, align 8
+  %tmp40 = zext i32 %tmp39 to i64
+  %tmp41 = icmp ult i64 %tmp21, %tmp40
+  br i1 %tmp41, label %bb42, label %bb18
+
+bb42:                                             ; preds = %bb36
+  %tmp43 = add nuw nsw i64 %tmp21, 1
+  %tmp44 = icmp ugt i64 %tmp21, 88
+  %tmp45 = add nuw nsw i64 %tmp20, 1
+  br i1 %tmp44, label %bb18, label %bb19
+
+bb46:                                             ; preds = %bb50
+  %tmp47 = icmp eq i32 %tmp28, 0
+  br i1 %tmp47, label %bb48, label %bb59
+
+bb48:                                             ; preds = %bb46
+  %tmp49 = add i32 %tmp52, 14
+  store i32 %tmp49, i32* undef, align 4
+  ret void
+
+bb50:                                             ; preds = %bb50, %bb19
+  %tmp51 = phi i32 addrspace(1)* [ %tmp57, %bb50 ], [ %tmp25, %bb19 ]
+  %tmp52 = phi i32 [ %tmp55, %bb50 ], [ %tmp22, %bb19 ]
+  %tmp53 = phi i64 [ %tmp56, %bb50 ], [ 1, %bb19 ]
+  %tmp54 = add i32 %tmp52, 12
+  store i32 %tmp54, i32 addrspace(1)* %tmp51, align 4
+  %tmp55 = add i32 %tmp52, 13
+  %tmp56 = add nuw nsw i64 %tmp53, 1
+  %tmp57 = getelementptr inbounds i32, i32 addrspace(1)* %tmp25, i64 %tmp53
+  %tmp58 = icmp ult i64 %tmp53, %tmp31
+  br i1 %tmp58, label %bb50, label %bb46
+
+bb59:                                             ; preds = %bb46, %bb19
+  %tmp60 = phi i32 [ %tmp22, %bb19 ], [ %tmp55, %bb46 ]
+  %tmp61 = phi i64 [ 1, %bb19 ], [ %tmp56, %bb46 ]
+  br label %bb62
+
+bb62:                                             ; preds = %bb68, %bb59
+  %tmp63 = phi i32 [ %tmp65, %bb68 ], [ %tmp60, %bb59 ]
+  %tmp64 = phi i64 [ %tmp66, %bb68 ], [ %tmp61, %bb59 ]
+  %tmp65 = add i32 %tmp63, 13
+  %tmp66 = add nuw nsw i64 %tmp64, 1
+  %tmp67 = icmp ult i64 %tmp66, %tmp21
+  br i1 %tmp67, label %bb68, label %bb33
+
+bb68:                                             ; preds = %bb62
+  br label %bb62
+}
+
+attributes #0 = { "target-cpu"="broadwell" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,-xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,+invpcid,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }
Index: test/Transforms/LoopVectorize/induction.ll
===================================================================
--- test/Transforms/LoopVectorize/induction.ll
+++ test/Transforms/LoopVectorize/induction.ll
@@ -138,7 +138,7 @@
 ; CHECK-LABEL: @scalarize_induction_variable_02(
 ; CHECK: vector.body:
 ; CHECK:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECK:   %offset.idx = shl i64 %index, 3
+; CHECK:   %offset.idx = mul i64 %index, 8
 ; CHECK:   %[[i0:.+]] = add i64 %offset.idx, 0
 ; CHECK:   %[[i1:.+]] = add i64 %offset.idx, 8
 ; CHECK:   getelementptr inbounds float, float* %a, i64 %[[i0]]
@@ -149,7 +149,7 @@
 ; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_02(
 ; UNROLL-NO-IC: vector.body:
 ; UNROLL-NO-IC:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; UNROLL-NO-IC:   %offset.idx = shl i64 %index, 3
+; UNROLL-NO-IC:   %offset.idx = mul i64 %index, 8
 ; UNROLL-NO-IC:   %[[i0:.+]] = add i64 %offset.idx, 0
 ; UNROLL-NO-IC:   %[[i1:.+]] = add i64 %offset.idx, 8
 ; UNROLL-NO-IC:   %[[i2:.+]] = add i64 %offset.idx, 16
Index: test/Transforms/LoopVectorize/iv_outside_user.ll
===================================================================
--- test/Transforms/LoopVectorize/iv_outside_user.ll
+++ test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -23,11 +23,10 @@
 ; CHECK-LABEL: @preinc
 ; CHECK-LABEL: middle.block:
 ; CHECK: %[[v3:.+]] = sub i32 %n.vec, 1
-; CHECK: %ind.escape = add i32 0, %[[v3]]
 ; CHECK-LABEL: scalar.ph:
 ; CHECK: %bc.resume.val = phi i32 [ %n.vec, %middle.block ], [ 0, %entry ]
 ; CHECK-LABEL: for.end:
-; CHECK: %[[RET:.*]] = phi i32 [ {{.*}}, %for.body ], [ %ind.escape, %middle.block ]
+; CHECK: %[[RET:.*]] = phi i32 [ {{.*}}, %for.body ], [ %[[v3]], %middle.block ]
 ; CHECK: ret i32 %[[RET]]
 define i32 @preinc(i32 %k)  {
 entry:
@@ -135,16 +134,13 @@
 }
 
 ; CHECK-LABEL: @PR30742
+; CHECK:   %[[T15:.+]] = add nsw i32 %tmp03, -7
 ; CHECK: vector.ph
 ; CHECK:   %[[N_MOD_VF:.+]] = urem i32 %[[T5:.+]], 2
 ; CHECK:   %[[N_VEC:.+]] = sub i32 %[[T5]], %[[N_MOD_VF]]
 ; CHECK: middle.block
 ; CHECK:   %[[CMP:.+]] = icmp eq i32 %[[T5]], %[[N_VEC]]
-; CHECK:   %[[T15:.+]] = add i32 %tmp03, -7
-; CHECK:   %[[T16:.+]] = shl i32 %[[N_MOD_VF]], 3
-; CHECK:   %[[T17:.+]] = add i32 %[[T15]], %[[T16]]
-; CHECK:   %[[T18:.+]] = shl i32 {{.*}}, 3
-; CHECK:   %ind.escape = sub i32 %[[T17]], %[[T18]]
+; CHECK:   %ind.escape = add i32 %[[T15]],
 ; CHECK:   br i1 %[[CMP]], label %BB3, label %scalar.ph
 define void @PR30742() {
 BB0: