diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3081,7 +3081,15 @@
   setDebugLocFromInst(Builder, OldInst);
 
   // Create i+1 and fill the PHINode.
-  Value *Next = Builder.CreateAdd(Induction, Step, "index.next");
+  //
+  // If the tail is not folded, we know that End - Start >= Step (either
+  // statically or through the minimum iteration checks). We also know that both
+  // Start % Step == 0 and End % Step == 0. We exit the vector loop if %IV +
+  // %Step == %End. Hence we must exit the loop before %IV + %Step unsigned
+  // overflows and we can mark the induction increment as NUW.
+  Value *Next =
+      Builder.CreateAdd(Induction, Step, "index.next",
+                        /*NUW=*/!Cost->foldTailByMasking(), /*NSW=*/false);
   Induction->addIncoming(Start, L->getLoopPreheader());
   Induction->addIncoming(Next, Latch);
   // Create the compare.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
--- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
@@ -152,7 +152,7 @@
 ; FVW2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds float, float* [[TMP32]], i64 6
 ; FVW2-NEXT:    [[TMP39:%.*]] = bitcast float* [[TMP38]] to <2 x float>*
 ; FVW2-NEXT:    call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> [[TMP31]], <2 x float>* [[TMP39]], i32 4, <2 x i1> [[TMP11]])
-; FVW2-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX6]], 8
+; FVW2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX6]], 8
 ; FVW2-NEXT:    [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
 ; FVW2-NEXT:    br i1 [[TMP40]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; FVW2:       for.end:
@@ -466,7 +466,7 @@
 ; FVW2-NEXT:    store float [[TMP46]], float* [[TMP45]], align 4
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE30]]
 ; FVW2:       pred.store.continue30:
-; FVW2-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX6]], 8
+; FVW2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX6]], 8
 ; FVW2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 128, i64 128>
 ; FVW2-NEXT:    [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; FVW2-NEXT:    br i1 [[TMP47]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -782,7 +782,7 @@
 ; FVW2-NEXT:    store float [[TMP46]], float* [[TMP45]], align 4
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE29]]
 ; FVW2:       pred.store.continue29:
-; FVW2-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 8
+; FVW2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; FVW2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 128, i64 128>
 ; FVW2-NEXT:    [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; FVW2-NEXT:    br i1 [[TMP47]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
@@ -1084,7 +1084,7 @@
 ; FVW2-NEXT:    store float [[TMP46]], float addrspace(1)* [[TMP45]], align 4
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE30]]
 ; FVW2:       pred.store.continue30:
-; FVW2-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX6]], 8
+; FVW2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX6]], 8
 ; FVW2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 128, i64 128>
 ; FVW2-NEXT:    [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; FVW2-NEXT:    br i1 [[TMP47]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -1386,7 +1386,7 @@
 ; FVW2-NEXT:    store float [[TMP46]], float* [[TMP45]], align 4
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE30]]
 ; FVW2:       pred.store.continue30:
-; FVW2-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX6]], 8
+; FVW2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX6]], 8
 ; FVW2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 128, i64 128>
 ; FVW2-NEXT:    [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; FVW2-NEXT:    br i1 [[TMP47]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -1688,7 +1688,7 @@
 ; FVW2-NEXT:    store float [[TMP46]], float addrspace(1)* [[TMP45]], align 4
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE30]]
 ; FVW2:       pred.store.continue30:
-; FVW2-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX6]], 8
+; FVW2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX6]], 8
 ; FVW2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 128, i64 128>
 ; FVW2-NEXT:    [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; FVW2-NEXT:    br i1 [[TMP47]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -1900,7 +1900,7 @@
 ; AVX512-NEXT:    [[WIDE_LOAD15_7:%.*]] = load <16 x float>, <16 x float>* [[TMP55]], align 4, !alias.scope !9
 ; AVX512-NEXT:    [[TMP56:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP52]], i64 1
 ; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_7]], <16 x float*> [[TMP56]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>), !alias.scope !5, !noalias !7
-; AVX512-NEXT:    [[INDEX_NEXT_7]] = add i64 [[INDEX]], 128
+; AVX512-NEXT:    [[INDEX_NEXT_7]] = add nuw i64 [[INDEX]], 128
 ; AVX512-NEXT:    [[PTR_IND_7]] = getelementptr float, float* [[POINTER_PHI]], i64 2048
 ; AVX512-NEXT:    [[NITER_NSUB_7]] = add i64 [[NITER]], -8
 ; AVX512-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0
@@ -1924,7 +1924,7 @@
 ; AVX512-NEXT:    [[WIDE_LOAD15_EPIL:%.*]] = load <16 x float>, <16 x float>* [[TMP60]], align 4, !alias.scope !9
 ; AVX512-NEXT:    [[TMP61:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP57]], i64 1
 ; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_EPIL]], <16 x float*> [[TMP61]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>), !alias.scope !5, !noalias !7
-; AVX512-NEXT:    [[INDEX_NEXT_EPIL]] = add i64 [[INDEX_EPIL]], 16
+; AVX512-NEXT:    [[INDEX_NEXT_EPIL]] = add nuw i64 [[INDEX_EPIL]], 16
 ; AVX512-NEXT:    [[PTR_IND_EPIL]] = getelementptr float, float* [[POINTER_PHI_EPIL]], i64 256
 ; AVX512-NEXT:    [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1
 ; AVX512-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0
@@ -2092,7 +2092,7 @@
 ; FVW2-NEXT:    [[TMP60:%.*]] = getelementptr inbounds float, <2 x float*> [[TMP51]], i64 1
 ; FVW2-NEXT:    call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> [[WIDE_LOAD17_3]], <2 x float*> [[TMP59]], i32 4, <2 x i1> <i1 true, i1 true>), !alias.scope !10, !noalias !12
 ; FVW2-NEXT:    call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> [[WIDE_LOAD18_3]], <2 x float*> [[TMP60]], i32 4, <2 x i1> <i1 true, i1 true>), !alias.scope !10, !noalias !12
-; FVW2-NEXT:    [[INDEX_NEXT_3]] = add i64 [[INDEX]], 16
+; FVW2-NEXT:    [[INDEX_NEXT_3]] = add nuw i64 [[INDEX]], 16
 ; FVW2-NEXT:    [[PTR_IND_3]] = getelementptr float, float* [[POINTER_PHI]], i64 256
 ; FVW2-NEXT:    [[NITER_NSUB_3]] = add i64 [[NITER]], -4
 ; FVW2-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NSUB_3]], 0
@@ -2126,7 +2126,7 @@
 ; FVW2-NEXT:    [[TMP71:%.*]] = getelementptr inbounds float, <2 x float*> [[TMP62]], i64 1
 ; FVW2-NEXT:    call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> [[WIDE_LOAD17_EPIL]], <2 x float*> [[TMP70]], i32 4, <2 x i1> <i1 true, i1 true>), !alias.scope !10, !noalias !12
 ; FVW2-NEXT:    call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> [[WIDE_LOAD18_EPIL]], <2 x float*> [[TMP71]], i32 4, <2 x i1> <i1 true, i1 true>), !alias.scope !10, !noalias !12
-; FVW2-NEXT:    [[INDEX_NEXT_EPIL]] = add i64 [[INDEX_EPIL]], 4
+; FVW2-NEXT:    [[INDEX_NEXT_EPIL]] = add nuw i64 [[INDEX_EPIL]], 4
 ; FVW2-NEXT:    [[PTR_IND_EPIL]] = getelementptr float, float* [[POINTER_PHI_EPIL]], i64 64
 ; FVW2-NEXT:    [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1
 ; FVW2-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0
diff --git a/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll b/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll
--- a/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll
@@ -65,7 +65,7 @@
 ; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 0
 ; CHECK-NEXT:    [[TMP23:%.*]] = bitcast i32* [[TMP22]] to <4 x i32>*
 ; CHECK-NEXT:    store <4 x i32> [[TMP20]], <4 x i32>* [[TMP23]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 nuw [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
diff --git a/llvm/test/Transforms/LoopVectorize/ptr-induction.ll b/llvm/test/Transforms/LoopVectorize/ptr-induction.ll
--- a/llvm/test/Transforms/LoopVectorize/ptr-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/ptr-induction.ll
@@ -6,7 +6,7 @@
 
 ; CHECK: @f
 ; Expect that the pointer indvar has been converted into an integer indvar.
-; CHECK: %index.next = add i64 %index, 4
+; CHECK: %index.next = add nuw i64 %index, 4
 define i32 @f(i32* readonly %a, i32* readnone %b) #0 {
 entry:
   %cmp.6 = icmp ult i32* %a, %b