diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3081,7 +3081,15 @@ setDebugLocFromInst(Builder, OldInst); // Create i+1 and fill the PHINode. - Value *Next = Builder.CreateAdd(Induction, Step, "index.next"); + // + // If the tail is not folded, we know that End - Start >= Step (either + // statically or through the minimum iteration checks). We also know that both + // Start % Step == 0 and End % Step == 0. We exit the vector loop if %IV + + // %Step == %End. Hence we must exit the loop before %IV + %Step unsigned + // overflows and we can mark the induction increment as NUW. + Value *Next = + Builder.CreateAdd(Induction, Step, "index.next", + /*NUW=*/!Cost->foldTailByMasking(), /*NSW=*/false); Induction->addIncoming(Start, L->getLoopPreheader()); Induction->addIncoming(Next, Latch); // Create the compare. diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -152,7 +152,7 @@ ; FVW2-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, float* [[TMP32]], i64 6 ; FVW2-NEXT: [[TMP39:%.*]] = bitcast float* [[TMP38]] to <2 x float>* ; FVW2-NEXT: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> [[TMP31]], <2 x float>* [[TMP39]], i32 4, <2 x i1> [[TMP11]]) -; FVW2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX6]], 8 +; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX6]], 8 ; FVW2-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; FVW2-NEXT: br i1 [[TMP40]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; FVW2: for.end: @@ -466,7 +466,7 @@ ; FVW2-NEXT: store float [[TMP46]], float* [[TMP45]], align 4 ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE30]] ; FVW2: pred.store.continue30: -; FVW2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX6]], 8 +; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX6]], 8 ; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; FVW2-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP47]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] @@ -782,7 +782,7 @@ ; FVW2-NEXT: store float [[TMP46]], float* [[TMP45]], align 4 ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE29]] ; FVW2: pred.store.continue29: -; FVW2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; FVW2-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP47]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] @@ -1084,7 +1084,7 @@ ; FVW2-NEXT: store float [[TMP46]], float addrspace(1)* [[TMP45]], align 4 ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE30]] ; FVW2: pred.store.continue30: -; FVW2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX6]], 8 +; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX6]], 8 ; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; FVW2-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP47]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -1386,7 +1386,7 @@ ; FVW2-NEXT: store float [[TMP46]], float* [[TMP45]], align 4 ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE30]] ; FVW2: pred.store.continue30: -; FVW2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX6]], 8 +; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX6]], 8 ; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; FVW2-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP47]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -1688,7 +1688,7 @@ ; FVW2-NEXT: store float [[TMP46]], float addrspace(1)* [[TMP45]], align 4 ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE30]] ; FVW2: pred.store.continue30: -; FVW2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX6]], 8 +; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX6]], 8 ; FVW2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; FVW2-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; FVW2-NEXT: br i1 [[TMP47]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -1900,7 +1900,7 @@ ; AVX512-NEXT: [[WIDE_LOAD15_7:%.*]] = load <16 x float>, <16 x float>* [[TMP55]], align 4, !alias.scope !9 ; AVX512-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP52]], i64 1 ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_7]], <16 x float*> [[TMP56]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[INDEX_NEXT_7]] = add i64 [[INDEX]], 128 +; AVX512-NEXT: [[INDEX_NEXT_7]] = add nuw i64 [[INDEX]], 128 ; AVX512-NEXT: [[PTR_IND_7]] = getelementptr float, float* [[POINTER_PHI]], i64 2048 ; AVX512-NEXT: [[NITER_NSUB_7]] = add i64 [[NITER]], -8 ; AVX512-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0 @@ -1924,7 +1924,7 @@ ; AVX512-NEXT: [[WIDE_LOAD15_EPIL:%.*]] = load <16 x float>, <16 x float>* [[TMP60]], align 4, !alias.scope !9 ; AVX512-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP57]], i64 1 ; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_EPIL]], <16 x float*> [[TMP61]], i32 4, <16 x i1> ), !alias.scope !5, !noalias !7 -; AVX512-NEXT: [[INDEX_NEXT_EPIL]] = add i64 [[INDEX_EPIL]], 16 +; AVX512-NEXT: [[INDEX_NEXT_EPIL]] = add nuw i64 [[INDEX_EPIL]], 16 ; AVX512-NEXT: [[PTR_IND_EPIL]] = getelementptr float, float* [[POINTER_PHI_EPIL]], i64 256 ; AVX512-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 ; AVX512-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 @@ -2092,7 +2092,7 @@ ; FVW2-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, <2 x float*> [[TMP51]], i64 1 ; FVW2-NEXT: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> [[WIDE_LOAD17_3]], <2 x float*> [[TMP59]], i32 4, <2 x i1> ), !alias.scope !10, !noalias !12 ; FVW2-NEXT: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> [[WIDE_LOAD18_3]], <2 x float*> [[TMP60]], i32 4, <2 x i1> ), !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[INDEX_NEXT_3]] = add i64 [[INDEX]], 16 +; FVW2-NEXT: [[INDEX_NEXT_3]] = add nuw i64 [[INDEX]], 16 ; FVW2-NEXT: [[PTR_IND_3]] = getelementptr float, float* [[POINTER_PHI]], i64 256 ; FVW2-NEXT: [[NITER_NSUB_3]] = add i64 [[NITER]], -4 ; FVW2-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NSUB_3]], 0 @@ -2126,7 +2126,7 @@ ; FVW2-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, <2 x float*> [[TMP62]], i64 1 ; FVW2-NEXT: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> [[WIDE_LOAD17_EPIL]], <2 x float*> [[TMP70]], i32 4, <2 x i1> ), !alias.scope !10, !noalias !12 ; FVW2-NEXT: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> [[WIDE_LOAD18_EPIL]], <2 x float*> [[TMP71]], i32 4, <2 x i1> ), !alias.scope !10, !noalias !12 -; FVW2-NEXT: [[INDEX_NEXT_EPIL]] = add i64 [[INDEX_EPIL]], 4 +; FVW2-NEXT: [[INDEX_NEXT_EPIL]] = add nuw i64 [[INDEX_EPIL]], 4 ; FVW2-NEXT: [[PTR_IND_EPIL]] = getelementptr float, float* [[POINTER_PHI_EPIL]], i64 64 ; FVW2-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 ; FVW2-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll b/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll --- a/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll +++ b/llvm/test/Transforms/LoopVectorize/pr39417-optsize-scevchecks.ll @@ -65,7 +65,7 @@ ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 0 ; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP23]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 nuw [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 diff --git a/llvm/test/Transforms/LoopVectorize/ptr-induction.ll b/llvm/test/Transforms/LoopVectorize/ptr-induction.ll --- a/llvm/test/Transforms/LoopVectorize/ptr-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/ptr-induction.ll @@ -6,7 +6,7 @@ ; CHECK: @f ; Expect that the pointer indvar has been converted into an integer indvar. -; CHECK: %index.next = add i64 %index, 4 +; CHECK: %index.next = add nuw i64 %index, 4 define i32 @f(i32* readonly %a, i32* readnone %b) #0 { entry: %cmp.6 = icmp ult i32* %a, %b