Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5128,8 +5128,14 @@ Instruction *Update = cast( cast(Ptr)->getIncomingValueForBlock(Latch)); - ScalarPtrs.insert(Update); - return; + + // If there is more than one user of Update (Ptr), we shouldn't assume it + // will be scalar after vectorisation as other users of the instruction + // may require widening. Otherwise, add it to ScalarPtrs. + if (Update->hasOneUse() && cast(*Update->user_begin()) == Ptr) { + ScalarPtrs.insert(Update); + return; + } } // We only care about bitcast and getelementptr instructions contained in // the loop. Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -116,6 +116,55 @@ ret void } +define void @pointer_induction([64 x i8]* noalias %buffer, i64 %N) { +; CHECK-LABEL: @pointer_induction( +; CHECK: entry: +; CHECK: [[BUFFER_START:%.*]] = getelementptr inbounds [64 x i8], [64 x i8]* [[BUFFER:%.*]], i64 0, i64 0 +; CHECK: vector.ph: +; CHECK: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8* [[BUFFER_START]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[INDEX1]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = add shufflevector ( insertelement ( poison, i64 0, i32 0), poison, zeroinitializer), [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = add [[DOTSPLAT]], [[TMP6]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[BUFFER_START]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, i8* [[BUFFER_START]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 1 +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[BUFFER_START]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[NEXT_GEP3]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to * +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP12]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, [[NEXT_GEP]], i64 1 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq [[TMP13]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; +entry: + %buffer.start = getelementptr inbounds [64 x i8], [64 x i8]* %buffer, i64 0, i64 0 + %add.ptr.i = getelementptr inbounds [64 x i8], [64 x i8]* %buffer, i64 64, i64 64 + br label %for.body + +for.body: + %first.sroa = phi i8* [ %incdec.ptr, %for.body ], [ %buffer.start, %entry ] + %index = phi i64 [ %index_nxt, %for.body ], [ 0, %entry ] + %index_nxt = add i64 %index, 1 + %0 = load i8, i8* %first.sroa, align 1 + %incdec.ptr = getelementptr inbounds i8, i8* %first.sroa, i64 1 + %cmp.i.not = icmp eq i8* %incdec.ptr, %buffer.start + %cmp = icmp ult i64 %index, %N + br i1 %cmp, label %for.body, label %end, !llvm.loop !0 + +end: + ret void +} attributes #0 = {"target-features"="+sve"}