Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5128,7 +5128,13 @@ Instruction *Update = cast( cast(Ptr)->getIncomingValueForBlock(Latch)); - ScalarPtrs.insert(Update); + + if (llvm::all_of(Update->users(), + [&](User *U) { return dyn_cast(U); })) + ScalarPtrs.insert(Update); + else + PossibleNonScalarPtrs.insert(Update); + return; } // We only care about bitcast and getelementptr instructions contained in Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -116,6 +116,85 @@ ret void } +define i64 @pointer_induction([64 x i8]* noalias %buffer, i64 %N) { +; CHECK-LABEL: @pointer_induction( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BUFFER_START:%.*]] = getelementptr inbounds [64 x i8], [64 x i8]* [[BUFFER:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds [64 x i8], [64 x i8]* [[BUFFER]], i64 64, i64 64 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* [[BUFFER_START]], i64 [[N_VEC]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8* [[BUFFER_START]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[INDEX1]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = add shufflevector ( insertelement ( poison, i64 0, i32 0), poison, zeroinitializer), [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = add [[DOTSPLAT]], [[TMP6]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[BUFFER_START]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, i8* [[BUFFER_START]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], 1 +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[BUFFER_START]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[NEXT_GEP3]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to * +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP12]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, [[NEXT_GEP]], i64 1 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq [[TMP13]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[BUFFER_START]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[FIRST_SROA:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX_NXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i8, i8* [[FIRST_SROA]], align 1 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[FIRST_SROA]], i64 1 +; CHECK-NEXT: [[CMP_I_NOT:%.*]] = icmp eq i8* [[INCDEC_PTR]], [[BUFFER_START]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDEX]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[END]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: end: +; CHECK-NEXT: ret i64 undef +; +entry: + %buffer.start = getelementptr inbounds [64 x i8], [64 x i8]* %buffer, i64 0, i64 0 + %add.ptr.i = getelementptr inbounds [64 x i8], [64 x i8]* %buffer, i64 64, i64 64 + br label %for.body + +for.body: + %first.sroa = phi i8* [ %incdec.ptr, %for.body ], [ %buffer.start, %entry ] + %index = phi i64 [ %index_nxt, %for.body ], [ 0, %entry ] + %index_nxt = add i64 %index, 1 + %0 = load i8, i8* %first.sroa, align 1 + %incdec.ptr = getelementptr inbounds i8, i8* %first.sroa, i64 1 + %cmp.i.not = icmp eq i8* %incdec.ptr, %buffer.start + %cmp = icmp ult i64 %index, %N + br i1 %cmp, label %for.body, label %end, !llvm.loop !0 + +end: + ret i64 undef +} attributes #0 = {"target-features"="+sve"}