Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4565,7 +4565,8 @@ auto *Src = cast(Dst->getOperand(0)); if (llvm::all_of(Src->users(), [&](User *U) -> bool { auto *J = cast(U); - return !TheLoop->contains(J) || Worklist.count(J) || + return (!TheLoop->contains(J) && !VF.isScalable()) || + Worklist.count(J) || ((isa(J) || isa(J)) && isScalarUse(J, Src)); })) { @@ -4599,8 +4600,8 @@ // vectorization. auto ScalarInd = llvm::all_of(Ind->users(), [&](User *U) -> bool { auto *I = cast(U); - return I == IndUpdate || !TheLoop->contains(I) || Worklist.count(I) || - IsDirectLoadStoreFromPtrIndvar(Ind, I); + return I == IndUpdate || (!TheLoop->contains(I) && !VF.isScalable()) || + Worklist.count(I) || IsDirectLoadStoreFromPtrIndvar(Ind, I); }); if (!ScalarInd) continue; @@ -4610,7 +4611,8 @@ auto ScalarIndUpdate = llvm::all_of(IndUpdate->users(), [&](User *U) -> bool { auto *I = cast(U); - return I == Ind || !TheLoop->contains(I) || Worklist.count(I) || + return I == Ind || (!TheLoop->contains(I) && !VF.isScalable()) || + Worklist.count(I) || IsDirectLoadStoreFromPtrIndvar(IndUpdate, I); }); if (!ScalarIndUpdate) Index: llvm/test/Transforms/LoopVectorize/value-use-outside-scalable.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/value-use-outside-scalable.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -loop-vectorize -S %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define void @test(i64* %ptr, double %d) #0 { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1000, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1000, [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i32 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[D:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement poison, double [[D]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector [[BROADCAST_SPLATINSERT4]], poison, zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, i64* [[PTR:%.*]], [[VEC_IND]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, i64* [[PTR]], [[STEP_ADD]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i64* [[TMP12]] to double* +; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64* [[TMP14]] to double* +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr double, double* [[TMP13]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast double* [[TMP16]] to * +; CHECK-NEXT: store [[BROADCAST_SPLAT]], * [[TMP17]], align 8 +; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 2 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr double, double* [[TMP13]], i32 [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = bitcast double* [[TMP20]] to * +; CHECK-NEXT: store [[BROADCAST_SPLAT5]], * [[TMP21]], align 8 +; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP1]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, [[N_VEC]] +; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP24:%.*]] = mul i32 [[TMP23]], 2 +; CHECK-NEXT: [[TMP25:%.*]] = sub i32 [[TMP24]], 1 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement [[TMP11]], i32 [[TMP25]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i64, i64* [[PTR]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i64* [[TMP27]] to double* +; CHECK-NEXT: store double [[D]], double* [[TMP28]], align 8 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[END]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: end: +; CHECK-NEXT: [[OUTSIDEUSE:%.*]] = phi i64* [ [[TMP27]], [[LOOP]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %index = phi i64 [ 0, %entry ], [ %index.next, %loop ] + %index.next = add nsw i64 %index, 1 + %0 = getelementptr i64, i64* %ptr, i64 %index + %1 = bitcast i64* %0 to double* + store double %d, double* %1, align 8 + %exitcond = icmp eq i64 %index.next, 1000 + br i1 %exitcond, label %end, label %loop + +end: + %outsideuse = phi i64* [ %0, %loop ] + ret void +} + +attributes #0 = { "target-features"="+v8.2a,+fp-armv8,+neon,+sve," }