diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7418,8 +7418,6 @@ Decision = CM_GatherScatter; Cost = GatherScatterCost; } else { - assert(!VF.isScalable() && - "We cannot yet scalarise for scalable vectors"); Decision = CM_Scalarize; Cost = ScalarizationCost; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=on -o - | FileCheck %s +; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=preferred -force-target-instruction-cost=1 -o - | FileCheck %s define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) { ; CHECK-LABEL: @gather_nxv4i32_ind64 @@ -122,6 +122,37 @@ ret void } + + +define void @gather_nxv4i32_ind64_stride2(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) { +; CHECK-LABEL: @gather_nxv4i32_ind64_stride2 +; CHECK: vector.body: +; CHECK: %[[IDX:.*]] = phi i64 [ 0, %vector.ph ], [ %{{.*}}, %vector.body ] +; CHECK-DAG: %[[STEP:.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-DAG: %[[IDXSPLATINS:.*]] = insertelement poison, i64 %[[IDX]], i32 0 +; CHECK-DAG: %[[IDXSPLAT:.*]] = shufflevector %[[IDXSPLATINS]], poison, zeroinitializer +; CHECK: %[[ADD:.*]] = add %[[IDXSPLAT]], %[[STEP]] +; CHECK: %[[MUL:.*]] = shl %[[ADD]], shufflevector ( insertelement ( undef, i64 1, i32 0), undef, zeroinitializer) +; CHECK: %[[PTRS:.*]] = getelementptr inbounds float, float* %b, %[[MUL]] +; CHECK: call @llvm.masked.gather.nxv4f32.nxv4p0f32( %[[PTRS]] +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %indvars.iv.stride2 = mul i64 %indvars.iv, 2 + %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv.stride2 + %0 = load float, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv + store float %0, float* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void +} + !0 = distinct !{!0, !1, !2, !3, !4, !5} !1 = !{!"llvm.loop.mustprogress"} !2 = !{!"llvm.loop.vectorize.width", i32 4}