diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4636,6 +4636,14 @@ assert(VF.isVector() && Scalars.find(VF) == Scalars.end() && "This function should not be visited twice for the same VF"); + // This avoids any chances of creating a REPLICATE recipe during planning + // since that would result in generation of scalarized code during execution, + // which is not supported for SVE. + if (VF.isScalable()) { + Scalars[VF].insert(Uniforms[VF].begin(), Uniforms[VF].end()); + return; + } + SmallSetVector Worklist; // These sets are used to seed the analysis with pointers used by memory diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll @@ -0,0 +1,34 @@ +; RUN: opt -mtriple=aarch64 -loop-vectorize -S < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; CHECK-NOT: Assertion {{.*}} "Can't scalarize a scalable vector"' failed + +; The test checks that scalarized code is not generated for SVE. +; It creates a scenario where the gep instruction is used outside +; the loop, preventing it (and consequently the loop induction +; update variable) from being classified as 'uniform'. + +define void @test_no_scalarization(i64* %a) #0 { +L.entry: + %idx = alloca i32, align 4 + store i32 100, i32* %idx, align 4 + %0 = load i32, i32* %idx + br label %L.LB19_336 + +L.LB19_336: ; preds = %L.LB19_336, %L.entry + %indvars.iv = phi i32 [ %indvars.iv.next, %L.LB19_336 ], [ %0, %L.entry ] + %indvars.iv.next = add nsw i32 %indvars.iv, -1 + %1 = getelementptr i64, i64* %a, i32 %indvars.iv + %2 = bitcast i64* %1 to double* + %3 = load double, double* %2, align 8 + %4 = icmp sgt i32 %indvars.iv.next, 1 + br i1 %4, label %L.LB19_336, label %L.LB19_337 + +L.LB19_337: ; preds = %L.LB19_336 + store i64 1, i64* %1, align 8 + ret void +} + +attributes #0 = { nofree norecurse noreturn nosync nounwind "target-features"="+neon,+v8a,+sve" } +