diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5819,6 +5819,12 @@ } } + // For scalable vectors, don't use tail folding as this is currently not yet + // supported. The code is likely to have ended up here if the tripcount is + // low, in which case it makes sense not to use scalable vectors. + if (MaxFactors.ScalableVF.isVector()) + MaxFactors.ScalableVF = ElementCount::getScalable(0); + // If we don't know the precise trip count, or if the trip count that we // found modulo the vectorization factor is not zero, try to fold the tail // by masking. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -0,0 +1,23 @@ +; RUN: opt -S -loop-vectorize -scalable-vectorization=preferred -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck %s + +; CHECK-NOT: vector.body: + +target triple = "aarch64-unknown-linux-gnu" + +define void @tail_predication(i32 %init, i32* %ptr, i32 %val) #0 { +entry: + br label %while.body + +while.body: ; preds = %while.body, %entry + %index = phi i32 [ %index.dec, %while.body ], [ %init, %entry ] + %gep = getelementptr i32, i32* %ptr, i32 %index + store i32 %val, i32* %gep + %index.dec = add nsw i32 %index, -1 + %cmp10 = icmp sgt i32 %index, 0 + br i1 %cmp10, label %while.body, label %while.end.loopexit + +while.end.loopexit: ; preds = %while.body + ret void +} + +attributes #0 = { "target-features"="+sve" }