diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1196,7 +1196,7 @@
           return VLOperands::ScoreFail;
         // The distance is too large - still may be profitable to use masked
         // loads/gathers.
-        if (std::abs(*Dist) > NumLanes / 2)
+        if (std::abs(*Dist) > 1)
           return VLOperands::ScoreAltOpcodes;
         // This still will detect consecutive loads, but we might have "holes"
         // in some cases. It is ok for non-power-2 vectorization and may produce
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -slp-vectorizer -S -mtriple=aarch64-unknown-unknown | FileCheck %s
+
+; This test is reduced from the TSVC evaluation of vectorizers:
+; https://github.com/llvm/llvm-test-suite/commits/main/MultiSource/Benchmarks/TSVC/LoopRerolling-flt/tsc.c
+
+define void @s116_modified(float* %a) {
+; CHECK-LABEL: @s116_modified(
+; CHECK: load <4 x float>, <4 x float>*
+; CHECK: fmul fast <4 x float>
+; CHECK: store <4 x float>
+  %gep0 = getelementptr inbounds float, float* %a, i64 0
+  %gep1 = getelementptr inbounds float, float* %a, i64 1
+  %gep2 = getelementptr inbounds float, float* %a, i64 2
+  %gep3 = getelementptr inbounds float, float* %a, i64 3
+  %gep4 = getelementptr inbounds float, float* %a, i64 4
+  %ld0 = load float, float* %gep0
+  %ld1 = load float, float* %gep1
+  %ld2 = load float, float* %gep2
+  %ld3 = load float, float* %gep3
+  %ld4 = load float, float* %gep4
+  %mul0 = fmul fast float %ld0, %ld1
+  %mul1 = fmul fast float %ld2, %ld1
+  %mul2 = fmul fast float %ld3, %ld2
+  %mul3 = fmul fast float %ld4, %ld3
+  store float %mul0, float* %gep0
+  store float %mul1, float* %gep1
+  store float %mul2, float* %gep2
+  store float %mul3, float* %gep3
+  ret void
+}
+
+