diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -2001,14 +2001,16 @@ // is 8, which is less than 2 and forbidden vectorization, But actually // both A and B could be vectorized by 2 iterations. MaxSafeDepDistBytes = - std::min(static_cast(Distance), MaxSafeDepDistBytes); + std::min(static_cast(Distance / Stride), MaxSafeDepDistBytes); bool IsTrueDataDependence = (!AIsWrite && BIsWrite); if (IsTrueDataDependence && EnableForwardingConflictDetection && couldPreventStoreLoadForward(Distance, TypeByteSize)) return Dependence::BackwardVectorizableButPreventsForwarding; - uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride); + // MaxSafeDepDistBytes already accounts for stride, so no need to scale + // TypeByteSize by Stride. + uint64_t MaxVF = MaxSafeDepDistBytes / TypeByteSize; LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() << " with max VF = " << MaxVF << '\n'); uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll --- a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll @@ -9,7 +9,7 @@ ; CHECK-LABEL: function 'backdep_type_size_equivalence': ; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe with a maximum dependence distance of 800 bytes +; CHECK-NEXT: Memory dependences are safe with a maximum dependence distance of 400 bytes ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Forward: ; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 -> diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types_opaque_ptr.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types_opaque_ptr.ll --- a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types_opaque_ptr.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types_opaque_ptr.ll @@ -8,7 +8,7 @@ ; CHECK-LABEL: function 'backdep_type_size_equivalence': ; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe with a maximum dependence distance of 800 bytes +; CHECK-NEXT: Memory dependences are safe with a maximum dependence distance of 400 bytes ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Forward: ; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 -> diff --git a/llvm/test/Analysis/LoopAccessAnalysis/max_safe_dep_dist_non_unit_stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/max_safe_dep_dist_non_unit_stride.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/max_safe_dep_dist_non_unit_stride.ll @@ -0,0 +1,57 @@ +; RUN: opt -S -disable-output -passes='print' < %s 2>&1 | FileCheck %s + +; Generated from following C program: +; void foo(int len, int *a) { +; for (int k = 0; k < len; k+=3) { +; a[k] = a[k + 4]; +; a[k+2] = a[k+6]; +; } +; } +define void @foo(i32 noundef signext %len, ptr nocapture noundef %a) { +; CHECK-LABEL: Loop access info in function 'foo': +; CHECK-NEXT: for.body: +; CHECK-NEXT: Memory dependences are safe with a maximum dependence distance of 8 bytes +; CHECK-NEXT: Dependences: +; CHECK-NEXT: BackwardVectorizable: +; CHECK-NEXT: store i32 %2, ptr %arrayidx2, align 4 -> +; CHECK-NEXT: %4 = load i32, ptr %arrayidx5, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %cmp18 = icmp sgt i32 %len, 0 + br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %0 = zext i32 %len to i64 + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %1 = add nuw nsw i64 %indvars.iv, 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %1 + %2 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + store i32 %2, ptr %arrayidx2, align 4 + %3 = add nuw nsw i64 %indvars.iv, 6 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %3 + %4 = load i32, ptr %arrayidx5, align 4 + %5 = add nuw nsw i64 %indvars.iv, 2 + %arrayidx8 = getelementptr inbounds i32, ptr %a, i64 %5 + store i32 %4, ptr %arrayidx8, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3 + %cmp = icmp ult i64 %indvars.iv.next, %0 + br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit +}