diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -126,8 +126,8 @@ ForwardButPreventsForwarding, // Lexically backward. Backward, - // Backward, but the distance allows a vectorization factor of - // MaxSafeDepDistBytes. + // Backward, but the distance allows a vectorization factor of dependent + // on MinDepDistBytes. BackwardVectorizable, // Same, but may prevent store-to-load forwarding. BackwardVectorizableButPreventsForwarding @@ -197,10 +197,6 @@ return MaxSafeVectorWidthInBits == UINT_MAX; } - /// The maximum number of bytes of a vector register we can vectorize - /// the accesses safely with. - uint64_t getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; } - /// Return the number of elements that are safe to operate on /// simultaneously, multiplied by the size of the element in bits. uint64_t getMaxSafeVectorWidthInBits() const { @@ -274,8 +270,10 @@ /// The program order index to be used for the next instruction. unsigned AccessIdx = 0; - // We can access this many bytes in parallel safely. - uint64_t MaxSafeDepDistBytes = 0; + /// The smallest dependence distance in bytes in the loop. This may not be + /// the same as the maximum number of bytes that are safe to operate on + /// simultaneously. + uint64_t MinDepDistBytes = 0; /// Number of elements (from consecutive iterations) that are safe to /// operate on simultaneously, multiplied by the size of the element in bits. @@ -310,7 +308,7 @@ /// This function checks whether there is a plausible dependence (or the /// absence of such can't be proved) between the two accesses. If there is a /// plausible dependence but the dependence distance is bigger than one - /// element access it records this distance in \p MaxSafeDepDistBytes (if this + /// element access it records this distance in \p MinDepDistBytes (if this /// distance is smaller than any other distance encountered so far). /// Otherwise, this function returns true signaling a possible dependence. Dependence::DepType isDependent(const MemAccessInfo &A, unsigned AIdx, @@ -321,7 +319,7 @@ /// forwarding. /// /// \return false if we shouldn't vectorize at all or avoid larger - /// vectorization factors by limiting MaxSafeDepDistBytes. + /// vectorization factors by limiting MinDepDistBytes. bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize); /// Updates the current safety status with \p S. We can go from Safe to @@ -678,8 +676,6 @@ unsigned NumLoads = 0; unsigned NumStores = 0; - uint64_t MaxSafeDepDistBytes = -1; - /// Cache the result of analyzeLoop. bool CanVecMem = false; bool HasConvergentOp = false; diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1676,7 +1676,7 @@ const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize; // Maximum vector factor. uint64_t MaxVFWithoutSLForwardIssues = std::min( - VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes); + VectorizerParams::MaxVectorWidth * TypeByteSize, MinDepDistBytes); // Compute the smallest VF at which the store and load would be misaligned. for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues; @@ -1696,10 +1696,10 @@ return true; } - if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes && + if (MaxVFWithoutSLForwardIssues < MinDepDistBytes && MaxVFWithoutSLForwardIssues != VectorizerParams::MaxVectorWidth * TypeByteSize) - MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues; + MinDepDistBytes = MaxVFWithoutSLForwardIssues; return false; } @@ -1897,6 +1897,9 @@ // Negative distances are not plausible dependencies. if (Val.isNegative()) { bool IsTrueDataDependence = (AIsWrite && !BIsWrite); + // There is no need to update MaxSafeVectorWidthInBits after call to + // couldPreventStoreLoadForward, even if it changed MinDepDistBytes, + // since a forward dependency will allow vectorization using any width. if (IsTrueDataDependence && EnableForwardingConflictDetection && (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) || !HasSameSize)) { @@ -1967,8 +1970,9 @@ return Dependence::Backward; } - // Unsafe if the minimum distance needed is greater than max safe distance. - if (MinDistanceNeeded > MaxSafeDepDistBytes) { + // Unsafe if the minimum distance needed is greater than smallest dependence + // distance distance. + if (MinDistanceNeeded > MinDepDistBytes) { LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least " << MinDistanceNeeded << " size in bytes\n"); return Dependence::Backward; @@ -1990,15 +1994,24 @@ // is 2. Then we analyze the accesses on array A, the minimum distance needed // is 8, which is less than 2 and forbidden vectorization, But actually // both A and B could be vectorized by 2 iterations. - MaxSafeDepDistBytes = - std::min(static_cast(Distance), MaxSafeDepDistBytes); + MinDepDistBytes = + std::min(static_cast(Distance), MinDepDistBytes); bool IsTrueDataDependence = (!AIsWrite && BIsWrite); + uint64_t MinDepDistBytesOld = MinDepDistBytes; if (IsTrueDataDependence && EnableForwardingConflictDetection && - couldPreventStoreLoadForward(Distance, TypeByteSize)) + couldPreventStoreLoadForward(Distance, TypeByteSize)) { + // Sanity check that we didn't update MinDepDistBytes when calling + // couldPreventStoreLoadForward + assert(MinDepDistBytes == MinDepDistBytesOld && + "An update to MinDepDistBytes requires an update to " + "MaxSafeVectorWidthInBits"); return Dependence::BackwardVectorizableButPreventsForwarding; + } - uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride); + // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits + // since there is a backwards dependency. + uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * Stride); LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() << " with max VF = " << MaxVF << '\n'); uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; @@ -2010,7 +2023,7 @@ MemAccessInfoList &CheckDeps, const DenseMap &Strides) { - MaxSafeDepDistBytes = -1; + MinDepDistBytes = -1; SmallPtrSet Visited; for (MemAccessInfo CurAccess : CheckDeps) { if (Visited.count(CurAccess)) @@ -2399,7 +2412,6 @@ LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); CanVecMem = DepChecker->areDepsSafe( DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides); - MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes(); if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) { LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); @@ -2764,9 +2776,10 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (CanVecMem) { OS.indent(Depth) << "Memory dependences are safe"; - if (MaxSafeDepDistBytes != -1ULL) - OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes - << " bytes"; + const MemoryDepChecker &DC = getDepChecker(); + if (!DC.isSafeForAnyVectorWidth()) + OS << " with a maximum safe vector width of " + << DC.getMaxSafeVectorWidthInBits() << " bits"; if (PtrRtChecking->Need) OS << " with run-time checks"; OS << "\n"; diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll --- a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll @@ -9,7 +9,7 @@ ; CHECK-LABEL: function 'backdep_type_size_equivalence': ; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe with a maximum dependence distance of 800 bytes +; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 3200 bits ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Forward: ; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 -> diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types_opaque_ptr.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types_opaque_ptr.ll --- a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types_opaque_ptr.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types_opaque_ptr.ll @@ -8,7 +8,7 @@ ; CHECK-LABEL: function 'backdep_type_size_equivalence': ; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe with a maximum dependence distance of 800 bytes +; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 3200 bits ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Forward: ; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 -> diff --git a/llvm/test/Analysis/LoopAccessAnalysis/max_safe_dep_dist_non_unit_stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/max_safe_dep_dist_non_unit_stride.ll --- a/llvm/test/Analysis/LoopAccessAnalysis/max_safe_dep_dist_non_unit_stride.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/max_safe_dep_dist_non_unit_stride.ll @@ -10,7 +10,7 @@ define void @foo(i64 %len, ptr %a) { ; CHECK-LABEL: Loop access info in function 'foo': ; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe with a maximum dependence distance of 24 bytes +; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 64 bits ; CHECK-NEXT: Dependences: ; CHECK-NEXT: BackwardVectorizable: ; CHECK-NEXT: store i32 %0, ptr %arrayidx2, align 4 -> diff --git a/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll b/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll --- a/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll @@ -4,7 +4,7 @@ ; for (i = 0; i < n; i++) ; A[i + 4] = A[i] * 2; -; CHECK: Memory dependences are safe with a maximum dependence distance of 8 bytes +; CHECK: Memory dependences are safe with a maximum safe vector width of 64 bits target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.10.0"