Index: include/llvm/Analysis/LoopAccessAnalysis.h =================================================================== --- include/llvm/Analysis/LoopAccessAnalysis.h +++ include/llvm/Analysis/LoopAccessAnalysis.h @@ -125,9 +125,52 @@ /// \brief Set of potential dependent memory accesses. typedef EquivalenceClasses DepCandidates; + /// \brief Dependece between memory access instructions. + struct Dependence { + /// \brief The type of the dependence. + enum DepType { + // No dependence. + NoDep, + // We couldn't determine the direction or the distance. + Unknown, + // Lexically forward. + Forward, + // Forward, but if vectorized, is likely to prevent store-to-load + // forwarding. + ForwardButPreventsForwarding, + // Lexically backward. + Backward, + // Backward, but the distance allows a vectorization factor of + // MaxSafeDepDistBytes. + BackwardVectorizable, + // Same, but may prevent store-to-load forwarding. + BackwardVectorizableButPreventsForwarding + }; + + /// \brief Index of the source of the dependence in the InstMap vector. + unsigned Source; + /// \brief Index of the destination of the dependence in the InstMap vector. + unsigned Destination; + /// \brief The type of the dependence. + DepType Type; + + Dependence(unsigned Source, unsigned Destination, DepType Type) + : Source(Source), Destination(Destination), Type(Type) {} + + /// \brief Dependence types that don't prevent vectorization. + static bool isSafeForVectorization(DepType Type); + + /// \brief Dependence types that can be queried from the analysis. + static bool isInterestingDependence(DepType Type); + + /// \brief Lexically backward dependence types. + bool isPossiblyBackward() const; + }; + MemoryDepChecker(ScalarEvolution *Se, const DataLayout *Dl, const Loop *L) : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0), - ShouldRetryWithRuntimeCheck(false) {} + ShouldRetryWithRuntimeCheck(false), SafeForVectorization(true), + RecordInterestingDependences(true) {} /// \brief Register the location (instructions are given increasing numbers) /// of a write access. @@ -153,6 +196,10 @@ bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoSet &CheckDeps, const ValueToValueMap &Strides); + /// \brief No memory dependence was encountered that would inhibit + /// vectorization. + bool isSafeForVectorization() const { return SafeForVectorization; } + /// \brief The maximum number of bytes of a vector register we can vectorize /// the accesses safely with. unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; } @@ -161,6 +208,19 @@ /// vectorize the loop with a dynamic array access check. bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; } + /// \brief Returns the interesting dependences. If null is returned we + /// exceeded the MaxInterestingDependence threshold and this information is + /// not available. + const SmallVectorImpl *getInterestingDependences() const { + return RecordInterestingDependences ? &InterestingDependences : nullptr; + } + + /// \brief The vector of memory access instructions. The indices are used as + /// instruction identifiers in the Dependence class. + const SmallVectorImpl &getMemoryInstructions() const { + return InstMap; + } + private: ScalarEvolution *SE; const DataLayout *DL; @@ -182,6 +242,20 @@ /// vectorize this loop with runtime checks. bool ShouldRetryWithRuntimeCheck; + /// \brief No memory dependence was encountered that would inhibit + /// vectorization. + bool SafeForVectorization; + + //// \brief True if InterestingDependences reflects the dependences in the + //// loop. If false we exceeded MaxInterestingDependence and + //// InterestingDependences is invalid. + bool RecordInterestingDependences; + + /// \brief Interesting memory dependences collected during the analysis as + /// defined by isInterestingDependence. Only valid if + /// RecordInterestingDependences is true. + SmallVector InterestingDependences; + /// \brief Check whether there is a plausible dependence between the two /// accesses. /// @@ -194,9 +268,9 @@ /// element access it records this distance in \p MaxSafeDepDistBytes (if this /// distance is smaller than any other distance encountered so far). /// Otherwise, this function returns true signaling a possible dependence. - bool isDependent(const MemAccessInfo &A, unsigned AIdx, - const MemAccessInfo &B, unsigned BIdx, - const ValueToValueMap &Strides); + Dependence::DepType isDependent(const MemAccessInfo &A, unsigned AIdx, + const MemAccessInfo &B, unsigned BIdx, + const ValueToValueMap &Strides); /// \brief Check whether the data dependence could prevent store-load /// forwarding. Index: lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- lib/Analysis/LoopAccessAnalysis.cpp +++ lib/Analysis/LoopAccessAnalysis.cpp @@ -49,6 +49,12 @@ /// Maximum SIMD width. const unsigned VectorizerParams::MaxVectorWidth = 64; +/// \brief We collect interesting dependences up to this threshold. +static cl::opt MaxInterestingDependence( + "max-interesting-dependences", cl::Hidden, + cl::desc("Maximum number of interesting dependences collected by " + "loop-access analysis (default = 100)"), cl::init(100)); + bool VectorizerParams::isInterleaveForced() { return ::VectorizationInterleave.getNumOccurrences() > 0; } @@ -360,7 +366,7 @@ DEBUG(dbgs() << "LAA: Processing memory accesses...\n"); DEBUG(dbgs() << " AST: "; AST.dump()); - DEBUG(dbgs() << "LAA: Accesses:\n"); + DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n"); DEBUG({ for (auto A : Accesses) dbgs() << "\t" << *A.getPointer() << " (" << @@ -545,6 +551,51 @@ return Stride; } +bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { + switch (Type) { + case NoDep: + case Forward: + case BackwardVectorizable: + return true; + + case Unknown: + case ForwardButPreventsForwarding: + case Backward: + case BackwardVectorizableButPreventsForwarding: + return false; + } +} + +bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) { + switch (Type) { + case NoDep: + case Forward: + return false; + + case BackwardVectorizable: + case Unknown: + case ForwardButPreventsForwarding: + case Backward: + case BackwardVectorizableButPreventsForwarding: + return true; + } +} + +bool MemoryDepChecker::Dependence::isPossiblyBackward() const { + switch (Type) { + case NoDep: + case Forward: + case ForwardButPreventsForwarding: + return false; + + case Unknown: + case BackwardVectorizable: + case Backward: + case BackwardVectorizableButPreventsForwarding: + return true; + } +} + bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize) { // If loads occur at a distance that is not a multiple of a feasible vector @@ -584,9 +635,10 @@ return false; } -bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, - const MemAccessInfo &B, unsigned BIdx, - const ValueToValueMap &Strides) { +MemoryDepChecker::Dependence::DepType +MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, + const MemAccessInfo &B, unsigned BIdx, + const ValueToValueMap &Strides) { assert (AIdx < BIdx && "Must pass arguments in program order"); Value *APtr = A.getPointer(); @@ -596,12 +648,12 @@ // Two reads are independent. if (!AIsWrite && !BIsWrite) - return false; + return Dependence::NoDep; // We cannot check pointers in different address spaces. if (APtr->getType()->getPointerAddressSpace() != BPtr->getType()->getPointerAddressSpace()) - return true; + return Dependence::Unknown; const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr); const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr); @@ -636,14 +688,14 @@ // the address space. if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){ DEBUG(dbgs() << "Non-consecutive pointer access\n"); - return true; + return Dependence::Unknown; } const SCEVConstant *C = dyn_cast(Dist); if (!C) { DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); ShouldRetryWithRuntimeCheck = true; - return true; + return Dependence::Unknown; } Type *ATy = APtr->getType()->getPointerElementType(); @@ -657,19 +709,19 @@ if (IsTrueDataDependence && (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) || ATy != BTy)) - return true; + return Dependence::ForwardButPreventsForwarding; DEBUG(dbgs() << "LAA: Dependence is negative: NoDep\n"); - return false; + return Dependence::Forward; } // Write to the same location with the same size. // Could be improved to assert type sizes are the same (i32 == float, etc). if (Val == 0) { if (ATy == BTy) - return false; + return Dependence::NoDep; DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n"); - return true; + return Dependence::Unknown; } assert(Val.isStrictlyPositive() && "Expect a positive value"); @@ -677,7 +729,7 @@ if (ATy != BTy) { DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with different types\n"); - return true; + return Dependence::Unknown; } unsigned Distance = (unsigned) Val.getZExtValue(); @@ -696,7 +748,7 @@ Distance < TypeByteSize * ForcedUnroll * ForcedFactor) { DEBUG(dbgs() << "LAA: Failure because of Positive distance " << Val.getSExtValue() << '\n'); - return true; + return Dependence::Backward; } // Positive distance bigger than max vectorization factor. @@ -706,12 +758,12 @@ bool IsTrueDataDependence = (!AIsWrite && BIsWrite); if (IsTrueDataDependence && couldPreventStoreLoadForward(Distance, TypeByteSize)) - return true; + return Dependence::BackwardVectorizableButPreventsForwarding; DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() << " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n'); - return false; + return Dependence::BackwardVectorizable; } bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, @@ -740,9 +792,33 @@ I1E = Accesses[*AI].end(); I1 != I1E; ++I1) for (std::vector::iterator I2 = Accesses[*OI].begin(), I2E = Accesses[*OI].end(); I2 != I2E; ++I2) { - if (*I1 < *I2 && isDependent(*AI, *I1, *OI, *I2, Strides)) - return false; - if (*I2 < *I1 && isDependent(*OI, *I2, *AI, *I1, Strides)) + auto A = std::make_pair(&*AI, *I1); + auto B = std::make_pair(&*OI, *I2); + + assert(*I1 != *I2); + if (*I1 > *I2) + std::swap(A, B); + + Dependence::DepType Type = + isDependent(*A.first, A.second, *B.first, B.second, Strides); + SafeForVectorization &= Dependence::isSafeForVectorization(Type); + + // Gather dependences unless we accumulated MaxInterestingDependence + // dependences. In that case return as soon as we find the first + // unsafe dependence. This puts a limit on this quadratic + // algorithm. + if (RecordInterestingDependences) { + if (Dependence::isInterestingDependence(Type)) + InterestingDependences.push_back( + Dependence(A.second, B.second, Type)); + + if (InterestingDependences.size() >= MaxInterestingDependence) { + RecordInterestingDependences = false; + InterestingDependences.clear(); + DEBUG(dbgs() << "Too many dependences, stopped recording\n"); + } + } + if (!RecordInterestingDependences && !SafeForVectorization) return false; } ++OI; @@ -750,7 +826,10 @@ AI++; } } - return true; + + DEBUG(dbgs() << "Total Interesting Dependences: " + << InterestingDependences.size() << "\n"); + return SafeForVectorization; } bool LoopAccessInfo::canAnalyzeLoop() {