diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -397,6 +397,12 @@ const SCEV *End; /// Holds the information if this pointer is used for writing to memory. bool IsWritePtr; + /// Contains information regarding the stride of the access, which could be + /// 0 (unknown), 1 (a normal contiguous access), or abs(Stride) > 1 (this + /// is considered in the vectorizer as a strided access). + int Stride; + /// Loop Invariant + bool LoopInv; /// Holds the id of the set of pointers that could be dependent because of a /// shared underlying object. unsigned DependencySetId; @@ -408,11 +414,14 @@ bool NeedsFreeze; PointerInfo(Value *PointerValue, const SCEV *Start, const SCEV *End, - bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId, - const SCEV *Expr, bool NeedsFreeze) + bool IsWritePtr, int Stride, bool LoopInv, + unsigned DependencySetId, unsigned AliasSetId, const SCEV *Expr, + bool NeedsFreeze) : PointerValue(PointerValue), Start(Start), End(End), - IsWritePtr(IsWritePtr), DependencySetId(DependencySetId), - AliasSetId(AliasSetId), Expr(Expr), NeedsFreeze(NeedsFreeze) {} + IsWritePtr(IsWritePtr), Stride(Stride), LoopInv(LoopInv), + DependencySetId(DependencySetId), AliasSetId(AliasSetId), + Expr(Expr), NeedsFreeze(NeedsFreeze) {} + }; RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE) @@ -431,12 +440,15 @@ /// The method might also version the pointer stride according to \p Strides, /// and add new predicates to \p PSE. void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, - bool WritePtr, unsigned DepSetId, unsigned ASId, + bool WritePtr, int Stride, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze); /// No run-time memory checking is necessary. bool empty() const { return Pointers.empty(); } + /// All gathered accesses which require checks are strided + bool isStrided() const; + /// Generate the checks and store it. This also performs the grouping /// of pointers to reduce the number of memchecks necessary. void generateChecks(MemoryDepChecker::DepCandidates &DepCands, diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -198,7 +198,7 @@ /// There is no conflict when the intervals are disjoint: /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End) void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, - Type *AccessTy, bool WritePtr, + Type *AccessTy, bool WritePtr, int Stride, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze) { @@ -207,7 +207,9 @@ const SCEV *ScStart; const SCEV *ScEnd; + bool LoopInv = false; if (SE->isLoopInvariant(PtrExpr, Lp)) { + LoopInv = true; ScStart = ScEnd = PtrExpr; } else { const SCEVAddRecExpr *AR = dyn_cast(PtrExpr); @@ -237,8 +239,8 @@ const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy); ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV); - Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr, - NeedsFreeze); + Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, Stride, LoopInv, + DepSetId, ASId, PtrExpr, NeedsFreeze); } void RuntimePointerChecking::tryToCreateDiffCheck( @@ -463,7 +465,36 @@ // checking pointer group for each pointer. This is also required // for correctness, because in this case we can have checking between // pointers to the same underlying object. - if (!UseDependencies) { + // If we have strided accesses we want to always group them to emit checks. + // Otherwise we can have two strided pointers with constant offset + // being compared against each other for the same underlying object. + // + // However we can not merge loop invariants into any of the groups. + // Loop invariants need to be in separate groups to ensure that checks + // against all write accesses are emitted. + // + // In the example below: + // void foo(double _Complex a *a, unsigned int N, unsigned int n) { + // unsigned int i = 0; + // for (i = 0; i < N; i++) { + // a[i] = a[n] + 1; + // } + // } + // + // We have strided writes to: creal(a[i]) and cimag(a[i]) and + // reads from loop invariants: creal(a[n]) and creal(a[n]). + // If grouping is enabled we end up with just one group, + // and there is no checks emitted. It is leading to wrong analysis, + // as there is a dependency between a[n] and a[i], if n is within the range + // of iteration space. + // A memory access is defined as strided if either: + // 1. The stride is unknown (0), or + // 2. The stride is known and has an absolute value > 1. A stride of 1 + // is considered to be contiguous. + bool StridedOrInv = all_of(Pointers, [](const PointerInfo& P) { + return (std::abs(P.Stride) != 1 || P.LoopInv); }); + + if (!StridedOrInv && !UseDependencies) { for (unsigned I = 0; I < Pointers.size(); ++I) CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this)); return; @@ -1027,7 +1058,9 @@ DepId = RunningDepId++; bool IsWrite = Access.getInt(); - RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE, + int Stride = + getPtrStride(PSE, AccessTy, Ptr, TheLoop, StridesMap, true).value_or(0); + RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, Stride, DepId, ASId, PSE, NeedsFreeze); LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); }