Index: include/llvm/Analysis/LoopAccessAnalysis.h =================================================================== --- include/llvm/Analysis/LoopAccessAnalysis.h +++ include/llvm/Analysis/LoopAccessAnalysis.h @@ -374,7 +374,8 @@ LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL, const TargetLibraryInfo *TLI, AliasAnalysis *AA, - DominatorTree *DT, const ValueToValueMap &Strides); + DominatorTree *DT, LoopInfo *LI, + const ValueToValueMap &Strides); /// Return true we can analyze the memory accesses in the loop and there are /// no memory dependence cycles. @@ -464,6 +465,7 @@ const TargetLibraryInfo *TLI; AliasAnalysis *AA; DominatorTree *DT; + LoopInfo *LI; unsigned NumLoads; unsigned NumStores; @@ -534,6 +536,7 @@ const TargetLibraryInfo *TLI; AliasAnalysis *AA; DominatorTree *DT; + LoopInfo *LI; }; } // End llvm namespace Index: include/llvm/Analysis/ValueTracking.h =================================================================== --- include/llvm/Analysis/ValueTracking.h +++ include/llvm/Analysis/ValueTracking.h @@ -28,6 +28,7 @@ class AssumptionCache; class DominatorTree; class TargetLibraryInfo; + class LoopInfo; /// Determine which bits of V are known to be either zero or one and return /// them in the KnownZero/KnownOne bit sets. @@ -176,11 +177,15 @@ return GetUnderlyingObject(const_cast(V), DL, MaxLookup); } - /// GetUnderlyingObjects - This method is similar to GetUnderlyingObject - /// except that it can look through phi and select instructions and return - /// multiple objects. + /// \brief This method is similar to GetUnderlyingObject except that it can + /// look through phi and select instructions and return multiple objects. + /// + /// If LoopInfo is passed, loop phis are further analyzed. If a pointer + /// accesses different objects in each iteration, we don't look through the + /// phi node. void GetUnderlyingObjects(Value *V, SmallVectorImpl &Objects, - const DataLayout &DL, unsigned MaxLookup = 6); + const DataLayout &DL, LoopInfo *LI = nullptr, + unsigned MaxLookup = 6); /// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer /// are lifetime markers. Index: lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- lib/Analysis/LoopAccessAnalysis.cpp +++ lib/Analysis/LoopAccessAnalysis.cpp @@ -199,9 +199,9 @@ typedef PointerIntPair MemAccessInfo; typedef SmallPtrSet MemAccessInfoSet; - AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, + AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI, MemoryDepChecker::DepCandidates &DA) - : DL(Dl), AST(*AA), DepCands(DA), IsRTCheckNeeded(false) {} + : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckNeeded(false) {} /// \brief Register a load and whether it is only read from. void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) { @@ -264,6 +264,8 @@ //intrinsic property (such as TBAA metadata). AliasSetTracker AST; + LoopInfo *LI; + /// Sets of potentially dependent accesses - members of one set share an /// underlying pointer. The set "CheckDeps" identfies which sets really need a /// dependence check. @@ -481,7 +483,9 @@ // underlying object. typedef SmallVector ValueVector; ValueVector TempObjects; - GetUnderlyingObjects(Ptr, TempObjects, DL); + + GetUnderlyingObjects(Ptr, TempObjects, DL, LI); + DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n"); for (Value *UnderlyingObj : TempObjects) { UnderlyingObjToAccessMap::iterator Prev = ObjToLastAccess.find(UnderlyingObj); @@ -489,6 +493,7 @@ DepCands.unionSets(Access, Prev->second); ObjToLastAccess[UnderlyingObj] = Access; + DEBUG(dbgs() << " " << *UnderlyingObj << "\n"); } } } @@ -1035,7 +1040,7 @@ MemoryDepChecker::DepCandidates DependentAccesses; AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(), - AA, DependentAccesses); + AA, LI, DependentAccesses); // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once @@ -1314,10 +1319,10 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL, const TargetLibraryInfo *TLI, AliasAnalysis *AA, - DominatorTree *DT, + DominatorTree *DT, LoopInfo *LI, const ValueToValueMap &Strides) : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL), - TLI(TLI), AA(AA), DT(DT), NumLoads(0), NumStores(0), + TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false) { if (canAnalyzeLoop()) analyzeLoop(Strides); @@ -1359,7 +1364,8 @@ if (!LAI) { const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - LAI = llvm::make_unique(L, SE, DL, TLI, AA, DT, Strides); + LAI = llvm::make_unique(L, SE, DL, TLI, AA, DT, LI, + Strides); #ifndef NDEBUG LAI->NumSymbolicStrides = Strides.size(); #endif @@ -1370,7 +1376,6 @@ void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const { LoopAccessAnalysis &LAA = *const_cast(this); - LoopInfo *LI = &getAnalysis().getLoopInfo(); ValueToValueMap NoSymbolicStrides; for (Loop *TopLevelLoop : *LI) @@ -1387,6 +1392,7 @@ TLI = TLIP ? &TLIP->getTLI() : nullptr; AA = &getAnalysis(); DT = &getAnalysis().getDomTree(); + LI = &getAnalysis().getLoopInfo(); return false; } Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" @@ -2737,6 +2738,32 @@ return Len == ~0ULL ? 1 : Len; } +/// \brief \p PN defines a loop-variant pointer to an object. Check if the +/// previous iteration of the loop was referring to the same object as \p PN. +static bool isSameUnderlyingObjectInLoop(PHINode *PN, LoopInfo *LI) { + // Find the loop-defined value. + Loop *L = LI->getLoopFor(PN->getParent()); + if (PN->getNumIncomingValues() != 2) + return true; + + // Find the value from previous iteration. + auto *PrevValue = dyn_cast(PN->getIncomingValue(0)); + if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) + PrevValue = dyn_cast(PN->getIncomingValue(1)); + if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L) + return true; + + // If a new pointer is loaded in the loop, the pointer references a different + // object in every iteration. E.g.: + // for (i) + // int *p = a[i]; + // ... + if (auto *Load = dyn_cast(PrevValue)) + if (!L->isLoopInvariant(Load->getPointerOperand())) + return false; + return true; +} + Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup) { if (!V->getType()->isPointerTy()) @@ -2768,7 +2795,8 @@ } void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl &Objects, - const DataLayout &DL, unsigned MaxLookup) { + const DataLayout &DL, LoopInfo *LI, + unsigned MaxLookup) { SmallPtrSet Visited; SmallVector Worklist; Worklist.push_back(V); @@ -2786,8 +2814,20 @@ } if (PHINode *PN = dyn_cast(P)) { - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - Worklist.push_back(PN->getIncomingValue(i)); + // If this PHI changes the underlying object in every iteration of the + // loop, don't look through it. Consider: + // int **A; + // for (i) { + // Prev = Curr; // Prev = PHI (Prev_0, Curr) + // Curr = A[i]; + // *Prev, *Curr; + // + // Prev is tracking Curr one iteration behind so they refer to different + // underlying objects. + if (!LI || !LI->isLoopHeader(PN->getParent()) || + isSameUnderlyingObjectInLoop(PN, LI)) + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + Worklist.push_back(PN->getIncomingValue(i)); continue; } Index: test/Analysis/LoopAccessAnalysis/underlying-objects-1.ll =================================================================== --- /dev/null +++ test/Analysis/LoopAccessAnalysis/underlying-objects-1.ll @@ -0,0 +1,47 @@ +; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s + +; In: +; +; store_ptr = A; +; load_ptr = &A[2]; +; for (i = 0; i < n; i++) +; *store_ptr++ = *load_ptr++ *10; // A[i] = A[i+2] * 10 +; +; make sure, we look through the PHI to conclude that store_ptr and load_ptr +; both have A as their underlying object. The dependence is safe for +; vectorization requiring no memchecks. +; +; Otherwise we would try to prove independence with a memcheck that is going +; to always fail. + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +; CHECK: Memory dependences are safe{{$}} + +define void @f(i8* noalias %A, i64 %width) { +for.body.preheader: + %A_ahead = getelementptr inbounds i8, i8* %A, i64 2 + br label %for.body + +for.body: + %i = phi i64 [ %i.1, %for.body ], [ 0, %for.body.preheader ] + %load_ptr = phi i8* [ %load_ptr.1, %for.body ], [ %A_ahead, %for.body.preheader ] + %store_ptr = phi i8* [ %store_ptr.1, %for.body ], [ %A, %for.body.preheader ] + + %loadA = load i8, i8* %load_ptr, align 1 + + %mul = mul i8 %loadA, 10 + + store i8 %mul, i8* %store_ptr, align 1 + + %load_ptr.1 = getelementptr inbounds i8, i8* %load_ptr, i64 1 + %store_ptr.1 = getelementptr inbounds i8, i8* %store_ptr, i64 1 + %i.1 = add nuw i64 %i, 1 + + %exitcond = icmp eq i64 %i.1, %width + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} Index: test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll =================================================================== --- /dev/null +++ test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll @@ -0,0 +1,90 @@ +; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s + +; This loop: +; +; int **A; +; for (i) +; for (j) { +; A[i][j] = A[i-1][j] * B[j] +; B[j+1] = 2 // backward dep between this and the previous +; } +; +; is transformed by Load-PRE to stash away A[i] for the next iteration of the +; outer loop: +; +; Curr = A[0]; // Prev_0 +; for (i: 1..N) { +; Prev = Curr; // Prev = PHI (Prev_0, Curr) +; Curr = A[i]; +; for (j: 0..N) { +; Curr[j] = Prev[j] * B[j] +; B[j+1] = 2 // backward dep between this and the previous +; } +; } +; +; Since A[i] and A[i-1] are likely to be independent, getUnderlyingObjects +; should not assume that Curr and Prev share the same underlying object. +; +; If it did we would try to dependence-analyze Curr and Prev and the analysis +; would fail with non-constant distance. +; +; To illustrate one of the negative consequences of this, if the loop has a +; backward dependence we won't detect this but instead fully fall back on +; memchecks (that is what LAA does after encountering a case of non-constant +; distance). + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +; CHECK: for_j.body: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop +; CHECK-NEXT: Interesting Dependences: +; CHECK-NEXT: Backward: +; CHECK-NEXT: %loadB = load i8, i8* %gepB, align 1 -> +; CHECK-NEXT: store i8 2, i8* %gepB_plus_one, align 1 + +define void @f(i8** noalias %A, i8* noalias %B, i64 %N) { +for_i.preheader: + %prev_0 = load i8*, i8** %A, align 8 + br label %for_i.body + +for_i.body: + %i = phi i64 [1, %for_i.preheader], [%i.1, %for_j.end] + %prev = phi i8* [%prev_0, %for_i.preheader], [%curr, %for_j.end] + %gep = getelementptr inbounds i8*, i8** %A, i64 %i + %curr = load i8*, i8** %gep, align 8 + br label %for_j.preheader + +for_j.preheader: + br label %for_j.body + +for_j.body: + %j = phi i64 [0, %for_j.preheader], [%j.1, %for_j.body] + + %gepPrev = getelementptr inbounds i8, i8* %prev, i64 %j + %gepCurr = getelementptr inbounds i8, i8* %curr, i64 %j + %gepB = getelementptr inbounds i8, i8* %B, i64 %j + + %loadPrev = load i8, i8* %gepPrev, align 1 + %loadB = load i8, i8* %gepB, align 1 + + %mul = mul i8 %loadPrev, %loadB + + store i8 %mul, i8* %gepCurr, align 1 + + %gepB_plus_one = getelementptr inbounds i8, i8* %gepB, i64 1 + store i8 2, i8* %gepB_plus_one, align 1 + + %j.1 = add nuw i64 %j, 1 + %exitcondj = icmp eq i64 %j.1, %N + br i1 %exitcondj, label %for_j.end, label %for_j.body + +for_j.end: + + %i.1 = add nuw i64 %i, 1 + %exitcond = icmp eq i64 %i.1, %N + br i1 %exitcond, label %for_i.end, label %for_i.body + +for_i.end: + ret void +}