Index: lib/Analysis/MemorySSA.cpp =================================================================== --- lib/Analysis/MemorySSA.cpp +++ lib/Analysis/MemorySSA.cpp @@ -504,6 +504,7 @@ AliasAnalysisType &AA; DominatorTree &DT; UpwardsMemoryQuery *Query; + unsigned *UpwardWalkLimit; // Phi optimization bookkeeping SmallVector Paths; @@ -539,9 +540,11 @@ /// /// This does not test for whether StopAt is a clobber UpwardsWalkResult - walkToPhiOrClobber(DefPath &Desc, const MemoryAccess *StopAt = nullptr, + walkToPhiOrClobber(DefPath &Desc, + const MemoryAccess *StopAt = nullptr, const MemoryAccess *SkipStopAt = nullptr) const { assert(!isa(Desc.Last) && "Uses don't exist in my world"); + assert(UpwardWalkLimit && *UpwardWalkLimit > 0 && "Need a positive walk limit"); for (MemoryAccess *Current : def_chain(Desc.Last)) { Desc.Last = Current; @@ -551,6 +554,10 @@ if (auto *MD = dyn_cast(Current)) { if (MSSA.isLiveOnEntryDef(MD)) return {MD, true, MustAlias}; + + if (!--*UpwardWalkLimit) + return {Current, true, MayAlias}; + ClobberAlias CA = instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA); if (CA.IsClobber) @@ -589,11 +596,10 @@ /// /// If this returns None, NewPaused is a vector of searches that terminated /// at StopWhere. Otherwise, NewPaused is left in an unspecified state. - Optional - getBlockingAccess(const MemoryAccess *StopWhere, - SmallVectorImpl &PausedSearches, - SmallVectorImpl &NewPaused, - SmallVectorImpl &Terminated) { + Optional getBlockingAccess( + const MemoryAccess *StopWhere, SmallVectorImpl &PausedSearches, + SmallVectorImpl &NewPaused, + SmallVectorImpl &Terminated) { assert(!PausedSearches.empty() && "No searches to continue?"); // BFS vs DFS really doesn't make a difference here, so just do a DFS with @@ -629,10 +635,12 @@ SkipStopWhere = Query->OriginalAccess; } - UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere, + UpwardsWalkResult Res = walkToPhiOrClobber(Node, + /*StopAt=*/StopWhere, /*SkipStopAt=*/SkipStopWhere); if (Res.IsKnownClobber) { assert(Res.Result != StopWhere && Res.Result != SkipStopWhere); + // If this wasn't a cache hit, we hit a clobber when walking. That's a // failure. TerminatedPath Term{Res.Result, PathIndex}; @@ -774,8 +782,9 @@ // FIXME: This is broken, because the Blocker may be reported to be // liveOnEntry, and we'll happily wait for that to disappear (read: never) // For the moment, this is fine, since we do nothing with blocker info. - if (Optional Blocker = getBlockingAccess( - Target, PausedSearches, NewPaused, TerminatedPaths)) { + if (Optional Blocker = + getBlockingAccess(Target, PausedSearches, NewPaused, + TerminatedPaths)) { // Find the node we started at. We can't search based on N->Last, since // we may have gone around a loop with a different MemoryLocation. @@ -828,7 +837,8 @@ MemoryAccess *DefChainEnd = nullptr; SmallVector Clobbers; for (ListIndex Paused : NewPaused) { - UpwardsWalkResult WR = walkToPhiOrClobber(Paths[Paused]); + UpwardsWalkResult WR = + walkToPhiOrClobber(Paths[Paused]); if (WR.IsKnownClobber) Clobbers.push_back({WR.Result, Paused}); else @@ -896,8 +906,13 @@ AliasAnalysisType *getAA() { return &AA; } /// Finds the nearest clobber for the given query, optimizing phis if /// possible. - MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q) { + MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q, + unsigned &UpWalkLimit) { Query = &Q; + UpwardWalkLimit = &UpWalkLimit; + // Starting limit must be > 0. + if (!UpWalkLimit) + UpWalkLimit++; MemoryAccess *Current = Start; // This walker pretends uses don't exist. If we're handed one, silently grab @@ -908,21 +923,23 @@ DefPath FirstDesc(Q.StartingLoc, Current, Current, None); // Fast path for the overly-common case (no crazy phi optimization // necessary) - UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc); + UpwardsWalkResult WalkResult = + walkToPhiOrClobber(FirstDesc); MemoryAccess *Result; if (WalkResult.IsKnownClobber) { Result = WalkResult.Result; Q.AR = WalkResult.AR; } else { - OptznResult OptRes = tryOptimizePhi(cast(FirstDesc.Last), - Current, Q.StartingLoc); + OptznResult OptRes = + tryOptimizePhi(cast(FirstDesc.Last), Current, + Q.StartingLoc); verifyOptResult(OptRes); resetPhiOptznState(); Result = OptRes.PrimaryClobber.Clobber; } #ifdef EXPENSIVE_CHECKS - if (!Q.SkipSelfAccess) + if (!Q.SkipSelfAccess && *UpwardWalkLimit > 0) checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA); #endif return Result; @@ -958,14 +975,15 @@ : Walker(*M, *A, *D), MSSA(M) {} MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, - const MemoryLocation &); - // Second argument (bool), defines whether the clobber search should skip the + const MemoryLocation &, + unsigned &); + // Third argument (bool), defines whether the clobber search should skip the // original queried access. If true, there will be a follow-up query searching // for a clobber access past "self". Note that the Optimized access is not // updated if a new clobber is found by this SkipSelf search. If this // additional query becomes heavily used we may decide to cache the result. // Walker instantiations will decide how to set the SkipSelf bool. - MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, bool); + MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, unsigned &, bool); }; /// A MemorySSAWalker that does AA walks to disambiguate accesses. It no @@ -982,12 +1000,23 @@ using MemorySSAWalker::getClobberingMemoryAccess; + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, unsigned &UWL) { + return Walker->getClobberingMemoryAccessBase(MA, UWL, false); + } + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, + const MemoryLocation &Loc, + unsigned &UWL) { + return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL); + } + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override { - return Walker->getClobberingMemoryAccessBase(MA, false); + unsigned UpwardWalkLimit = MaxCheckLimit; + return getClobberingMemoryAccess(MA, UpwardWalkLimit); } MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, const MemoryLocation &Loc) override { - return Walker->getClobberingMemoryAccessBase(MA, Loc); + unsigned UpwardWalkLimit = MaxCheckLimit; + return getClobberingMemoryAccess(MA, Loc, UpwardWalkLimit); } void invalidateInfo(MemoryAccess *MA) override { @@ -1007,12 +1036,23 @@ using MemorySSAWalker::getClobberingMemoryAccess; + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, unsigned &UWL) { + return Walker->getClobberingMemoryAccessBase(MA, UWL, true); + } + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, + const MemoryLocation &Loc, + unsigned &UWL) { + return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL); + } + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override { - return Walker->getClobberingMemoryAccessBase(MA, true); + unsigned UpwardWalkLimit = MaxCheckLimit; + return getClobberingMemoryAccess(MA, UpwardWalkLimit); } MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, const MemoryLocation &Loc) override { - return Walker->getClobberingMemoryAccessBase(MA, Loc); + unsigned UpwardWalkLimit = MaxCheckLimit; + return getClobberingMemoryAccess(MA, Loc, UpwardWalkLimit); } void invalidateInfo(MemoryAccess *MA) override { @@ -1205,8 +1245,8 @@ /// which is walking bottom-up. class MemorySSA::OptimizeUses { public: - OptimizeUses(MemorySSA *MSSA, MemorySSAWalker *Walker, BatchAAResults *BAA, - DominatorTree *DT) + OptimizeUses(MemorySSA *MSSA, CachingWalker *Walker, + BatchAAResults *BAA, DominatorTree *DT) : MSSA(MSSA), Walker(Walker), AA(BAA), DT(DT) {} void optimizeUses(); @@ -1235,7 +1275,7 @@ DenseMap &); MemorySSA *MSSA; - MemorySSAWalker *Walker; + CachingWalker *Walker; BatchAAResults *AA; DominatorTree *DT; }; @@ -1353,11 +1393,12 @@ continue; } bool FoundClobberResult = false; + unsigned UpwardWalkLimit = MaxCheckLimit; while (UpperBound > LocInfo.LowerBound) { if (isa(VersionStack[UpperBound])) { // For phis, use the walker, see where we ended up, go there - Instruction *UseInst = MU->getMemoryInst(); - MemoryAccess *Result = Walker->getClobberingMemoryAccess(UseInst); + MemoryAccess *Result = + Walker->getClobberingMemoryAccess(MU, UpwardWalkLimit); // We are guaranteed to find it or something is wrong while (VersionStack[UpperBound] != Result) { assert(UpperBound != 0); @@ -2210,7 +2251,8 @@ template MemoryAccess * MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase( - MemoryAccess *StartingAccess, const MemoryLocation &Loc) { + MemoryAccess *StartingAccess, const MemoryLocation &Loc, + unsigned &UpwardWalkLimit) { if (isa(StartingAccess)) return StartingAccess; @@ -2238,7 +2280,8 @@ ? StartingUseOrDef->getDefiningAccess() : StartingUseOrDef; - MemoryAccess *Clobber = Walker.findClobber(DefiningAccess, Q); + MemoryAccess *Clobber = + Walker.findClobber(DefiningAccess, Q, UpwardWalkLimit); LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); LLVM_DEBUG(dbgs() << *StartingUseOrDef << "\n"); LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); @@ -2249,7 +2292,7 @@ template MemoryAccess * MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase( - MemoryAccess *MA, bool SkipSelf) { + MemoryAccess *MA, unsigned &UpwardWalkLimit, bool SkipSelf) { auto *StartingAccess = dyn_cast(MA); // If this is a MemoryPhi, we can't do anything. if (!StartingAccess) @@ -2295,7 +2338,7 @@ return DefiningAccess; } - OptimizedAccess = Walker.findClobber(DefiningAccess, Q); + OptimizedAccess = Walker.findClobber(DefiningAccess, Q, UpwardWalkLimit); StartingAccess->setOptimized(OptimizedAccess); if (MSSA->isLiveOnEntryDef(OptimizedAccess)) StartingAccess->setOptimizedAccessType(None); @@ -2311,10 +2354,10 @@ MemoryAccess *Result; if (SkipSelf && isa(OptimizedAccess) && - isa(StartingAccess)) { + isa(StartingAccess) && UpwardWalkLimit) { assert(isa(Q.OriginalAccess)); Q.SkipSelfAccess = true; - Result = Walker.findClobber(OptimizedAccess, Q); + Result = Walker.findClobber(OptimizedAccess, Q, UpwardWalkLimit); } else Result = OptimizedAccess; Index: test/Analysis/MemorySSA/optimize-use.ll =================================================================== --- test/Analysis/MemorySSA/optimize-use.ll +++ test/Analysis/MemorySSA/optimize-use.ll @@ -1,5 +1,7 @@ -; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='print,verify' -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,NOLIMIT +; RUN: opt -memssa-check-limit=0 -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,LIMIT +; RUN: opt -aa-pipeline=basic-aa -passes='print,verify' -disable-output < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,NOLIMIT +; RUN: opt -memssa-check-limit=0 -aa-pipeline=basic-aa -passes='print,verify' -disable-output < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,LIMIT ; Function Attrs: ssp uwtable define i32 @main() { @@ -18,20 +20,29 @@ ; CHECK: 4 = MemoryDef(3) ; CHECK-NEXT: store i32 7, i32* %1, align 4 store i32 7, i32* %1, align 4 -; CHECK: MemoryUse(3) MustAlias -; CHECK-NEXT: %2 = load i32, i32* %0, align 4 +; NOLIMIT: MemoryUse(3) MustAlias +; NOLIMIT-NEXT: %2 = load i32, i32* %0, align 4 +; LIMIT: MemoryUse(4) MayAlias +; LIMIT-NEXT: %2 = load i32, i32* %0, align 4 %2 = load i32, i32* %0, align 4 -; CHECK: MemoryUse(4) MustAlias -; CHECK-NEXT: %3 = load i32, i32* %1, align 4 +; NOLIMIT: MemoryUse(4) MustAlias +; NOLIMIT-NEXT: %3 = load i32, i32* %1, align 4 +; LIMIT: MemoryUse(4) MayAlias +; LIMIT-NEXT: %3 = load i32, i32* %1, align 4 %3 = load i32, i32* %1, align 4 -; CHECK: MemoryUse(3) MustAlias -; CHECK-NEXT: %4 = load i32, i32* %0, align 4 +; NOLIMIT: MemoryUse(3) MustAlias +; NOLIMIT-NEXT: %4 = load i32, i32* %0, align 4 +; LIMIT: MemoryUse(4) MayAlias +; LIMIT-NEXT: %4 = load i32, i32* %0, align 4 %4 = load i32, i32* %0, align 4 -; CHECK: MemoryUse(4) MustAlias -; CHECK-NEXT: %5 = load i32, i32* %1, align 4 +; NOLIMIT: MemoryUse(4) MustAlias +; NOLIMIT-NEXT: %5 = load i32, i32* %1, align 4 +; LIMIT: MemoryUse(4) MayAlias +; LIMIT-NEXT: %5 = load i32, i32* %1, align 4 %5 = load i32, i32* %1, align 4 %add = add nsw i32 %3, %5 ret i32 %add } + declare noalias i8* @_Znwm(i64) Index: test/Analysis/MemorySSA/phi-translation.ll =================================================================== --- test/Analysis/MemorySSA/phi-translation.ll +++ test/Analysis/MemorySSA/phi-translation.ll @@ -1,5 +1,7 @@ -; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='print,verify' -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,NOLIMIT +; RUN: opt -memssa-check-limit=0 -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,LIMIT +; RUN: opt -aa-pipeline=basic-aa -passes='print,verify' -disable-output < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,NOLIMIT +; RUN: opt -memssa-check-limit=0 -aa-pipeline=basic-aa -passes='print,verify' -disable-output < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,LIMIT ; %ptr can't alias %local, so we should be able to optimize the use of %local to ; point to the store to %local. @@ -21,8 +23,10 @@ if.end: ; CHECK: 3 = MemoryPhi({entry,1},{if.then,2}) -; CHECK: MemoryUse(1) -; CHECK-NEXT: load i8, i8* %local, align 1 +; NOLIMIT: MemoryUse(1) MayAlias +; NOLIMIT-NEXT: load i8, i8* %local, align 1 +; LIMIT: MemoryUse(3) MayAlias +; LIMIT-NEXT: load i8, i8* %local, align 1 load i8, i8* %local, align 1 ret void } @@ -62,8 +66,10 @@ ; Order matters here; phi.2 needs to come before phi.3, because that's the order ; they're visited in. ; CHECK: 6 = MemoryPhi({phi.2,4},{phi.3,3}) -; CHECK: MemoryUse(1) -; CHECK-NEXT: load i8, i8* %local +; NOLIMIT: MemoryUse(1) MayAlias +; NOLIMIT-NEXT: load i8, i8* %local +; LIMIT: MemoryUse(6) MayAlias +; LIMIT-NEXT: load i8, i8* %local load i8, i8* %local ret void } @@ -73,8 +79,10 @@ ; CHECK: 1 = MemoryDef(liveOnEntry) ; CHECK-NEXT: store i8 0, i8* %p1 store i8 0, i8* %p1 -; CHECK: MemoryUse(1) -; CHECK-NEXT: load i8, i8* %p1 +; NOLIMIT: MemoryUse(1) MustAlias +; NOLIMIT-NEXT: load i8, i8* %p1 +; LIMIT: MemoryUse(1) MayAlias +; LIMIT-NEXT: load i8, i8* %p1 load i8, i8* %p1 br i1 undef, label %a, label %b @@ -106,8 +114,10 @@ e: ; 8 = MemoryPhi({c,4},{d,5}) -; CHECK: MemoryUse(1) -; CHECK-NEXT: load i8, i8* %p1 +; NOLIMIT: MemoryUse(1) MustAlias +; NOLIMIT-NEXT: load i8, i8* %p1 +; LIMIT: MemoryUse(8) MayAlias +; LIMIT-NEXT: load i8, i8* %p1 load i8, i8* %p1 ret void } @@ -138,8 +148,10 @@ ; CHECK: 4 = MemoryDef(7) ; CHECK-NEXT: store i8 2, i8* %p2 store i8 2, i8* %p2 -; CHECK: MemoryUse(1) -; CHECK-NEXT: load i8, i8* %p1 +; NOLIMIT: MemoryUse(1) MayAlias +; NOLIMIT-NEXT: load i8, i8* %p1 +; LIMIT: MemoryUse(4) MayAlias +; LIMIT-NEXT: load i8, i8* %p1 load i8, i8* %p1 br i1 undef, label %loop.2, label %loop.1 } @@ -167,14 +179,16 @@ if.end: ; CHECK: 4 = MemoryPhi({while.cond,5},{if.then,1},{if.then2,2}) -; CHECK: MemoryUse(4) +; CHECK: MemoryUse(4) MayAlias ; CHECK-NEXT: load i8, i8* %p1 load i8, i8* %p1 ; CHECK: 3 = MemoryDef(4) ; CHECK-NEXT: store i8 2, i8* %p2 store i8 2, i8* %p2 -; CHECK: MemoryUse(4) -; CHECK-NEXT: load i8, i8* %p1 +; NOLIMIT: MemoryUse(4) MayAlias +; NOLIMIT-NEXT: load i8, i8* %p1 +; LIMIT: MemoryUse(3) MayAlias +; LIMIT-NEXT: load i8, i8* %p1 load i8, i8* %p1 br label %while.cond }