Index: include/llvm/Transforms/Utils/MemorySSA.h =================================================================== --- include/llvm/Transforms/Utils/MemorySSA.h +++ include/llvm/Transforms/Utils/MemorySSA.h @@ -594,6 +594,8 @@ private: class CachingWalker; + + CachingWalker *getWalkerImpl(); void buildMemorySSA(); void verifyUseInDefs(MemoryAccess *, MemoryAccess *) const; using AccessMap = DenseMap>; @@ -605,7 +607,6 @@ bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const; MemoryUseOrDef *createNewAccess(Instruction *); MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *); - MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace); void removeFromLookups(MemoryAccess *); MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *); Index: lib/Transforms/Utils/MemorySSA.cpp =================================================================== --- lib/Transforms/Utils/MemorySSA.cpp +++ lib/Transforms/Utils/MemorySSA.cpp @@ -76,7 +76,119 @@ OS << "; " << *MA << "\n"; } }; +} + +namespace { +struct UpwardsMemoryQuery { + // True if our original query started off as a call + bool IsCall; + // The pointer location we started the query with. This will be empty if + // IsCall is true. + MemoryLocation StartingLoc; + // This is the instruction we were querying about. + const Instruction *Inst; + // Set of visited Instructions for this query. + DenseSet Visited; + // Vector of visited call accesses for this query. This is separated out + // because you can always cache and lookup the result of call queries (IE when + // IsCall == true) for every call in the chain. The calls have no AA location + // associated with them with them, and thus, no context dependence. + SmallVector VisitedCalls; + // The MemoryAccess we actually got called with, used to test local domination + const MemoryAccess *OriginalAccess; + + UpwardsMemoryQuery() + : IsCall(false), Inst(nullptr), OriginalAccess(nullptr) {} + UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access) + : IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) { + if (!IsCall) + StartingLoc = MemoryLocation::get(Inst); + } +}; + +static bool instructionClobbersQuery(MemoryDef *MD, const MemoryLocation &Loc, + const UpwardsMemoryQuery &Query, + AliasAnalysis &AA) { + Instruction *DefMemoryInst = MD->getMemoryInst(); + assert(DefMemoryInst && "Defining instruction not actually an instruction"); + + if (!Query.IsCall) + return AA.getModRefInfo(DefMemoryInst, Loc) & MRI_Mod; + + ModRefInfo I = AA.getModRefInfo(DefMemoryInst, ImmutableCallSite(Query.Inst)); + return I != MRI_NoModRef; +} + +/// Cache for our caching MemorySSA walker. +class WalkerCache { + DenseMap Accesses; + DenseMap Calls; + +public: + MemoryAccess *lookup(const MemoryAccess *MA, const MemoryLocation &Loc, + bool IsCall) const { + ++NumClobberCacheLookups; + MemoryAccess *R = IsCall ? Calls.lookup(MA) : Accesses.lookup({MA, Loc}); + if (R) + ++NumClobberCacheHits; + return R; + } + + bool insert(const MemoryAccess *MA, MemoryAccess *To, + const MemoryLocation &Loc, bool IsCall) { + // This is fine for Phis, since there are times where we can't optimize + // them. Making a def its own clobber is never correct, though. + assert((MA != To || isa(MA)) && + "Something can't clobber itself!"); + + ++NumClobberCacheInserts; + bool Inserted; + if (IsCall) + Inserted = Calls.insert({MA, To}).second; + else + Inserted = Accesses.insert({{MA, Loc}, To}).second; + + return Inserted; + } + + bool remove(const MemoryAccess *MA, const MemoryLocation &Loc, bool IsCall) { + return IsCall ? Calls.erase(MA) : Accesses.erase({MA, Loc}); + } + + void clear() { + Accesses.clear(); + Calls.clear(); + } + + bool contains(const MemoryAccess *MA) const { + for (auto &P : Accesses) + if (P.first.first == MA || P.second == MA) + return true; + for (auto &P : Calls) + if (P.first == MA || P.second == MA) + return true; + return false; + } +}; + +struct RenamePassData { + DomTreeNode *DTN; + DomTreeNode::const_iterator ChildIt; + MemoryAccess *IncomingVal; + + RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It, + MemoryAccess *M) + : DTN(D), ChildIt(It), IncomingVal(M) {} + void swap(RenamePassData &RHS) { + std::swap(DTN, RHS.DTN); + std::swap(ChildIt, RHS.ChildIt); + std::swap(IncomingVal, RHS.IncomingVal); + } +}; +} // anonymous namespace + +namespace llvm { /// \brief A MemorySSAWalker that does AA walks and caching of lookups to /// disambiguate accesses. /// @@ -111,6 +223,15 @@ /// ret i32 %r /// } class MemorySSA::CachingWalker final : public MemorySSAWalker { + WalkerCache Cache; + AliasAnalysis *AA; + DominatorTree *DT; + + MemoryAccessPair UpwardsDFSWalk(MemoryAccess *, const MemoryLocation &, + UpwardsMemoryQuery &, bool); + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &); + void verifyRemoved(MemoryAccess *); + public: CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *); ~CachingWalker() override; @@ -119,51 +240,8 @@ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, MemoryLocation &) override; void invalidateInfo(MemoryAccess *) override; - -protected: - struct UpwardsMemoryQuery; - MemoryAccess *doCacheLookup(const MemoryAccess *, const UpwardsMemoryQuery &, - const MemoryLocation &); - - void doCacheInsert(const MemoryAccess *, MemoryAccess *, - const UpwardsMemoryQuery &, const MemoryLocation &); - - void doCacheRemove(const MemoryAccess *, const UpwardsMemoryQuery &, - const MemoryLocation &); - -private: - MemoryAccessPair UpwardsDFSWalk(MemoryAccess *, const MemoryLocation &, - UpwardsMemoryQuery &, bool); - MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &); - bool instructionClobbersQuery(const MemoryDef *, UpwardsMemoryQuery &, - const MemoryLocation &Loc) const; - void verifyRemoved(MemoryAccess *); - SmallDenseMap - CachedUpwardsClobberingAccess; - DenseMap CachedUpwardsClobberingCall; - AliasAnalysis *AA; - DominatorTree *DT; -}; -} - -namespace { -struct RenamePassData { - DomTreeNode *DTN; - DomTreeNode::const_iterator ChildIt; - MemoryAccess *IncomingVal; - - RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It, - MemoryAccess *M) - : DTN(D), ChildIt(It), IncomingVal(M) {} - void swap(RenamePassData &RHS) { - std::swap(DTN, RHS.DTN); - std::swap(ChildIt, RHS.ChildIt); - std::swap(IncomingVal, RHS.IncomingVal); - } }; -} -namespace llvm { /// \brief Rename a single basic block into MemorySSA form. /// Uses the standard SSA renaming algorithm. /// \returns The new incoming value. @@ -393,7 +471,7 @@ SmallPtrSet Visited; renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited); - MemorySSAWalker *Walker = getWalker(); + CachingWalker *Walker = getWalkerImpl(); // Now optimize the MemoryUse's defining access to point to the nearest // dominating clobbering def. @@ -420,7 +498,9 @@ markUnreachableAsLiveOnEntry(&BB); } -MemorySSAWalker *MemorySSA::getWalker() { +MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); } + +MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() { if (Walker) return Walker.get(); @@ -522,33 +602,6 @@ return MUD; } -MemoryAccess *MemorySSA::findDominatingDef(BasicBlock *UseBlock, - enum InsertionPlace Where) { - // Handle the initial case - if (Where == Beginning) - // The only thing that could define us at the beginning is a phi node - if (MemoryPhi *Phi = getMemoryAccess(UseBlock)) - return Phi; - - DomTreeNode *CurrNode = DT->getNode(UseBlock); - // Need to be defined by our dominator - if (Where == Beginning) - CurrNode = CurrNode->getIDom(); - Where = End; - while (CurrNode) { - auto It = PerBlockAccesses.find(CurrNode->getBlock()); - if (It != PerBlockAccesses.end()) { - auto &Accesses = It->second; - for (MemoryAccess &RA : reverse(*Accesses)) { - if (isa(RA) || isa(RA)) - return &RA; - } - } - CurrNode = CurrNode->getIDom(); - } - return LiveOnEntryDef.get(); -} - /// \brief Returns true if \p Replacer dominates \p Replacee . bool MemorySSA::dominatesUse(const MemoryAccess *Replacer, const MemoryAccess *Replacee) const { @@ -796,12 +849,11 @@ /// \returns True if \p Dominator dominates \p Dominatee. bool MemorySSA::locallyDominates(const MemoryAccess *Dominator, const MemoryAccess *Dominatee) const { - assert((Dominator->getBlock() == Dominatee->getBlock()) && "Asking for local domination when accesses are in different blocks!"); - // A node dominates itself. - if (Dominatee == Dominator) + // A node dominates itself, and liveOnEntry dominates everything. + if (Dominatee == Dominator || isLiveOnEntryDef(Dominator)) return true; // When Dominatee is defined on function entry, it is not dominated by another @@ -809,17 +861,12 @@ if (isLiveOnEntryDef(Dominatee)) return false; - // When Dominator is defined on function entry, it dominates the other memory - // access. - if (isLiveOnEntryDef(Dominator)) - return true; - // Get the access list for the block const AccessList *AccessList = getBlockAccesses(Dominator->getBlock()); AccessList::const_reverse_iterator It(Dominator->getIterator()); // If we hit the beginning of the access list before we hit dominatee, we must - // dominate it + // dominate it. return std::none_of(It, AccessList->rend(), [&](const MemoryAccess &MA) { return &MA == Dominatee; }); } @@ -938,37 +985,7 @@ MemorySSA::CachingWalker::~CachingWalker() {} -struct MemorySSA::CachingWalker::UpwardsMemoryQuery { - // True if we saw a phi whose predecessor was a backedge - bool SawBackedgePhi; - // True if our original query started off as a call - bool IsCall; - // The pointer location we started the query with. This will be empty if - // IsCall is true. - MemoryLocation StartingLoc; - // This is the instruction we were querying about. - const Instruction *Inst; - // Set of visited Instructions for this query. - DenseSet Visited; - // Vector of visited call accesses for this query. This is separated out - // because you can always cache and lookup the result of call queries (IE when - // IsCall == true) for every call in the chain. The calls have no AA location - // associated with them with them, and thus, no context dependence. - SmallVector VisitedCalls; - // The MemoryAccess we actually got called with, used to test local domination - const MemoryAccess *OriginalAccess; - - UpwardsMemoryQuery() - : SawBackedgePhi(false), IsCall(false), Inst(nullptr), - OriginalAccess(nullptr) {} - - UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access) - : SawBackedgePhi(false), IsCall(ImmutableCallSite(Inst)), Inst(Inst), - OriginalAccess(Access) {} -}; - void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) { - // TODO: We can do much better cache invalidation with differently stored // caches. For now, for MemoryUses, we simply remove them // from the cache, and kill the entire call/non-call cache for everything @@ -982,17 +999,11 @@ // itself. if (MemoryUse *MU = dyn_cast(MA)) { - UpwardsMemoryQuery Q; - Instruction *I = MU->getMemoryInst(); - Q.IsCall = bool(ImmutableCallSite(I)); - Q.Inst = I; - if (!Q.IsCall) - Q.StartingLoc = MemoryLocation::get(I); - doCacheRemove(MA, Q, Q.StartingLoc); + UpwardsMemoryQuery Q(MU->getMemoryInst(), MU); + Cache.remove(MU, Q.StartingLoc, Q.IsCall); } else { // If it is not a use, the best we can do right now is destroy the cache. - CachedUpwardsClobberingCall.clear(); - CachedUpwardsClobberingAccess.clear(); + Cache.clear(); } #ifdef EXPENSIVE_CHECKS @@ -1001,63 +1012,6 @@ #endif } -void MemorySSA::CachingWalker::doCacheRemove(const MemoryAccess *M, - const UpwardsMemoryQuery &Q, - const MemoryLocation &Loc) { - if (Q.IsCall) - CachedUpwardsClobberingCall.erase(M); - else - CachedUpwardsClobberingAccess.erase({M, Loc}); -} - -void MemorySSA::CachingWalker::doCacheInsert(const MemoryAccess *M, - MemoryAccess *Result, - const UpwardsMemoryQuery &Q, - const MemoryLocation &Loc) { - // This is fine for Phis, since there are times where we can't optimize them. - // Making a def its own clobber is never correct, though. - assert((Result != M || isa(M)) && - "Something can't clobber itself!"); - ++NumClobberCacheInserts; - if (Q.IsCall) - CachedUpwardsClobberingCall[M] = Result; - else - CachedUpwardsClobberingAccess[{M, Loc}] = Result; -} - -MemoryAccess * -MemorySSA::CachingWalker::doCacheLookup(const MemoryAccess *M, - const UpwardsMemoryQuery &Q, - const MemoryLocation &Loc) { - ++NumClobberCacheLookups; - MemoryAccess *Result; - - if (Q.IsCall) - Result = CachedUpwardsClobberingCall.lookup(M); - else - Result = CachedUpwardsClobberingAccess.lookup({M, Loc}); - - if (Result) - ++NumClobberCacheHits; - return Result; -} - -bool MemorySSA::CachingWalker::instructionClobbersQuery( - const MemoryDef *MD, UpwardsMemoryQuery &Q, - const MemoryLocation &Loc) const { - Instruction *DefMemoryInst = MD->getMemoryInst(); - assert(DefMemoryInst && "Defining instruction not actually an instruction"); - - if (!Q.IsCall) - return AA->getModRefInfo(DefMemoryInst, Loc) & MRI_Mod; - - // If this is a call, mark it for caching - if (ImmutableCallSite(DefMemoryInst)) - Q.VisitedCalls.push_back(MD); - ModRefInfo I = AA->getModRefInfo(DefMemoryInst, ImmutableCallSite(Q.Inst)); - return I != MRI_NoModRef; -} - MemoryAccessPair MemorySSA::CachingWalker::UpwardsDFSWalk( MemoryAccess *StartingAccess, const MemoryLocation &Loc, UpwardsMemoryQuery &Q, bool FollowingBackedge) { @@ -1078,12 +1032,12 @@ // While we can do lookups, we can't sanely do inserts here unless we were // to track everything we saw along the way, since we don't know where we // will stop. - if (instructionClobbersQuery(MD, Q, Loc)) { + if (instructionClobbersQuery(MD, Loc, Q, *AA)) { ModifyingAccess = CurrAccess; break; } } - if (auto CacheResult = doCacheLookup(CurrAccess, Q, Loc)) + if (auto CacheResult = Cache.lookup(CurrAccess, Loc, Q.IsCall)) return {CacheResult, Loc}; // We need to know whether it is a phi so we can track backedges. @@ -1168,7 +1122,7 @@ MemoryAccess *CacheAccess = DFI.getPath(N - 1); BasicBlock *CurrBlock = CacheAccess->getBlock(); if (!FollowingBackedge) - doCacheInsert(CacheAccess, ModifyingAccess, Q, Loc); + Cache.insert(CacheAccess, ModifyingAccess, Loc, Q.IsCall); if (DT->dominates(CurrBlock, OriginalBlock) && (CurrBlock != OriginalBlock || !FollowingBackedge || MSSA->locallyDominates(CacheAccess, StartingAccess))) @@ -1179,7 +1133,7 @@ // StartingAccess for us. for (; N > 1; --N) { MemoryAccess *CacheAccess = DFI.getPath(N - 1); - doCacheInsert(CacheAccess, ModifyingAccess, Q, Loc); + Cache.insert(CacheAccess, ModifyingAccess, Loc, Q.IsCall); } assert(Q.Visited.size() < 1000 && "Visited too much"); @@ -1214,10 +1168,10 @@ UpwardsMemoryQuery Q; Q.OriginalAccess = StartingUseOrDef; Q.StartingLoc = Loc; - Q.Inst = StartingUseOrDef->getMemoryInst(); + Q.Inst = I; Q.IsCall = false; - if (auto CacheResult = doCacheLookup(StartingUseOrDef, Q, Q.StartingLoc)) + if (auto *CacheResult = Cache.lookup(StartingUseOrDef, Loc, Q.IsCall)) return CacheResult; // Unlike the other function, do not walk to the def of a def, because we are @@ -1229,7 +1183,7 @@ MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q); // Only cache this if it wouldn't make Clobber point to itself. if (Clobber != StartingAccess) - doCacheInsert(Q.OriginalAccess, Clobber, Q, Q.StartingLoc); + Cache.insert(Q.OriginalAccess, Clobber, Q.StartingLoc, Q.IsCall); DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); DEBUG(dbgs() << *StartingUseOrDef << "\n"); DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); @@ -1249,13 +1203,8 @@ if (isa(I)) return StartingAccess; - UpwardsMemoryQuery Q; - Q.OriginalAccess = StartingAccess; - Q.IsCall = bool(ImmutableCallSite(I)); - if (!Q.IsCall) - Q.StartingLoc = MemoryLocation::get(I); - Q.Inst = I; - if (auto CacheResult = doCacheLookup(StartingAccess, Q, Q.StartingLoc)) + UpwardsMemoryQuery Q(I, StartingAccess); + if (auto *CacheResult = Cache.lookup(StartingAccess, Q.StartingLoc, Q.IsCall)) return CacheResult; // Start with the thing we already think clobbers this location @@ -1270,14 +1219,14 @@ // DFS won't cache a result for DefiningAccess. So, if DefiningAccess isn't // our clobber, be sure that it gets a cache entry, too. if (Result != DefiningAccess) - doCacheInsert(DefiningAccess, Result, Q, Q.StartingLoc); - doCacheInsert(Q.OriginalAccess, Result, Q, Q.StartingLoc); + Cache.insert(DefiningAccess, Result, Q.StartingLoc, Q.IsCall); + Cache.insert(Q.OriginalAccess, Result, Q.StartingLoc, Q.IsCall); // TODO: When this implementation is more mature, we may want to figure out // what this additional caching buys us. It's most likely A Good Thing. if (Q.IsCall) for (const MemoryAccess *MA : Q.VisitedCalls) if (MA != Result) - doCacheInsert(MA, Result, Q, Q.StartingLoc); + Cache.insert(MA, Result, Q.StartingLoc, Q.IsCall); DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); DEBUG(dbgs() << *DefiningAccess << "\n"); @@ -1289,14 +1238,7 @@ // Verify that MA doesn't exist in any of the caches. void MemorySSA::CachingWalker::verifyRemoved(MemoryAccess *MA) { -#ifndef NDEBUG - for (auto &P : CachedUpwardsClobberingAccess) - assert(P.first.first != MA && P.second != MA && - "Found removed MemoryAccess in cache."); - for (auto &P : CachedUpwardsClobberingCall) - assert(P.first != MA && P.second != MA && - "Found removed MemoryAccess in cache."); -#endif // !NDEBUG + assert(!Cache.contains(MA) && "Found removed MemoryAccess in cache."); } MemoryAccess * @@ -1313,4 +1255,4 @@ return Use->getDefiningAccess(); return StartingAccess; } -} +} // namespace llvm