Index: llvm/include/llvm/Analysis/MemorySSA.h =================================================================== --- llvm/include/llvm/Analysis/MemorySSA.h +++ llvm/include/llvm/Analysis/MemorySSA.h @@ -794,6 +794,8 @@ /// def-use chain of uses. void ensureOptimizedUses(); + AliasAnalysis &getAA() { return *AA; } + protected: // Used by Memory SSA dumpers and wrapper pass friend class MemorySSAPrinterLegacyPass; @@ -840,12 +842,12 @@ bool CreationMustSucceed = true); private: - template class ClobberWalkerBase; - template class CachingWalker; - template class SkipSelfWalker; + class ClobberWalkerBase; + class CachingWalker; + class SkipSelfWalker; class OptimizeUses; - CachingWalker *getWalkerImpl(); + CachingWalker *getWalkerImpl(); void buildMemorySSA(BatchAAResults &BAA); void prepareForMoveTo(MemoryAccess *, BasicBlock *); @@ -892,9 +894,9 @@ mutable DenseMap BlockNumbering; // Memory SSA building info - std::unique_ptr> WalkerBase; - std::unique_ptr> Walker; - std::unique_ptr> SkipWalker; + std::unique_ptr WalkerBase; + std::unique_ptr Walker; + std::unique_ptr SkipWalker; unsigned NextID = 0; bool IsOptimized = false; }; @@ -1041,15 +1043,17 @@ /// /// calling this API on load(%a) will return the MemoryPhi, not the MemoryDef /// in the if (a) branch. - MemoryAccess *getClobberingMemoryAccess(const Instruction *I) { + MemoryAccess *getClobberingMemoryAccess(const Instruction *I, + BatchAAResults &AA) { MemoryAccess *MA = MSSA->getMemoryAccess(I); assert(MA && "Handed an instruction that MemorySSA doesn't recognize?"); - return getClobberingMemoryAccess(MA); + return getClobberingMemoryAccess(MA, AA); } /// Does the same thing as getClobberingMemoryAccess(const Instruction *I), /// but takes a MemoryAccess instead of an Instruction. - virtual MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) = 0; + virtual MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, + BatchAAResults &AA) = 0; /// Given a potentially clobbering memory access and a new location, /// calling this will give you the nearest dominating clobbering MemoryAccess @@ -1063,7 +1067,8 @@ /// will return that MemoryDef, whereas the above would return the clobber /// starting from the use side of the memory def. virtual MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, - const MemoryLocation &) = 0; + const MemoryLocation &, + BatchAAResults &AA) = 0; /// Given a memory access, invalidate anything this walker knows about /// that access. @@ -1086,9 +1091,11 @@ // getClobberingMemoryAccess. using MemorySSAWalker::getClobberingMemoryAccess; - MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override; MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, - const MemoryLocation &) override; + BatchAAResults &) override; + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, + const MemoryLocation &, + BatchAAResults &) override; }; using MemoryAccessPair = std::pair; Index: llvm/lib/Analysis/MemorySSA.cpp =================================================================== --- llvm/lib/Analysis/MemorySSA.cpp +++ llvm/lib/Analysis/MemorySSA.cpp @@ -125,10 +125,11 @@ class MemorySSAWalkerAnnotatedWriter : public AssemblyAnnotationWriter { MemorySSA *MSSA; MemorySSAWalker *Walker; + BatchAAResults BAA; public: MemorySSAWalkerAnnotatedWriter(MemorySSA *M) - : MSSA(M), Walker(M->getWalker()) {} + : MSSA(M), Walker(M->getWalker()), BAA(M->getAA()) {} void emitBasicBlockStartAnnot(const BasicBlock *BB, formatted_raw_ostream &OS) override { @@ -139,7 +140,7 @@ void emitInstructionAnnot(const Instruction *I, formatted_raw_ostream &OS) override { if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) { - MemoryAccess *Clobber = Walker->getClobberingMemoryAccess(MA); + MemoryAccess *Clobber = Walker->getClobberingMemoryAccess(MA, BAA); OS << "; " << *MA; if (Clobber) { OS << " - clobbered by "; @@ -373,8 +374,7 @@ } // end anonymous namespace -template -static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA, +static bool isUseTriviallyOptimizableToLiveOnEntry(BatchAAResults &AA, const Instruction *I) { // If the memory can't be changed, then loads of the memory can't be // clobbered. @@ -398,11 +398,10 @@ /// \param AA The AliasAnalysis we used for our search. /// \param AllowImpreciseClobber Always false, unless we do relaxed verify. -template LLVM_ATTRIBUTE_UNUSED static void checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt, const MemoryLocation &StartLoc, const MemorySSA &MSSA, - const UpwardsMemoryQuery &Query, AliasAnalysisType &AA, + const UpwardsMemoryQuery &Query, BatchAAResults &AA, bool AllowImpreciseClobber = false) { assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?"); @@ -493,7 +492,7 @@ /// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up /// in one class. -template class ClobberWalker { +class ClobberWalker { /// Save a few bytes by using unsigned instead of size_t. using ListIndex = unsigned; @@ -517,7 +516,6 @@ }; const MemorySSA &MSSA; - AliasAnalysisType &AA; DominatorTree &DT; UpwardsMemoryQuery *Query; unsigned *UpwardWalkLimit; @@ -558,7 +556,8 @@ /// /// This does not test for whether StopAt is a clobber UpwardsWalkResult - walkToPhiOrClobber(DefPath &Desc, const MemoryAccess *StopAt = nullptr, + walkToPhiOrClobber(BatchAAResults &BAA, DefPath &Desc, + const MemoryAccess *StopAt = nullptr, const MemoryAccess *SkipStopAt = nullptr) const { assert(!isa(Desc.Last) && "Uses don't exist in my world"); assert(UpwardWalkLimit && "Need a valid walk limit"); @@ -584,7 +583,7 @@ if (!--*UpwardWalkLimit) return {Current, true}; - if (instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA)) + if (instructionClobbersQuery(MD, Desc.Loc, Query->Inst, BAA)) return {MD, true}; } } @@ -624,7 +623,7 @@ /// If this returns None, NewPaused is a vector of searches that terminated /// at StopWhere. Otherwise, NewPaused is left in an unspecified state. Optional - getBlockingAccess(const MemoryAccess *StopWhere, + getBlockingAccess(BatchAAResults &BAA, const MemoryAccess *StopWhere, SmallVectorImpl &PausedSearches, SmallVectorImpl &NewPaused, SmallVectorImpl &Terminated) { @@ -663,7 +662,7 @@ SkipStopWhere = Query->OriginalAccess; } - UpwardsWalkResult Res = walkToPhiOrClobber(Node, + UpwardsWalkResult Res = walkToPhiOrClobber(BAA, Node, /*StopAt=*/StopWhere, /*SkipStopAt=*/SkipStopWhere); if (Res.IsKnownClobber) { @@ -766,8 +765,8 @@ /// /// A path is a series of {MemoryAccess, MemoryLocation} pairs. A path /// terminates when a MemoryAccess that clobbers said MemoryLocation is found. - OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start, - const MemoryLocation &Loc) { + OptznResult tryOptimizePhi(BatchAAResults &BAA, MemoryPhi *Phi, + MemoryAccess *Start, const MemoryLocation &Loc) { assert(Paths.empty() && VisitedPhis.empty() && "Reset the optimization state."); @@ -811,7 +810,7 @@ // liveOnEntry, and we'll happily wait for that to disappear (read: never) // For the moment, this is fine, since we do nothing with blocker info. if (Optional Blocker = getBlockingAccess( - Target, PausedSearches, NewPaused, TerminatedPaths)) { + BAA, Target, PausedSearches, NewPaused, TerminatedPaths)) { // Find the node we started at. We can't search based on N->Last, since // we may have gone around a loop with a different MemoryLocation. @@ -864,7 +863,7 @@ MemoryAccess *DefChainEnd = nullptr; SmallVector Clobbers; for (ListIndex Paused : NewPaused) { - UpwardsWalkResult WR = walkToPhiOrClobber(Paths[Paused]); + UpwardsWalkResult WR = walkToPhiOrClobber(BAA, Paths[Paused]); if (WR.IsKnownClobber) Clobbers.push_back({WR.Result, Paused}); else @@ -927,14 +926,13 @@ } public: - ClobberWalker(const MemorySSA &MSSA, AliasAnalysisType &AA, DominatorTree &DT) - : MSSA(MSSA), AA(AA), DT(DT) {} + ClobberWalker(const MemorySSA &MSSA, DominatorTree &DT) + : MSSA(MSSA), DT(DT) {} - AliasAnalysisType *getAA() { return &AA; } /// Finds the nearest clobber for the given query, optimizing phis if /// possible. - MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q, - unsigned &UpWalkLimit) { + MemoryAccess *findClobber(BatchAAResults &BAA, MemoryAccess *Start, + UpwardsMemoryQuery &Q, unsigned &UpWalkLimit) { Query = &Q; UpwardWalkLimit = &UpWalkLimit; // Starting limit must be > 0. @@ -950,12 +948,12 @@ DefPath FirstDesc(Q.StartingLoc, Current, Current, None); // Fast path for the overly-common case (no crazy phi optimization // necessary) - UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc); + UpwardsWalkResult WalkResult = walkToPhiOrClobber(BAA, FirstDesc); MemoryAccess *Result; if (WalkResult.IsKnownClobber) { Result = WalkResult.Result; } else { - OptznResult OptRes = tryOptimizePhi(cast(FirstDesc.Last), + OptznResult OptRes = tryOptimizePhi(BAA, cast(FirstDesc.Last), Current, Q.StartingLoc); verifyOptResult(OptRes); resetPhiOptznState(); @@ -964,7 +962,7 @@ #ifdef EXPENSIVE_CHECKS if (!Q.SkipSelfAccess && *UpwardWalkLimit > 0) - checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA); + checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, BAA); #endif return Result; } @@ -990,63 +988,65 @@ namespace llvm { -template class MemorySSA::ClobberWalkerBase { - ClobberWalker Walker; +class MemorySSA::ClobberWalkerBase { + ClobberWalker Walker; MemorySSA *MSSA; public: - ClobberWalkerBase(MemorySSA *M, AliasAnalysisType *A, DominatorTree *D) - : Walker(*M, *A, *D), MSSA(M) {} + ClobberWalkerBase(MemorySSA *M, DominatorTree *D) : Walker(*M, *D), MSSA(M) {} MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, const MemoryLocation &, - unsigned &); + BatchAAResults &, unsigned &); // Third argument (bool), defines whether the clobber search should skip the // original queried access. If true, there will be a follow-up query searching // for a clobber access past "self". Note that the Optimized access is not // updated if a new clobber is found by this SkipSelf search. If this // additional query becomes heavily used we may decide to cache the result. // Walker instantiations will decide how to set the SkipSelf bool. - MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, unsigned &, bool, + MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, BatchAAResults &, + unsigned &, bool, bool UseInvariantGroup = true); }; /// A MemorySSAWalker that does AA walks to disambiguate accesses. It no /// longer does caching on its own, but the name has been retained for the /// moment. -template class MemorySSA::CachingWalker final : public MemorySSAWalker { - ClobberWalkerBase *Walker; + ClobberWalkerBase *Walker; public: - CachingWalker(MemorySSA *M, ClobberWalkerBase *W) + CachingWalker(MemorySSA *M, ClobberWalkerBase *W) : MemorySSAWalker(M), Walker(W) {} ~CachingWalker() override = default; using MemorySSAWalker::getClobberingMemoryAccess; - MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, unsigned &UWL) { - return Walker->getClobberingMemoryAccessBase(MA, UWL, false); + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, BatchAAResults &BAA, + unsigned &UWL) { + return Walker->getClobberingMemoryAccessBase(MA, BAA, UWL, false); } MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, const MemoryLocation &Loc, - unsigned &UWL) { - return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL); + BatchAAResults &BAA, unsigned &UWL) { + return Walker->getClobberingMemoryAccessBase(MA, Loc, BAA, UWL); } // This method is not accessible outside of this file. - MemoryAccess *getClobberingMemoryAccessWithoutInvariantGroup(MemoryAccess *MA, - unsigned &UWL) { - return Walker->getClobberingMemoryAccessBase(MA, UWL, false, false); + MemoryAccess *getClobberingMemoryAccessWithoutInvariantGroup( + MemoryAccess *MA, BatchAAResults &BAA, unsigned &UWL) { + return Walker->getClobberingMemoryAccessBase(MA, BAA, UWL, false, false); } - MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override { + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, + BatchAAResults &BAA) override { unsigned UpwardWalkLimit = MaxCheckLimit; - return getClobberingMemoryAccess(MA, UpwardWalkLimit); + return getClobberingMemoryAccess(MA, BAA, UpwardWalkLimit); } MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, - const MemoryLocation &Loc) override { + const MemoryLocation &Loc, + BatchAAResults &BAA) override { unsigned UpwardWalkLimit = MaxCheckLimit; - return getClobberingMemoryAccess(MA, Loc, UpwardWalkLimit); + return getClobberingMemoryAccess(MA, Loc, BAA, UpwardWalkLimit); } void invalidateInfo(MemoryAccess *MA) override { @@ -1055,34 +1055,36 @@ } }; -template class MemorySSA::SkipSelfWalker final : public MemorySSAWalker { - ClobberWalkerBase *Walker; + ClobberWalkerBase *Walker; public: - SkipSelfWalker(MemorySSA *M, ClobberWalkerBase *W) + SkipSelfWalker(MemorySSA *M, ClobberWalkerBase *W) : MemorySSAWalker(M), Walker(W) {} ~SkipSelfWalker() override = default; using MemorySSAWalker::getClobberingMemoryAccess; - MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, unsigned &UWL) { - return Walker->getClobberingMemoryAccessBase(MA, UWL, true); + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, BatchAAResults &BAA, + unsigned &UWL) { + return Walker->getClobberingMemoryAccessBase(MA, BAA, UWL, true); } MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, const MemoryLocation &Loc, - unsigned &UWL) { - return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL); + BatchAAResults &BAA, unsigned &UWL) { + return Walker->getClobberingMemoryAccessBase(MA, Loc, BAA, UWL); } - MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override { + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, + BatchAAResults &BAA) override { unsigned UpwardWalkLimit = MaxCheckLimit; - return getClobberingMemoryAccess(MA, UpwardWalkLimit); + return getClobberingMemoryAccess(MA, BAA, UpwardWalkLimit); } MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, - const MemoryLocation &Loc) override { + const MemoryLocation &Loc, + BatchAAResults &BAA) override { unsigned UpwardWalkLimit = MaxCheckLimit; - return getClobberingMemoryAccess(MA, Loc, UpwardWalkLimit); + return getClobberingMemoryAccess(MA, Loc, BAA, UpwardWalkLimit); } void invalidateInfo(MemoryAccess *MA) override { @@ -1283,8 +1285,8 @@ /// which is walking bottom-up. class MemorySSA::OptimizeUses { public: - OptimizeUses(MemorySSA *MSSA, CachingWalker *Walker, - BatchAAResults *BAA, DominatorTree *DT) + OptimizeUses(MemorySSA *MSSA, CachingWalker *Walker, BatchAAResults *BAA, + DominatorTree *DT) : MSSA(MSSA), Walker(Walker), AA(BAA), DT(DT) {} void optimizeUses(); @@ -1313,7 +1315,7 @@ DenseMap &); MemorySSA *MSSA; - CachingWalker *Walker; + CachingWalker *Walker; BatchAAResults *AA; DominatorTree *DT; }; @@ -1441,7 +1443,7 @@ // support updates, so don't use it to optimize uses. MemoryAccess *Result = Walker->getClobberingMemoryAccessWithoutInvariantGroup( - MU, UpwardWalkLimit); + MU, *AA, UpwardWalkLimit); // We are guaranteed to find it or something is wrong. while (VersionStack[UpperBound] != Result) { assert(UpperBound != 0); @@ -1558,16 +1560,14 @@ MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); } -MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() { +MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() { if (Walker) return Walker.get(); if (!WalkerBase) - WalkerBase = - std::make_unique>(this, AA, DT); + WalkerBase = std::make_unique(this, DT); - Walker = - std::make_unique>(this, WalkerBase.get()); + Walker = std::make_unique(this, WalkerBase.get()); return Walker.get(); } @@ -1576,11 +1576,9 @@ return SkipWalker.get(); if (!WalkerBase) - WalkerBase = - std::make_unique>(this, AA, DT); + WalkerBase = std::make_unique(this, DT); - SkipWalker = - std::make_unique>(this, WalkerBase.get()); + SkipWalker = std::make_unique(this, WalkerBase.get()); return SkipWalker.get(); } @@ -2143,8 +2141,8 @@ return; BatchAAResults BatchAA(*AA); - ClobberWalkerBase WalkerBase(this, &BatchAA, DT); - CachingWalker WalkerLocal(this, &WalkerBase); + ClobberWalkerBase WalkerBase(this, DT); + CachingWalker WalkerLocal(this, &WalkerBase); OptimizeUses(this, &WalkerLocal, &BatchAA, DT).optimizeUses(); IsOptimized = true; } @@ -2413,11 +2411,9 @@ /// the MemoryAccess that actually clobbers Loc. /// /// \returns our clobbering memory access -template -MemoryAccess * -MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase( +MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase( MemoryAccess *StartingAccess, const MemoryLocation &Loc, - unsigned &UpwardWalkLimit) { + BatchAAResults &BAA, unsigned &UpwardWalkLimit) { assert(!isa(StartingAccess) && "Use cannot be defining access"); Instruction *I = nullptr; @@ -2443,7 +2439,7 @@ // handed something we already believe is the clobbering access. // We never set SkipSelf to true in Q in this method. MemoryAccess *Clobber = - Walker.findClobber(StartingAccess, Q, UpwardWalkLimit); + Walker.findClobber(BAA, StartingAccess, Q, UpwardWalkLimit); LLVM_DEBUG({ dbgs() << "Clobber starting at access " << *StartingAccess << "\n"; if (I) @@ -2512,11 +2508,9 @@ return MostDominatingInstruction == &I ? nullptr : MostDominatingInstruction; } -template -MemoryAccess * -MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase( - MemoryAccess *MA, unsigned &UpwardWalkLimit, bool SkipSelf, - bool UseInvariantGroup) { +MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase( + MemoryAccess *MA, BatchAAResults &BAA, unsigned &UpwardWalkLimit, + bool SkipSelf, bool UseInvariantGroup) { auto *StartingAccess = dyn_cast(MA); // If this is a MemoryPhi, we can't do anything. if (!StartingAccess) @@ -2555,7 +2549,7 @@ UpwardsMemoryQuery Q(I, StartingAccess); - if (isUseTriviallyOptimizableToLiveOnEntry(*Walker.getAA(), I)) { + if (isUseTriviallyOptimizableToLiveOnEntry(BAA, I)) { MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef(); StartingAccess->setOptimized(LiveOnEntry); return LiveOnEntry; @@ -2573,7 +2567,8 @@ return DefiningAccess; } - OptimizedAccess = Walker.findClobber(DefiningAccess, Q, UpwardWalkLimit); + OptimizedAccess = + Walker.findClobber(BAA, DefiningAccess, Q, UpwardWalkLimit); StartingAccess->setOptimized(OptimizedAccess); } else OptimizedAccess = StartingAccess->getOptimized(); @@ -2588,7 +2583,7 @@ isa(StartingAccess) && UpwardWalkLimit) { assert(isa(Q.OriginalAccess)); Q.SkipSelfAccess = true; - Result = Walker.findClobber(OptimizedAccess, Q, UpwardWalkLimit); + Result = Walker.findClobber(BAA, OptimizedAccess, Q, UpwardWalkLimit); } else Result = OptimizedAccess; @@ -2599,14 +2594,15 @@ } MemoryAccess * -DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) { +DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA, + BatchAAResults &) { if (auto *Use = dyn_cast(MA)) return Use->getDefiningAccess(); return MA; } MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess( - MemoryAccess *StartingAccess, const MemoryLocation &) { + MemoryAccess *StartingAccess, const MemoryLocation &, BatchAAResults &) { if (auto *Use = dyn_cast(StartingAccess)) return Use->getDefiningAccess(); return StartingAccess; Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp @@ -149,7 +149,9 @@ bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, AAResults *AA) { MemorySSAWalker *Walker = MSSA->getWalker(); - SmallVector WorkList{Walker->getClobberingMemoryAccess(Load)}; + BatchAAResults BAA(*AA); + SmallVector WorkList{ + Walker->getClobberingMemoryAccess(Load, BAA)}; SmallSet Visited; MemoryLocation Loc(MemoryLocation::get(Load)); @@ -179,8 +181,8 @@ return true; } - WorkList.push_back( - Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc)); + WorkList.push_back(Walker->getClobberingMemoryAccess( + Def->getDefiningAccess(), Loc, BAA)); continue; } Index: llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1833,7 +1833,7 @@ // can modify the memory location. if (InitC && InitC == StoredConstant) return MSSA.isLiveOnEntryDef( - MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def)); + MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA)); } if (!Store) @@ -1852,7 +1852,7 @@ // does not match LoadAccess. SetVector ToCheck; MemoryAccess *Current = - MSSA.getWalker()->getClobberingMemoryAccess(Def); + MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA); // We don't want to bail when we run into the store memory def. But, // the phi access may point to it. So, pretend like we've already // checked it. Index: llvm/lib/Transforms/Scalar/EarlyCSE.cpp =================================================================== --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -1052,7 +1052,8 @@ // clobbers LaterInst. MemoryAccess *LaterDef; if (ClobberCounter < EarlyCSEMssaOptCap) { - LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst); + BatchAAResults BAA(MSSA->getAA()); + LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst, BAA); ClobberCounter++; } else LaterDef = LaterMA->getDefiningAccess(); Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -162,8 +162,9 @@ const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, const Instruction *CtxI, AssumptionCache *AC, bool AllowSpeculation); -static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU, - Loop *CurLoop, Instruction &I, +static bool pointerInvalidatedByLoop(MemorySSA *MSSA, BatchAAResults &BAA, + MemoryUse *MU, Loop *CurLoop, + Instruction &I, SinkAndHoistLICMFlags &Flags); static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU); @@ -1173,8 +1174,10 @@ if (isLoadInvariantInLoop(LI, DT, CurLoop)) return true; + BatchAAResults BAA(*AA); bool Invalidated = pointerInvalidatedByLoop( - MSSA, cast(MSSA->getMemoryAccess(LI)), CurLoop, I, Flags); + MSSA, BAA, cast(MSSA->getMemoryAccess(LI)), CurLoop, I, + Flags); // Check loop-invariant address because this may also be a sinkable load // whose address is not necessarily loop-invariant. if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) @@ -1221,11 +1224,12 @@ // writes to this memory in the loop, we can hoist or sink. if (Behavior.onlyAccessesArgPointees()) { // TODO: expand to writeable arguments + BatchAAResults BAA(*AA); for (Value *Op : CI->args()) if (Op->getType()->isPointerTy() && pointerInvalidatedByLoop( - MSSA, cast(MSSA->getMemoryAccess(CI)), CurLoop, I, - Flags)) + MSSA, BAA, cast(MSSA->getMemoryAccess(CI)), + CurLoop, I, Flags)) return false; return true; } @@ -1264,6 +1268,7 @@ // Could do better here, but this is conservatively correct. // TODO: Cache set of Uses on the first walk in runOnLoop, update when // moving accesses. Can also extend to dominating uses. + BatchAAResults BAA(*AA); auto *SIMD = MSSA->getMemoryAccess(SI); for (auto *BB : CurLoop->getBlocks()) if (auto *Accesses = MSSA->getBlockAccesses(BB)) { @@ -1290,13 +1295,14 @@ // Check if the call may read from the memory location written // to by SI. Check CI's attributes and arguments; the number of // such checks performed is limited above by NoOfMemAccTooLarge. - ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI)); + ModRefInfo MRI = BAA.getModRefInfo(CI, MemoryLocation::get(SI)); if (isModOrRefSet(MRI)) return false; } } } - auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI); + auto *Source = + MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI, BAA); Flags.incrementClobberingCalls(); // If there are no clobbering Defs in the loop, store is safe to hoist. return MSSA->isLiveOnEntryDef(Source) || @@ -2301,8 +2307,9 @@ return Result; } -static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU, - Loop *CurLoop, Instruction &I, +static bool pointerInvalidatedByLoop(MemorySSA *MSSA, BatchAAResults &BAA, + MemoryUse *MU, Loop *CurLoop, + Instruction &I, SinkAndHoistLICMFlags &Flags) { // For hoisting, use the walker to determine safety if (!Flags.getIsSink()) { @@ -2311,7 +2318,7 @@ if (Flags.tooManyClobberingCalls()) Source = MU->getDefiningAccess(); else { - Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU); + Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU, BAA); Flags.incrementClobberingCalls(); } return !MSSA->isLiveOnEntryDef(Source) && Index: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -347,7 +347,7 @@ // Check for mod of Loc between Start and End, excluding both boundaries. // Start and End can be in different blocks. -static bool writtenBetween(MemorySSA *MSSA, AliasAnalysis &AA, +static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End) { if (isa(End)) { @@ -368,7 +368,7 @@ // TODO: Only walk until we hit Start. MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( - End->getDefiningAccess(), Loc); + End->getDefiningAccess(), Loc, AA); return !MSSA->dominates(Clobber, Start); } @@ -766,8 +766,9 @@ auto GetCall = [&]() -> CallInst * { // We defer this expensive clobber walk until the cheap checks // have been done on the source inside performCallSlotOptzn. + BatchAAResults BAA(*AA); if (auto *LoadClobber = dyn_cast( - MSSA->getWalker()->getClobberingMemoryAccess(LI))) + MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA))) return dyn_cast_or_null(LoadClobber->getMemoryInst()); return nullptr; }; @@ -1146,7 +1147,8 @@ // then we could still perform the xform by moving M up to the first memcpy. // TODO: It would be sufficient to check the MDep source up to the memcpy // size of M, rather than MDep. - if (writtenBetween(MSSA, *AA, MemoryLocation::getForSource(MDep), + BatchAAResults BAA(*AA); + if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep), MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M))) return false; @@ -1369,8 +1371,9 @@ MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy); bool CanReduceSize = false; MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet); + BatchAAResults BAA(*AA); MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( - MemSetAccess->getDefiningAccess(), MemCpyLoc); + MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA); if (auto *MD = dyn_cast(Clobber)) if (hasUndefContents(MSSA, AA, MemCpy->getSource(), MD, CopySize)) CanReduceSize = true; @@ -1429,12 +1432,13 @@ return true; } + BatchAAResults BAA(*AA); MemoryUseOrDef *MA = MSSA->getMemoryAccess(M); // FIXME: Not using getClobberingMemoryAccess() here due to PR54682. MemoryAccess *AnyClobber = MA->getDefiningAccess(); MemoryLocation DestLoc = MemoryLocation::getForDest(M); const MemoryAccess *DestClobber = - MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc); + MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA); // Try to turn a partially redundant memset + memcpy into // memcpy + smaller memset. We don't need the memcpy size for this. @@ -1447,7 +1451,7 @@ return true; MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess( - AnyClobber, MemoryLocation::getForSource(M)); + AnyClobber, MemoryLocation::getForSource(M), BAA); // There are four possible optimizations we can do for memcpy: // a) memcpy-memcpy xform which exposes redundance for DSE. @@ -1536,8 +1540,9 @@ if (!CallAccess) return false; MemCpyInst *MDep = nullptr; + BatchAAResults BAA(*AA); MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( - CallAccess->getDefiningAccess(), Loc); + CallAccess->getDefiningAccess(), Loc, BAA); if (auto *MD = dyn_cast(Clobber)) MDep = dyn_cast_or_null(MD->getMemoryInst()); @@ -1578,7 +1583,7 @@ // *b = 42; // foo(*a) // It would be invalid to transform the second memcpy into foo(*b). - if (writtenBetween(MSSA, *AA, MemoryLocation::getForSource(MDep), + if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep), MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB))) return false; Index: llvm/lib/Transforms/Scalar/NewGVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/NewGVN.cpp +++ llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -1394,8 +1394,12 @@ auto *StoreAccess = getMemoryAccess(SI); // Get the expression, if any, for the RHS of the MemoryDef. const MemoryAccess *StoreRHS = StoreAccess->getDefiningAccess(); - if (EnableStoreRefinement) - StoreRHS = MSSAWalker->getClobberingMemoryAccess(StoreAccess); + if (EnableStoreRefinement) { + // TODO: It should be possible to use a single BatchAAResults instance for + // the whole NewGVN analysis phase, as it does not modify IR. + BatchAAResults BAA(*AA); + StoreRHS = MSSAWalker->getClobberingMemoryAccess(StoreAccess, BAA); + } // If we bypassed the use-def chains, make sure we add a use. StoreRHS = lookupMemoryLeader(StoreRHS); if (StoreRHS != StoreAccess->getDefiningAccess()) @@ -1526,8 +1530,9 @@ if (isa(LoadAddressLeader)) return createConstantExpression(PoisonValue::get(LI->getType())); MemoryAccess *OriginalAccess = getMemoryAccess(I); + BatchAAResults BAA(*AA); MemoryAccess *DefiningAccess = - MSSAWalker->getClobberingMemoryAccess(OriginalAccess); + MSSAWalker->getClobberingMemoryAccess(OriginalAccess, BAA); if (!MSSA->isLiveOnEntryDef(DefiningAccess)) { if (auto *MD = dyn_cast(DefiningAccess)) { @@ -1623,7 +1628,8 @@ createCallExpression(CI, TOPClass->getMemoryLeader())); } else if (AA->onlyReadsMemory(CI)) { if (auto *MA = MSSA->getMemoryAccess(CI)) { - auto *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(MA); + BatchAAResults BAA(*AA); + auto *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(MA, BAA); return ExprResult::some(createCallExpression(CI, DefiningAccess)); } else // MSSA determined that CI does not access memory. return ExprResult::some(