Index: include/llvm/Analysis/LoopAnalysisManager.h =================================================================== --- include/llvm/Analysis/LoopAnalysisManager.h +++ include/llvm/Analysis/LoopAnalysisManager.h @@ -37,6 +37,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -58,6 +59,7 @@ ScalarEvolution &SE; TargetLibraryInfo &TLI; TargetTransformInfo &TTI; + MemorySSA &MSSA; }; /// Extern template declaration for the analysis set for this IR unit. Index: include/llvm/Analysis/MemorySSAAliasSets.h =================================================================== --- /dev/null +++ include/llvm/Analysis/MemorySSAAliasSets.h @@ -0,0 +1,314 @@ +//===-- MemorySSAAliasSets.h - Alias sets using MemorySSA -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This a a helper class that creates alias sets using MemorySSA, each +// set being marked with properties that enablei/disable promotion. +// It is used for promotion in the Loop Invariant Code Motion Pass. +// It also stores insertion points into MemorySSA for a particular loop. +// These need to be reset if the sets are reused. +//===----------------------------------------------------------------------===// +#ifndef LLVM_ANALYSIS_MEMORYSSA_ALIASSETS_H +#define LLVM_ANALYSIS_MEMORYSSA_ALIASSETS_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace llvm { +class MemorySSAAliasSets { +public: + MemorySSA *MSSA; + MemorySSAUpdater *MSSAUpdater; + +private: + Loop *L; + LoopInfo *LI; + AliasAnalysis *AA; + DominatorTree *DT; + SmallVector MSSAInsertPts; + + class SingleMustAliasVec { + SmallSetVector Vec; + MemoryLocation Loc; + + public: + // CanNeverPromote trumps everything else + // Set is either may-alias, has a volatile store or aliases an instruction + // witout a MemoryLocation. + bool CanNeverPromote = false; + bool LoopInvariantAccess = false; + bool HasAGoodStore = false; + SingleMustAliasVec(MemoryLocation loc) : Loc(loc), CanNeverPromote(false) { + Vec.insert(const_cast(Loc.Ptr)); + } + void addMemoryLocation(Optional MemLoc) { + if (MemLoc == None) + return; + Vec.insert(const_cast(MemLoc.getValue().Ptr)); + } + MemoryLocation getMemoryLocation() { return Loc; } + SmallSetVector *getVecRef() { return &Vec; } + bool isGoodForPromotion() { + return !CanNeverPromote && LoopInvariantAccess && HasAGoodStore; + } + void setMemoryLocation(Value *LI, Value *Replacement) { + Loc = MemoryLocation(Replacement, Loc.Size, Loc.AATags); + Vec.remove(LI); + Vec.insert(Replacement); + } + }; + + SmallVector AllSets; + DenseMap StoredDefs; + + // FIXME: This should not need to query AA, MemorySSA should cache this. + AliasResult alias(Optional MemLocI, Instruction *InstrI, + MemoryLocation &MemLocJ) { + if (MemLocI == None) { + ModRefInfo ModRef = AA->getModRefInfo(InstrI, MemLocJ); + if (bool(ModRef & MRI_Mod)) { + return MustAlias; + } + if (bool(ModRef & MRI_Ref)) { + return MayAlias; + } + return NoAlias; + } else { + AliasResult Result = AA->alias(MemLocI.getValue(), MemLocJ); + return Result; + } + } + + void addToAliasSets(const MemoryUse *Use, + SmallPtrSetImpl &Processed) { + Processed.insert(Use); + // Follow-up to the BIG NOTE: + // Because we allowed some imprecision, it is possible that the + // definingAccess is not the clobberingAccess. We *really* want the + // correct aliasing definition here, otherwise we're creating a set with + // things that don't alias. + // This is currently visible in "LoopSimplify/ashr-crash.ll"(Puts @a,@c,@d + // in the same set because its uses all point to the newly promoted Def) + // We *have to* use getClobberingAccess here for correctness! + // To avoid incurring the performance penalty of complete info, we can + // either: + // a) Create sets in BB order, without walking MemoryAccess uses + // recursively. Then, we'll need to check alias information with all + // existing sets and cap the number of sets created, much like the + // AliasSetTracker :/ Working version available, but, well, yuck! b) Figure + // out why the MemorySSA uses are not updated to be accurate after a + // promotion. The problem here is that when MemorySSA reaches the + // "memssa-check-limit", results will still be incorrect. + // c) Add another way to cap this in MemorySSA. + // e.g. Query MemorySSA on how many calls to getClobberingMemoryAccess have + // passed the "memssa-check-limit", and cap that. + + // MemoryDef* DefiningAcc = + // dyn_cast_or_null(Use->getDefiningAccess()); + MemoryDef *DefiningAcc = dyn_cast_or_null( + MSSA->getWalker()->getClobberingMemoryAccess( + const_cast(Use))); + if (!DefiningAcc || !DefiningAcc->getMemoryInst()) + return; + + SingleMustAliasVec *OneAliasSet = StoredDefs[DefiningAcc]; + if (!OneAliasSet) + return; + + Instruction *InstrI = Use->getMemoryInst(); + Optional MemLocI = MemoryLocation::getOrNone(InstrI); + MemoryLocation MemLocJ = OneAliasSet->getMemoryLocation(); + AliasResult Result = alias(MemLocI, InstrI, MemLocJ); + if (Result == MayAlias) { + OneAliasSet->CanNeverPromote = true; + } + OneAliasSet->addMemoryLocation(MemLocI); + } + + void addDefsHelper(MemoryLocation &MemLocJ, + const MemoryAccess *DefOrUseOfADef, + SingleMustAliasVec *newEntry, + SmallPtrSetImpl &Processed, + SmallPtrSetImpl &Phis, + bool skipUses) { + for (const User *U : DefOrUseOfADef->users()) { + /// No need ot look at ProcessedPhis; need to looks at ProcessedDefs! + if (Phis.count(dyn_cast(U))) + continue; + + /// Process MemoryPhis + const MemoryPhi *Phi = dyn_cast(U); + if (Phi && L->contains(Phi->getBlock())) { + Phis.insert(Phi); + addDefsHelper(MemLocJ, Phi, newEntry, Processed, Phis, true); + continue; + } + + const MemoryUseOrDef *UseOrDef = dyn_cast(U); + if (!UseOrDef || !UseOrDef->getMemoryInst()) { + continue; /*FIXME: What does this mean? U=unknown value.*/ + } + Instruction *InstrI = UseOrDef->getMemoryInst(); + if (!InstrI || !L->contains(InstrI)) + continue; + Optional MemLocI = MemoryLocation::getOrNone(InstrI); + bool LoopInvariantAccess = + ((MemLocI != None) && L->isLoopInvariant(MemLocI.getValue().Ptr)); + + /// Process MemoryDefs + const MemoryDef *UserDef = dyn_cast_or_null(UseOrDef); + if (UserDef) { + AliasResult Result = alias(MemLocI, InstrI, MemLocJ); + if (Result == MayAlias || Result == MustAlias) { + Processed.insert(UseOrDef); + if (MemLocI != None) + newEntry->addMemoryLocation(MemLocI.getValue()); + else // Cannot promote stuff with no memory locations + newEntry->CanNeverPromote = true; + StoredDefs[UseOrDef] = newEntry; + + StoreInst *USIn = dyn_cast_or_null(InstrI); + + // TODO: minor optimization: only check if !CanNeverPromote + if (Result == MayAlias || (USIn && USIn->isVolatile())) + newEntry->CanNeverPromote = true; + if (USIn && !USIn->isVolatile()) + newEntry->HasAGoodStore = true; + newEntry->LoopInvariantAccess |= LoopInvariantAccess; + } else { + addDefsHelper(MemLocJ, UserDef, newEntry, Processed, Phis, true); + } + continue; + } + + if (skipUses) + continue; + + /// Process MemoryUses + const MemoryUse *UserUse = dyn_cast_or_null(UseOrDef); + if (UserUse) { + Processed.insert(UseOrDef); + if (MemLocI != None) + newEntry->addMemoryLocation(MemLocI.getValue()); + + if (!newEntry->CanNeverPromote) { + AliasResult Result = alias(MemLocI, InstrI, MemLocJ); + if (Result == MayAlias) + newEntry->CanNeverPromote = true; + newEntry->LoopInvariantAccess |= LoopInvariantAccess; + } + } + } + } + + void addToAliasSets(const MemoryDef *Def, + SmallPtrSetImpl &Processed) { + Processed.insert(Def); + Instruction *InstrJ = Def->getMemoryInst(); + Optional OptMemLocJ = MemoryLocation::getOrNone(InstrJ); + if (OptMemLocJ == None) + return; + MemoryLocation MemLocJ = OptMemLocJ.getValue(); + + StoreInst *SIn = dyn_cast_or_null(InstrJ); + SingleMustAliasVec *newEntry = new SingleMustAliasVec(MemLocJ); + newEntry->CanNeverPromote = SIn && SIn->isVolatile(); + newEntry->LoopInvariantAccess = L->isLoopInvariant(MemLocJ.Ptr); + newEntry->HasAGoodStore = SIn && !SIn->isVolatile(); + + SmallPtrSet Phis; + addDefsHelper(MemLocJ, Def, newEntry, Processed, Phis, false); + AllSets.push_back(newEntry); + } + +public: + MemorySSAAliasSets(MemorySSA *mssa, MemorySSAUpdater *mssaupdater, Loop *l, + LoopInfo *li, AliasAnalysis *aa, DominatorTree *dt, + SmallVectorImpl &ExitBlocks) + : MSSA(mssa), MSSAUpdater(mssaupdater), L(l), LI(li), AA(aa), DT(dt), + AllSets() { + resetMSSAInsertPts(ExitBlocks); + } + + void resetMSSAInsertPts(SmallVectorImpl &ExitBlocks) { + // TODO: Better way to fill this in. + MSSAInsertPts.reserve(ExitBlocks.size()); + for (unsigned I = 0; I < ExitBlocks.size(); ++I) { + MSSAInsertPts.push_back(nullptr); + } + } + + // Creates all alias sets in L + void createSets() { + SmallPtrSet Processed; + addToSets(DT->getNode(L->getHeader()), Processed); + } + + void addToSets(DomTreeNode *N, + SmallPtrSetImpl &Processed) { + // We want to visit parents before children. We will enque all the parents + // before their children in the worklist and process the worklist in order. + SmallVector Worklist = collectChildrenInLoop(N, L); + + for (DomTreeNode *DTN : Worklist) { + BasicBlock *BB = DTN->getBlock(); + auto *Accesses = MSSA->getBlockAccesses(BB); + if (Accesses) + for (auto &Acc : make_range(Accesses->begin(), Accesses->end())) { + if (Processed.count(&Acc)) + continue; + // Process uses + const MemoryUse *Use = dyn_cast_or_null(&Acc); + if (Use) + addToAliasSets(Use, Processed); + // Process defs + const MemoryDef *Def = dyn_cast_or_null(&Acc); + if (Def) + addToAliasSets(Def, Processed); + } + } + } + + SmallSetVector *getSetIfPromotableOrNull(unsigned I) { + if (AllSets[I]->isGoodForPromotion()) { + SmallSetVector *MustAliasPtrs = AllSets[I]->getVecRef(); + assert(MustAliasPtrs->size() > 0 && "Alias set cannot be empty!!!\n"); + return MustAliasPtrs; + } + return nullptr; + } + + unsigned size() { return AllSets.size(); } + + void replace(Value *LoadI, Value *Replacement) { + for (auto OneAliasSet : AllSets) { + MemoryLocation MemLocJ = OneAliasSet->getMemoryLocation(); + if (MemLocJ.Ptr == LoadI) { + OneAliasSet->setMemoryLocation(LoadI, Replacement); + if (!OneAliasSet->CanNeverPromote && OneAliasSet->HasAGoodStore && + !OneAliasSet->LoopInvariantAccess && + L->isLoopInvariant(Replacement)) { + OneAliasSet->LoopInvariantAccess = true; + } + } + } + } + + MemoryAccess *getInsertPoint(int I) { return MSSAInsertPts[I]; } + void setInsertPoint(MemoryAccess *Acc, int I) { MSSAInsertPts[I] = Acc; } +}; +} // end namespace llvm + +#endif // LLVM_ANALYSIS_MEMORYSSA_ALIASSETS_H Index: include/llvm/Support/Debug.h =================================================================== --- include/llvm/Support/Debug.h +++ include/llvm/Support/Debug.h @@ -93,6 +93,14 @@ /// extern bool VerifyLoopInfo; +/// Enables memory ssa as a dependency for loop passes. +/// +extern bool EnableMSSALoopDep; + +/// Enables LICM to use MSSA if available +/// +extern bool UseLICMwMSSA; + ///\} /// EnableDebugBuffering - This defaults to false. If true, the debug Index: include/llvm/Transforms/Scalar/LoopPassManager.h =================================================================== --- include/llvm/Transforms/Scalar/LoopPassManager.h +++ include/llvm/Transforms/Scalar/LoopPassManager.h @@ -271,13 +271,15 @@ return PA; // Get the analysis results needed by loop passes. - LoopStandardAnalysisResults LAR = {AM.getResult(F), - AM.getResult(F), - AM.getResult(F), - AM.getResult(F), - AM.getResult(F), - AM.getResult(F), - AM.getResult(F)}; + LoopStandardAnalysisResults LAR = { + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + AM.getResult(F).getMSSA()}; // Setup the loop analysis manager from its proxy. It is important that // this is only done when there are loops to process and we have built the @@ -345,6 +347,9 @@ PA.preserve(); PA.preserve(); PA.preserve(); + if (EnableMSSALoopDep) { + PA.preserve(); + } // FIXME: What we really want to do here is preserve an AA category, but // that concept doesn't exist yet. PA.preserve(); Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -38,6 +38,9 @@ class DataLayout; class Loop; class LoopInfo; +class MemorySSA; +class MemorySSAUpdater; +class MemorySSAAliasSets; class OptimizationRemarkEmitter; class PredicatedScalarEvolution; class PredIteratorCache; @@ -425,7 +428,8 @@ /// arguments. Diagnostics is emitted via \p ORE. It returns changed status. bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, - LoopSafetyInfo *, OptimizationRemarkEmitter *ORE); + MemorySSAUpdater *, MemorySSA *, LoopSafetyInfo *, + OptimizationRemarkEmitter *ORE); /// \brief Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth @@ -437,7 +441,8 @@ /// ORE. It returns changed status. bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, - LoopSafetyInfo *, OptimizationRemarkEmitter *ORE); + MemorySSAUpdater *, MemorySSA *, LoopSafetyInfo *, + OptimizationRemarkEmitter *ORE); /// \brief Try to promote memory values to scalars by sinking stores out of /// the loop and moving loads to before the loop. We do this by looping over @@ -446,14 +451,13 @@ /// vector, loop exit blocks insertion point vector, PredIteratorCache, /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions /// of the loop and loop safety information as arguments. +/// The AliasSetTracker and MemorySSAAliasSets are alternatives to one another. /// Diagnostics is emitted via \p ORE. It returns changed status. -bool promoteLoopAccessesToScalars(const SmallSetVector &, - SmallVectorImpl &, - SmallVectorImpl &, - PredIteratorCache &, LoopInfo *, - DominatorTree *, const TargetLibraryInfo *, - Loop *, AliasSetTracker *, LoopSafetyInfo *, - OptimizationRemarkEmitter *); +bool promoteLoopAccessesToScalars( + const SmallSetVector &, SmallVectorImpl &, + SmallVectorImpl &, PredIteratorCache &, LoopInfo *, + DominatorTree *, const TargetLibraryInfo *, Loop *, AliasSetTracker *, + MemorySSAAliasSets *, LoopSafetyInfo *, OptimizationRemarkEmitter *); /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. @@ -506,9 +510,9 @@ /// instructions from loop body to preheader/exit. Check if the instruction /// can execute speculatively. /// If \p ORE is set use it to emit optimization remarks. -bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, - Loop *CurLoop, AliasSetTracker *CurAST, - LoopSafetyInfo *SafetyInfo, +bool canSinkOrHoistInst(Instruction &, AAResults *, DominatorTree *, Loop *, + AliasSetTracker *, MemorySSAUpdater *, MemorySSA *, + LoopSafetyInfo *, OptimizationRemarkEmitter *ORE = nullptr); /// Generates a vector reduction using shufflevectors to reduce the value. Index: lib/Analysis/LoopAnalysisManager.cpp =================================================================== --- lib/Analysis/LoopAnalysisManager.cpp +++ lib/Analysis/LoopAnalysisManager.cpp @@ -11,6 +11,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/Dominators.h" @@ -45,12 +46,16 @@ // loop analyses declare any dependencies on these and use the more general // invalidation logic below to act on that. auto PAC = PA.getChecker(); + bool invalidateMemorySSAAnalysis = false; + if (UseLICMwMSSA && !EnableMSSALoopDep) + invalidateMemorySSAAnalysis = Inv.invalidate(F, PA); if (!(PAC.preserved() || PAC.preservedSet>()) || Inv.invalidate(F, PA) || Inv.invalidate(F, PA) || Inv.invalidate(F, PA) || Inv.invalidate(F, PA) || - Inv.invalidate(F, PA)) { + Inv.invalidate(F, PA) || + invalidateMemorySSAAnalysis) { // Note that the LoopInfo may be stale at this point, however the loop // objects themselves remain the only viable keys that could be in the // analysis manager's cache. So we just walk the keys and forcibly clear @@ -135,6 +140,9 @@ PA.preserve(); PA.preserve(); PA.preserve(); + if (EnableMSSALoopDep) { + PA.preserve(); + } // TODO: What we really want to do here is preserve an AA category, but that // concept doesn't exist yet. PA.preserve(); Index: lib/Analysis/MemorySSA.cpp =================================================================== --- lib/Analysis/MemorySSA.cpp +++ lib/Analysis/MemorySSA.cpp @@ -55,6 +55,8 @@ #include using namespace llvm; +bool llvm::EnableMSSALoopDep = false; +bool llvm::UseLICMwMSSA = true; #define DEBUG_TYPE "memoryssa" Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -42,6 +42,9 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAAliasSets.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" @@ -91,10 +94,12 @@ static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo); static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, + MemorySSAUpdater *MSSAUpdater, MemorySSA *MSSA, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE); static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, const Loop *CurLoop, AliasSetTracker *CurAST, + MemorySSAUpdater *MSSAUpdater, MemorySSA *MSSA, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE); static bool isSafeToExecuteUnconditionally(Instruction &Inst, @@ -106,15 +111,21 @@ static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, const AAMDNodes &AAInfo, AliasSetTracker *CurAST); +static bool pointerInvalidatedByLoopWithMSSA(Value *V, MemorySSA *MSSA, + MemoryUseOrDef *MUD, + Loop *CurLoop); +static void removeFromMSSA(Instruction *I, MemorySSAUpdater *MSSAUpdater, + MemorySSA *MSSA); static Instruction * CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, + MemorySSAUpdater *MSSAUpdater, MemorySSA *MSSA, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo); namespace { struct LoopInvariantCodeMotion { bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, - TargetLibraryInfo *TLI, ScalarEvolution *SE, + TargetLibraryInfo *TLI, ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST); DenseMap &getLoopToAliasSetMap() { @@ -126,6 +137,7 @@ AliasSetTracker *collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA); + MemorySSAUpdater *MSSAUpdater = nullptr; }; struct LegacyLICMPass : public LoopPass { @@ -150,12 +162,13 @@ // pass. Function analyses need to be preserved across loop transformations // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); - return LICM.runOnLoop(L, - &getAnalysis().getAAResults(), - &getAnalysis().getLoopInfo(), - &getAnalysis().getDomTree(), - &getAnalysis().getTLI(), - SE ? &SE->getSE() : nullptr, &ORE, false); + return LICM.runOnLoop( + L, &getAnalysis().getAAResults(), + &getAnalysis().getLoopInfo(), + &getAnalysis().getDomTree(), + &getAnalysis().getTLI(), + SE ? &SE->getSE() : nullptr, + &getAnalysis().getMSSA(), &ORE, false); } /// This transformation requires natural loop information & requires that @@ -164,6 +177,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); + AU.addRequired(); getLoopAnalysisUsage(AU); } @@ -204,7 +218,11 @@ "cached at a higher level"); LoopInvariantCodeMotion LICM; - if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, ORE, true)) + // MemorySSA *MSSA = EnableMSSALoopDep ? (&AR.MSSA) : nullptr; + MemorySSA *MSSA = UseLICMwMSSA ? (&AR.MSSA) : nullptr; + // MemorySSA *MSSA = &AR.MSSA; + if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, MSSA, ORE, + true)) return PreservedAnalyses::all(); auto PA = getLoopPassPreservedAnalyses(); @@ -217,6 +235,7 @@ false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) @@ -231,14 +250,24 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, - ScalarEvolution *SE, + ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) { bool Changed = false; assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); - AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA); + AliasSetTracker *CurAST = nullptr; + if (!MSSA) { + // Note: MSSA disabled in the new PM until all loop passes preserve MSSA + DEBUG(dbgs() << "LICM: Using Alias Set Tracker\n"); + CurAST = collectAliasInfoForLoop(L, LI, AA); + MSSAUpdater = nullptr; + } else { + DEBUG(dbgs() << "LICM: Using MemorySSA\n"); + CurAST = nullptr; + MSSAUpdater = new MemorySSAUpdater(MSSA); + } // Get the preheader block to move instructions into... BasicBlock *Preheader = L->getLoopPreheader(); @@ -259,10 +288,11 @@ // if (L->hasDedicatedExits()) Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, - CurAST, &SafetyInfo, ORE); + CurAST, MSSAUpdater, MSSA, &SafetyInfo, ORE); + if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, - CurAST, &SafetyInfo, ORE); + CurAST, MSSAUpdater, MSSA, &SafetyInfo, ORE); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. @@ -291,26 +321,41 @@ bool Promoted = false; - // Loop over all of the alias sets in the tracker object. - for (AliasSet &AS : *CurAST) { - // We can promote this alias set if it has a store, if it is a "Must" - // alias set, if the pointer is loop invariant, and if we are not - // eliminating any volatile loads or stores. - if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || - AS.isVolatile() || !L->isLoopInvariant(AS.begin()->getValue())) - continue; - - assert( - !AS.empty() && - "Must alias set should have at least one pointer element in it!"); - - SmallSetVector PointerMustAliases; - for (const auto &ASI : AS) - PointerMustAliases.insert(ASI.getValue()); - - Promoted |= promoteLoopAccessesToScalars(PointerMustAliases, ExitBlocks, - InsertPts, PIC, LI, DT, TLI, L, - CurAST, &SafetyInfo, ORE); + if (CurAST) { + // Loop over all of the alias sets in the tracker object. + for (AliasSet &AS : *CurAST) { + // We can promote this alias set if it has a store, if it is a "Must" + // alias set, if the pointer is loop invariant, and if we are not + // eliminating any volatile loads or stores. + if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || + AS.isVolatile() || !L->isLoopInvariant(AS.begin()->getValue())) + continue; + + assert( + !AS.empty() && + "Must alias set should have at least one pointer element in it!"); + + SmallSetVector PointerMustAliases; + for (const auto &ASI : AS) + PointerMustAliases.insert(ASI.getValue()); + + Promoted |= promoteLoopAccessesToScalars( + PointerMustAliases, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L, + CurAST, nullptr, &SafetyInfo, ORE); + } + } else { + MemorySSAAliasSets AS(MSSA, MSSAUpdater, L, LI, AA, DT, ExitBlocks); + AS.createSets(); + + for (unsigned I = 0; I < AS.size(); ++I) { + SmallSetVector *MustAliasPtrs = + AS.getSetIfPromotableOrNull(I); + if (MustAliasPtrs) { + Promoted |= promoteLoopAccessesToScalars( + *MustAliasPtrs, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L, + CurAST, &AS, &SafetyInfo, ORE); + } + } } // Once we have promoted values across the loop body we have to @@ -333,12 +378,15 @@ assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) && "Parent loop not left in LCSSA form after LICM!"); - // If this loop is nested inside of another one, save the alias information - // for when we process the outer loop. - if (L->getParentLoop() && !DeleteAST) - LoopToAliasSetMap[L] = CurAST; - else - delete CurAST; + if (CurAST) { + // If this loop is nested inside of another one, save the alias information + // for when we process the outer loop. + if (L->getParentLoop() && !DeleteAST) + LoopToAliasSetMap[L] = CurAST; + else + delete CurAST; + } else + delete MSSAUpdater; if (Changed && SE) SE->forgetLoopDispositions(L); @@ -352,12 +400,15 @@ /// bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, - AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, + AliasSetTracker *CurAST, MemorySSAUpdater *MSSAUpdater, + MemorySSA *MSSA, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && - CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && + CurLoop != nullptr && SafetyInfo != nullptr && + "Unexpected input to sinkRegion"); + assert((CurAST != nullptr || (MSSAUpdater != nullptr && MSSA != nullptr)) && "Unexpected input to sinkRegion"); // We want to visit children before parents. We will enque all the parents @@ -381,7 +432,10 @@ if (isInstructionTriviallyDead(&I, TLI)) { DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); ++II; - CurAST->deleteValue(&I); + if (CurAST) + CurAST->deleteValue(&I); + if (MSSA) + removeFromMSSA(&I, MSSAUpdater, MSSA); I.eraseFromParent(); Changed = true; continue; @@ -393,9 +447,11 @@ // operands of the instruction are loop invariant. // if (isNotUsedInLoop(I, CurLoop, SafetyInfo) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) { + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAUpdater, MSSA, + SafetyInfo, ORE)) { ++II; - Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo, ORE); + Changed |= sink(I, LI, DT, CurLoop, CurAST, MSSAUpdater, MSSA, + SafetyInfo, ORE); } } } @@ -409,11 +465,14 @@ /// bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, - AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, + AliasSetTracker *CurAST, MemorySSAUpdater *MSSAUpdater, + MemorySSA *MSSA, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && - CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && + CurLoop != nullptr && SafetyInfo != nullptr && + "Unexpected input to hoistRegion"); + assert((CurAST != nullptr || (MSSAUpdater != nullptr && MSSA != nullptr)) && "Unexpected input to hoistRegion"); // We want to visit parents before children. We will enque all the parents @@ -434,10 +493,17 @@ if (Constant *C = ConstantFoldInstruction( &I, I.getModule()->getDataLayout(), TLI)) { DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); - CurAST->copyValue(&I, C); + if (CurAST) + CurAST->copyValue(&I, C); + // TODO: update sets if built beforehand; otherwise, no-op + if (MSSA) { + } I.replaceAllUsesWith(C); if (isInstructionTriviallyDead(&I, TLI)) { - CurAST->deleteValue(&I); + if (CurAST) + CurAST->deleteValue(&I); + if (MSSA) + removeFromMSSA(&I, MSSAUpdater, MSSA); I.eraseFromParent(); } Changed = true; @@ -462,7 +528,8 @@ I.replaceAllUsesWith(Product); I.eraseFromParent(); - hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE); + hoist(*ReciprocalDivisor, DT, CurLoop, MSSAUpdater, MSSA, SafetyInfo, + ORE); Changed = true; continue; } @@ -472,11 +539,12 @@ // if it is safe to hoist the instruction. // if (CurLoop->hasLoopInvariantOperands(&I) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) && + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAUpdater, MSSA, + SafetyInfo, ORE) && isSafeToExecuteUnconditionally( I, DT, CurLoop, SafetyInfo, ORE, CurLoop->getLoopPreheader()->getTerminator())) - Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE); + Changed |= hoist(I, DT, CurLoop, MSSAUpdater, MSSA, SafetyInfo, ORE); } } @@ -575,6 +643,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, + MemorySSAUpdater *MSSAUpdater, MemorySSA *MSSA, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Loads have extra constraints we have to verify before we can hoist them. @@ -601,8 +670,14 @@ AAMDNodes AAInfo; LI->getAAMetadata(AAInfo); - bool Invalidated = - pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST); + bool Invalidated = false; + if (CurAST) + Invalidated = + pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST); + else // if (MSSA) + Invalidated = pointerInvalidatedByLoopWithMSSA( + LI->getOperand(0), MSSA, MSSA->getMemoryAccess(LI), CurLoop); + // Check loop-invariant address because this may also be a sinkable load // whose address is not necessarily loop-invariant. if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) @@ -630,20 +705,38 @@ // it's arguments with arbitrary offsets. If we can prove there are no // writes to this memory in the loop, we can hoist or sink. if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) { - for (Value *Op : CI->arg_operands()) - if (Op->getType()->isPointerTy() && - pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize, - AAMDNodes(), CurAST)) + for (Value *Op : CI->arg_operands()) { + bool Invalidated = false; + if (CurAST) + Invalidated = pointerInvalidatedByLoop( + Op, MemoryLocation::UnknownSize, AAMDNodes(), CurAST); + else // if (MSSA) + Invalidated = pointerInvalidatedByLoopWithMSSA( + Op, MSSA, MSSA->getMemoryAccess(CI), CurLoop); + if (Op->getType()->isPointerTy() && Invalidated) return false; + } return true; } // If this call only reads from memory and there are no writes to memory // in the loop, we can hoist or sink the call as appropriate. bool FoundMod = false; - for (AliasSet &AS : *CurAST) { - if (!AS.isForwardingAliasSet() && AS.isMod()) { - FoundMod = true; - break; + if (CurAST) { + for (AliasSet &AS : *CurAST) { + if (!AS.isForwardingAliasSet() && AS.isMod()) { + FoundMod = true; + break; + } + } + } else if (MSSA) { + // TODO: need "if this call only reads from memory" + for (auto BBIt = (CurLoop->block_begin()); BBIt != CurLoop->block_end(); + BBIt++) { + auto *Defs = MSSA->getBlockDefs(*BBIt); + if (Defs) { + FoundMod = true; + break; + } } } if (!FoundMod) @@ -739,6 +832,7 @@ static Instruction * CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, + MemorySSAUpdater *MSSAUpdater, MemorySSA *MSSA, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo) { Instruction *New; @@ -776,6 +870,23 @@ if (!I.getName().empty()) New->setName(I.getName() + ".le"); + MemoryAccess *OldMemAcc; + if (MSSA && (OldMemAcc = MSSA->getMemoryAccess(&I))) { + MemoryAccess *NewMemAcc = MSSAUpdater->createMemoryAccessInBB( + New, nullptr, New->getParent(), MemorySSA::Beginning); + MemoryDef *NewMemDef = dyn_cast_or_null(NewMemAcc); + if (NewMemDef) + MSSAUpdater->insertDef(NewMemDef, true); + else { + MemoryUse *NewMemUse = dyn_cast_or_null(NewMemAcc); + if (NewMemUse) { + MSSAUpdater->insertUse(NewMemUse); + // This is only called to properly update the defining access + MSSA->getWalker()->getClobberingMemoryAccess(NewMemUse); + } + } + } + // Build LCSSA PHI nodes for any in-loop operands. Note that this is // particularly cheap because we can rip off the PHI node that we're // replacing for the number and blocks of the predecessors. @@ -805,6 +916,7 @@ /// static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, const Loop *CurLoop, AliasSetTracker *CurAST, + MemorySSAUpdater *MSSAUpdater, MemorySSA *MSSA, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); @@ -860,14 +972,17 @@ if (It != SunkCopies.end()) New = It->second; else - New = SunkCopies[ExitBlock] = - CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI, SafetyInfo); + New = SunkCopies[ExitBlock] = CloneInstructionInExitBlock( + I, *ExitBlock, *PN, MSSAUpdater, MSSA, LI, SafetyInfo); PN->replaceAllUsesWith(New); PN->eraseFromParent(); } - CurAST->deleteValue(&I); + if (CurAST) + CurAST->deleteValue(&I); + if (MSSA) + removeFromMSSA(&I, MSSAUpdater, MSSA); I.eraseFromParent(); return Changed; } @@ -876,6 +991,7 @@ /// is safe to hoist, this instruction is called to do the dirty work. /// static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, + MemorySSAUpdater *MSSAUpdater, MemorySSA *MSSA, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { auto *Preheader = CurLoop->getLoopPreheader(); @@ -897,6 +1013,16 @@ // Move the new node to the Preheader, before its terminator. I.moveBefore(Preheader->getTerminator()); + if (MSSA) { + // Update MemorySSA if moving I just moved a load or store + MemoryUseOrDef *OldMemAcc = + dyn_cast_or_null(MSSA->getMemoryAccess(&I)); + if (OldMemAcc) { + MSSAUpdater->moveToPlace(OldMemAcc, Preheader, MemorySSA::End); + // This is only called to properly update the defining access + MSSA->getWalker()->getClobberingMemoryAccess(OldMemAcc); + } + } // Do not retain debug locations when we are moving instructions to different // basic blocks, because we want to avoid jumpy line tables. Calls, however, @@ -948,7 +1074,11 @@ SmallVectorImpl &LoopExitBlocks; SmallVectorImpl &LoopInsertPts; PredIteratorCache &PredCache; - AliasSetTracker &AST; + + // AST and AS are alternatives! + AliasSetTracker *AST; + MemorySSAAliasSets *AS; + LoopInfo &LI; DebugLoc DL; int Alignment; @@ -975,11 +1105,12 @@ const SmallSetVector &PMA, SmallVectorImpl &LEB, SmallVectorImpl &LIP, PredIteratorCache &PIC, - AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment, - bool UnorderedAtomic, const AAMDNodes &AATags) + AliasSetTracker *ast, MemorySSAAliasSets *as, LoopInfo &li, + DebugLoc dl, int alignment, bool UnorderedAtomic, + const AAMDNodes &AATags) : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast), - LI(li), DL(std::move(dl)), Alignment(alignment), + AS(as), LI(li), DL(std::move(dl)), Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags) {} bool isInstInList(Instruction *I, @@ -1010,14 +1141,36 @@ NewSI->setDebugLoc(DL); if (AATags) NewSI->setAAMetadata(AATags); + if (AS) { + MemoryAccess *MSSAInsertPoint = AS->getInsertPoint(i); + MemoryAccess *NewMemAcc; + if (!MSSAInsertPoint) { + NewMemAcc = AS->MSSAUpdater->createMemoryAccessInBB( + NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning); + } else { + NewMemAcc = AS->MSSAUpdater->createMemoryAccessAfter(NewSI, nullptr, + MSSAInsertPoint); + } + AS->setInsertPoint(NewMemAcc, i); + AS->MSSAUpdater->insertDef(cast(NewMemAcc), true); + // FIXME: true for safety, false may still be correct. + } } } void replaceLoadWithValue(LoadInst *LI, Value *V) const override { // Update alias analysis. - AST.copyValue(LI, V); + if (AST) + AST->copyValue(LI, V); + if (AS) + AS->replace(LI, V); + } + void instructionDeleted(Instruction *I) const override { + if (AST) + AST->deleteValue(I); + if (AS) + removeFromMSSA(I, AS->MSSAUpdater, AS->MSSA); } - void instructionDeleted(Instruction *I) const override { AST.deleteValue(I); } }; } // namespace @@ -1031,12 +1184,14 @@ SmallVectorImpl &ExitBlocks, SmallVectorImpl &InsertPts, PredIteratorCache &PIC, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, - Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE) { + Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAAliasSets *AS, + LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && - CurAST != nullptr && SafetyInfo != nullptr && + SafetyInfo != nullptr && "Unexpected Input to promoteLoopAccessesToScalars"); + assert((CurAST != nullptr || (AS != nullptr)) && + "Unexpected input to promoteLoopAccessesToScalars"); Value *SomePtr = *PointerMustAliases.begin(); BasicBlock *Preheader = CurLoop->getLoopPreheader(); @@ -1266,7 +1421,7 @@ SmallVector NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - InsertPts, PIC, *CurAST, *LI, DL, Alignment, + InsertPts, PIC, CurAST, AS, *LI, DL, Alignment, SawUnorderedAtomic, AATags); // Set up the preheader to have a definition of the value. It is the live-out @@ -1281,13 +1436,27 @@ PreheaderLoad->setAAMetadata(AATags); SSA.AddAvailableValue(Preheader, PreheaderLoad); + MemoryAccess *PreheaderLoadMemoryAccess; + if (AS) { + // Update MSSA + PreheaderLoadMemoryAccess = AS->MSSAUpdater->createMemoryAccessInBB( + PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End); + MemoryUse *NewMemUse = cast(PreheaderLoadMemoryAccess); + AS->MSSAUpdater->insertUse(NewMemUse); + // This is only called to properly update the defining access + AS->MSSA->getWalker()->getClobberingMemoryAccess(NewMemUse); + } + // Rewrite all the loads in the loop and remember all the definitions from // stores in the loop. Promoter.run(LoopUses); // If the SSAUpdater didn't use the load in the preheader, just zap it now. - if (PreheaderLoad->use_empty()) + if (PreheaderLoad->use_empty()) { + if (AS) + AS->MSSAUpdater->removeMemoryAccess(PreheaderLoadMemoryAccess); PreheaderLoad->eraseFromParent(); + } return true; } @@ -1387,6 +1556,75 @@ return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod(); } +// Checks arguments of a Phi, so Defs and other Phis. +// returns true if Acc is not a load or a live on entry +static bool checkNotLoadOrLOE(MemoryAccess *Acc, MemorySSA *MSSA, + SmallPtrSetImpl &Processed) { + if (Processed.count(Acc)) + return true; + if (MSSA->isLiveOnEntryDef(Acc)) + return false; + + MemoryPhi *Phi = dyn_cast_or_null(Acc); + MemoryDef *Def = dyn_cast_or_null(Acc); + if (Phi) { + bool MSSAResult = false; + int NoArgsPhi = Phi->getNumIncomingValues(); + Processed.insert(Acc); + for (int I = 0; I < NoArgsPhi; I++) { + MemoryAccess *Arg = Phi->getIncomingValue(I); + MSSAResult |= checkNotLoadOrLOE(Arg, MSSA, Processed); + } + return MSSAResult; + } else if (Def) { + Instruction *AccI = Def->getMemoryInst(); + if (AccI && isa(AccI)) // a load is a def if volatile + return false; + } + return true; +} + +static bool pointerInvalidatedByLoopWithMSSAHelper(MemorySSA *MSSA, + MemoryUseOrDef *MUD, + Loop *CurLoop) { + bool MSSAResult = false; + MemoryUse *MU = nullptr; + // MUD is from a LoopInstr or CallInstr, so both Uses. + if (MUD && (MU = dyn_cast_or_null(MUD))) { + // BIG NOTE: getDefiningAccess may not be precise, since optimizeUses + // in MemorySSA is capped. This is good! It means that this becomes best + // effort, rather than incur a performance penalty for loops with large load + // count. Perfect accuracy can be obtained with: + // MSSA->getWalker()->getClobberingMemoryAccess(MU) + // For the updates done by sink/hoist/promotion (insertUse, moveToPlace), + // we *explicitly* call getClobberingMemoryAccesses. This will get perfect + // results for small tests, and imprecise but fast for large ones. + // MemoryAccess *Source = MSSA->getWalker()->getClobberingMemoryAccess(MU); + MemoryAccess *Source = MU->getDefiningAccess(); + + if (MSSA->isLiveOnEntryDef(Source)) { + MSSAResult = false; + } else { + BasicBlock *DBB = Source->getBlock(); + if (CurLoop->contains(DBB)) { + // if the Source is a combination of liveOnEntry and + // volatile loads (treated as Defs), it's not invalidated. + SmallPtrSet Processed; + MSSAResult = checkNotLoadOrLOE(Source, MSSA, Processed); + } + } + } + return MSSAResult; +} +static bool pointerInvalidatedByLoopWithMSSA(Value *V, MemorySSA *MSSA, + MemoryUseOrDef *MUD, + Loop *CurLoop) { + if (isa(V)) + return true; + bool MSSAResult = pointerInvalidatedByLoopWithMSSAHelper(MSSA, MUD, CurLoop); + return MSSAResult; +} + /// Little predicate that returns true if the specified basic block is in /// a subloop of the current one, not the current one itself. /// @@ -1394,3 +1632,11 @@ assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); return LI->getLoopFor(BB) != CurLoop; } + +static void removeFromMSSA(Instruction *I, MemorySSAUpdater *MSSAUpdater, + MemorySSA *MSSA) { + MemoryUseOrDef *AccI = MSSA->getMemoryAccess(I); + if (AccI) { + MSSAUpdater->removeMemoryAccess(AccI); + } +} Index: lib/Transforms/Scalar/LoopDistribute.cpp =================================================================== --- lib/Transforms/Scalar/LoopDistribute.cpp +++ lib/Transforms/Scalar/LoopDistribute.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" @@ -952,11 +953,12 @@ auto &AC = AM.getResult(F); auto &TTI = AM.getResult(F); auto &TLI = AM.getResult(F); + auto &MSSA = AM.getResult(F).getMSSA(); auto &LAM = AM.getResult(F).getManager(); std::function GetLAA = [&](Loop &L) -> const LoopAccessInfo & { - LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI}; + LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA}; return LAM.getResult(L, AR); }; Index: lib/Transforms/Scalar/LoopLoadElimination.cpp =================================================================== --- lib/Transforms/Scalar/LoopLoadElimination.cpp +++ lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -648,11 +649,12 @@ auto &TLI = AM.getResult(F); auto &AA = AM.getResult(F); auto &AC = AM.getResult(F); + auto &MSSA = AM.getResult(F).getMSSA(); auto &LAM = AM.getResult(F).getManager(); bool Changed = eliminateLoadsAcrossLoops( F, LI, DT, [&](Loop &L) -> const LoopAccessInfo & { - LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI}; + LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA}; return LAM.getResult(L, AR); }); Index: lib/Transforms/Scalar/LoopSink.cpp =================================================================== --- lib/Transforms/Scalar/LoopSink.cpp +++ lib/Transforms/Scalar/LoopSink.cpp @@ -288,7 +288,8 @@ // No need to check for instruction's operands are loop invariant. assert(L.hasLoopInvariantOperands(I) && "Insts in a loop's preheader should have loop invariant operands!"); - if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr)) + if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, nullptr, + nullptr, nullptr)) continue; if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI)) Changed = true; Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8710,11 +8710,12 @@ auto &AC = AM.getResult(F); auto &DB = AM.getResult(F); auto &ORE = AM.getResult(F); + auto &MSSA = AM.getResult(F).getMSSA(); auto &LAM = AM.getResult(F).getManager(); std::function GetLAA = [&](Loop &L) -> const LoopAccessInfo & { - LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI}; + LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA}; return LAM.getResult(L, AR); }; bool Changed = Index: test/Other/loop-pm-invalidation.ll =================================================================== --- test/Other/loop-pm-invalidation.ll +++ test/Other/loop-pm-invalidation.ll @@ -51,6 +51,7 @@ ; CHECK-LOOP-INV-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: TargetIRAnalysis +; CHECK-LOOP-INV-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Loop pass manager run. ; CHECK-LOOP-INV-NEXT: Running pass: NoOpLoopPass @@ -78,6 +79,7 @@ ; CHECK-SCEV-INV-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: TargetIRAnalysis +; CHECK-SCEV-INV-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Loop pass manager run. ; CHECK-SCEV-INV-NEXT: Running pass: NoOpLoopPass @@ -115,6 +117,7 @@ ; CHECK-LOOP-INV-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: TargetIRAnalysis +; CHECK-LOOP-INV-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Loop pass manager run. ; CHECK-LOOP-INV-NEXT: Running pass: NoOpLoopPass @@ -149,6 +152,7 @@ ; CHECK-SCEV-INV-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: TargetIRAnalysis +; CHECK-SCEV-INV-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Loop pass manager run. ; CHECK-SCEV-INV-NEXT: Running pass: NoOpLoopPass @@ -200,6 +204,7 @@ ; CHECK-LOOP-INV-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: TargetIRAnalysis +; CHECK-LOOP-INV-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Loop pass manager run. ; CHECK-LOOP-INV-NEXT: Running pass: NoOpLoopPass @@ -227,6 +232,7 @@ ; CHECK-SCEV-INV-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: TargetIRAnalysis +; CHECK-SCEV-INV-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Loop pass manager run. ; CHECK-SCEV-INV-NEXT: Running pass: NoOpLoopPass @@ -252,6 +258,7 @@ ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: TargetIRAnalysis +; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Starting {{.*}}Loop pass manager run. ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running pass: NoOpLoopPass @@ -259,10 +266,11 @@ ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Clearing all analysis results for: ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Finished {{.*}}Loop pass manager run. ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Invalidating all non-preserved analyses +; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Invalidating analysis: MemorySSAAnalysis +; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Invalidating analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}ScalarEvolutionAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Invalidating all non-preserved analyses ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Invalidating analysis: ScalarEvolutionAnalysis -; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Invalidating analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}> on dead_loop ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Finished {{.*}}Function pass manager run. Index: test/Other/new-pass-manager.ll =================================================================== --- test/Other/new-pass-manager.ll +++ test/Other/new-pass-manager.ll @@ -458,6 +458,7 @@ ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: TargetIRAnalysis +; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}> ; CHECK-REPEAT-LOOP-PASS-NEXT: Starting Loop pass manager run ; CHECK-REPEAT-LOOP-PASS-NEXT: Running pass: RepeatedPass Index: test/Other/pass-pipelines.ll =================================================================== --- test/Other/pass-pipelines.ll +++ test/Other/pass-pipelines.ll @@ -37,6 +37,9 @@ ; CHECK-O2-NEXT: FunctionPass Manager ; CHECK-O2-NOT: Manager ; CHECK-O2: Loop Pass Manager +; FIXME: Adition of MemorySSA splits the Loop Pass Manager into 3. +; CHECK-O2: Loop Pass Manager +; CHECK-O2: Loop Pass Manager ; CHECK-O2-NOT: Manager ; FIXME: We shouldn't be pulling out to simplify-cfg and instcombine and ; causing new loop pass managers. Index: unittests/Transforms/Scalar/LoopPassManagerTest.cpp =================================================================== --- unittests/Transforms/Scalar/LoopPassManagerTest.cpp +++ unittests/Transforms/Scalar/LoopPassManagerTest.cpp @@ -304,6 +304,7 @@ // those. FAM.registerPass([&] { return AAManager(); }); FAM.registerPass([&] { return AssumptionAnalysis(); }); + FAM.registerPass([&] { return MemorySSAAnalysis(); }); FAM.registerPass([&] { return ScalarEvolutionAnalysis(); }); FAM.registerPass([&] { return TargetLibraryAnalysis(); }); FAM.registerPass([&] { return TargetIRAnalysis(); });