Index: include/llvm/Analysis/LoopAnalysisManager.h =================================================================== --- include/llvm/Analysis/LoopAnalysisManager.h +++ include/llvm/Analysis/LoopAnalysisManager.h @@ -37,6 +37,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -58,6 +59,7 @@ ScalarEvolution &SE; TargetLibraryInfo &TLI; TargetTransformInfo &TTI; + MemorySSA &MSSA; }; /// Extern template declaration for the analysis set for this IR unit. Index: include/llvm/Support/Debug.h =================================================================== --- include/llvm/Support/Debug.h +++ include/llvm/Support/Debug.h @@ -93,6 +93,14 @@ /// extern bool VerifyLoopInfo; +/// Enables memory ssa as a dependency for loop passes. +/// +extern bool EnableMSSALoopDep; + +/// Enables LICM to use MSSA if available +/// +extern bool UseLICMwMSSA; + ///\} /// EnableDebugBuffering - This defaults to false. If true, the debug Index: include/llvm/Transforms/Scalar/LoopPassManager.h =================================================================== --- include/llvm/Transforms/Scalar/LoopPassManager.h +++ include/llvm/Transforms/Scalar/LoopPassManager.h @@ -271,13 +271,15 @@ return PA; // Get the analysis results needed by loop passes. - LoopStandardAnalysisResults LAR = {AM.getResult(F), - AM.getResult(F), - AM.getResult(F), - AM.getResult(F), - AM.getResult(F), - AM.getResult(F), - AM.getResult(F)}; + LoopStandardAnalysisResults LAR = { + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + AM.getResult(F).getMSSA()}; // Setup the loop analysis manager from its proxy. It is important that // this is only done when there are loops to process and we have built the @@ -345,6 +347,9 @@ PA.preserve(); PA.preserve(); PA.preserve(); + if (EnableMSSALoopDep) { + PA.preserve(); + } // FIXME: What we really want to do here is preserve an AA category, but // that concept doesn't exist yet. PA.preserve(); Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -38,6 +38,9 @@ class DataLayout; class Loop; class LoopInfo; +class MemorySSA; +class MemorySSAUpdater; +class MemorySSAAliasSets; class OptimizationRemarkEmitter; class PredicatedScalarEvolution; class PredIteratorCache; @@ -425,7 +428,8 @@ /// arguments. Diagnostics is emitted via \p ORE. It returns changed status. bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, - LoopSafetyInfo *, OptimizationRemarkEmitter *ORE); + MemorySSAUpdater *, MemorySSA *, LoopSafetyInfo *, + OptimizationRemarkEmitter *ORE); /// \brief Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth @@ -437,7 +441,8 @@ /// ORE. It returns changed status. bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, - LoopSafetyInfo *, OptimizationRemarkEmitter *ORE); + MemorySSAUpdater *, MemorySSA *, LoopSafetyInfo *, + OptimizationRemarkEmitter *ORE); /// \brief Try to promote memory values to scalars by sinking stores out of /// the loop and moving loads to before the loop. We do this by looping over @@ -446,19 +451,18 @@ /// vector, loop exit blocks insertion point vector, PredIteratorCache, /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions /// of the loop and loop safety information as arguments. +/// The AliasSetTracker and MemorySSAAliasSets are alternatives to one another. /// Diagnostics is emitted via \p ORE. It returns changed status. -bool promoteLoopAccessesToScalars(const SmallSetVector &, - SmallVectorImpl &, - SmallVectorImpl &, - PredIteratorCache &, LoopInfo *, - DominatorTree *, const TargetLibraryInfo *, - Loop *, AliasSetTracker *, LoopSafetyInfo *, - OptimizationRemarkEmitter *); +bool promoteLoopAccessesToScalars( + const SmallSetVector &, SmallVectorImpl &, + SmallVectorImpl &, PredIteratorCache &, LoopInfo *, + DominatorTree *, const TargetLibraryInfo *, Loop *, AliasSetTracker *, + MemorySSAAliasSets *, LoopSafetyInfo *, OptimizationRemarkEmitter *); /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. SmallVector collectChildrenInLoop(DomTreeNode *N, - const Loop *CurLoop); + Loop *CurLoop); /// \brief Computes safety information for a loop /// checks loop body & header for the possibility of may throw @@ -506,9 +510,9 @@ /// instructions from loop body to preheader/exit. Check if the instruction /// can execute speculatively. /// If \p ORE is set use it to emit optimization remarks. -bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, - Loop *CurLoop, AliasSetTracker *CurAST, - LoopSafetyInfo *SafetyInfo, +bool canSinkOrHoistInst(Instruction &, AAResults *, DominatorTree *, Loop *, + AliasSetTracker *, MemorySSAUpdater *, MemorySSA *, + LoopSafetyInfo *, OptimizationRemarkEmitter *ORE = nullptr); /// Generates a vector reduction using shufflevectors to reduce the value. Index: include/llvm/Transforms/Utils/MemorySSAAliasSets.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Utils/MemorySSAAliasSets.h @@ -0,0 +1,303 @@ +//===-- MemorySSAAliasSets.h - Alias sets using MemorySSA -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This a a helper class that creates alias sets using MemorySSA, each +// set being marked with properties that enablei/disable promotion. +// It is used for promotion in the Loop Invariant Code Motion Pass. +// It also stores insertion points into MemorySSA for a particular loop. +// These need to be reset if the sets are reused. +//===----------------------------------------------------------------------===// +#ifndef LLVM_ANALYSIS_MEMORYSSA_ALIASSETS_H +#define LLVM_ANALYSIS_MEMORYSSA_ALIASSETS_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/LoopUtils.h" + +using namespace llvm; + +namespace llvm { +class MemorySSAAliasSets { +public: + MemorySSA *MSSA; + MemorySSAUpdater *MSSAUpdater; + +private: + Loop *L; + LoopInfo *LI; + AliasAnalysis *AA; + DominatorTree *DT; + SmallVector MSSAInsertPts; + + class SingleMustAliasVec { + MemoryLocation Loc; + SmallSetVector Vec; + + public: + // CanNeverPromote trumps everything else + // Set is either may-alias, has a volatile store or aliases an instruction + // witout a MemoryLocation. + bool CanNeverPromote = false; + bool LoopInvariantAccess = false; + bool HasAGoodStore = false; + SingleMustAliasVec(MemoryLocation loc) : Loc(loc), CanNeverPromote(false) { + Vec.insert(const_cast(Loc.Ptr)); + } + void addMemoryLocation(Optional MemLoc) { + if (MemLoc == None) + return; + Vec.insert(const_cast(MemLoc.getValue().Ptr)); + } + MemoryLocation getMemoryLocation() { return Loc; } + SmallSetVector *getVecRef() { return &Vec; } + bool isGoodForPromotion() { + return !CanNeverPromote && LoopInvariantAccess && HasAGoodStore; + } + void setMemoryLocation(Value *LI, Value *Replacement) { + Loc = MemoryLocation(Replacement, Loc.Size, Loc.AATags); + Vec.remove(LI); + Vec.insert(Replacement); + } + void dump() { + dbgs() << "Set cannot be promoted: " << CanNeverPromote << "\n"; + dbgs() << "Has only loop invariant accesses: " << LoopInvariantAccess + << "\n"; + dbgs() << "Contains a promotable store: " << HasAGoodStore << "\n"; + dbgs() << "Contents: \n"; + for (Value *V : Vec) + dbgs() << " Value: " << *V << "\n"; + } + }; + + SmallVector AllSets; + DenseMap StoredDefs; + unsigned MemorySSATraversalPenalty = 0; + unsigned CAP_NO_ALIAS_SETS = 30; + unsigned CAP_MSSA_TRAVERSALS = 30; + + void dump() { + for (SingleMustAliasVec *Set : AllSets) + Set->dump(); + } + + bool isLoopInvariant(const Value *V) { + if(!L) + return true; + return L->isLoopInvariant(V); + } + bool loopContains(Instruction *I) { + if(!L) + return true; + return L->contains(I); + } + bool loopContains(BasicBlock *B) { + if(!L) + return true; + return L->contains(B); + } + + AliasResult alias(Optional MemLocI, Instruction *InstrI, + MemoryLocation &MemLocJ) { + if (MemLocI == None) { + ModRefInfo ModRef = AA->getModRefInfo(InstrI, MemLocJ); + if (bool(ModRef & MRI_Mod)) { + return MustAlias; + } + if (bool(ModRef & MRI_Ref)) { + return MayAlias; + } + return NoAlias; + } else { + AliasResult Result = AA->alias(MemLocI.getValue(), MemLocJ); + return Result; + } + } + + void addToAliasSets(const MemoryUse *Use) { + // Follow-up to the BIG NOTE: + // Because we allowed some imprecision, it is possible that the + // definingAccess is not the clobberingAccess. We *really* want the + // correct aliasing definition here, otherwise we're creating a set with + // things that don't alias. + // This is currently visible in "LoopSimplify/ashr-crash.ll"(Puts @a,@c,@d + // in the same set because its uses all point to the newly promoted Def) + // We *have to* use getClobberingAccess here for correctness! + // To avoid incurring the performance penalty of complete info: + // - we don't check against existing sets + // - we get first defining access, then clobbering access. If the two are + // different, then we increment a local cost - the cost of optimizing the + // info for this use past the "memssa-check-limit". + // - we stop creating sets after CAP_MSSA_TRAVERSALS calls to getClobbering + // that did work (result != getDefining). + // - we do *not* have partial info to reuse, so nothing is promoted when + // cost is exceeded. + MemoryDef *DefiningAcc = dyn_cast_or_null( + const_cast(Use)->getDefiningAccess()); + MemoryDef *ClobberingAcc = dyn_cast_or_null( + MSSA->getWalker()->getClobberingMemoryAccess( + const_cast(Use))); + + if (DefiningAcc != ClobberingAcc) + MemorySSATraversalPenalty++; + if (!ClobberingAcc || !ClobberingAcc->getMemoryInst()) + return; + + SingleMustAliasVec *OneAliasSet = StoredDefs[ClobberingAcc]; + if (!OneAliasSet) + return; + + Instruction *InstrI = Use->getMemoryInst(); + Optional MemLocI = MemoryLocation::getOrNone(InstrI); + MemoryLocation MemLocJ = OneAliasSet->getMemoryLocation(); + // FIXME: This is KNOWN by MSSA, i.e. if may or must. TODO: expose this. + AliasResult Result = alias(MemLocI, InstrI, MemLocJ); + if (Result == MayAlias) { + OneAliasSet->CanNeverPromote = true; + } + OneAliasSet->addMemoryLocation(MemLocI); + } + + void addToAliasSets(const MemoryDef *Def) { + Instruction *InstrI = Def->getMemoryInst(); + Optional MemLocI = MemoryLocation::getOrNone(InstrI); + + SingleMustAliasVec *FoundMustAlias = nullptr; + bool FoundMayAlias = false; + + StoreInst *SIn = dyn_cast_or_null(InstrI); + bool LoopInvariantAccess = + ((MemLocI != None) && isLoopInvariant(MemLocI.getValue().Ptr)); + bool CanNeverPromote = (SIn && SIn->isVolatile()) || MemLocI == None; + bool HasAGoodStore = SIn && !SIn->isVolatile(); + + for (auto OneAliasSet : AllSets) { + MemoryLocation MemLocJ = OneAliasSet->getMemoryLocation(); + AliasResult Result = alias(MemLocI, InstrI, MemLocJ); + if (Result == MustAlias) { + OneAliasSet->addMemoryLocation(MemLocI); + if (FoundMustAlias != nullptr || CanNeverPromote) { + OneAliasSet->CanNeverPromote = true; + FoundMayAlias = true; + if (FoundMustAlias) { + FoundMustAlias->CanNeverPromote = true; + } + } + OneAliasSet->LoopInvariantAccess |= LoopInvariantAccess; + OneAliasSet->HasAGoodStore |= HasAGoodStore; + FoundMustAlias = OneAliasSet; + StoredDefs[Def] = FoundMustAlias; + } + if (Result == MayAlias) { + FoundMayAlias = true; + OneAliasSet->CanNeverPromote = true; + if (FoundMustAlias) { + FoundMustAlias->CanNeverPromote = true; + } + } + } + + // Still need to add this to a new set + if (!FoundMustAlias && MemLocI != None) { + SingleMustAliasVec *newEntry = new SingleMustAliasVec(MemLocI.getValue()); + newEntry->CanNeverPromote = CanNeverPromote || FoundMayAlias; + newEntry->LoopInvariantAccess = LoopInvariantAccess; + newEntry->HasAGoodStore = HasAGoodStore; + StoredDefs[Def] = newEntry; + AllSets.push_back(newEntry); + } + } + +public: + MemorySSAAliasSets(MemorySSA *mssa, MemorySSAUpdater *mssaupdater, Loop *l, + LoopInfo *li, AliasAnalysis *aa, DominatorTree *dt, + SmallVectorImpl &ExitBlocks) + : MSSA(mssa), MSSAUpdater(mssaupdater), L(l), LI(li), AA(aa), DT(dt), + AllSets() { + resetMSSAInsertPts(ExitBlocks); + } + + void resetMSSAInsertPts(SmallVectorImpl &ExitBlocks) { + // TODO: Better way to fill this in. + MSSAInsertPts.reserve(ExitBlocks.size()); + for (unsigned I = 0; I < ExitBlocks.size(); ++I) { + MSSAInsertPts.push_back(nullptr); + } + } + + // Creates all alias sets in L + void createSets() { addToSets(DT->getNode(L->getHeader())); } + + // Create all alias sets given a basic block (for testing) + void createSets(BasicBlock *B) { addToSets(DT->getNode(B)); } + + void addToSets(DomTreeNode *N) { + // We want to visit parents before children. We will enque all the parents + // before their children in the worklist and process the worklist in order. + SmallVector Worklist = collectChildrenInLoop(N, L); + + for (DomTreeNode *DTN : Worklist) { + BasicBlock *BB = DTN->getBlock(); + auto *Accesses = MSSA->getBlockAccesses(BB); + if (Accesses) + for (auto &Acc : make_range(Accesses->begin(), Accesses->end())) { + if (AllSets.size() > CAP_NO_ALIAS_SETS || + MemorySSATraversalPenalty > CAP_MSSA_TRAVERSALS) { + AllSets.clear(); + return; + } + // Process uses + const MemoryUse *Use = dyn_cast_or_null(&Acc); + if (Use) + addToAliasSets(Use); + // Process defs + const MemoryDef *Def = dyn_cast_or_null(&Acc); + if (Def) + addToAliasSets(Def); + } + } + // dbgs()<<"No sets: "< *getSetIfPromotableOrNull(unsigned I) { + if (AllSets[I]->isGoodForPromotion()) { + SmallSetVector *MustAliasPtrs = AllSets[I]->getVecRef(); + assert(MustAliasPtrs->size() > 0 && "Alias set cannot be empty!!!\n"); + return MustAliasPtrs; + } + return nullptr; + } + + unsigned size() { return AllSets.size(); } + + void replace(Value *LoadI, Value *Replacement) { + for (auto OneAliasSet : AllSets) { + MemoryLocation MemLocJ = OneAliasSet->getMemoryLocation(); + if (MemLocJ.Ptr == LoadI) { + OneAliasSet->setMemoryLocation(LoadI, Replacement); + if (!OneAliasSet->CanNeverPromote && OneAliasSet->HasAGoodStore && + !OneAliasSet->LoopInvariantAccess && + isLoopInvariant(Replacement)) { + OneAliasSet->LoopInvariantAccess = true; + } + } + } + } + + MemoryAccess *getInsertPoint(int I) { return MSSAInsertPts[I]; } + void setInsertPoint(MemoryAccess *Acc, int I) { MSSAInsertPts[I] = Acc; } +}; +} // end namespace llvm + +#endif // LLVM_ANALYSIS_MEMORYSSA_ALIASSETS_H Index: lib/Analysis/LoopAnalysisManager.cpp =================================================================== --- lib/Analysis/LoopAnalysisManager.cpp +++ lib/Analysis/LoopAnalysisManager.cpp @@ -11,6 +11,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/Dominators.h" @@ -45,12 +46,22 @@ // loop analyses declare any dependencies on these and use the more general // invalidation logic below to act on that. auto PAC = PA.getChecker(); + bool invalidateMemorySSAAnalysis = false; + // Only invalidate MSSA when used in LICM. Caveat: Building MSSA for nothing! + // The alternative: always invalidate MemorySSA, until all loop passes preserve it + // This alternative change will need to fix: + // unittests/Scalar/ScalarTests/LoopPassManagerTest, + // test/Transforms/LoopUnroll/unroll-loop-invalidation.ll and + // test/Other/loop-pm-invalidation.ll + if (UseLICMwMSSA && !EnableMSSALoopDep) + invalidateMemorySSAAnalysis = Inv.invalidate(F, PA); if (!(PAC.preserved() || PAC.preservedSet>()) || Inv.invalidate(F, PA) || Inv.invalidate(F, PA) || Inv.invalidate(F, PA) || Inv.invalidate(F, PA) || - Inv.invalidate(F, PA)) { + Inv.invalidate(F, PA) || + invalidateMemorySSAAnalysis) { // Note that the LoopInfo may be stale at this point, however the loop // objects themselves remain the only viable keys that could be in the // analysis manager's cache. So we just walk the keys and forcibly clear @@ -135,6 +146,9 @@ PA.preserve(); PA.preserve(); PA.preserve(); + if (EnableMSSALoopDep) { + PA.preserve(); + } // TODO: What we really want to do here is preserve an AA category, but that // concept doesn't exist yet. PA.preserve(); Index: lib/Analysis/MemorySSA.cpp =================================================================== --- lib/Analysis/MemorySSA.cpp +++ lib/Analysis/MemorySSA.cpp @@ -55,6 +55,8 @@ #include using namespace llvm; +bool llvm::EnableMSSALoopDep = false; +bool llvm::UseLICMwMSSA = false; #define DEBUG_TYPE "memoryssa" Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -42,6 +42,8 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" @@ -64,6 +66,7 @@ #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/MemorySSAAliasSets.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include #include @@ -91,10 +94,12 @@ static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo); static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, + MemorySSAUpdater *MSSAUpdater, MemorySSA *MSSA, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE); static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, const Loop *CurLoop, AliasSetTracker *CurAST, + MemorySSAUpdater *MSSAUpdater, MemorySSA *MSSA, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE); static bool isSafeToExecuteUnconditionally(Instruction &Inst, @@ -106,15 +111,21 @@ static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, const AAMDNodes &AAInfo, AliasSetTracker *CurAST); +static bool pointerInvalidatedByLoopWithMSSA(Value *V, MemorySSA *MSSA, + MemoryUseOrDef *MUD, + Loop *CurLoop); +static void removeFromMSSA(Instruction *I, MemorySSAUpdater *MSSAUpdater, + MemorySSA *MSSA); static Instruction * CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, + MemorySSAUpdater *MSSAUpdater, MemorySSA *MSSA, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo); namespace { struct LoopInvariantCodeMotion { bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, - TargetLibraryInfo *TLI, ScalarEvolution *SE, + TargetLibraryInfo *TLI, ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST); DenseMap &getLoopToAliasSetMap() { @@ -126,6 +137,7 @@ AliasSetTracker *collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA); + MemorySSAUpdater *MSSAUpdater = nullptr; }; struct LegacyLICMPass : public LoopPass { @@ -150,12 +162,13 @@ // pass. Function analyses need to be preserved across loop transformations // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); - return LICM.runOnLoop(L, - &getAnalysis().getAAResults(), - &getAnalysis().getLoopInfo(), - &getAnalysis().getDomTree(), - &getAnalysis().getTLI(), - SE ? &SE->getSE() : nullptr, &ORE, false); + return LICM.runOnLoop( + L, &getAnalysis().getAAResults(), + &getAnalysis().getLoopInfo(), + &getAnalysis().getDomTree(), + &getAnalysis().getTLI(), + SE ? &SE->getSE() : nullptr, + &getAnalysis().getMSSA(), &ORE, false); } /// This transformation requires natural loop information & requires that @@ -164,6 +177,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); + AU.addRequired(); getLoopAnalysisUsage(AU); } @@ -204,7 +218,9 @@ "cached at a higher level"); LoopInvariantCodeMotion LICM; - if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, ORE, true)) + MemorySSA *MSSA = UseLICMwMSSA ? (&AR.MSSA) : nullptr; + if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, MSSA, ORE, + true)) return PreservedAnalyses::all(); auto PA = getLoopPassPreservedAnalyses(); @@ -217,6 +233,7 @@ false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) @@ -231,14 +248,24 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, - ScalarEvolution *SE, + ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) { bool Changed = false; assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); - AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA); + AliasSetTracker *CurAST = nullptr; + if (!MSSA) { + // Note: MSSA disabled in the new PM until all loop passes preserve MSSA + DEBUG(dbgs() << "LICM: Using Alias Set Tracker\n"); + CurAST = collectAliasInfoForLoop(L, LI, AA); + MSSAUpdater = nullptr; + } else { + DEBUG(dbgs() << "LICM: Using MemorySSA\n"); + CurAST = nullptr; + MSSAUpdater = new MemorySSAUpdater(MSSA); + } // Get the preheader block to move instructions into... BasicBlock *Preheader = L->getLoopPreheader(); @@ -259,10 +286,11 @@ // if (L->hasDedicatedExits()) Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, - CurAST, &SafetyInfo, ORE); + CurAST, MSSAUpdater, MSSA, &SafetyInfo, ORE); + if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, - CurAST, &SafetyInfo, ORE); + CurAST, MSSAUpdater, MSSA, &SafetyInfo, ORE); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. @@ -291,26 +319,41 @@ bool Promoted = false; - // Loop over all of the alias sets in the tracker object. - for (AliasSet &AS : *CurAST) { - // We can promote this alias set if it has a store, if it is a "Must" - // alias set, if the pointer is loop invariant, and if we are not - // eliminating any volatile loads or stores. - if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || - AS.isVolatile() || !L->isLoopInvariant(AS.begin()->getValue())) - continue; - - assert( - !AS.empty() && - "Must alias set should have at least one pointer element in it!"); - - SmallSetVector PointerMustAliases; - for (const auto &ASI : AS) - PointerMustAliases.insert(ASI.getValue()); - - Promoted |= promoteLoopAccessesToScalars(PointerMustAliases, ExitBlocks, - InsertPts, PIC, LI, DT, TLI, L, - CurAST, &SafetyInfo, ORE); + if (CurAST) { + // Loop over all of the alias sets in the tracker object. + for (AliasSet &AS : *CurAST) { + // We can promote this alias set if it has a store, if it is a "Must" + // alias set, if the pointer is loop invariant, and if we are not + // eliminating any volatile loads or stores. + if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || + AS.isVolatile() || !L->isLoopInvariant(AS.begin()->getValue())) + continue; + + assert( + !AS.empty() && + "Must alias set should have at least one pointer element in it!"); + + SmallSetVector PointerMustAliases; + for (const auto &ASI : AS) + PointerMustAliases.insert(ASI.getValue()); + + Promoted |= promoteLoopAccessesToScalars( + PointerMustAliases, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L, + CurAST, nullptr, &SafetyInfo, ORE); + } + } else { + MemorySSAAliasSets AS(MSSA, MSSAUpdater, L, LI, AA, DT, ExitBlocks); + AS.createSets(); + + for (unsigned I = 0; I < AS.size(); ++I) { + SmallSetVector *MustAliasPtrs = + AS.getSetIfPromotableOrNull(I); + if (MustAliasPtrs) { + Promoted |= promoteLoopAccessesToScalars( + *MustAliasPtrs, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L, + CurAST, &AS, &SafetyInfo, ORE); + } + } } // Once we have promoted values across the loop body we have to @@ -333,12 +376,15 @@ assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) && "Parent loop not left in LCSSA form after LICM!"); - // If this loop is nested inside of another one, save the alias information - // for when we process the outer loop. - if (L->getParentLoop() && !DeleteAST) - LoopToAliasSetMap[L] = CurAST; - else - delete CurAST; + if (CurAST) { + // If this loop is nested inside of another one, save the alias information + // for when we process the outer loop. + if (L->getParentLoop() && !DeleteAST) + LoopToAliasSetMap[L] = CurAST; + else + delete CurAST; + } else + delete MSSAUpdater; if (Changed && SE) SE->forgetLoopDispositions(L); @@ -352,12 +398,15 @@ /// bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, - AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, + AliasSetTracker *CurAST, MemorySSAUpdater *MSSAUpdater, + MemorySSA *MSSA, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && - CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && + CurLoop != nullptr && SafetyInfo != nullptr && + "Unexpected input to sinkRegion"); + assert((CurAST != nullptr || (MSSAUpdater != nullptr && MSSA != nullptr)) && "Unexpected input to sinkRegion"); // We want to visit children before parents. We will enque all the parents @@ -381,7 +430,10 @@ if (isInstructionTriviallyDead(&I, TLI)) { DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); ++II; - CurAST->deleteValue(&I); + if (CurAST) + CurAST->deleteValue(&I); + if (MSSA) + removeFromMSSA(&I, MSSAUpdater, MSSA); I.eraseFromParent(); Changed = true; continue; @@ -393,9 +445,11 @@ // operands of the instruction are loop invariant. // if (isNotUsedInLoop(I, CurLoop, SafetyInfo) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) { + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAUpdater, MSSA, + SafetyInfo, ORE)) { ++II; - Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo, ORE); + Changed |= sink(I, LI, DT, CurLoop, CurAST, MSSAUpdater, MSSA, + SafetyInfo, ORE); } } } @@ -409,11 +463,14 @@ /// bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, - AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, + AliasSetTracker *CurAST, MemorySSAUpdater *MSSAUpdater, + MemorySSA *MSSA, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && - CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && + CurLoop != nullptr && SafetyInfo != nullptr && + "Unexpected input to hoistRegion"); + assert((CurAST != nullptr || (MSSAUpdater != nullptr && MSSA != nullptr)) && "Unexpected input to hoistRegion"); // We want to visit parents before children. We will enque all the parents @@ -434,10 +491,17 @@ if (Constant *C = ConstantFoldInstruction( &I, I.getModule()->getDataLayout(), TLI)) { DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); - CurAST->copyValue(&I, C); + if (CurAST) + CurAST->copyValue(&I, C); + // TODO: update sets if built beforehand; otherwise, no-op + if (MSSA) { + } I.replaceAllUsesWith(C); if (isInstructionTriviallyDead(&I, TLI)) { - CurAST->deleteValue(&I); + if (CurAST) + CurAST->deleteValue(&I); + if (MSSA) + removeFromMSSA(&I, MSSAUpdater, MSSA); I.eraseFromParent(); } Changed = true; @@ -462,7 +526,8 @@ I.replaceAllUsesWith(Product); I.eraseFromParent(); - hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE); + hoist(*ReciprocalDivisor, DT, CurLoop, MSSAUpdater, MSSA, SafetyInfo, + ORE); Changed = true; continue; } @@ -472,11 +537,12 @@ // if it is safe to hoist the instruction. // if (CurLoop->hasLoopInvariantOperands(&I) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) && + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAUpdater, MSSA, + SafetyInfo, ORE) && isSafeToExecuteUnconditionally( I, DT, CurLoop, SafetyInfo, ORE, CurLoop->getLoopPreheader()->getTerminator())) - Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE); + Changed |= hoist(I, DT, CurLoop, MSSAUpdater, MSSA, SafetyInfo, ORE); } } @@ -575,6 +641,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, + MemorySSAUpdater *MSSAUpdater, MemorySSA *MSSA, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Loads have extra constraints we have to verify before we can hoist them. @@ -601,8 +668,14 @@ AAMDNodes AAInfo; LI->getAAMetadata(AAInfo); - bool Invalidated = - pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST); + bool Invalidated = false; + if (CurAST) + Invalidated = + pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST); + else // if (MSSA) + Invalidated = pointerInvalidatedByLoopWithMSSA( + LI->getOperand(0), MSSA, MSSA->getMemoryAccess(LI), CurLoop); + // Check loop-invariant address because this may also be a sinkable load // whose address is not necessarily loop-invariant. if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) @@ -630,20 +703,38 @@ // it's arguments with arbitrary offsets. If we can prove there are no // writes to this memory in the loop, we can hoist or sink. if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) { - for (Value *Op : CI->arg_operands()) - if (Op->getType()->isPointerTy() && - pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize, - AAMDNodes(), CurAST)) + for (Value *Op : CI->arg_operands()) { + bool Invalidated = false; + if (CurAST) + Invalidated = pointerInvalidatedByLoop( + Op, MemoryLocation::UnknownSize, AAMDNodes(), CurAST); + else // if (MSSA) + Invalidated = pointerInvalidatedByLoopWithMSSA( + Op, MSSA, MSSA->getMemoryAccess(CI), CurLoop); + if (Op->getType()->isPointerTy() && Invalidated) return false; + } return true; } // If this call only reads from memory and there are no writes to memory // in the loop, we can hoist or sink the call as appropriate. bool FoundMod = false; - for (AliasSet &AS : *CurAST) { - if (!AS.isForwardingAliasSet() && AS.isMod()) { - FoundMod = true; - break; + if (CurAST) { + for (AliasSet &AS : *CurAST) { + if (!AS.isForwardingAliasSet() && AS.isMod()) { + FoundMod = true; + break; + } + } + } else if (MSSA) { + // TODO: need "if this call only reads from memory" + for (auto BBIt = (CurLoop->block_begin()); BBIt != CurLoop->block_end(); + BBIt++) { + auto *Defs = MSSA->getBlockDefs(*BBIt); + if (Defs) { + FoundMod = true; + break; + } } } if (!FoundMod) @@ -739,6 +830,7 @@ static Instruction * CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, + MemorySSAUpdater *MSSAUpdater, MemorySSA *MSSA, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo) { Instruction *New; @@ -776,6 +868,23 @@ if (!I.getName().empty()) New->setName(I.getName() + ".le"); + MemoryAccess *OldMemAcc; + if (MSSA && (OldMemAcc = MSSA->getMemoryAccess(&I))) { + MemoryAccess *NewMemAcc = MSSAUpdater->createMemoryAccessInBB( + New, nullptr, New->getParent(), MemorySSA::Beginning); + MemoryDef *NewMemDef = dyn_cast_or_null(NewMemAcc); + if (NewMemDef) + MSSAUpdater->insertDef(NewMemDef, true); + else { + MemoryUse *NewMemUse = dyn_cast_or_null(NewMemAcc); + if (NewMemUse) { + MSSAUpdater->insertUse(NewMemUse); + // This is only called to properly update the defining access + MSSA->getWalker()->getClobberingMemoryAccess(NewMemUse); + } + } + } + // Build LCSSA PHI nodes for any in-loop operands. Note that this is // particularly cheap because we can rip off the PHI node that we're // replacing for the number and blocks of the predecessors. @@ -805,6 +914,7 @@ /// static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, const Loop *CurLoop, AliasSetTracker *CurAST, + MemorySSAUpdater *MSSAUpdater, MemorySSA *MSSA, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); @@ -860,14 +970,17 @@ if (It != SunkCopies.end()) New = It->second; else - New = SunkCopies[ExitBlock] = - CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI, SafetyInfo); + New = SunkCopies[ExitBlock] = CloneInstructionInExitBlock( + I, *ExitBlock, *PN, MSSAUpdater, MSSA, LI, SafetyInfo); PN->replaceAllUsesWith(New); PN->eraseFromParent(); } - CurAST->deleteValue(&I); + if (CurAST) + CurAST->deleteValue(&I); + if (MSSA) + removeFromMSSA(&I, MSSAUpdater, MSSA); I.eraseFromParent(); return Changed; } @@ -876,6 +989,7 @@ /// is safe to hoist, this instruction is called to do the dirty work. /// static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, + MemorySSAUpdater *MSSAUpdater, MemorySSA *MSSA, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { auto *Preheader = CurLoop->getLoopPreheader(); @@ -897,6 +1011,16 @@ // Move the new node to the Preheader, before its terminator. I.moveBefore(Preheader->getTerminator()); + if (MSSA) { + // Update MemorySSA if moving I just moved a load or store + MemoryUseOrDef *OldMemAcc = + dyn_cast_or_null(MSSA->getMemoryAccess(&I)); + if (OldMemAcc) { + MSSAUpdater->moveToPlace(OldMemAcc, Preheader, MemorySSA::End); + // This is only called to properly update the defining access + MSSA->getWalker()->getClobberingMemoryAccess(OldMemAcc); + } + } // Do not retain debug locations when we are moving instructions to different // basic blocks, because we want to avoid jumpy line tables. Calls, however, @@ -948,7 +1072,11 @@ SmallVectorImpl &LoopExitBlocks; SmallVectorImpl &LoopInsertPts; PredIteratorCache &PredCache; - AliasSetTracker &AST; + + // AST and AS are alternatives! + AliasSetTracker *AST; + MemorySSAAliasSets *AS; + LoopInfo &LI; DebugLoc DL; int Alignment; @@ -975,11 +1103,12 @@ const SmallSetVector &PMA, SmallVectorImpl &LEB, SmallVectorImpl &LIP, PredIteratorCache &PIC, - AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment, - bool UnorderedAtomic, const AAMDNodes &AATags) + AliasSetTracker *ast, MemorySSAAliasSets *as, LoopInfo &li, + DebugLoc dl, int alignment, bool UnorderedAtomic, + const AAMDNodes &AATags) : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast), - LI(li), DL(std::move(dl)), Alignment(alignment), + AS(as), LI(li), DL(std::move(dl)), Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags) {} bool isInstInList(Instruction *I, @@ -1010,14 +1139,36 @@ NewSI->setDebugLoc(DL); if (AATags) NewSI->setAAMetadata(AATags); + if (AS) { + MemoryAccess *MSSAInsertPoint = AS->getInsertPoint(i); + MemoryAccess *NewMemAcc; + if (!MSSAInsertPoint) { + NewMemAcc = AS->MSSAUpdater->createMemoryAccessInBB( + NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning); + } else { + NewMemAcc = AS->MSSAUpdater->createMemoryAccessAfter(NewSI, nullptr, + MSSAInsertPoint); + } + AS->setInsertPoint(NewMemAcc, i); + AS->MSSAUpdater->insertDef(cast(NewMemAcc), true); + // FIXME: true for safety, false may still be correct. + } } } void replaceLoadWithValue(LoadInst *LI, Value *V) const override { // Update alias analysis. - AST.copyValue(LI, V); + if (AST) + AST->copyValue(LI, V); + if (AS) + AS->replace(LI, V); + } + void instructionDeleted(Instruction *I) const override { + if (AST) + AST->deleteValue(I); + if (AS) + removeFromMSSA(I, AS->MSSAUpdater, AS->MSSA); } - void instructionDeleted(Instruction *I) const override { AST.deleteValue(I); } }; } // namespace @@ -1031,12 +1182,14 @@ SmallVectorImpl &ExitBlocks, SmallVectorImpl &InsertPts, PredIteratorCache &PIC, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, - Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE) { + Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAAliasSets *AS, + LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && - CurAST != nullptr && SafetyInfo != nullptr && + SafetyInfo != nullptr && "Unexpected Input to promoteLoopAccessesToScalars"); + assert((CurAST != nullptr || (AS != nullptr)) && + "Unexpected input to promoteLoopAccessesToScalars"); Value *SomePtr = *PointerMustAliases.begin(); BasicBlock *Preheader = CurLoop->getLoopPreheader(); @@ -1266,7 +1419,7 @@ SmallVector NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - InsertPts, PIC, *CurAST, *LI, DL, Alignment, + InsertPts, PIC, CurAST, AS, *LI, DL, Alignment, SawUnorderedAtomic, AATags); // Set up the preheader to have a definition of the value. It is the live-out @@ -1281,13 +1434,27 @@ PreheaderLoad->setAAMetadata(AATags); SSA.AddAvailableValue(Preheader, PreheaderLoad); + MemoryAccess *PreheaderLoadMemoryAccess; + if (AS) { + // Update MSSA + PreheaderLoadMemoryAccess = AS->MSSAUpdater->createMemoryAccessInBB( + PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End); + MemoryUse *NewMemUse = cast(PreheaderLoadMemoryAccess); + AS->MSSAUpdater->insertUse(NewMemUse); + // This is only called to properly update the defining access + AS->MSSA->getWalker()->getClobberingMemoryAccess(NewMemUse); + } + // Rewrite all the loads in the loop and remember all the definitions from // stores in the loop. Promoter.run(LoopUses); // If the SSAUpdater didn't use the load in the preheader, just zap it now. - if (PreheaderLoad->use_empty()) + if (PreheaderLoad->use_empty()) { + if (AS) + AS->MSSAUpdater->removeMemoryAccess(PreheaderLoadMemoryAccess); PreheaderLoad->eraseFromParent(); + } return true; } @@ -1387,6 +1554,75 @@ return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod(); } +// Checks arguments of a Phi, so Defs and other Phis. +// returns true if Acc is not a load or a live on entry +static bool checkNotLoadOrLOE(MemoryAccess *Acc, MemorySSA *MSSA, + SmallPtrSetImpl &Processed) { + if (Processed.count(Acc)) + return true; + if (MSSA->isLiveOnEntryDef(Acc)) + return false; + + MemoryPhi *Phi = dyn_cast_or_null(Acc); + MemoryDef *Def = dyn_cast_or_null(Acc); + if (Phi) { + bool MSSAResult = false; + int NoArgsPhi = Phi->getNumIncomingValues(); + Processed.insert(Acc); + for (int I = 0; I < NoArgsPhi; I++) { + MemoryAccess *Arg = Phi->getIncomingValue(I); + MSSAResult |= checkNotLoadOrLOE(Arg, MSSA, Processed); + } + return MSSAResult; + } else if (Def) { + Instruction *AccI = Def->getMemoryInst(); + if (AccI && isa(AccI)) // a load is a def if volatile + return false; + } + return true; +} + +static bool pointerInvalidatedByLoopWithMSSAHelper(MemorySSA *MSSA, + MemoryUseOrDef *MUD, + Loop *CurLoop) { + bool MSSAResult = false; + MemoryUse *MU = nullptr; + // MUD is from a LoopInstr or CallInstr, so both Uses. + if (MUD && (MU = dyn_cast_or_null(MUD))) { + // BIG NOTE: getDefiningAccess may not be precise, since optimizeUses + // in MemorySSA is capped. This is good! It means that this becomes best + // effort, rather than incur a performance penalty for loops with large load + // count. Perfect accuracy can be obtained with: + // MSSA->getWalker()->getClobberingMemoryAccess(MU) + // For the updates done by sink/hoist/promotion (insertUse, moveToPlace), + // we *explicitly* call getClobberingMemoryAccesses. This will get perfect + // results for small tests, and imprecise but fast for large ones. + // MemoryAccess *Source = MSSA->getWalker()->getClobberingMemoryAccess(MU); + MemoryAccess *Source = MU->getDefiningAccess(); + + if (MSSA->isLiveOnEntryDef(Source)) { + MSSAResult = false; + } else { + BasicBlock *DBB = Source->getBlock(); + if (CurLoop->contains(DBB)) { + // if the Source is a combination of liveOnEntry and + // volatile loads (treated as Defs), it's not invalidated. + SmallPtrSet Processed; + MSSAResult = checkNotLoadOrLOE(Source, MSSA, Processed); + } + } + } + return MSSAResult; +} +static bool pointerInvalidatedByLoopWithMSSA(Value *V, MemorySSA *MSSA, + MemoryUseOrDef *MUD, + Loop *CurLoop) { + if (isa(V)) + return true; + bool MSSAResult = pointerInvalidatedByLoopWithMSSAHelper(MSSA, MUD, CurLoop); + return MSSAResult; +} + /// Little predicate that returns true if the specified basic block is in /// a subloop of the current one, not the current one itself. /// @@ -1394,3 +1630,11 @@ assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); return LI->getLoopFor(BB) != CurLoop; } + +static void removeFromMSSA(Instruction *I, MemorySSAUpdater *MSSAUpdater, + MemorySSA *MSSA) { + MemoryUseOrDef *AccI = MSSA->getMemoryAccess(I); + if (AccI) { + MSSAUpdater->removeMemoryAccess(AccI); + } +} Index: lib/Transforms/Scalar/LoopDistribute.cpp =================================================================== --- lib/Transforms/Scalar/LoopDistribute.cpp +++ lib/Transforms/Scalar/LoopDistribute.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" @@ -952,11 +953,12 @@ auto &AC = AM.getResult(F); auto &TTI = AM.getResult(F); auto &TLI = AM.getResult(F); + auto &MSSA = AM.getResult(F).getMSSA(); auto &LAM = AM.getResult(F).getManager(); std::function GetLAA = [&](Loop &L) -> const LoopAccessInfo & { - LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI}; + LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA}; return LAM.getResult(L, AR); }; Index: lib/Transforms/Scalar/LoopLoadElimination.cpp =================================================================== --- lib/Transforms/Scalar/LoopLoadElimination.cpp +++ lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -648,11 +649,12 @@ auto &TLI = AM.getResult(F); auto &AA = AM.getResult(F); auto &AC = AM.getResult(F); + auto &MSSA = AM.getResult(F).getMSSA(); auto &LAM = AM.getResult(F).getManager(); bool Changed = eliminateLoadsAcrossLoops( F, LI, DT, [&](Loop &L) -> const LoopAccessInfo & { - LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI}; + LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA}; return LAM.getResult(L, AR); }); Index: lib/Transforms/Scalar/LoopSink.cpp =================================================================== --- lib/Transforms/Scalar/LoopSink.cpp +++ lib/Transforms/Scalar/LoopSink.cpp @@ -288,7 +288,8 @@ // No need to check for instruction's operands are loop invariant. assert(L.hasLoopInvariantOperands(I) && "Insts in a loop's preheader should have loop invariant operands!"); - if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr)) + if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, nullptr, + nullptr, nullptr)) continue; if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI)) Changed = true; Index: lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- lib/Transforms/Utils/LoopUtils.cpp +++ lib/Transforms/Utils/LoopUtils.cpp @@ -1119,12 +1119,12 @@ /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. SmallVector -llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { +llvm::collectChildrenInLoop(DomTreeNode *N, Loop *CurLoop) { SmallVector Worklist; auto AddRegionToWorklist = [&](DomTreeNode *DTN) { // Only include subregions in the top level loop. BasicBlock *BB = DTN->getBlock(); - if (CurLoop->contains(BB)) + if (!CurLoop || CurLoop->contains(BB)) Worklist.push_back(DTN); }; Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8710,11 +8710,12 @@ auto &AC = AM.getResult(F); auto &DB = AM.getResult(F); auto &ORE = AM.getResult(F); + auto &MSSA = AM.getResult(F).getMSSA(); auto &LAM = AM.getResult(F).getManager(); std::function GetLAA = [&](Loop &L) -> const LoopAccessInfo & { - LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI}; + LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA}; return LAM.getResult(L, AR); }; bool Changed = Index: test/Other/loop-pm-invalidation.ll =================================================================== --- test/Other/loop-pm-invalidation.ll +++ test/Other/loop-pm-invalidation.ll @@ -51,6 +51,7 @@ ; CHECK-LOOP-INV-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: TargetIRAnalysis +; CHECK-LOOP-INV-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Loop pass manager run. ; CHECK-LOOP-INV-NEXT: Running pass: NoOpLoopPass @@ -78,6 +79,7 @@ ; CHECK-SCEV-INV-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: TargetIRAnalysis +; CHECK-SCEV-INV-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Loop pass manager run. ; CHECK-SCEV-INV-NEXT: Running pass: NoOpLoopPass @@ -115,6 +117,7 @@ ; CHECK-LOOP-INV-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: TargetIRAnalysis +; CHECK-LOOP-INV-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Loop pass manager run. ; CHECK-LOOP-INV-NEXT: Running pass: NoOpLoopPass @@ -149,6 +152,7 @@ ; CHECK-SCEV-INV-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: TargetIRAnalysis +; CHECK-SCEV-INV-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Loop pass manager run. ; CHECK-SCEV-INV-NEXT: Running pass: NoOpLoopPass @@ -200,6 +204,7 @@ ; CHECK-LOOP-INV-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: TargetIRAnalysis +; CHECK-LOOP-INV-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-LOOP-INV-NEXT: Starting {{.*}}Loop pass manager run. ; CHECK-LOOP-INV-NEXT: Running pass: NoOpLoopPass @@ -227,6 +232,7 @@ ; CHECK-SCEV-INV-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: TargetIRAnalysis +; CHECK-SCEV-INV-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-NEXT: Starting {{.*}}Loop pass manager run. ; CHECK-SCEV-INV-NEXT: Running pass: NoOpLoopPass @@ -252,6 +258,7 @@ ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: TargetIRAnalysis +; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Starting {{.*}}Loop pass manager run. ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running pass: NoOpLoopPass @@ -259,6 +266,7 @@ ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Clearing all analysis results for: ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Finished {{.*}}Loop pass manager run. ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Invalidating all non-preserved analyses +; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Invalidating analysis: MemorySSAAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}ScalarEvolutionAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Invalidating all non-preserved analyses ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Invalidating analysis: ScalarEvolutionAnalysis Index: test/Other/new-pass-manager.ll =================================================================== --- test/Other/new-pass-manager.ll +++ test/Other/new-pass-manager.ll @@ -458,6 +458,7 @@ ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: ScalarEvolutionAnalysis ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: TargetIRAnalysis +; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}> ; CHECK-REPEAT-LOOP-PASS-NEXT: Starting Loop pass manager run ; CHECK-REPEAT-LOOP-PASS-NEXT: Running pass: RepeatedPass Index: test/Other/pass-pipelines.ll =================================================================== --- test/Other/pass-pipelines.ll +++ test/Other/pass-pipelines.ll @@ -37,6 +37,9 @@ ; CHECK-O2-NEXT: FunctionPass Manager ; CHECK-O2-NOT: Manager ; CHECK-O2: Loop Pass Manager +; FIXME: Adition of MemorySSA splits the Loop Pass Manager into 3. +; CHECK-O2: Loop Pass Manager +; CHECK-O2: Loop Pass Manager ; CHECK-O2-NOT: Manager ; FIXME: We shouldn't be pulling out to simplify-cfg and instcombine and ; causing new loop pass managers. Index: unittests/Transforms/Scalar/LoopPassManagerTest.cpp =================================================================== --- unittests/Transforms/Scalar/LoopPassManagerTest.cpp +++ unittests/Transforms/Scalar/LoopPassManagerTest.cpp @@ -304,6 +304,7 @@ // those. FAM.registerPass([&] { return AAManager(); }); FAM.registerPass([&] { return AssumptionAnalysis(); }); + FAM.registerPass([&] { return MemorySSAAnalysis(); }); FAM.registerPass([&] { return ScalarEvolutionAnalysis(); }); FAM.registerPass([&] { return TargetLibraryAnalysis(); }); FAM.registerPass([&] { return TargetIRAnalysis(); });