Index: include/llvm/Analysis/MemorySSAUpdater.h =================================================================== --- include/llvm/Analysis/MemorySSAUpdater.h +++ include/llvm/Analysis/MemorySSAUpdater.h @@ -35,6 +35,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/BasicBlock.h" Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -41,6 +41,7 @@ class DataLayout; class Loop; class LoopInfo; +class MemorySSAUpdater; class OptimizationRemarkEmitter; class PredicatedScalarEvolution; class PredIteratorCache; @@ -109,7 +110,7 @@ /// arguments. Diagnostics is emitted via \p ORE. It returns changed status. bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, - AliasSetTracker *, LoopSafetyInfo *, + AliasSetTracker *, MemorySSAUpdater *, LoopSafetyInfo *, OptimizationRemarkEmitter *ORE); /// Walk the specified region of the CFG (defined by all blocks @@ -122,7 +123,8 @@ /// ORE. It returns changed status. bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, - LoopSafetyInfo *, OptimizationRemarkEmitter *ORE); + MemorySSAUpdater *, LoopSafetyInfo *, + OptimizationRemarkEmitter *ORE); /// This function deletes dead loops. The caller of this function needs to /// guarantee that the loop is infact dead. @@ -202,7 +204,7 @@ /// If \p ORE is set use it to emit optimization remarks. bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, - bool TargetExecutesOncePerLoop, + MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, OptimizationRemarkEmitter *ORE = nullptr); /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -44,11 +44,11 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -67,6 +67,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include @@ -98,16 +99,36 @@ LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0), cl::desc("How many instruction to cross product using AA")); +// Experimental option to allow imprecision in LICM (use MemorySSA cap) in +// pathological cases, in exchange for faster compile. This is to be removed +// if MemorySSA starts to address the same issue. + +// [Verbose] This flag applies only when LICM uses MemorySSA instead on +// AliasSetTracker. +// When flag is disabled (default), LICM calls MemorySSAWalker's +// getClobberingMemoryAccess, which gets perfect accuracy. When flag is enabled, +// LICM will call into MemorySSA's getDefiningAccess, which may not be precise, +// since optimizeUses is capped. This means that doing sink, hoist and promotion +// will not incur a performance penalty for loops with large load and store +// count. For the updates done by sink/hoist/promotion (insertUse, moveToPlace), +// we will need to *explicitly* call getClobberingMemoryAccesses. This will get +// perfect results for small tests, and imprecise but fast for large ones. +static cl::opt EnableLicmCap( + "enable-licm-cap", cl::init(false), cl::Hidden, + cl::desc("Enable imprecision in LICM (uses MemorySSA cap) in " + "pathological cases, in exchange for faster compile")); + static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, TargetTransformInfo *TTI, bool &FreeInLoop); static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, - LoopSafetyInfo *SafetyInfo, + LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE); static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE, bool FreeInLoop); + MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE, + bool FreeInLoop); static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, @@ -117,11 +138,14 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, AliasSetTracker *CurAST, Loop *CurLoop, AliasAnalysis *AA); - -static Instruction * -CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, - const LoopInfo *LI, - const LoopSafetyInfo *SafetyInfo); +static bool pointerInvalidatedByLoopWithMSSA(Value *V, MemorySSA *MSSA, + MemoryUseOrDef *MUD, + Loop *CurLoop); +static void removeFromAnalyses(Instruction *I, AliasSetTracker *CurAST, + MemorySSAUpdater *MSSAU); +static Instruction *CloneInstructionInExitBlock( + Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, + const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU); namespace { struct LoopInvariantCodeMotion { @@ -261,7 +285,15 @@ assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); - std::unique_ptr CurAST = collectAliasInfoForLoop(L, LI, AA); + std::unique_ptr CurAST; + std::unique_ptr MSSAU; + if (!MSSA) { + LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n"); + CurAST = collectAliasInfoForLoop(L, LI, AA); + } else { + LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA. Promotion disabled.\n"); + MSSAU = make_unique(MSSA); + } // Get the preheader block to move instructions into... BasicBlock *Preheader = L->getLoopPreheader(); @@ -282,10 +314,10 @@ // if (L->hasDedicatedExits()) Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, - CurAST.get(), &SafetyInfo, ORE); + CurAST.get(), MSSAU.get(), &SafetyInfo, ORE); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, - CurAST.get(), &SafetyInfo, ORE); + CurAST.get(), MSSAU.get(), &SafetyInfo, ORE); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. @@ -314,27 +346,30 @@ bool Promoted = false; - // Loop over all of the alias sets in the tracker object. - for (AliasSet &AS : *CurAST) { - // We can promote this alias set if it has a store, if it is a "Must" - // alias set, if the pointer is loop invariant, and if we are not - // eliminating any volatile loads or stores. - if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || - !L->isLoopInvariant(AS.begin()->getValue())) - continue; - - assert( - !AS.empty() && - "Must alias set should have at least one pointer element in it!"); - - SmallSetVector PointerMustAliases; - for (const auto &ASI : AS) - PointerMustAliases.insert(ASI.getValue()); - - Promoted |= promoteLoopAccessesToScalars( - PointerMustAliases, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L, - CurAST.get(), &SafetyInfo, ORE); + if (CurAST.get()) { + // Loop over all of the alias sets in the tracker object. + for (AliasSet &AS : *CurAST) { + // We can promote this alias set if it has a store, if it is a "Must" + // alias set, if the pointer is loop invariant, and if we are not + // eliminating any volatile loads or stores. + if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || + !L->isLoopInvariant(AS.begin()->getValue())) + continue; + + assert( + !AS.empty() && + "Must alias set should have at least one pointer element in it!"); + + SmallSetVector PointerMustAliases; + for (const auto &ASI : AS) + PointerMustAliases.insert(ASI.getValue()); + + Promoted |= promoteLoopAccessesToScalars( + PointerMustAliases, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L, + CurAST.get(), &SafetyInfo, ORE); + } } + // FIXME: Promotion initially disabled when using MemorySSA. // Once we have promoted values across the loop body we have to // recursively reform LCSSA as any nested loop may now have values defined @@ -358,9 +393,12 @@ // If this loop is nested inside of another one, save the alias information // for when we process the outer loop. - if (L->getParentLoop() && !DeleteAST) + if (CurAST.get() && L->getParentLoop() && !DeleteAST) LoopToAliasSetMap[L] = std::move(CurAST); + if (MSSAU.get() && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + if (Changed && SE) SE->forgetLoopDispositions(L); return Changed; @@ -374,13 +412,16 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop, - AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, + AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, + LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && - CurLoop != nullptr && CurAST && SafetyInfo != nullptr && - "Unexpected input to sinkRegion"); + CurLoop != nullptr && SafetyInfo != nullptr && + "Unexpected input to sinkRegion."); + assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) && + "Either AliasSetTracker or MemorySSA should be initialized."); // We want to visit children before parents. We will enque all the parents // before their children in the worklist and process the worklist in reverse @@ -404,7 +445,7 @@ LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); salvageDebugInfo(I); ++II; - CurAST->deleteValue(&I); + removeFromAnalyses(&I, CurAST, MSSAU); I.eraseFromParent(); Changed = true; continue; @@ -417,12 +458,12 @@ // bool FreeInLoop = false; if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE) && + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) && !I.mayHaveSideEffects()) { - if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) { + if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE, FreeInLoop)) { if (!FreeInLoop) { ++II; - CurAST->deleteValue(&I); + removeFromAnalyses(&I, CurAST, MSSAU); I.eraseFromParent(); } Changed = true; @@ -430,6 +471,8 @@ } } } + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); return Changed; } @@ -440,12 +483,15 @@ /// bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, - AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, + AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, + LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && - CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && - "Unexpected input to hoistRegion"); + CurLoop != nullptr && SafetyInfo != nullptr && + "Unexpected input to hoistRegion."); + assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) && + "Either AliasSetTracker or MemorySSA should be initialized."); // We want to visit parents before children. We will enque all the parents // before their children in the worklist and process the worklist in order. @@ -478,10 +524,12 @@ &I, I.getModule()->getDataLayout(), TLI)) { LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); - CurAST->copyValue(&I, C); + if (CurAST) + CurAST->copyValue(&I, C); + // FIXME MSSA: Such replacements may make accesses unoptimized (D51960). I.replaceAllUsesWith(C); if (isInstructionTriviallyDead(&I, TLI)) { - CurAST->deleteValue(&I); + removeFromAnalyses(&I, CurAST, MSSAU); I.eraseFromParent(); } Changed = true; @@ -493,12 +541,12 @@ // if it is safe to hoist the instruction. // if (CurLoop->hasLoopInvariantOperands(&I) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE) && + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) && (IsMustExecute || isSafeToExecuteUnconditionally( I, DT, CurLoop, SafetyInfo, ORE, CurLoop->getLoopPreheader()->getTerminator()))) { - hoist(I, DT, CurLoop, SafetyInfo, ORE); + hoist(I, DT, CurLoop, SafetyInfo, MSSAU, ORE); Changed = true; continue; } @@ -521,7 +569,7 @@ I.replaceAllUsesWith(Product); I.eraseFromParent(); - hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE); + hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, MSSAU, ORE); Changed = true; continue; } @@ -532,7 +580,7 @@ isGuard(&I)) && IsMustExecute && IsMemoryNotModified && CurLoop->hasLoopInvariantOperands(&I)) { - hoist(I, DT, CurLoop, SafetyInfo, ORE); + hoist(I, DT, CurLoop, SafetyInfo, MSSAU, ORE); Changed = true; continue; } @@ -543,6 +591,8 @@ IsMemoryNotModified = !I.mayWriteToMemory(); } } + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); return Changed; } @@ -616,24 +666,49 @@ } /// Return true if all of the alias sets within this AST are known not to /// contain a Mod. -bool isReadOnly(AliasSetTracker *CurAST) { - for (AliasSet &AS : *CurAST) { - if (!AS.isForwardingAliasSet() && AS.isMod()) { - return false; +bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU, + const Loop *L) { + if (CurAST) { + for (AliasSet &AS : *CurAST) { + if (!AS.isForwardingAliasSet() && AS.isMod()) { + return false; + } } + return true; + } else { /*MSSAU*/ + for (auto *BB : L->getBlocks()) + if (MSSAU->getMemorySSA()->getBlockDefs(BB)) + return false; + return true; } +} + +/// Return true if I is the only Instruction with a MemoryAccess in L. +bool isOnlyMemoryAccess(const Instruction *I, const Loop *L, + const MemorySSAUpdater *MSSAU) { + for (auto *BB : L->getBlocks()) + if (auto *Acc = MSSAU->getMemorySSA()->getBlockAccesses(BB)) { + if (Acc->size() > 1) + return false; + if (const auto *MUD = dyn_cast(&*Acc->begin())) + if (MUD->getMemoryInst() != I) + return false; + } return true; } } bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, + MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, OptimizationRemarkEmitter *ORE) { // If we don't understand the instruction, bail early. if (!isHoistableAndSinkableInst(I)) return false; - + + MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr; + // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast(&I)) { if (!LI->isUnordered()) @@ -653,8 +728,13 @@ if (isLoadInvariantInLoop(LI, DT, CurLoop)) return true; - bool Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), - CurAST, CurLoop, AA); + bool Invalidated; + if (CurAST) + Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST, + CurLoop, AA); + else + Invalidated = pointerInvalidatedByLoopWithMSSA( + LI->getOperand(0), MSSA, MSSA->getMemoryAccess(LI), CurLoop); // Check loop-invariant address because this may also be a sinkable load // whose address is not necessarily loop-invariant. if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) @@ -679,7 +759,7 @@ if (match(CI, m_Intrinsic())) // Assumes don't actually alias anything or throw return true; - + // Handle simple cases by querying alias analysis. FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI); if (Behavior == FMRB_DoesNotAccessMemory) @@ -691,17 +771,24 @@ if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) { // TODO: expand to writeable arguments for (Value *Op : CI->arg_operands()) - if (Op->getType()->isPointerTy() && - pointerInvalidatedByLoop( + if (Op->getType()->isPointerTy()) { + bool Invalidated; + if (CurAST) + Invalidated = pointerInvalidatedByLoop( MemoryLocation(Op, MemoryLocation::UnknownSize, AAMDNodes()), - CurAST, CurLoop, AA)) - return false; + CurAST, CurLoop, AA); + else + Invalidated = pointerInvalidatedByLoopWithMSSA( + Op, MSSA, MSSA->getMemoryAccess(CI), CurLoop); + if (Invalidated) + return false; + } return true; } // If this call only reads from memory and there are no writes to memory // in the loop, we can hoist or sink the call as appropriate. - if (isReadOnly(CurAST)) + if (isReadOnly(CurAST, MSSAU, CurLoop)) return true; } @@ -712,18 +799,24 @@ } else if (auto *FI = dyn_cast(&I)) { // Fences alias (most) everything to provide ordering. For the moment, // just give up if there are any other memory operations in the loop. - auto Begin = CurAST->begin(); - assert(Begin != CurAST->end() && "must contain FI"); - if (std::next(Begin) != CurAST->end()) - // constant memory for instance, TODO: handle better - return false; - auto *UniqueI = Begin->getUniqueInstruction(); - if (!UniqueI) - // other memory op, give up + if (CurAST) { + auto Begin = CurAST->begin(); + assert(Begin != CurAST->end() && "must contain FI"); + if (std::next(Begin) != CurAST->end()) + // constant memory for instance, TODO: handle better + return false; + auto *UniqueI = Begin->getUniqueInstruction(); + if (!UniqueI) + // other memory op, give up + return false; + (void)FI; // suppress unused variable warning + assert(UniqueI == FI && "AS must contain FI"); + return true; + } else { // MSSAU + if (isOnlyMemoryAccess(FI, CurLoop, MSSAU)) + return true; return false; - (void)FI; //suppress unused variable warning - assert(UniqueI == FI && "AS must contain FI"); - return true; + } } else if (auto *SI = dyn_cast(&I)) { if (!SI->isUnordered()) return false; // Don't sink/hoist volatile or ordered atomic store! @@ -733,17 +826,26 @@ // load store promotion instead. TODO: We can extend this to cases where // there is exactly one write to the location and that write dominates an // arbitrary number of reads in the loop. - auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI)); + if (CurAST) { + auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI)); - if (AS.isRef() || !AS.isMustAlias()) - // Quick exit test, handled by the full path below as well. - return false; - auto *UniqueI = AS.getUniqueInstruction(); - if (!UniqueI) - // other memory op, give up + if (AS.isRef() || !AS.isMustAlias()) + // Quick exit test, handled by the full path below as well. + return false; + auto *UniqueI = AS.getUniqueInstruction(); + if (!UniqueI) + // other memory op, give up + return false; + assert(UniqueI == SI && "AS must contain SI"); + return true; + } else { // MSSAU + if (isOnlyMemoryAccess(SI, CurLoop, MSSAU)) + return true; + // FIXME: Checking alias() of SI against all other memory instructions is + // initially disabled when using MemorySSA. + // example: test/Transforms/LICM/atomics.ll:test7b return false; - assert(UniqueI == SI && "AS must contain SI"); - return true; + } } assert(!I.mayReadOrWriteMemory() && "unhandled aliasing"); @@ -827,10 +929,9 @@ return true; } -static Instruction * -CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, - const LoopInfo *LI, - const LoopSafetyInfo *SafetyInfo) { +static Instruction *CloneInstructionInExitBlock( + Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, + const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) { Instruction *New; if (auto *CI = dyn_cast(&I)) { const auto &BlockColors = SafetyInfo->BlockColors; @@ -866,6 +967,24 @@ if (!I.getName().empty()) New->setName(I.getName() + ".le"); + MemoryAccess *OldMemAcc; + if (MSSAU && (OldMemAcc = MSSAU->getMemorySSA()->getMemoryAccess(&I))) { + // Create a new MemoryAccess and let MemorySSA set its defining access. + MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB( + New, nullptr, New->getParent(), MemorySSA::Beginning); + if (NewMemAcc) { + if (auto *MemDef = dyn_cast(NewMemAcc)) + MSSAU->insertDef(MemDef, /*RenameUses=*/true); + else { + auto *MemUse = cast(NewMemAcc); + MSSAU->insertUse(MemUse); + // This is only called to properly update the defining access. + if (EnableLicmCap) + MSSAU->getMemorySSA()->getWalker()->getClobberingMemoryAccess(MemUse); + } + } + } + // Build LCSSA PHI nodes for any in-loop operands. Note that this is // particularly cheap because we can rip off the PHI node that we're // replacing for the number and blocks of the predecessors. @@ -891,7 +1010,8 @@ static Instruction *sinkThroughTriviallyReplaceablePHI( PHINode *TPN, Instruction *I, LoopInfo *LI, SmallDenseMap &SunkCopies, - const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop) { + const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop, + MemorySSAUpdater *MSSAU) { assert(isTriviallyReplaceablePHI(*TPN, *I) && "Expect only trivially replaceable PHI"); BasicBlock *ExitBlock = TPN->getParent(); @@ -900,8 +1020,8 @@ if (It != SunkCopies.end()) New = It->second; else - New = SunkCopies[ExitBlock] = - CloneInstructionInExitBlock(*I, *ExitBlock, *TPN, LI, SafetyInfo); + New = SunkCopies[ExitBlock] = CloneInstructionInExitBlock( + *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU); return New; } @@ -925,7 +1045,8 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, LoopInfo *LI, const Loop *CurLoop, - LoopSafetyInfo *SafetyInfo) { + LoopSafetyInfo *SafetyInfo, + MemorySSAUpdater *MSSAU) { #ifndef NDEBUG SmallVector ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); @@ -975,7 +1096,7 @@ "Expect all predecessors are in the loop"); if (PN->getBasicBlockIndex(PredBB) >= 0) { BasicBlock *NewPred = SplitBlockPredecessors( - ExitBB, PredBB, ".split.loop.exit", DT, LI, nullptr, true); + ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true); // Since we do not allow splitting EH-block with BlockColors in // canSplitPredecessors(), we can simply assign predecessor's color to // the new block. @@ -999,7 +1120,8 @@ /// static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE, bool FreeInLoop) { + MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE, + bool FreeInLoop) { LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) @@ -1051,7 +1173,7 @@ // Split predecessors of the PHI so that we can make users trivially // replaceable. - splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo); + splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU); // Should rebuild the iterators, as they may be invalidated by // splitPredecessorsOfLoopExit(). @@ -1086,8 +1208,8 @@ assert(ExitBlockSet.count(PN->getParent()) && "The LCSSA PHI is not in an exit block!"); // The PHI must be trivially replaceable. - Instruction *New = sinkThroughTriviallyReplaceablePHI(PN, &I, LI, SunkCopies, - SafetyInfo, CurLoop); + Instruction *New = sinkThroughTriviallyReplaceablePHI( + PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU); PN->replaceAllUsesWith(New); PN->eraseFromParent(); Changed = true; @@ -1099,7 +1221,8 @@ /// is safe to hoist, this instruction is called to do the dirty work. /// static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, - LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { + LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, + OptimizationRemarkEmitter *ORE) { auto *Preheader = CurLoop->getLoopPreheader(); LLVM_DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I << "\n"); @@ -1121,6 +1244,18 @@ // Move the new node to the Preheader, before its terminator. I.moveBefore(Preheader->getTerminator()); + if (MSSAU) { + // If moving, I just moved a load or store, so update MemorySSA. + MemoryUseOrDef *OldMemAcc = cast_or_null( + MSSAU->getMemorySSA()->getMemoryAccess(&I)); + if (OldMemAcc) { + MSSAU->moveToPlace(OldMemAcc, Preheader, MemorySSA::End); + // This is only called to properly update the defining access. + if (EnableLicmCap) + MSSAU->getMemorySSA()->getWalker()->getClobberingMemoryAccess( + OldMemAcc); + } + } // Do not retain debug locations when we are moving instructions to different // basic blocks, because we want to avoid jumpy line tables. Calls, however, @@ -1649,6 +1784,28 @@ return false; } +static bool pointerInvalidatedByLoopWithMSSA(Value *V, MemorySSA *MSSA, + MemoryUseOrDef *MUD, + Loop *CurLoop) { + assert(V->getType()->isPointerTy() && "Value tested is not a pointer."); + if (isa(V)) + return true; + + // MUD is from a LoadInst or CallInst, so both Uses. + if (MemoryUse *MU = cast_or_null(MUD)) { + MemoryAccess *Source; + // See declaration of EnableLicmCap for usage details. + if (EnableLicmCap) + Source = MU->getDefiningAccess(); + else + Source = MSSA->getWalker()->getClobberingMemoryAccess(MU); + if (MSSA->isLiveOnEntryDef(Source) || + !CurLoop->contains(Source->getBlock())) + return false; + } + return true; +} + /// Little predicate that returns true if the specified basic block is in /// a subloop of the current one, not the current one itself. /// @@ -1656,3 +1813,11 @@ assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); return LI->getLoopFor(BB) != CurLoop; } + +static void removeFromAnalyses(Instruction *I, AliasSetTracker *CurAST, + MemorySSAUpdater *MSSAU) { + if (CurAST) + CurAST->deleteValue(I); + else + MSSAU->removeMemoryAccess(I); +} Index: lib/Transforms/Scalar/LoopSink.cpp =================================================================== --- lib/Transforms/Scalar/LoopSink.cpp +++ lib/Transforms/Scalar/LoopSink.cpp @@ -298,7 +298,7 @@ // No need to check for instruction's operands are loop invariant. assert(L.hasLoopInvariantOperands(I) && "Insts in a loop's preheader should have loop invariant operands!"); - if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, false)) + if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false)) continue; if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI)) Changed = true; Index: test/Transforms/LICM/argmemonly-call.ll =================================================================== --- test/Transforms/LICM/argmemonly-call.ll +++ test/Transforms/LICM/argmemonly-call.ll @@ -2,6 +2,7 @@ ; RUN: opt -licm -basicaa -licm-n2-threshold=200 < %s -S | FileCheck %s --check-prefix=ALIAS-N2 ; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=0 -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -licm-n2-threshold=200 -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck %s --check-prefix=ALIAS-N2 +; RUN: opt -S -basicaa -licm -licm-n2-threshold=0 -enable-mssa-loop-dependency=true -verify-memoryssa %s | FileCheck %s --check-prefix=ALIAS-N2 declare i32 @foo() readonly argmemonly nounwind declare i32 @foo2() readonly nounwind @@ -11,6 +12,9 @@ ; CHECK-LABEL: @test ; CHECK: @foo ; CHECK-LABEL: loop: +; ALIAS-N2-LABEL: @test +; ALIAS-N2: @foo +; ALIAS-N2-LABEL: loop: br label %loop loop: @@ -24,6 +28,9 @@ ; CHECK-LABEL: @test_neg ; CHECK-LABEL: loop: ; CHECK: @foo +; ALIAS-N2-LABEL: @test_neg +; ALIAS-N2-LABEL: loop: +; ALIAS-N2: @foo br label %loop loop: @@ -36,6 +43,9 @@ ; CHECK-LABEL: @test2 ; CHECK: @bar ; CHECK-LABEL: loop: +; ALIAS-N2-LABEL: @test2 +; ALIAS-N2: @bar +; ALIAS-N2-LABEL: loop: br label %loop loop: @@ -49,6 +59,9 @@ ; CHECK-LABEL: @test3 ; CHECK-LABEL: loop: ; CHECK: @bar +; ALIAS-N2-LABEL: @test3 +; ALIAS-N2-LABEL: loop: +; ALIAS-N2: @bar br label %loop loop: @@ -64,6 +77,9 @@ ; CHECK-LABEL: @test4 ; CHECK-LABEL: loop: ; CHECK: @bar +; ALIAS-N2-LABEL: @test4 +; ALIAS-N2-LABEL: loop: +; ALIAS-N2: @bar br label %loop loop: @@ -77,6 +93,7 @@ ; we clump foo_new with bar. ; With the N2 Alias analysis diagnostic tool, we are able to hoist the ; argmemonly bar call out of the loop. +; Using MemorySSA we can also hoist bar. define void @test5(i32* %loc2, i32* noalias %loc) { ; ALIAS-N2-LABEL: @test5 @@ -103,6 +120,10 @@ ; CHECK: %val = load i32, i32* %loc2 ; CHECK-LABEL: loop: ; CHECK: @llvm.memcpy +; ALIAS-N2-LABEL: @test6 +; ALIAS-N2: %val = load i32, i32* %loc2 +; ALIAS-N2-LABEL: loop: +; ALIAS-N2: @llvm.memcpy br label %loop loop: @@ -119,6 +140,10 @@ ; CHECK: %val = load i32, i32* %loc2 ; CHECK-LABEL: loop: ; CHECK: @custom_memcpy +; ALIAS-N2-LABEL: @test7 +; ALIAS-N2: %val = load i32, i32* %loc2 +; ALIAS-N2-LABEL: loop: +; ALIAS-N2: @custom_memcpy br label %loop loop: Index: test/Transforms/LICM/hoist-bitcast-load.ll =================================================================== --- test/Transforms/LICM/hoist-bitcast-load.ll +++ test/Transforms/LICM/hoist-bitcast-load.ll @@ -1,5 +1,6 @@ ; RUN: opt -S -basicaa -licm < %s | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='require,loop(simplify-cfg,licm)' -S < %s | FileCheck %s +; RUN: opt -S -basicaa -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/LICM/hoist-debuginvariant.ll =================================================================== --- test/Transforms/LICM/hoist-debuginvariant.ll +++ test/Transforms/LICM/hoist-debuginvariant.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -licm -S | FileCheck %s ; RUN: opt < %s -strip-debug -licm -S | FileCheck %s +; RUN: opt < %s -licm -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/LICM/hoist-deref-load.ll =================================================================== --- test/Transforms/LICM/hoist-deref-load.ll +++ test/Transforms/LICM/hoist-deref-load.ll @@ -1,5 +1,7 @@ ; RUN: opt -S -basicaa -licm < %s | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='require,loop(simplify-cfg,licm)' -S < %s | FileCheck %s +; RUN: opt -S -basicaa -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require,loop(simplify-cfg,licm)' -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/LICM/hoist-fast-fdiv.ll =================================================================== --- test/Transforms/LICM/hoist-fast-fdiv.ll +++ test/Transforms/LICM/hoist-fast-fdiv.ll @@ -1,4 +1,5 @@ ; RUN: opt -licm -S < %s | FileCheck %s +; RUN: opt -licm -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s ; Function Attrs: noinline norecurse nounwind readnone ssp uwtable define zeroext i1 @invariant_denom(double %v) #0 { Index: test/Transforms/LICM/hoist-invariant-load.ll =================================================================== --- test/Transforms/LICM/hoist-invariant-load.ll +++ test/Transforms/LICM/hoist-invariant-load.ll @@ -1,5 +1,6 @@ ; REQUIRES: asserts ; RUN: opt < %s -licm -disable-basicaa -stats -S 2>&1 | grep "1 licm" +; RUN: opt < %s -licm -enable-mssa-loop-dependency=true -verify-memoryssa -disable-basicaa -stats -S 2>&1 | grep "1 licm" @"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1 @"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip" Index: test/Transforms/LICM/hoist-nounwind.ll =================================================================== --- test/Transforms/LICM/hoist-nounwind.ll +++ test/Transforms/LICM/hoist-nounwind.ll @@ -1,5 +1,6 @@ ; RUN: opt -S -basicaa -licm < %s | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='require,loop(licm)' -S %s | FileCheck %s +; RUN: opt -S -basicaa -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/LICM/hoist-round.ll =================================================================== --- test/Transforms/LICM/hoist-round.ll +++ test/Transforms/LICM/hoist-round.ll @@ -1,5 +1,6 @@ ; RUN: opt -S -licm < %s | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' -S %s | FileCheck %s +; RUN: opt -S -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s target datalayout = "E-m:e-p:32:32-i8:8:8-i16:16:16-i64:32:32-f64:32:32-v64:32:32-v128:32:32-a0:0:32-n32" Index: test/Transforms/LICM/hoisting.ll =================================================================== --- test/Transforms/LICM/hoisting.ll +++ test/Transforms/LICM/hoisting.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -licm -S | FileCheck %s ; RUN: opt < %s -aa-pipeline=basic-aa -passes='require,loop(licm)' -S | FileCheck %s +; RUN: opt < %s -licm -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s @X = global i32 0 ; [#uses=1] Index: test/Transforms/LICM/sink-promote.ll =================================================================== --- /dev/null +++ test/Transforms/LICM/sink-promote.ll @@ -0,0 +1,50 @@ +; RUN: opt < %s -basicaa -licm -S | FileCheck %s + +; Test moved from sinking.ll, as it tests sinking of a store who alone touches +; a memory location in a loop. +; Store can be sunk out of exit block containing indirectbr instructions after +; D50925. Updated to use an argument instead of undef, due to PR38989. +define void @test12(i32* %ptr) { +; CHECK-LABEL: @test12 +; CHECK: store +; CHECK-NEXT: br label %lab4 + br label %lab4 + +lab4: + br label %lab20 + +lab5: + br label %lab20 + +lab6: + br label %lab4 + +lab7: + br i1 undef, label %lab8, label %lab13 + +lab8: + br i1 undef, label %lab13, label %lab10 + +lab10: + br label %lab7 + +lab13: + ret void + +lab20: + br label %lab21 + +lab21: +; CHECK: lab21: +; CHECK-NOT: store +; CHECK: br i1 false, label %lab21, label %lab22 + store i32 36127957, i32* %ptr, align 4 + br i1 undef, label %lab21, label %lab22 + +lab22: +; CHECK: lab22: +; CHECK-NOT: store +; CHECK-NEXT: indirectbr i8* undef + indirectbr i8* undef, [label %lab5, label %lab6, label %lab7] +} + Index: test/Transforms/LICM/sink.ll =================================================================== --- test/Transforms/LICM/sink.ll +++ test/Transforms/LICM/sink.ll @@ -2,6 +2,7 @@ ; RUN: opt -S -licm < %s | opt -S -loop-sink | FileCheck %s --check-prefix=CHECK-SINK ; RUN: opt -S < %s -passes='require,loop(licm),loop-sink' \ ; RUN: | FileCheck %s --check-prefix=CHECK-SINK +; RUN: opt -S -licm -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s --check-prefix=CHECK-LICM ; Original source code: ; int g; Index: test/Transforms/LICM/sinking.ll =================================================================== --- test/Transforms/LICM/sinking.ll +++ test/Transforms/LICM/sinking.ll @@ -1,5 +1,7 @@ ; RUN: opt < %s -basicaa -licm -S | FileCheck %s ; RUN: opt < %s -debugify -basicaa -licm -S | FileCheck %s -check-prefix=DEBUGIFY +; RUN: opt < %s -basicaa -licm -S -enable-mssa-loop-dependency=true -verify-memoryssa | FileCheck %s + declare i32 @strlen(i8*) readonly nounwind @@ -358,50 +360,7 @@ ret i32 %lcssa } -; Can't sink stores out of exit blocks containing indirectbr instructions -; because loop simplify does not create dedicated exits for such blocks. Test -; that by sinking the store from lab21 to lab22, but not further. -define void @test12() { -; CHECK-LABEL: @test12 - br label %lab4 - -lab4: - br label %lab20 - -lab5: - br label %lab20 - -lab6: - br label %lab4 - -lab7: - br i1 undef, label %lab8, label %lab13 - -lab8: - br i1 undef, label %lab13, label %lab10 - -lab10: - br label %lab7 - -lab13: - ret void - -lab20: - br label %lab21 - -lab21: -; CHECK: lab21: -; CHECK-NOT: store -; CHECK: br i1 false, label %lab21, label %lab22 - store i32 36127957, i32* undef, align 4 - br i1 undef, label %lab21, label %lab22 - -lab22: -; CHECK: lab22: -; CHECK: store -; CHECK-NEXT: indirectbr i8* undef - indirectbr i8* undef, [label %lab5, label %lab6, label %lab7] -} +; @test12 moved to sink-promote.ll, as it tests sinking and promotion. ; Test that we don't crash when trying to sink stores and there's no preheader ; available (which is used for creating loads that may be used by the SSA Index: test/Transforms/LICM/volatile-alias.ll =================================================================== --- test/Transforms/LICM/volatile-alias.ll +++ test/Transforms/LICM/volatile-alias.ll @@ -1,5 +1,6 @@ ; RUN: opt -basicaa -sroa -loop-rotate -licm -S < %s | FileCheck %s ; RUN: opt -basicaa -sroa -loop-rotate %s | opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' -S | FileCheck %s +; RUN: opt -basicaa -sroa -loop-rotate -licm -enable-mssa-loop-dependency=true -verify-memoryssa -S < %s | FileCheck %s ; The objects *p and *q are aliased to each other, but even though *q is ; volatile, *p can be considered invariant in the loop. Check if it is moved ; out of the loop.