Index: include/llvm/Analysis/AliasSetTracker.h =================================================================== --- include/llvm/Analysis/AliasSetTracker.h +++ include/llvm/Analysis/AliasSetTracker.h @@ -36,6 +36,8 @@ class AliasSetTracker; class BasicBlock; class LoadInst; +class Loop; +class MemorySSA; class AnyMemSetInst; class AnyMemTransferInst; class raw_ostream; @@ -341,6 +343,8 @@ struct ASTCallbackVHDenseMapInfo : public DenseMapInfo {}; AliasAnalysis &AA; + MemorySSA *MSSA; + Loop *L; ilist AliasSets; using PointerMapType = DenseMap &, - SmallVectorImpl &, - SmallVectorImpl &, - PredIteratorCache &, LoopInfo *, - DominatorTree *, const TargetLibraryInfo *, - Loop *, AliasSetTracker *, - ICFLoopSafetyInfo *, - OptimizationRemarkEmitter *); +bool promoteLoopAccessesToScalars( + const SmallSetVector &, SmallVectorImpl &, + SmallVectorImpl &, SmallVectorImpl &, + PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, + Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, + OptimizationRemarkEmitter *); /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. Index: include/llvm/Transforms/Utils/SSAUpdater.h =================================================================== --- include/llvm/Transforms/Utils/SSAUpdater.h +++ include/llvm/Transforms/Utils/SSAUpdater.h @@ -147,7 +147,7 @@ /// Insts is a list of loads and stores to promote, and Name is the basename /// for the PHIs to insert. After this is complete, the loads and stores are /// removed from the code. - void run(const SmallVectorImpl &Insts) const; + void run(const SmallVectorImpl &Insts); /// Return true if the specified instruction is in the Inst list. /// @@ -158,7 +158,7 @@ /// This hook is invoked after all the stores are found and inserted as /// available values. - virtual void doExtraRewritesBeforeFinalDeletion() const {} + virtual void doExtraRewritesBeforeFinalDeletion() {} /// Clients can choose to implement this to get notified right before /// a load is RAUW'd another value. Index: lib/Analysis/AliasSetTracker.cpp =================================================================== --- lib/Analysis/AliasSetTracker.cpp +++ lib/Analysis/AliasSetTracker.cpp @@ -13,7 +13,9 @@ #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/GuardUtils.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -523,6 +525,15 @@ } } +void AliasSetTracker::addAllInstructionsInLoopUsingMSSA() { + assert(MSSA && L && "MSSA and L must be available"); + for (const BasicBlock *BB : L->blocks()) + if (auto *Accesses = MSSA->getBlockAccesses(BB)) + for (auto &Access : *Accesses) + if (auto *MUD = dyn_cast(&Access)) + add(MUD->getMemoryInst()); +} + // deleteValue method - This method is used to remove a pointer value from the // AliasSetTracker entirely. It should be used when an instruction is deleted // from the program to update the AST. If you don't use this, you would have Index: lib/Transforms/Instrumentation/InstrProfiling.cpp =================================================================== --- lib/Transforms/Instrumentation/InstrProfiling.cpp +++ lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -186,7 +186,7 @@ SSA.AddAvailableValue(PH, Init); } - void doExtraRewritesBeforeFinalDeletion() const override { + void doExtraRewritesBeforeFinalDeletion() override { for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = ExitBlocks[i]; Instruction *InsertPos = InsertPts[i]; Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -118,6 +118,16 @@ cl::desc("Enable imprecision in LICM (uses MemorySSA cap) in " "pathological cases, in exchange for faster compile")); +// Experimentally, memory promotion carries less importance than sinking and +// hoisting. Limit when we do promotion when using MemorySSA, in order to save +// compile time. +static cl::opt AccessCapForMSSAPromotion( + "max-acc-licm-promotion", cl::init(250), cl::Hidden, + cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no " + "effect. When MSSA in LICM is enabled, then this is the maximum " + "number of accesses allowed to be present in a loop in order to " + "enable memory promotion.")); + static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, @@ -166,6 +176,9 @@ std::unique_ptr collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA); + std::unique_ptr + collectAliasInfoForLoopWithMSSA(Loop *L, AliasAnalysis *AA, + MemorySSAUpdater *MSSAU); }; struct LegacyLICMPass : public LoopPass { @@ -293,12 +306,29 @@ std::unique_ptr CurAST; std::unique_ptr MSSAU; + bool LocalDisablePromotion = false; if (!MSSA) { LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n"); CurAST = collectAliasInfoForLoop(L, LI, AA); } else { - LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA. Promotion disabled.\n"); + LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n"); MSSAU = make_unique(MSSA); + + unsigned AccessCapCount = 0; + for (auto *BB : L->getBlocks()) { + if (auto *Accesses = MSSA->getBlockAccesses(BB)) { + for (const auto &MA : *Accesses) { + (void)MA; + AccessCapCount++; + if (AccessCapCount > AccessCapForMSSAPromotion) { + LocalDisablePromotion = true; + break; + } + } + } + if (LocalDisablePromotion) + break; + } } // Get the preheader block to move instructions into... @@ -332,7 +362,8 @@ // make sure we catch that. An additional load may be generated in the // preheader for SSA updater, so also avoid sinking when no preheader // is available. - if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { + if (!DisablePromotion && Preheader && L->hasDedicatedExits() && + !LocalDisablePromotion) { // Figure out the loop exits and their insertion points SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); @@ -344,38 +375,45 @@ if (!HasCatchSwitch) { SmallVector InsertPts; + SmallVector MSSAInsertPts; InsertPts.reserve(ExitBlocks.size()); - for (BasicBlock *ExitBlock : ExitBlocks) + if (MSSAU) + MSSAInsertPts.reserve(ExitBlocks.size()); + for (BasicBlock *ExitBlock : ExitBlocks) { InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); + if (MSSAU) + MSSAInsertPts.push_back(nullptr); + } PredIteratorCache PIC; bool Promoted = false; - if (CurAST.get()) { - // Loop over all of the alias sets in the tracker object. - for (AliasSet &AS : *CurAST) { - // We can promote this alias set if it has a store, if it is a "Must" - // alias set, if the pointer is loop invariant, and if we are not - // eliminating any volatile loads or stores. - if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || - !L->isLoopInvariant(AS.begin()->getValue())) - continue; - - assert( - !AS.empty() && - "Must alias set should have at least one pointer element in it!"); - - SmallSetVector PointerMustAliases; - for (const auto &ASI : AS) - PointerMustAliases.insert(ASI.getValue()); - - Promoted |= promoteLoopAccessesToScalars( - PointerMustAliases, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L, - CurAST.get(), &SafetyInfo, ORE); - } + // Build an AST using MSSA. + if (!CurAST.get()) + CurAST = collectAliasInfoForLoopWithMSSA(L, AA, MSSAU.get()); + + // Loop over all of the alias sets in the tracker object. + for (AliasSet &AS : *CurAST) { + // We can promote this alias set if it has a store, if it is a "Must" + // alias set, if the pointer is loop invariant, and if we are not + // eliminating any volatile loads or stores. + if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || + !L->isLoopInvariant(AS.begin()->getValue())) + continue; + + assert( + !AS.empty() && + "Must alias set should have at least one pointer element in it!"); + + SmallSetVector PointerMustAliases; + for (const auto &ASI : AS) + PointerMustAliases.insert(ASI.getValue()); + + Promoted |= promoteLoopAccessesToScalars( + PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, + DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE); } - // FIXME: Promotion initially disabled when using MemorySSA. // Once we have promoted values across the loop body we have to // recursively reform LCSSA as any nested loop may now have values defined @@ -399,7 +437,7 @@ // If this loop is nested inside of another one, save the alias information // for when we process the outer loop. - if (CurAST.get() && L->getParentLoop() && !DeleteAST) + if (!MSSAU.get() && CurAST.get() && L->getParentLoop() && !DeleteAST) LoopToAliasSetMap[L] = std::move(CurAST); if (MSSAU.get() && VerifyMemorySSA) @@ -1609,8 +1647,10 @@ const SmallSetVector &PointerMustAliases; SmallVectorImpl &LoopExitBlocks; SmallVectorImpl &LoopInsertPts; + SmallVectorImpl &MSSAInsertPts; PredIteratorCache &PredCache; AliasSetTracker &AST; + MemorySSAUpdater *MSSAU; LoopInfo &LI; DebugLoc DL; int Alignment; @@ -1637,15 +1677,16 @@ LoopPromoter(Value *SP, ArrayRef Insts, SSAUpdater &S, const SmallSetVector &PMA, SmallVectorImpl &LEB, - SmallVectorImpl &LIP, PredIteratorCache &PIC, - AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment, - bool UnorderedAtomic, const AAMDNodes &AATags, - ICFLoopSafetyInfo &SafetyInfo) + SmallVectorImpl &LIP, + SmallVectorImpl &MSSAIP, PredIteratorCache &PIC, + AliasSetTracker &ast, MemorySSAUpdater *MSSAU, LoopInfo &li, + DebugLoc dl, int alignment, bool UnorderedAtomic, + const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo) : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), - LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast), - LI(li), DL(std::move(dl)), Alignment(alignment), - UnorderedAtomic(UnorderedAtomic), AATags(AATags), SafetyInfo(SafetyInfo) - {} + LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), + PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)), + Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags), + SafetyInfo(SafetyInfo) {} bool isInstInList(Instruction *I, const SmallVectorImpl &) const override { @@ -1657,7 +1698,7 @@ return PointerMustAliases.count(Ptr); } - void doExtraRewritesBeforeFinalDeletion() const override { + void doExtraRewritesBeforeFinalDeletion() override { // Insert stores after in the loop exit blocks. Each exit block gets a // store of the live-out values that feed them. Since we've already told // the SSA updater about the defs in the loop and the preheader @@ -1675,6 +1716,21 @@ NewSI->setDebugLoc(DL); if (AATags) NewSI->setAAMetadata(AATags); + + if (MSSAU) { + MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i]; + MemoryAccess *NewMemAcc; + if (!MSSAInsertPoint) { + NewMemAcc = MSSAU->createMemoryAccessInBB( + NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning); + } else { + NewMemAcc = + MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint); + } + MSSAInsertPts[i] = NewMemAcc; + MSSAU->insertDef(cast(NewMemAcc), true); + // FIXME: true for safety, false may still be correct. + } } } @@ -1685,6 +1741,8 @@ void instructionDeleted(Instruction *I) const override { SafetyInfo.removeInstruction(I); AST.deleteValue(I); + if (MSSAU) + MSSAU->removeMemoryAccess(I); } }; @@ -1721,10 +1779,11 @@ bool llvm::promoteLoopAccessesToScalars( const SmallSetVector &PointerMustAliases, SmallVectorImpl &ExitBlocks, - SmallVectorImpl &InsertPts, PredIteratorCache &PIC, + SmallVectorImpl &InsertPts, + SmallVectorImpl &MSSAInsertPts, PredIteratorCache &PIC, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, - Loop *CurLoop, AliasSetTracker *CurAST, ICFLoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE) { + Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, + ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && @@ -1941,8 +2000,8 @@ SmallVector NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - InsertPts, PIC, *CurAST, *LI, DL, Alignment, - SawUnorderedAtomic, AATags, *SafetyInfo); + InsertPts, MSSAInsertPts, PIC, *CurAST, MSSAU, *LI, DL, + Alignment, SawUnorderedAtomic, AATags, *SafetyInfo); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. @@ -1956,13 +2015,21 @@ PreheaderLoad->setAAMetadata(AATags); SSA.AddAvailableValue(Preheader, PreheaderLoad); + MemoryAccess *PreheaderLoadMemoryAccess; + if (MSSAU) { + PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB( + PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End); + MemoryUse *NewMemUse = cast(PreheaderLoadMemoryAccess); + MSSAU->insertUse(NewMemUse); + } + // Rewrite all the loads in the loop and remember all the definitions from // stores in the loop. Promoter.run(LoopUses); // If the SSAUpdater didn't use the load in the preheader, just zap it now. if (PreheaderLoad->use_empty()) - eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, nullptr); + eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU); return true; } @@ -2015,6 +2082,15 @@ return CurAST; } +std::unique_ptr +LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA( + Loop *L, AliasAnalysis *AA, MemorySSAUpdater *MSSAU) { + auto *MSSA = MSSAU->getMemorySSA(); + auto CurAST = make_unique(*AA, MSSA, L); + CurAST->addAllInstructionsInLoopUsingMSSA(); + return CurAST; +} + /// Simple analysis hook. Clone alias set info. /// void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Index: lib/Transforms/Utils/SSAUpdater.cpp =================================================================== --- lib/Transforms/Utils/SSAUpdater.cpp +++ lib/Transforms/Utils/SSAUpdater.cpp @@ -349,8 +349,7 @@ SSA.Initialize(SomeVal->getType(), BaseName); } -void LoadAndStorePromoter:: -run(const SmallVectorImpl &Insts) const { +void LoadAndStorePromoter::run(const SmallVectorImpl &Insts) { // First step: bucket up uses of the alloca by the block they occur in. // This is important because we have to handle multiple defs/uses in a block // ourselves: SSAUpdater is purely for cross-block references. Index: test/CodeGen/PowerPC/pr35688.ll =================================================================== --- test/CodeGen/PowerPC/pr35688.ll +++ test/CodeGen/PowerPC/pr35688.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown < %s | \ +; RUN: llc -enable-mssa-loop-dependency=false -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown < %s | \ ; RUN: FileCheck %s +; RUN: llc -enable-mssa-loop-dependency=true -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown < %s | \ +; RUN: FileCheck %s --check-prefix=MSSA ; Function Attrs: nounwind define void @ec_GFp_nistp256_points_mul() { ; CHECK-LABEL: ec_GFp_nistp256_points_mul: @@ -14,6 +16,19 @@ ; CHECK: subfc 5, 5, 7 ; CHECK: subfe 5, 4, 6 ; CHECK: sradi 5, 5, 63 + +; With MemorySSA, everything is taken out of the loop by licm. +; Loads and stores to undef are treated as non-aliasing. +; MSSA-LABEL: ec_GFp_nistp256_points_mul +; MSSA: ld 3, 0(3) +; MSSA: li 4, 0 +; MSSA: subfic 5, 3, 0 +; MSSA: subfze 5, 4 +; MSSA: sradi 5, 5, 63 +; MSSA: subfc 3, 3, 5 +; MSSA: subfe 3, 4, 5 +; MSSA: sradi 3, 3, 63 +; MSSA: std 3, 0(3) entry: br label %fe_cmovznz.exit.i534.i.15 @@ -32,3 +47,4 @@ store i64 %conv4.i60.i.i, i64* undef, align 16 br label %fe_cmovznz.exit.i534.i.15 } +