Index: llvm/include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -110,9 +110,28 @@ bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE); -struct SinkAndHoistLICMFlags { - bool NoOfMemAccTooLarge; - unsigned LicmMssaOptCounter; +/// Flags controlling how much is checked when sinking or hoisting +/// instructions. The number of memory access in the loop (and whether there +/// are too many) is determined in the constructors when using MemorySSA. +class SinkAndHoistLICMFlags { +public: + // Explicitly set limits. + SinkAndHoistLICMFlags(unsigned LicmMssaOptCap, + unsigned LicmMssaNoAccForPromotionCap, bool IsSink, + Loop *L = nullptr, MemorySSA *MSSA = nullptr); + // Use default limits. + SinkAndHoistLICMFlags(bool IsSink, Loop *L = nullptr, + MemorySSA *MSSA = nullptr); + + void setIsSink(bool B) { IsSink = B; } + bool getIsSink() { return IsSink; } + bool tooManyMemoryAccesses() { return NoOfMemAccTooLarge; } + bool tooManyClobberingCalls() { return LicmMssaOptCounter >= LicmMssaOptCap; } + void incrementClobberingCalls() { ++LicmMssaOptCounter; } + +protected: + bool NoOfMemAccTooLarge = false; + unsigned LicmMssaOptCounter = 0; unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; bool IsSink; Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -163,8 +163,10 @@ AliasSetTracker *CurAST, Loop *CurLoop, AAResults *AA); static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, - Loop *CurLoop, + Loop *CurLoop, Instruction &I, SinkAndHoistLICMFlags &Flags); +static bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA, + MemoryUse &MU); static Instruction *cloneInstructionInExitBlock( Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU); @@ -297,6 +299,36 @@ return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); } +llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L, + MemorySSA *MSSA) + : SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap, + IsSink, L, MSSA) {} + +llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags( + unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, + bool IsSink, Loop *L, MemorySSA *MSSA) + : LicmMssaOptCap(LicmMssaOptCap), + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), + IsSink(IsSink) { + assert(((L != nullptr) == (MSSA != nullptr)) && + "Unexpected values for SinkAndHoistLICMFlags"); + if (!MSSA) + return; + + unsigned AccessCapCount = 0; + for (auto *BB : L->getBlocks()) + if (auto *Accesses = MSSA->getBlockAccesses(BB)) + for (const auto &MA : *Accesses) { + (void)MA; + ++AccessCapCount; + if (AccessCapCount > LicmMssaNoAccForPromotionCap) { + NoOfMemAccTooLarge = true; + return; + } + } +} + + /// Hoist expressions out of the specified loop. Note, alias info for inner /// loop is not preserved so it is not a good idea to run LICM multiple /// times on one loop. @@ -316,31 +348,18 @@ std::unique_ptr CurAST; std::unique_ptr MSSAU; - bool NoOfMemAccTooLarge = false; - unsigned LicmMssaOptCounter = 0; + std::unique_ptr Flags; if (!MSSA) { LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n"); CurAST = collectAliasInfoForLoop(L, LI, AA); + Flags = std::make_unique( + LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true); } else { LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n"); MSSAU = std::make_unique(MSSA); - - unsigned AccessCapCount = 0; - for (auto *BB : L->getBlocks()) { - if (auto *Accesses = MSSA->getBlockAccesses(BB)) { - for (const auto &MA : *Accesses) { - (void)MA; - AccessCapCount++; - if (AccessCapCount > LicmMssaNoAccForPromotionCap) { - NoOfMemAccTooLarge = true; - break; - } - } - } - if (NoOfMemAccTooLarge) - break; - } + Flags = std::make_unique( + LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true, L, MSSA); } // Get the preheader block to move instructions into... @@ -359,18 +378,16 @@ // that we are guaranteed to see definitions before we see uses. This allows // us to sink instructions in one pass, without iteration. After sinking // instructions, we perform another pass to hoist them out of the loop. - SinkAndHoistLICMFlags Flags = {NoOfMemAccTooLarge, LicmMssaOptCounter, - LicmMssaOptCap, LicmMssaNoAccForPromotionCap, - /*IsSink=*/true}; if (L->hasDedicatedExits()) Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L, - CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE); - Flags.IsSink = false; + CurAST.get(), MSSAU.get(), &SafetyInfo, *Flags.get(), ORE); + Flags->setIsSink(false); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L, - CurAST.get(), MSSAU.get(), SE, &SafetyInfo, Flags, ORE); + CurAST.get(), MSSAU.get(), SE, &SafetyInfo, *Flags.get(), + ORE); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. @@ -380,7 +397,7 @@ // preheader for SSA updater, so also avoid sinking when no preheader // is available. if (!DisablePromotion && Preheader && L->hasDedicatedExits() && - !NoOfMemAccTooLarge) { + !Flags->tooManyMemoryAccesses()) { // Figure out the loop exits and their insertion points SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); @@ -1139,7 +1156,7 @@ CurLoop, AA); else Invalidated = pointerInvalidatedByLoopWithMSSA( - MSSA, cast(MSSA->getMemoryAccess(LI)), CurLoop, *Flags); + MSSA, cast(MSSA->getMemoryAccess(LI)), CurLoop, I, *Flags); // Check loop-invariant address because this may also be a sinkable load // whose address is not necessarily loop-invariant. if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) @@ -1188,7 +1205,7 @@ CurAST, CurLoop, AA); else Invalidated = pointerInvalidatedByLoopWithMSSA( - MSSA, cast(MSSA->getMemoryAccess(CI)), CurLoop, + MSSA, cast(MSSA->getMemoryAccess(CI)), CurLoop, I, *Flags); if (Invalidated) return false; @@ -1248,12 +1265,9 @@ } else { // MSSAU if (isOnlyMemoryAccess(SI, CurLoop, MSSAU)) return true; - // If there are more accesses than the Promotion cap, give up, we're not - // walking a list that long. - if (Flags->NoOfMemAccTooLarge) - return false; - // Check store only if there's still "quota" to check clobber. - if (Flags->LicmMssaOptCounter >= Flags->LicmMssaOptCap) + // If there are more accesses than the Promotion cap or no "quota" to + // check clobber, then give up as we're not walking a list that long. + if (Flags->tooManyMemoryAccesses() || Flags->tooManyClobberingCalls()) return false; // If there are interfering Uses (i.e. their defining access is in the // loop), or ordered loads (stored as Defs!), don't move this store. @@ -1273,7 +1287,7 @@ // Uses may point to an access outside the loop, as getClobbering // checks the previous iteration when walking the backedge. // FIXME: More precise: no Uses that alias SI. - if (!Flags->IsSink && !MSSA->dominates(SIMD, MU)) + if (!Flags->getIsSink() && !MSSA->dominates(SIMD, MU)) return false; } else if (const auto *MD = dyn_cast(&MA)) { if (auto *LI = dyn_cast(MD->getMemoryInst())) { @@ -1292,9 +1306,8 @@ } } } - auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI); - Flags->LicmMssaOptCounter++; + Flags->incrementClobberingCalls(); // If there are no clobbering Defs in the loop, store is safe to hoist. return MSSA->isLiveOnEntryDef(Source) || !CurLoop->contains(Source->getBlock()); @@ -2264,18 +2277,18 @@ return false; } -static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, - Loop *CurLoop, - SinkAndHoistLICMFlags &Flags) { +bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, + Loop *CurLoop, Instruction &I, + SinkAndHoistLICMFlags &Flags) { // For hoisting, use the walker to determine safety - if (!Flags.IsSink) { + if (!Flags.getIsSink()) { MemoryAccess *Source; // See declaration of SetLicmMssaOptCap for usage details. - if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap) + if (Flags.tooManyClobberingCalls()) Source = MU->getDefiningAccess(); else { Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU); - Flags.LicmMssaOptCounter++; + Flags.incrementClobberingCalls(); } return !MSSA->isLiveOnEntryDef(Source) && CurLoop->contains(Source->getBlock()); @@ -2298,15 +2311,25 @@ // FIXME: Increase precision: Safe to sink if Use post dominates the Def; // needs PostDominatorTreeAnalysis. // FIXME: More precise: no Defs that alias this Use. - if (Flags.NoOfMemAccTooLarge) + if (Flags.tooManyMemoryAccesses()) return true; for (auto *BB : CurLoop->getBlocks()) - if (auto *Accesses = MSSA->getBlockDefs(BB)) - for (const auto &MA : *Accesses) - if (const auto *MD = dyn_cast(&MA)) - if (MU->getBlock() != MD->getBlock() || - !MSSA->locallyDominates(MD, MU)) - return true; + if (pointerInvalidatedByBlockWithMSSA(*BB, *MSSA, *MU)) + return true; + // When sinking, the source block may not be part of the loop so check it. + if (!CurLoop->contains(&I)) + return pointerInvalidatedByBlockWithMSSA(*I.getParent(), *MSSA, *MU); + + return false; +} + +bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA, + MemoryUse &MU) { + if (auto *Accesses = MSSA.getBlockDefs(&BB)) + for (const auto &MA : *Accesses) + if (const auto *MD = dyn_cast(&MA)) + if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU)) + return true; return false; } Index: llvm/lib/Transforms/Scalar/LoopSink.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopSink.cpp +++ llvm/lib/Transforms/Scalar/LoopSink.cpp @@ -39,6 +39,8 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/Dominators.h" @@ -172,11 +174,10 @@ // sinking is successful. // \p LoopBlockNumber is used to sort the insertion blocks to ensure // determinism. -static bool sinkInstruction(Loop &L, Instruction &I, - const SmallVectorImpl &ColdLoopBBs, - const SmallDenseMap &LoopBlockNumber, - LoopInfo &LI, DominatorTree &DT, - BlockFrequencyInfo &BFI) { +static bool sinkInstruction( + Loop &L, Instruction &I, const SmallVectorImpl &ColdLoopBBs, + const SmallDenseMap &LoopBlockNumber, LoopInfo &LI, + DominatorTree &DT, BlockFrequencyInfo &BFI, MemorySSAUpdater *MSSAU) { // Compute the set of blocks in loop L which contain a use of I. SmallPtrSet BBs; for (auto &U : I.uses()) { @@ -230,6 +231,21 @@ Instruction *IC = I.clone(); IC->setName(I.getName()); IC->insertBefore(&*N->getFirstInsertionPt()); + + if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) { + // Create a new MemoryAccess and let MemorySSA set its defining access. + MemoryAccess *NewMemAcc = + MSSAU->createMemoryAccessInBB(IC, nullptr, N, MemorySSA::Beginning); + if (NewMemAcc) { + if (auto *MemDef = dyn_cast(NewMemAcc)) + MSSAU->insertDef(MemDef, /*RenameUses=*/true); + else { + auto *MemUse = cast(NewMemAcc); + MSSAU->insertUse(MemUse, /*RenameUses=*/true); + } + } + } + // Replaces uses of I with IC in N I.replaceUsesWithIf(IC, [N](Use &U) { return cast(U.getUser())->getParent() == N; @@ -244,18 +260,22 @@ NumLoopSunk++; I.moveBefore(&*MoveBB->getFirstInsertionPt()); + if (MSSAU) + if (MemoryUseOrDef *OldMemAcc = cast_or_null( + MSSAU->getMemorySSA()->getMemoryAccess(&I))) + MSSAU->moveToPlace(OldMemAcc, MoveBB, MemorySSA::Beginning); + return true; } /// Sinks instructions from loop's preheader to the loop body if the /// sum frequency of inserted copy is smaller than preheader's frequency. -static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, - DominatorTree &DT, - BlockFrequencyInfo &BFI, - ScalarEvolution *SE) { +static bool sinkLoopInvariantInstructions( + Loop &L, AAResults &AA, LoopInfo &LI, DominatorTree &DT, + BlockFrequencyInfo &BFI, ScalarEvolution *SE, AliasSetTracker *CurAST, + MemorySSAUpdater *MSSAU, SinkAndHoistLICMFlags *LICMFlags) { BasicBlock *Preheader = L.getLoopPreheader(); - if (!Preheader) - return false; + assert(Preheader && "Expected loop to have preheader"); // Enable LoopSink only when runtime profile is available. // With static profile, the sinking decision may be sub-optimal. @@ -272,12 +292,6 @@ return false; bool Changed = false; - AliasSetTracker CurAST(AA); - - // Compute alias set. - for (BasicBlock *BB : L.blocks()) - CurAST.add(*BB); - CurAST.add(*Preheader); // Sort loop's basic blocks by frequency SmallVector ColdLoopBBs; @@ -300,9 +314,10 @@ // No need to check for instruction's operands are loop invariant. assert(L.hasLoopInvariantOperands(I) && "Insts in a loop's preheader should have loop invariant operands!"); - if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false)) + if (!canSinkOrHoistInst(*I, &AA, &DT, &L, CurAST, MSSAU, false, LICMFlags)) continue; - if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI)) + if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI, + MSSAU)) Changed = true; } @@ -320,6 +335,8 @@ AAResults &AA = FAM.getResult(F); DominatorTree &DT = FAM.getResult(F); BlockFrequencyInfo &BFI = FAM.getResult(F); + MemorySSA &MSSA = FAM.getResult(F).getMSSA(); + MemorySSAUpdater MSSAU(&MSSA); // We want to do a postorder walk over the loops. Since loops are a tree this // is equivalent to a reversed preorder walk and preorder is easy to compute @@ -329,19 +346,29 @@ SmallVector PreorderLoops = LI.getLoopsInPreorder(); bool Changed = false; + do { Loop &L = *PreorderLoops.pop_back_val(); + BasicBlock *Preheader = L.getLoopPreheader(); + if (!Preheader) + continue; + + SinkAndHoistLICMFlags LICMFlags(/*IsSink=*/true, &L, &MSSA); // Note that we don't pass SCEV here because it is only used to invalidate // loops in SCEV and we don't preserve (or request) SCEV at all making that // unnecessary. Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI, - /*ScalarEvolution*/ nullptr); + /*ScalarEvolution*/ nullptr, + nullptr, &MSSAU, &LICMFlags); } while (!PreorderLoops.empty()); if (!Changed) return PreservedAnalyses::all(); + if (VerifyMemorySSA) + MSSA.verifyMemorySSA(); + PreservedAnalyses PA; PA.preserveSet(); return PA; @@ -358,13 +385,25 @@ if (skipLoop(L)) return false; + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) + return false; + auto *SE = getAnalysisIfAvailable(); + + AAResults &AA = getAnalysis().getAAResults(); + AliasSetTracker CurAST(AA); + + // Compute alias set. + for (BasicBlock *BB : L->blocks()) + CurAST.add(*BB); + CurAST.add(*Preheader); + return sinkLoopInvariantInstructions( - *L, getAnalysis().getAAResults(), - getAnalysis().getLoopInfo(), + *L, AA, getAnalysis().getLoopInfo(), getAnalysis().getDomTree(), getAnalysis().getBFI(), - SE ? &SE->getSE() : nullptr); + SE ? &SE->getSE() : nullptr, &CurAST, nullptr, nullptr); } void getAnalysisUsage(AnalysisUsage &AU) const override { Index: llvm/test/Transforms/LICM/loopsink-pr38462.ll =================================================================== --- llvm/test/Transforms/LICM/loopsink-pr38462.ll +++ llvm/test/Transforms/LICM/loopsink-pr38462.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -loop-sink < %s | FileCheck %s +; RUN: opt -S -verify-memoryssa -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc19.13.26128" Index: llvm/test/Transforms/LICM/loopsink-pr39570.ll =================================================================== --- llvm/test/Transforms/LICM/loopsink-pr39570.ll +++ llvm/test/Transforms/LICM/loopsink-pr39570.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -loop-sink < %s | FileCheck %s +; RUN: opt -S -verify-memoryssa -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s ; CHECK: pr39570 ; Make sure not to assert. Index: llvm/test/Transforms/LICM/loopsink-pr39695.ll =================================================================== --- llvm/test/Transforms/LICM/loopsink-pr39695.ll +++ llvm/test/Transforms/LICM/loopsink-pr39695.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -loop-sink < %s | FileCheck %s +; RUN: opt -S -verify-memoryssa -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s ; The load instruction should not be sunk into following loop. ; CHECK: @foo Index: llvm/test/Transforms/LICM/loopsink.ll =================================================================== --- llvm/test/Transforms/LICM/loopsink.ll +++ llvm/test/Transforms/LICM/loopsink.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -loop-sink < %s | FileCheck %s -; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s +; RUN: opt -S -verify-memoryssa -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s @g = global i32 0, align 4