Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -163,8 +163,10 @@ AliasSetTracker *CurAST, Loop *CurLoop, AAResults *AA); static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, - Loop *CurLoop, + Loop *CurLoop, Instruction &I, SinkAndHoistLICMFlags &Flags); +static bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA, + MemoryUse &MU); static Instruction *cloneInstructionInExitBlock( Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU); @@ -1155,7 +1157,7 @@ CurLoop, AA); else Invalidated = pointerInvalidatedByLoopWithMSSA( - MSSA, cast(MSSA->getMemoryAccess(LI)), CurLoop, *Flags); + MSSA, cast(MSSA->getMemoryAccess(LI)), CurLoop, I, *Flags); // Check loop-invariant address because this may also be a sinkable load // whose address is not necessarily loop-invariant. if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) @@ -1211,7 +1213,7 @@ CurAST, CurLoop, AA); else Invalidated = pointerInvalidatedByLoopWithMSSA( - MSSA, cast(MSSA->getMemoryAccess(CI)), CurLoop, + MSSA, cast(MSSA->getMemoryAccess(CI)), CurLoop, I, *Flags); if (Invalidated) return false; @@ -1312,7 +1314,6 @@ } } } - auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI); Flags->incrementClobberingCalls(); // If there are no clobbering Defs in the loop, store is safe to hoist. @@ -2285,7 +2286,7 @@ } bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, - Loop *CurLoop, + Loop *CurLoop, Instruction &I, SinkAndHoistLICMFlags &Flags) { // For hoisting, use the walker to determine safety if (!Flags.getIsSink()) { @@ -2321,12 +2322,22 @@ if (Flags.tooManyMemoryAccesses()) return true; for (auto *BB : CurLoop->getBlocks()) - if (auto *Accesses = MSSA->getBlockDefs(BB)) - for (const auto &MA : *Accesses) - if (const auto *MD = dyn_cast(&MA)) - if (MU->getBlock() != MD->getBlock() || - !MSSA->locallyDominates(MD, MU)) - return true; + if (pointerInvalidatedByBlockWithMSSA(*BB, *MSSA, *MU)) + return true; + // When sinking, the source block may not be part of the loop so check it. + if (!CurLoop->contains(&I)) + return pointerInvalidatedByBlockWithMSSA(*I.getParent(), *MSSA, *MU); + + return false; +} + +bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA, + MemoryUse &MU) { + if (const auto *Accesses = MSSA.getBlockDefs(&BB)) + for (const auto &MA : *Accesses) + if (const auto *MD = dyn_cast(&MA)) + if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU)) + return true; return false; } Index: llvm/lib/Transforms/Scalar/LoopSink.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopSink.cpp +++ llvm/lib/Transforms/Scalar/LoopSink.cpp @@ -39,6 +39,8 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/Dominators.h" @@ -67,6 +69,14 @@ "max-uses-for-sinking", cl::Hidden, cl::init(30), cl::desc("Do not sink instructions that have too many uses.")); +static cl::opt EnableMSSAInLoopSink( + "enable-mssa-in-loop-sink", cl::Hidden, cl::init(false), + cl::desc("Enable MemorySSA for LoopSink in new pass manager")); + +static cl::opt EnableMSSAInLegacyLoopSink( + "enable-mssa-in-legacy-loop-sink", cl::Hidden, cl::init(false), + cl::desc("Enable MemorySSA for LoopSink in legacy pass manager")); + /// Return adjusted total frequency of \p BBs. /// /// * If there is only one BB, sinking instruction will not introduce code @@ -172,11 +182,10 @@ // sinking is successful. // \p LoopBlockNumber is used to sort the insertion blocks to ensure // determinism. -static bool sinkInstruction(Loop &L, Instruction &I, - const SmallVectorImpl &ColdLoopBBs, - const SmallDenseMap &LoopBlockNumber, - LoopInfo &LI, DominatorTree &DT, - BlockFrequencyInfo &BFI) { +static bool sinkInstruction( + Loop &L, Instruction &I, const SmallVectorImpl &ColdLoopBBs, + const SmallDenseMap &LoopBlockNumber, LoopInfo &LI, + DominatorTree &DT, BlockFrequencyInfo &BFI, MemorySSAUpdater *MSSAU) { // Compute the set of blocks in loop L which contain a use of I. SmallPtrSet BBs; for (auto &U : I.uses()) { @@ -230,6 +239,21 @@ Instruction *IC = I.clone(); IC->setName(I.getName()); IC->insertBefore(&*N->getFirstInsertionPt()); + + if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) { + // Create a new MemoryAccess and let MemorySSA set its defining access. + MemoryAccess *NewMemAcc = + MSSAU->createMemoryAccessInBB(IC, nullptr, N, MemorySSA::Beginning); + if (NewMemAcc) { + if (auto *MemDef = dyn_cast(NewMemAcc)) + MSSAU->insertDef(MemDef, /*RenameUses=*/true); + else { + auto *MemUse = cast(NewMemAcc); + MSSAU->insertUse(MemUse, /*RenameUses=*/true); + } + } + } + // Replaces uses of I with IC in N I.replaceUsesWithIf(IC, [N](Use &U) { return cast(U.getUser())->getParent() == N; @@ -244,6 +268,11 @@ NumLoopSunk++; I.moveBefore(&*MoveBB->getFirstInsertionPt()); + if (MSSAU) + if (MemoryUseOrDef *OldMemAcc = cast_or_null( + MSSAU->getMemorySSA()->getMemoryAccess(&I))) + MSSAU->moveToPlace(OldMemAcc, MoveBB, MemorySSA::Beginning); + return true; } @@ -252,15 +281,14 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo &BFI, - ScalarEvolution *SE) { + ScalarEvolution *SE, + AliasSetTracker *CurAST, + MemorySSA *MSSA) { BasicBlock *Preheader = L.getLoopPreheader(); - if (!Preheader) - return false; + assert(Preheader && "Expected loop to have preheader"); - // Enable LoopSink only when runtime profile is available. - // With static profile, the sinking decision may be sub-optimal. - if (!Preheader->getParent()->hasProfileData()) - return false; + assert(Preheader->getParent()->hasProfileData() && + "Unexpected call when profile data unavailable."); const BlockFrequency PreheaderFreq = BFI.getBlockFreq(Preheader); // If there are no basic blocks with lower frequency than the preheader then @@ -271,13 +299,15 @@ })) return false; - bool Changed = false; - AliasSetTracker CurAST(AA); + std::unique_ptr MSSAU; + std::unique_ptr LICMFlags; + if (MSSA) { + MSSAU = std::make_unique(MSSA); + LICMFlags = + std::make_unique(/*IsSink=*/true, &L, MSSA); + } - // Compute alias set. - for (BasicBlock *BB : L.blocks()) - CurAST.add(*BB); - CurAST.add(*Preheader); + bool Changed = false; // Sort loop's basic blocks by frequency SmallVector ColdLoopBBs; @@ -300,9 +330,11 @@ // No need to check for instruction's operands are loop invariant. assert(L.hasLoopInvariantOperands(I) && "Insts in a loop's preheader should have loop invariant operands!"); - if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false)) + if (!canSinkOrHoistInst(*I, &AA, &DT, &L, CurAST, MSSAU.get(), false, + LICMFlags.get())) continue; - if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI)) + if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI, + MSSAU.get())) Changed = true; } @@ -311,6 +343,13 @@ return Changed; } +static void computeAliasSet(Loop &L, BasicBlock &Preheader, + AliasSetTracker &CurAST) { + for (BasicBlock *BB : L.blocks()) + CurAST.add(*BB); + CurAST.add(Preheader); +} + PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) { LoopInfo &LI = FAM.getResult(F); // Nothing to do if there are no loops. @@ -321,6 +360,10 @@ DominatorTree &DT = FAM.getResult(F); BlockFrequencyInfo &BFI = FAM.getResult(F); + MemorySSA *MSSA = EnableMSSAInLoopSink + ? &FAM.getResult(F).getMSSA() + : nullptr; + // We want to do a postorder walk over the loops. Since loops are a tree this // is equivalent to a reversed preorder walk and preorder is easy to compute // without recursion. Since we reverse the preorder, we will visit siblings @@ -332,11 +375,27 @@ do { Loop &L = *PreorderLoops.pop_back_val(); + BasicBlock *Preheader = L.getLoopPreheader(); + if (!Preheader) + continue; + + // Enable LoopSink only when runtime profile is available. + // With static profile, the sinking decision may be sub-optimal. + if (!Preheader->getParent()->hasProfileData()) + continue; + + std::unique_ptr CurAST; + if (!EnableMSSAInLoopSink) { + CurAST = std::make_unique(AA); + computeAliasSet(L, *Preheader, *CurAST.get()); + } + // Note that we don't pass SCEV here because it is only used to invalidate // loops in SCEV and we don't preserve (or request) SCEV at all making that // unnecessary. Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI, - /*ScalarEvolution*/ nullptr); + /*ScalarEvolution*/ nullptr, + CurAST.get(), MSSA); } while (!PreorderLoops.empty()); if (!Changed) @@ -344,6 +403,14 @@ PreservedAnalyses PA; PA.preserveSet(); + + if (MSSA) { + PA.preserve(); + + if (VerifyMemorySSA) + MSSA->verifyMemorySSA(); + } + return PA; } @@ -358,19 +425,46 @@ if (skipLoop(L)) return false; + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) + return false; + + // Enable LoopSink only when runtime profile is available. + // With static profile, the sinking decision may be sub-optimal. + if (!Preheader->getParent()->hasProfileData()) + return false; + + AAResults &AA = getAnalysis().getAAResults(); auto *SE = getAnalysisIfAvailable(); - return sinkLoopInvariantInstructions( - *L, getAnalysis().getAAResults(), - getAnalysis().getLoopInfo(), + std::unique_ptr CurAST; + MemorySSA *MSSA = nullptr; + if (EnableMSSAInLegacyLoopSink) + MSSA = &getAnalysis().getMSSA(); + else { + CurAST = std::make_unique(AA); + computeAliasSet(*L, *Preheader, *CurAST.get()); + } + + bool Changed = sinkLoopInvariantInstructions( + *L, AA, getAnalysis().getLoopInfo(), getAnalysis().getDomTree(), getAnalysis().getBFI(), - SE ? &SE->getSE() : nullptr); + SE ? &SE->getSE() : nullptr, CurAST.get(), MSSA); + + if (MSSA && VerifyMemorySSA) + MSSA->verifyMemorySSA(); + + return Changed; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); getLoopAnalysisUsage(AU); + if (EnableMSSAInLegacyLoopSink) { + AU.addRequired(); + AU.addPreserved(); + } } }; } @@ -380,6 +474,7 @@ false) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_END(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false, false) Pass *llvm::createLoopSinkPass() { return new LegacyLoopSinkPass(); } Index: llvm/test/Transforms/LICM/loopsink-pr38462.ll =================================================================== --- llvm/test/Transforms/LICM/loopsink-pr38462.ll +++ llvm/test/Transforms/LICM/loopsink-pr38462.ll @@ -1,4 +1,7 @@ ; RUN: opt -S -loop-sink < %s | FileCheck %s +; RUN: opt -S -verify-memoryssa -enable-mssa-in-legacy-loop-sink -loop-sink < %s | FileCheck %s +; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s +; RUN: opt -S -verify-memoryssa -enable-mssa-in-loop-sink -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc19.13.26128" Index: llvm/test/Transforms/LICM/loopsink-pr39570.ll =================================================================== --- llvm/test/Transforms/LICM/loopsink-pr39570.ll +++ llvm/test/Transforms/LICM/loopsink-pr39570.ll @@ -1,4 +1,7 @@ ; RUN: opt -S -loop-sink < %s | FileCheck %s +; RUN: opt -S -verify-memoryssa -enable-mssa-in-legacy-loop-sink -loop-sink < %s | FileCheck %s +; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s +; RUN: opt -S -verify-memoryssa -enable-mssa-in-loop-sink -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s ; CHECK: pr39570 ; Make sure not to assert. Index: llvm/test/Transforms/LICM/loopsink-pr39695.ll =================================================================== --- llvm/test/Transforms/LICM/loopsink-pr39695.ll +++ llvm/test/Transforms/LICM/loopsink-pr39695.ll @@ -1,4 +1,7 @@ ; RUN: opt -S -loop-sink < %s | FileCheck %s +; RUN: opt -S -verify-memoryssa -enable-mssa-in-legacy-loop-sink -loop-sink < %s | FileCheck %s +; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s +; RUN: opt -S -verify-memoryssa -enable-mssa-in-loop-sink -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s ; The load instruction should not be sunk into following loop. ; CHECK: @foo Index: llvm/test/Transforms/LICM/loopsink.ll =================================================================== --- llvm/test/Transforms/LICM/loopsink.ll +++ llvm/test/Transforms/LICM/loopsink.ll @@ -1,5 +1,7 @@ ; RUN: opt -S -loop-sink < %s | FileCheck %s +; RUN: opt -S -verify-memoryssa -enable-mssa-in-legacy-loop-sink -loop-sink < %s | FileCheck %s ; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s +; RUN: opt -S -verify-memoryssa -enable-mssa-in-loop-sink -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s @g = global i32 0, align 4