Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -163,8 +163,10 @@ AliasSetTracker *CurAST, Loop *CurLoop, AAResults *AA); static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, - Loop *CurLoop, + Loop *CurLoop, Instruction &I, SinkAndHoistLICMFlags &Flags); +static bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA, + MemoryUse &MU); static Instruction *cloneInstructionInExitBlock( Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU); @@ -1152,7 +1154,7 @@ CurLoop, AA); else Invalidated = pointerInvalidatedByLoopWithMSSA( - MSSA, cast(MSSA->getMemoryAccess(LI)), CurLoop, *Flags); + MSSA, cast(MSSA->getMemoryAccess(LI)), CurLoop, I, *Flags); // Check loop-invariant address because this may also be a sinkable load // whose address is not necessarily loop-invariant. if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) @@ -1201,7 +1203,7 @@ CurAST, CurLoop, AA); else Invalidated = pointerInvalidatedByLoopWithMSSA( - MSSA, cast(MSSA->getMemoryAccess(CI)), CurLoop, + MSSA, cast(MSSA->getMemoryAccess(CI)), CurLoop, I, *Flags); if (Invalidated) return false; @@ -1302,7 +1304,6 @@ } } } - auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI); Flags->incrementClobberingCalls(); // If there are no clobbering Defs in the loop, store is safe to hoist. @@ -2275,7 +2276,7 @@ } bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, - Loop *CurLoop, + Loop *CurLoop, Instruction &I, SinkAndHoistLICMFlags &Flags) { // For hoisting, use the walker to determine safety if (!Flags.getIsSink()) { @@ -2311,12 +2312,22 @@ if (Flags.tooManyMemoryAccesses()) return true; for (auto *BB : CurLoop->getBlocks()) - if (auto *Accesses = MSSA->getBlockDefs(BB)) - for (const auto &MA : *Accesses) - if (const auto *MD = dyn_cast(&MA)) - if (MU->getBlock() != MD->getBlock() || - !MSSA->locallyDominates(MD, MU)) - return true; + if (pointerInvalidatedByBlockWithMSSA(*BB, *MSSA, *MU)) + return true; + // When sinking, the source block may not be part of the loop so check it. + if (!CurLoop->contains(&I)) + return pointerInvalidatedByBlockWithMSSA(*I.getParent(), *MSSA, *MU); + + return false; +} + +bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA, + MemoryUse &MU) { + if (const auto *Accesses = MSSA.getBlockDefs(&BB)) + for (const auto &MA : *Accesses) + if (const auto *MD = dyn_cast(&MA)) + if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU)) + return true; return false; } Index: llvm/lib/Transforms/Scalar/LoopSink.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopSink.cpp +++ llvm/lib/Transforms/Scalar/LoopSink.cpp @@ -39,6 +39,8 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/Dominators.h" @@ -172,11 +174,10 @@ // sinking is successful. // \p LoopBlockNumber is used to sort the insertion blocks to ensure // determinism. -static bool sinkInstruction(Loop &L, Instruction &I, - const SmallVectorImpl &ColdLoopBBs, - const SmallDenseMap &LoopBlockNumber, - LoopInfo &LI, DominatorTree &DT, - BlockFrequencyInfo &BFI) { +static bool sinkInstruction( + Loop &L, Instruction &I, const SmallVectorImpl &ColdLoopBBs, + const SmallDenseMap &LoopBlockNumber, LoopInfo &LI, + DominatorTree &DT, BlockFrequencyInfo &BFI, MemorySSAUpdater *MSSAU) { // Compute the set of blocks in loop L which contain a use of I. SmallPtrSet BBs; for (auto &U : I.uses()) { @@ -230,6 +231,21 @@ Instruction *IC = I.clone(); IC->setName(I.getName()); IC->insertBefore(&*N->getFirstInsertionPt()); + + if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) { + // Create a new MemoryAccess and let MemorySSA set its defining access. + MemoryAccess *NewMemAcc = + MSSAU->createMemoryAccessInBB(IC, nullptr, N, MemorySSA::Beginning); + if (NewMemAcc) { + if (auto *MemDef = dyn_cast(NewMemAcc)) + MSSAU->insertDef(MemDef, /*RenameUses=*/true); + else { + auto *MemUse = cast(NewMemAcc); + MSSAU->insertUse(MemUse, /*RenameUses=*/true); + } + } + } + // Replaces uses of I with IC in N I.replaceUsesWithIf(IC, [N](Use &U) { return cast(U.getUser())->getParent() == N; @@ -244,18 +260,22 @@ NumLoopSunk++; I.moveBefore(&*MoveBB->getFirstInsertionPt()); + if (MSSAU) + if (MemoryUseOrDef *OldMemAcc = cast_or_null( + MSSAU->getMemorySSA()->getMemoryAccess(&I))) + MSSAU->moveToPlace(OldMemAcc, MoveBB, MemorySSA::Beginning); + return true; } /// Sinks instructions from loop's preheader to the loop body if the /// sum frequency of inserted copy is smaller than preheader's frequency. -static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, - DominatorTree &DT, - BlockFrequencyInfo &BFI, - ScalarEvolution *SE) { +static bool sinkLoopInvariantInstructions( + Loop &L, AAResults &AA, LoopInfo &LI, DominatorTree &DT, + BlockFrequencyInfo &BFI, ScalarEvolution *SE, AliasSetTracker *CurAST, + MemorySSAUpdater *MSSAU, SinkAndHoistLICMFlags *LICMFlags) { BasicBlock *Preheader = L.getLoopPreheader(); - if (!Preheader) - return false; + assert(Preheader && "Expected loop to have preheader"); // Enable LoopSink only when runtime profile is available. // With static profile, the sinking decision may be sub-optimal. @@ -272,12 +292,6 @@ return false; bool Changed = false; - AliasSetTracker CurAST(AA); - - // Compute alias set. - for (BasicBlock *BB : L.blocks()) - CurAST.add(*BB); - CurAST.add(*Preheader); // Sort loop's basic blocks by frequency SmallVector ColdLoopBBs; @@ -300,9 +314,10 @@ // No need to check for instruction's operands are loop invariant. assert(L.hasLoopInvariantOperands(I) && "Insts in a loop's preheader should have loop invariant operands!"); - if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false)) + if (!canSinkOrHoistInst(*I, &AA, &DT, &L, CurAST, MSSAU, false, LICMFlags)) continue; - if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI)) + if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI, + MSSAU)) Changed = true; } @@ -320,6 +335,8 @@ AAResults &AA = FAM.getResult(F); DominatorTree &DT = FAM.getResult(F); BlockFrequencyInfo &BFI = FAM.getResult(F); + MemorySSA &MSSA = FAM.getResult(F).getMSSA(); + MemorySSAUpdater MSSAU(&MSSA); // We want to do a postorder walk over the loops. Since loops are a tree this // is equivalent to a reversed preorder walk and preorder is easy to compute @@ -332,16 +349,25 @@ do { Loop &L = *PreorderLoops.pop_back_val(); + BasicBlock *Preheader = L.getLoopPreheader(); + if (!Preheader) + continue; + + SinkAndHoistLICMFlags LICMFlags(/*IsSink=*/true, &L, &MSSA); // Note that we don't pass SCEV here because it is only used to invalidate // loops in SCEV and we don't preserve (or request) SCEV at all making that // unnecessary. Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI, - /*ScalarEvolution*/ nullptr); + /*ScalarEvolution*/ nullptr, + nullptr, &MSSAU, &LICMFlags); } while (!PreorderLoops.empty()); if (!Changed) return PreservedAnalyses::all(); + if (VerifyMemorySSA) + MSSA.verifyMemorySSA(); + PreservedAnalyses PA; PA.preserveSet(); return PA; @@ -358,13 +384,25 @@ if (skipLoop(L)) return false; + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) + return false; + auto *SE = getAnalysisIfAvailable(); + + AAResults &AA = getAnalysis().getAAResults(); + AliasSetTracker CurAST(AA); + + // Compute alias set. + for (BasicBlock *BB : L->blocks()) + CurAST.add(*BB); + CurAST.add(*Preheader); + return sinkLoopInvariantInstructions( - *L, getAnalysis().getAAResults(), - getAnalysis().getLoopInfo(), + *L, AA, getAnalysis().getLoopInfo(), getAnalysis().getDomTree(), getAnalysis().getBFI(), - SE ? &SE->getSE() : nullptr); + SE ? &SE->getSE() : nullptr, &CurAST, nullptr, nullptr); } void getAnalysisUsage(AnalysisUsage &AU) const override { Index: llvm/test/Transforms/LICM/loopsink-pr38462.ll =================================================================== --- llvm/test/Transforms/LICM/loopsink-pr38462.ll +++ llvm/test/Transforms/LICM/loopsink-pr38462.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -loop-sink < %s | FileCheck %s +; RUN: opt -S -verify-memoryssa -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc19.13.26128" Index: llvm/test/Transforms/LICM/loopsink-pr39570.ll =================================================================== --- llvm/test/Transforms/LICM/loopsink-pr39570.ll +++ llvm/test/Transforms/LICM/loopsink-pr39570.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -loop-sink < %s | FileCheck %s +; RUN: opt -S -verify-memoryssa -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s ; CHECK: pr39570 ; Make sure not to assert. Index: llvm/test/Transforms/LICM/loopsink-pr39695.ll =================================================================== --- llvm/test/Transforms/LICM/loopsink-pr39695.ll +++ llvm/test/Transforms/LICM/loopsink-pr39695.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -loop-sink < %s | FileCheck %s +; RUN: opt -S -verify-memoryssa -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s ; The load instruction should not be sunk into following loop. ; CHECK: @foo Index: llvm/test/Transforms/LICM/loopsink.ll =================================================================== --- llvm/test/Transforms/LICM/loopsink.ll +++ llvm/test/Transforms/LICM/loopsink.ll @@ -1,5 +1,5 @@ ; RUN: opt -S -loop-sink < %s | FileCheck %s -; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s +; RUN: opt -S -verify-memoryssa -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s @g = global i32 0, align 4