Index: include/llvm/Transforms/Utils/MemorySSA.h =================================================================== --- include/llvm/Transforms/Utils/MemorySSA.h +++ include/llvm/Transforms/Utils/MemorySSA.h @@ -536,6 +536,31 @@ return It == PerBlockAccesses.end() ? nullptr : It->second.get(); } + enum InsertionPlace { Beginning, End }; + + /// \brief Create a MemoryAccess in MemorySSA at a specified point in a block, + /// with a specified clobbering definition. + /// This should be called when a memory instruction is created that is being + /// used to replace an existing memory instruction. It will *not* create PHI + /// nodes, or verify the clobbering definition. The insertion place is used + /// solely to determine where in the memoryssa access lists the instruction + /// will be placed. It will return the new MemoryAccess. + MemoryAccess *createMemoryAccess(Instruction *I, MemoryAccess *Definition, + const BasicBlock *BB, InsertionPlace Point); + /// \brief Create a MemoryAccess in MemorySSA before or after an existing + /// MemoryAccess with a specified clobbering definition. + /// This should be called when a memory instruction is created that is being + /// used to replace an existing memory instruction. It will *not* create PHI + /// nodes, or verify the clobbering definition. The insertion place is used + /// solely to determine where in the memoryssa access lists the instruction + /// will be placed. It will return the new MemoryAccess. + MemoryAccess *createMemoryAccessBefore(Instruction *I, + MemoryAccess *Definition, + MemoryAccess *InsertPt); + MemoryAccess *createMemoryAccessAfter(Instruction *I, + MemoryAccess *Definition, + MemoryAccess *InsertPt); + /// \brief Remove a MemoryAccess from MemorySSA, including updating all /// definitions and uses. /// This should be called when a memory instruction that has a MemoryAccess @@ -544,8 +569,6 @@ /// on the MemoryAccess for that store/load. void removeMemoryAccess(MemoryAccess *); - enum InsertionPlace { Beginning, End }; - /// \brief Given two memory accesses in the same basic block, determine /// whether MemoryAccess \p A dominates MemoryAccess \p B. bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const; @@ -572,13 +595,14 @@ void markUnreachableAsLiveOnEntry(BasicBlock *BB); bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const; MemoryUseOrDef *createNewAccess(Instruction *); + MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *); MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace); void removeFromLookups(MemoryAccess *); MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *); void renamePass(DomTreeNode *, MemoryAccess *IncomingVal, SmallPtrSet<BasicBlock *, 16> &Visited); - AccessListType *getOrCreateAccessList(BasicBlock *); + AccessListType *getOrCreateAccessList(const BasicBlock *); AliasAnalysis *AA; DominatorTree *DT; Function &F; Index: lib/Transforms/Scalar/MergedLoadStoreMotion.cpp =================================================================== --- lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -86,25 +86,91 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/MemorySSA.h" +#include <queue> +#include <unordered_set> +#include <vector> using namespace llvm; #define DEBUG_TYPE "mldst-motion" +static cl::opt<bool> UseMemorySSA("use-memoryssa", cl::Hidden, + cl::desc("Use MemorySSA for optimizations")); //===----------------------------------------------------------------------===// // MergedLoadStoreMotion Pass //===----------------------------------------------------------------------===// namespace { + +// This hash matches the most common things isSameOperationAs checks. It must +// always be a subset or equal to isSameOperationAs for everything to function +// properly. + +struct StoreInstHash { + size_t operator()(const StoreInst *A) const { + return hash_combine(A->getType(), A->getPointerOperand()->getType(), + A->getValueOperand()->getType(), A->isVolatile(), + A->getAlignment(), A->getOrdering(), + A->getSynchScope()); + } +}; + +struct StoreInstEq { + bool operator()(const StoreInst *A, const StoreInst *B) const { + return (A == B || A->isSameOperationAs(B)); + } +}; + +typedef std::pair<LoadInst *, const MemoryAccess *> LoadPair; + +// This hash has two parts. The first is the instruction, the second, the +// clobbering MemorySSA access. For the instruction part, this hash matches the +// most common things isSameOperationAs checks. It must always be a subset or +// equal to isSameOperationAs for everything to function properly. + +struct LoadPairHash { + size_t operator()(const LoadPair &LP) const { + const LoadInst *A = LP.first; + return hash_combine(A->getType(), A->getPointerOperand()->getType(), + A->isVolatile(), A->getAlignment(), A->getOrdering(), + A->getSynchScope(), LP.second); + } +}; + +struct LoadPairEq { + bool operator()(const LoadPair &A, const LoadPair &B) const { + return (A.second == B.second && + (A.first == B.first || A.first->isSameOperationAs(B.first))); + } +}; + class MergedLoadStoreMotion : public FunctionPass { AliasAnalysis *AA; MemoryDependenceResults *MD; + MemorySSA *MSSA; + MemorySSAWalker *CachingWalker; + DominatorTree *DT; + // The non-MemorySSA versions of mergeLoad/Store algorithms could have Size0 * + // Size1 complexity, where Size0 and Size1 are the #instructions on the two + // sides of the diamond. The constant chosen here is arbitrary. Compiler Time + // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl. + const int MagicCompileTimeControl; + // We DFS number instructions and avoid hoisting or sinking things past may + // throw instructions and instructions not guaranteed to transfer execution to + // successors. + DenseMap<const Instruction *, unsigned> DFSNumberMap; + DenseMap<const BasicBlock *, unsigned> FirstHoistBarrier; + DenseMap<const BasicBlock *, unsigned> LastSinkBarrier; + SmallPtrSet<MemoryAccess *, 8> AccessesToDelete; + std::unordered_multiset<LoadPair, LoadPairHash, LoadPairEq> LoadInfo; + std::unordered_multiset<StoreInst *, StoreInstHash, StoreInstEq> StoreInfo; public: static char ID; // Pass identification, replacement for typeid MergedLoadStoreMotion() : FunctionPass(ID), MD(nullptr), MagicCompileTimeControl(250) { + initializeMergedLoadStoreMotionPass(*PassRegistry::getPassRegistry()); } @@ -114,8 +180,12 @@ // This transformation requires dominator postdominator info void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<MemorySSAWrapperPass>(); AU.addRequired<AAResultsWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<MemorySSAWrapperPass>(); AU.addPreserved<MemoryDependenceWrapperPass>(); } @@ -130,26 +200,25 @@ bool isDiamondHead(BasicBlock *BB); // Routines for hoisting loads bool isLoadHoistBarrierInRange(const Instruction &Start, - const Instruction &End, LoadInst *LI, - bool SafeToLoadUnconditionally); + const Instruction &End, LoadInst *LI); LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI); void hoistInstruction(BasicBlock *BB, Instruction *HoistCand, Instruction *ElseInst); bool isSafeToHoist(Instruction *I) const; bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst); bool mergeLoads(BasicBlock *BB); + bool mergeLoadsMemorySSA(BasicBlock *BB); // Routines for sinking stores StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI); PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1); bool isStoreSinkBarrierInRange(const Instruction &Start, const Instruction &End, MemoryLocation Loc); + bool isStoreSinkBarrierInRangeMemorySSA(MemoryLocation &Loc, + MemoryAccess *Start); bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst); bool mergeStores(BasicBlock *BB); - // The mergeLoad/Store algorithms could have Size0 * Size1 complexity, - // where Size0 and Size1 are the #instructions on the two sides of - // the diamond. The constant chosen here is arbitrary. Compiler Time - // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl. - const int MagicCompileTimeControl; + bool mergeStoresMemorySSA(BasicBlock *BB); + void computeBarriers(); }; char MergedLoadStoreMotion::ID = 0; @@ -227,14 +296,9 @@ /// being loaded or protect against the load from happening /// it is considered a hoist barrier. /// -bool MergedLoadStoreMotion::isLoadHoistBarrierInRange( - const Instruction &Start, const Instruction &End, LoadInst *LI, - bool SafeToLoadUnconditionally) { - if (!SafeToLoadUnconditionally) - for (const Instruction &Inst : - make_range(Start.getIterator(), End.getIterator())) - if (!isGuaranteedToTransferExecutionToSuccessor(&Inst)) - return true; +bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction &Start, + const Instruction &End, + LoadInst *LI) { MemoryLocation Loc = MemoryLocation::get(LI); return AA->canInstructionRangeModRef(Start, End, Loc, MRI_Mod); } @@ -265,11 +329,20 @@ MemoryLocation Loc0 = MemoryLocation::get(Load0); MemoryLocation Loc1 = MemoryLocation::get(Load1); + if (!SafeToLoadUnconditionally) { + // If the first hoist barrier in the block is before the load, we + // can't hoist. + unsigned int BB0HoistBarrier = FirstHoistBarrier.lookup(BB0); + if (BB0HoistBarrier != 0 && DFSNumberMap.lookup(Load0) > BB0HoistBarrier) + continue; + unsigned int BB1HoistBarrier = FirstHoistBarrier.lookup(BB1); + if (BB1HoistBarrier != 0 && DFSNumberMap.lookup(Load1) > BB1HoistBarrier) + continue; + } + if (AA->isMustAlias(Loc0, Loc1) && Load0->isSameOperationAs(Load1) && - !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1, - SafeToLoadUnconditionally) && - !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0, - SafeToLoadUnconditionally)) { + !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1) && + !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0)) { return Load1; } } @@ -303,6 +376,20 @@ // Hoist instruction. HoistedInst->insertBefore(HoistPt); + if (UseMemorySSA) { + // We also hoist operands of loads using this function, so check to see if + // this is really a memory access before we try to update MemorySSA for it. + MemoryAccess *HoistCandAccess = MSSA->getMemoryAccess(HoistCand); + if (HoistCandAccess) { + MemoryUseOrDef *MUD = cast<MemoryUseOrDef>(HoistCandAccess); + // What is happening here is that we are creating the hoisted access + // and destroying the old accesses. + MSSA->createMemoryAccess(HoistedInst, MUD->getDefiningAccess(), BB, + MemorySSA::End); + MSSA->removeMemoryAccess(HoistCandAccess); + MSSA->removeMemoryAccess(MSSA->getMemoryAccess(ElseInst)); + } + } HoistCand->replaceAllUsesWith(HoistedInst); removeInstruction(HoistCand); @@ -395,10 +482,6 @@ bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start, const Instruction &End, MemoryLocation Loc) { - for (const Instruction &Inst : - make_range(Start.getIterator(), End.getIterator())) - if (Inst.mayThrow()) - return true; return AA->canInstructionRangeModRef(Start, End, Loc, MRI_ModRef); } @@ -419,8 +502,17 @@ if (!Store1) continue; + // If the last sink barrier in the block is after us, we can't sink out + // of the block. + unsigned int BB0SinkBarrier = LastSinkBarrier.lookup(BB0); + if (BB0SinkBarrier != 0 && DFSNumberMap.lookup(Store0) < BB0SinkBarrier) + continue; + unsigned int BB1SinkBarrier = LastSinkBarrier.lookup(BB1); + if (BB1SinkBarrier != 0 && DFSNumberMap.lookup(Store1) < BB1SinkBarrier) + continue; MemoryLocation Loc0 = MemoryLocation::get(Store0); MemoryLocation Loc1 = MemoryLocation::get(Store1); + if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) && !isStoreSinkBarrierInRange(*Store1->getNextNode(), BB1->back(), Loc1) && !isStoreSinkBarrierInRange(*Store0->getNextNode(), BB0->back(), Loc0)) { @@ -480,6 +572,13 @@ assert(S0->getParent() == A0->getParent()); assert(S1->getParent() == A1->getParent()); + if (UseMemorySSA) { + MSSA->removeMemoryAccess(MSSA->getMemoryAccess(S0)); + // ilist's reverse iterator invalidation semantics basically mean we need + // to + // wait until the loop in our caller is dead before we kill these + AccessesToDelete.insert(MSSA->getMemoryAccess(S1)); + } // New PHI operand? Use it. if (PHINode *NewPN = getPHIOperand(BB, S0, S1)) @@ -553,6 +652,31 @@ return MergedStores; } +// \brief DFS Number instructions in blocks in dominator order, tracking +void MergedLoadStoreMotion::computeBarriers() { + // This is 1 so the default constructed value of 0 can be used to say we + // didn't find anything. + unsigned DFSNum = 1; + for (auto DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); + DI != DE; ++DI) { + BasicBlock *DomBlock = DI->getBlock(); + bool FoundHoistBarrierInBlock = false; + for (const auto &Inst : *DomBlock) { + DFSNumberMap[&Inst] = DFSNum; + + if (!FoundHoistBarrierInBlock && + !isGuaranteedToTransferExecutionToSuccessor(&Inst)) { + FirstHoistBarrier[DomBlock] = DFSNum; + FoundHoistBarrierInBlock = true; + } + if (Inst.mayThrow()) { + LastSinkBarrier[DomBlock] = DFSNum; + } + ++DFSNum; + } + } +} + /// /// \brief Run the transformation for each function /// @@ -563,21 +687,305 @@ auto *MDWP = getAnalysisIfAvailable<MemoryDependenceWrapperPass>(); MD = MDWP ? &MDWP->getMemDep() : nullptr; AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + DT->updateDFSNumbers(); + if (UseMemorySSA) { + MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA(); + CachingWalker = MSSA->getWalker(); + } bool Changed = false; DEBUG(dbgs() << "Instruction Merger\n"); + computeBarriers(); // Merge unconditional branches, allowing PRE to catch more // optimization opportunities. - for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { - BasicBlock *BB = &*FI++; - + for (BasicBlock &BB : F) { // Hoist equivalent loads and sink stores // outside diamonds when possible - if (isDiamondHead(BB)) { - Changed |= mergeLoads(BB); - Changed |= mergeStores(getDiamondTail(BB)); + if (isDiamondHead(&BB)) { + if (UseMemorySSA) { + Changed |= mergeLoadsMemorySSA(&BB); + Changed |= mergeStoresMemorySSA(getDiamondTail(&BB)); + } else { + Changed |= mergeLoads(&BB); + Changed |= mergeStores(getDiamondTail(&BB)); + } } } return Changed; } + +/// This file contained a MemorySSA and non-MemorySSA version of the same +/// optimization, and to keep it less confusing, the functions specific to the +/// MemorySSA version are placed below this comment. + +/// +/// \brief Try to hoist two loads to same address into diamond header +/// +/// Starting from a diamond head block, iterate over the loads in one +/// successor block, put them in the hash table. +/// Then walk through loads in the successor block, and see if they match any in +/// the hash table and can be hoisted. +/// +bool MergedLoadStoreMotion::mergeLoadsMemorySSA(BasicBlock *BB) { + bool MergedLoads = false; + assert(isDiamondHead(BB)); + BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); + BasicBlock *Succ0 = BI->getSuccessor(0); + BasicBlock *Succ1 = BI->getSuccessor(1); + // #Instructions in Succ1 for Compile Time Control + int Size1 = Succ1->size(); + int NLoads = 0; + + // Skip if we don't have memory accesses in both blocks + auto *Succ0Accesses = MSSA->getBlockAccesses(Succ0); + if (!Succ0Accesses) + return false; + auto *Succ1Accesses = MSSA->getBlockAccesses(Succ1); + if (!Succ1Accesses) + return false; + + // Walk all accesses in first block, but them into a hash table. + for (auto &Access : *Succ0Accesses) { + const MemoryUse *MU = dyn_cast<MemoryUse>(&Access); + if (!MU) + continue; + Instruction *I = MU->getMemoryInst(); + // Only move non-simple (atomic, volatile) loads. + LoadInst *Load0 = dyn_cast<LoadInst>(I); + // FIXME: The isUsedOutsideofBlock is super-conservative, and used not to + // lengthen live ranges. There are better ways. + if (!Load0 || !Load0->isSimple() || Load0->isUsedOutsideOfBlock(Succ0)) + continue; + + LoadInfo.insert({Load0, CachingWalker->getClobberingMemoryAccess(Load0)}); + ++NLoads; + if (NLoads * Size1 >= MagicCompileTimeControl) + break; + } + // Walk all accesses in the second block, see if we can match them against + // accesses in the first. + for (auto AI = Succ1Accesses->begin(), AE = Succ1Accesses->end(); AI != AE;) { + auto CurrIter = AI; + ++AI; + const MemoryUse *MU = dyn_cast<MemoryUse>(&*CurrIter); + if (!MU) + continue; + LoadInst *Load1 = dyn_cast<LoadInst>(MU->getMemoryInst()); + // This isUsedOutsideOfBlock is also conservative + if (!Load1 || Load1->isUsedOutsideOfBlock(Succ1)) + continue; + // We know that if the load has the same clobbering access as this one, they + // must not be killed until the same point. That is, we are guaranteed that + // all the loads that could possibly be merged must have a common MemoryDef + // (or MemoryPhi) that they reach. If not, then we can't merge them because + // something is in the way on one of the branches. If we *do* find a + // possible match, the only further checking we need to do is to ensure they + // are loads of the same pointer. We could simply check the pointer + // operands, but isMustAlias can do a better job of it. + + auto LookupResult = LoadInfo.equal_range( + {Load1, CachingWalker->getClobberingMemoryAccess(Load1)}); + bool Res = false; + auto LookupIter = LookupResult.first; + bool SafeToLoadUnconditionally = + (LookupResult.first != LookupResult.second) && + isSafeToLoadUnconditionally(Load1->getPointerOperand(), + Load1->getAlignment(), + Load1->getModule()->getDataLayout(), + /*ScanFrom=*/BB->getTerminator()); + + while (!Res && LookupIter != LookupResult.second) { + LoadInst *Load0 = LookupIter->first; + auto OldIter = LookupIter; + ++LookupIter; + if (!SafeToLoadUnconditionally) { + // If the first hoist barrier in the block is before the load, we + // can't hoist. + unsigned int BB0HoistBarrier = + FirstHoistBarrier.lookup(Load0->getParent()); + if (BB0HoistBarrier != 0 && + DFSNumberMap.lookup(Load0) > BB0HoistBarrier) + continue; + unsigned int BB1HoistBarrier = + FirstHoistBarrier.lookup(Load1->getParent()); + if (BB1HoistBarrier != 0 && + DFSNumberMap.lookup(Load1) > BB1HoistBarrier) + continue; + } + + MemoryLocation Loc0 = MemoryLocation::get(Load0); + MemoryLocation Loc1 = MemoryLocation::get(Load1); + if (AA->isMustAlias(Loc0, Loc1)) + Res = hoistLoad(BB, Load0, Load1); + MergedLoads |= Res; + // Don't attempt to hoist above loads that had not been hoisted. + if (Res) + LoadInfo.erase(OldIter); + } + } + LoadInfo.clear(); + return MergedLoads; +} + +/// +/// \brief True when two stores are equivalent and can sink into the footer +/// +/// Starting from a diamond tail block, place all the stores in one predecessor +/// in a hash table, and try to match them against stores in the second +/// predecessor +/// +bool MergedLoadStoreMotion::mergeStoresMemorySSA(BasicBlock *T) { + + bool MergedStores = false; + assert(T && "Footer of a diamond cannot be empty"); + + pred_iterator PI = pred_begin(T), E = pred_end(T); + assert(PI != E); + BasicBlock *Pred0 = *PI; + ++PI; + BasicBlock *Pred1 = *PI; + ++PI; + // tail block of a diamond/hammock? + if (Pred0 == Pred1) + return false; // No. + if (PI != E) + return false; // No. More than 2 predecessors. + + // #Instructions in Succ1 for Compile Time Control + int Size1 = Pred1->size(); + int NStores = 0; + + // Skip all this if we don't have any memory accesses to look at + auto *Pred0Accesses = MSSA->getBlockAccesses(Pred0); + if (!Pred0Accesses) + return false; + auto *Pred1Accesses = MSSA->getBlockAccesses(Pred1); + if (!Pred1Accesses) + return false; + + for (auto AI = Pred0Accesses->rbegin(), AE = Pred0Accesses->rend(); AI != AE; + ++AI) { + if (const MemoryDef *MD = dyn_cast<MemoryDef>(&*AI)) { + Instruction *I = MD->getMemoryInst(); + + // Sink move non-simple (atomic, volatile) stores + if (!isa<StoreInst>(I)) + continue; + StoreInst *S0 = cast<StoreInst>(I); + if (!S0->isSimple()) + continue; + StoreInfo.insert(S0); + + ++NStores; + if (NStores * Size1 >= MagicCompileTimeControl) + break; + } + } + + for (auto AI = Pred1Accesses->rbegin(), AE = Pred1Accesses->rend(); AI != AE; + ++AI) { + const MemoryDef *MD = dyn_cast<MemoryDef>(&*AI); + if (!MD) + continue; + Instruction *Inst = MD->getMemoryInst(); + if (!isa<StoreInst>(Inst)) + continue; + + StoreInst *Store1 = cast<StoreInst>(Inst); + auto LookupResult = StoreInfo.equal_range(Store1); + bool Res = false; + auto LookupIter = LookupResult.first; + while (!Res && LookupIter != LookupResult.second) { + StoreInst *Store0 = *LookupIter; + auto OldIter = LookupIter; + ++LookupIter; + + // If the last sink barrier in the block is after us, we can't sink out + // of the block. + unsigned int BB0SinkBarrier = LastSinkBarrier.lookup(Store0->getParent()); + if (BB0SinkBarrier != 0 && DFSNumberMap.lookup(Store0) < BB0SinkBarrier) + continue; + unsigned int BB1SinkBarrier = LastSinkBarrier.lookup(Store1->getParent()); + if (BB1SinkBarrier != 0 && DFSNumberMap.lookup(Store1) < BB1SinkBarrier) + continue; + + MemoryLocation Loc0 = MemoryLocation::get(Store0); + MemoryLocation Loc1 = MemoryLocation::get(Store1); + if (!AA->isMustAlias(Loc0, Loc1) || + isStoreSinkBarrierInRangeMemorySSA(Loc0, + MSSA->getMemoryAccess(Store0)) || + isStoreSinkBarrierInRangeMemorySSA(Loc1, + MSSA->getMemoryAccess(Store1))) + continue; + Res = sinkStore(T, Store0, Store1); + if (Res) + StoreInfo.erase(OldIter); + MergedStores |= Res; + } + } + for (auto V : AccessesToDelete) { + MSSA->removeMemoryAccess(V); + } + AccessesToDelete.clear(); + StoreInfo.clear(); + return MergedStores; +} + +/// +/// \brief True when the store is not downwards explosed to block \p End + +bool MergedLoadStoreMotion::isStoreSinkBarrierInRangeMemorySSA( + MemoryLocation &Loc, MemoryAccess *Start) { + DomTreeNode *StartNode = DT->getNode(Start->getBlock()); + std::pair<unsigned, unsigned> StartDFS = {StartNode->getDFSNumIn(), + StartNode->getDFSNumOut()}; + + // Seed with current users that are in the block range. + std::queue<MemoryAccess *> CurrentUses; + SmallPtrSet<const MemoryAccess *, 8> Visited; + for (auto U : Start->users()) + CurrentUses.emplace(cast<MemoryAccess>(U)); + + // Process all the uses and see if they are below end. + while (!CurrentUses.empty()) { + MemoryAccess *MA = CurrentUses.front(); + CurrentUses.pop(); + + BasicBlock *BB = MA->getBlock(); + DomTreeNode *BBNode = DT->getNode(BB); + std::pair<unsigned, unsigned> CurrDFS = {BBNode->getDFSNumIn(), + BBNode->getDFSNumOut()}; + // First see if it's outside the dominator tree range + // The only way it could affect us is if it's below us in the dominator + // tree. + // The start of the final sink point is not below us in a hammock's + // dominator tree, because in a hammock, the merge block must be a + // sibling of the split block in the dominator tree. + // Thus, the things below us in the dominator tree are all things + // that lead to the sink point. + if ((CurrDFS.first >= StartDFS.first && CurrDFS.first <= StartDFS.first) && + CurrDFS.second >= StartDFS.second && + CurrDFS.second <= StartDFS.second) { + // A phi node is not a memory access on its own, and we may have deleted + // this access already. Becasue we are walking backwards, we don't perform + // updates on the fly. + if (AccessesToDelete.count(MA) || isa<MemoryPhi>(MA)) + continue; + Instruction *Use = cast<MemoryUseOrDef>(MA)->getMemoryInst(); + + // If it really conflicts, we have a barrier. + if (AA->getModRefInfo(Use, Loc) & MRI_ModRef) + return true; + + // If not, add it's immediate uses to keep walking + for (auto U : Start->users()) { + MemoryAccess *MU = cast<MemoryAccess>(U); + if (Visited.insert(MU).second) + CurrentUses.emplace(MU); + } + } + } + return false; +} Index: lib/Transforms/Utils/MemorySSA.cpp =================================================================== --- lib/Transforms/Utils/MemorySSA.cpp +++ lib/Transforms/Utils/MemorySSA.cpp @@ -225,7 +225,8 @@ MA.dropAllReferences(); } -MemorySSA::AccessListType *MemorySSA::getOrCreateAccessList(BasicBlock *BB) { +MemorySSA::AccessListType * +MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); if (Res.second) @@ -356,6 +357,57 @@ return Walker.get(); } +MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, + MemoryAccess *Definition) { + assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI"); + MemoryUseOrDef *NewAccess = createNewAccess(I); + assert( + NewAccess != nullptr && + "Tried to create a memory access for a non-memory touching instruction"); + NewAccess->setDefiningAccess(Definition); + return NewAccess; +} + +MemoryAccess *MemorySSA::createMemoryAccess(Instruction *I, + MemoryAccess *Definition, + const BasicBlock *BB, + InsertionPlace Point) { + MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition); + auto *Accesses = getOrCreateAccessList(BB); + if (Point == Beginning) { + // It goes after any phi nodes + auto AI = std::find_if( + Accesses->begin(), Accesses->end(), + [](const MemoryAccess &MA) { return !isa<MemoryPhi>(MA); }); + + Accesses->insert(AI, NewAccess); + } else { + Accesses->push_back(NewAccess); + } + + return NewAccess; +} +MemoryAccess *MemorySSA::createMemoryAccessBefore(Instruction *I, + MemoryAccess *Definition, + MemoryAccess *InsertPt) { + assert(I->getParent() == InsertPt->getBlock() && + "New and old access must be in the same block"); + MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition); + auto *Accesses = getOrCreateAccessList(InsertPt->getBlock()); + Accesses->insert(AccessListType::iterator(InsertPt), NewAccess); + return NewAccess; +} + +MemoryAccess *MemorySSA::createMemoryAccessAfter(Instruction *I, + MemoryAccess *Definition, + MemoryAccess *InsertPt) { + assert(I->getParent() == InsertPt->getBlock() && + "New and old access must be in the same block"); + MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition); + auto *Accesses = getOrCreateAccessList(InsertPt->getBlock()); + Accesses->insertAfter(AccessListType::iterator(InsertPt), NewAccess); + return NewAccess; +} /// \brief Helper function to create new memory accesses MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { @@ -551,7 +603,8 @@ continue; for (User *U : MD->users()) { - BasicBlock *UseBlock; (void)UseBlock; + BasicBlock *UseBlock; + (void)UseBlock; // Things are allowed to flow to phi nodes over their predecessor edge. if (auto *P = dyn_cast<MemoryPhi>(U)) { for (const auto &Arg : P->operands()) { Index: test/Transforms/InstMerge/exceptions.ll =================================================================== --- test/Transforms/InstMerge/exceptions.ll +++ test/Transforms/InstMerge/exceptions.ll @@ -1,4 +1,5 @@ ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/InstMerge/ld_hoist1.ll =================================================================== --- test/Transforms/InstMerge/ld_hoist1.ll +++ test/Transforms/InstMerge/ld_hoist1.ll @@ -1,5 +1,6 @@ ; Test load hoist ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc_linux" Index: test/Transforms/InstMerge/ld_hoist_st_sink.ll =================================================================== --- test/Transforms/InstMerge/ld_hoist_st_sink.ll +++ test/Transforms/InstMerge/ld_hoist_st_sink.ll @@ -1,6 +1,7 @@ ; Tests to make sure that loads and stores in a diamond get merged ; Loads are hoisted into the header. Stores sunks into the footer. ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i64, %struct.node*, %struct.node*, %struct.node*, i64, %struct.arc*, i64, i64, i64 } Index: test/Transforms/InstMerge/st_sink_barrier_call.ll =================================================================== --- test/Transforms/InstMerge/st_sink_barrier_call.ll +++ test/Transforms/InstMerge/st_sink_barrier_call.ll @@ -1,6 +1,7 @@ ; Test to make sure that a function call that needs to be a barrier to sinking stores is indeed a barrier. ; Stores sunks into the footer. ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } Index: test/Transforms/InstMerge/st_sink_bugfix_22613.ll =================================================================== --- test/Transforms/InstMerge/st_sink_bugfix_22613.ll +++ test/Transforms/InstMerge/st_sink_bugfix_22613.ll @@ -3,6 +3,7 @@ target triple = "x86_64-unknown-linux-gnu" ; RUN: opt -O2 -S < %s | FileCheck %s +; RUN: opt -O2 -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s ; CHECK-LABEL: main ; CHECK: if.end Index: test/Transforms/InstMerge/st_sink_no_barrier_call.ll =================================================================== --- test/Transforms/InstMerge/st_sink_no_barrier_call.ll +++ test/Transforms/InstMerge/st_sink_no_barrier_call.ll @@ -1,6 +1,7 @@ ; Test to make sure that stores in a diamond get merged with a non barrier function call after the store instruction ; Stores sunks into the footer. ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } Index: test/Transforms/InstMerge/st_sink_no_barrier_load.ll =================================================================== --- test/Transforms/InstMerge/st_sink_no_barrier_load.ll +++ test/Transforms/InstMerge/st_sink_no_barrier_load.ll @@ -1,6 +1,7 @@ ; Test to make sure that stores in a diamond get merged with a non barrier load after the store instruction ; Stores sunks into the footer. ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } Index: test/Transforms/InstMerge/st_sink_no_barrier_store.ll =================================================================== --- test/Transforms/InstMerge/st_sink_no_barrier_store.ll +++ test/Transforms/InstMerge/st_sink_no_barrier_store.ll @@ -1,6 +1,7 @@ ; Test to make sure that stores in a diamond get merged with a non barrier store after the store instruction to be sunk ; Stores sunks into the footer. ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } Index: test/Transforms/InstMerge/st_sink_two_stores.ll =================================================================== --- test/Transforms/InstMerge/st_sink_two_stores.ll +++ test/Transforms/InstMerge/st_sink_two_stores.ll @@ -1,6 +1,7 @@ ; Test to make sure that stores in a diamond get merged ; Stores sunks into the footer. ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } Index: test/Transforms/InstMerge/st_sink_with_barrier.ll =================================================================== --- test/Transforms/InstMerge/st_sink_with_barrier.ll +++ test/Transforms/InstMerge/st_sink_with_barrier.ll @@ -1,5 +1,6 @@ ; Test to make sure that load from the same address as a store and appears after the store prevents the store from being sunk ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 }