Index: include/llvm/Transforms/Utils/MemorySSA.h =================================================================== --- include/llvm/Transforms/Utils/MemorySSA.h +++ include/llvm/Transforms/Utils/MemorySSA.h @@ -398,8 +398,6 @@ MemoryAccess *getIncomingValue(unsigned I) const { return getOperand(I); } void setIncomingValue(unsigned I, MemoryAccess *V) { assert(V && "PHI node got a null value!"); - assert(getType() == V->getType() && - "All operands to PHI node must be the same type as the PHI node!"); setOperand(I, V); } static unsigned getOperandNumForIncomingValue(unsigned I) { return I; } @@ -536,6 +534,35 @@ return It == PerBlockAccesses.end() ? nullptr : It->second.get(); } + enum InsertionPlace { Beginning, End }; + + /// \brief Create a MemoryPhi in MemorySSA + /// Note that MemorySSA only allows one MemoryPhi per block. + MemoryPhi *createMemoryPhi(BasicBlock *BB); + + /// \brief Create a MemoryAccess in MemorySSA at a specified point in a block, + /// with a specified clobbering definition. + /// This should be called when a memory instruction is created that is being + /// used to replace an existing memory instruction. It will *not* create PHI + /// nodes, or verify the clobbering definition. The insertion place is used + /// solely to determine where in the memoryssa access lists the instruction + /// will be placed. It will return the new MemoryAccess. + MemoryAccess *createMemoryAccess(Instruction *I, MemoryAccess *Definition, + const BasicBlock *BB, InsertionPlace Point); + /// \brief Create a MemoryAccess in MemorySSA before or after an existing + /// MemoryAccess with a specified clobbering definition. + /// This should be called when a memory instruction is created that is being + /// used to replace an existing memory instruction. It will *not* create PHI + /// nodes, or verify the clobbering definition. The insertion place is used + /// solely to determine where in the memoryssa access lists the instruction + /// will be placed. It will return the new MemoryAccess. + MemoryAccess *createMemoryAccessBefore(Instruction *I, + MemoryAccess *Definition, + MemoryAccess *InsertPt); + MemoryAccess *createMemoryAccessAfter(Instruction *I, + MemoryAccess *Definition, + MemoryAccess *InsertPt); + /// \brief Remove a MemoryAccess from MemorySSA, including updating all /// definitions and uses. /// This should be called when a memory instruction that has a MemoryAccess @@ -544,8 +571,6 @@ /// on the MemoryAccess for that store/load. void removeMemoryAccess(MemoryAccess *); - enum InsertionPlace { Beginning, End }; - /// \brief Given two memory accesses in the same basic block, determine /// whether MemoryAccess \p A dominates MemoryAccess \p B. bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const; @@ -572,13 +597,14 @@ void markUnreachableAsLiveOnEntry(BasicBlock *BB); bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const; MemoryUseOrDef *createNewAccess(Instruction *); + MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *); MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace); void removeFromLookups(MemoryAccess *); MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *); void renamePass(DomTreeNode *, MemoryAccess *IncomingVal, SmallPtrSet &Visited); - AccessListType *getOrCreateAccessList(BasicBlock *); + AccessListType *getOrCreateAccessList(const BasicBlock *); AliasAnalysis *AA; DominatorTree *DT; Function &F; Index: lib/Transforms/Scalar/MergedLoadStoreMotion.cpp =================================================================== --- lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -86,25 +86,91 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/MemorySSA.h" +#include +#include +#include using namespace llvm; #define DEBUG_TYPE "mldst-motion" +static cl::opt UseMemorySSA("use-memoryssa", cl::Hidden, + cl::desc("Use MemorySSA for optimizations")); //===----------------------------------------------------------------------===// // MergedLoadStoreMotion Pass //===----------------------------------------------------------------------===// namespace { + +// This hash matches the most common things isSameOperationAs checks. It must +// always be a subset or equal to isSameOperationAs for everything to function +// properly. + +struct StoreInstHash { + size_t operator()(const StoreInst *A) const { + return hash_combine(A->getType(), A->getPointerOperand()->getType(), + A->getValueOperand()->getType(), A->isVolatile(), + A->getAlignment(), A->getOrdering(), + A->getSynchScope()); + } +}; + +struct StoreInstEq { + bool operator()(const StoreInst *A, const StoreInst *B) const { + return (A == B || A->isSameOperationAs(B)); + } +}; + +typedef std::pair LoadPair; + +// This hash has two parts. The first is the instruction, the second, the +// clobbering MemorySSA access. For the instruction part, this hash matches the +// most common things isSameOperationAs checks. It must always be a subset or +// equal to isSameOperationAs for everything to function properly. + +struct LoadPairHash { + size_t operator()(const LoadPair &LP) const { + const LoadInst *A = LP.first; + return hash_combine(A->getType(), A->getPointerOperand()->getType(), + A->isVolatile(), A->getAlignment(), A->getOrdering(), + A->getSynchScope(), LP.second); + } +}; + +struct LoadPairEq { + bool operator()(const LoadPair &A, const LoadPair &B) const { + return (A.second == B.second && + (A.first == B.first || A.first->isSameOperationAs(B.first))); + } +}; + class MergedLoadStoreMotion : public FunctionPass { AliasAnalysis *AA; MemoryDependenceResults *MD; + MemorySSA *MSSA; + MemorySSAWalker *CachingWalker; + DominatorTree *DT; + // The non-MemorySSA versions of mergeLoad/Store algorithms could have Size0 * + // Size1 complexity, where Size0 and Size1 are the #instructions on the two + // sides of the diamond. The constant chosen here is arbitrary. Compiler Time + // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl. + const int MagicCompileTimeControl; + // We DFS number instructions and avoid hoisting or sinking things past may + // throw instructions and instructions not guaranteed to transfer execution to + // successors. + DenseMap DFSNumberMap; + DenseMap FirstHoistBarrier; + DenseMap LastSinkBarrier; + SmallPtrSet AccessesToDelete; + std::unordered_multiset LoadInfo; + std::unordered_multiset StoreInfo; public: static char ID; // Pass identification, replacement for typeid MergedLoadStoreMotion() : FunctionPass(ID), MD(nullptr), MagicCompileTimeControl(250) { + initializeMergedLoadStoreMotionPass(*PassRegistry::getPassRegistry()); } @@ -114,8 +180,12 @@ // This transformation requires dominator postdominator info void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); } @@ -130,26 +200,27 @@ bool isDiamondHead(BasicBlock *BB); // Routines for hoisting loads bool isLoadHoistBarrierInRange(const Instruction &Start, - const Instruction &End, LoadInst *LI, - bool SafeToLoadUnconditionally); + const Instruction &End, LoadInst *LI); LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI); void hoistInstruction(BasicBlock *BB, Instruction *HoistCand, Instruction *ElseInst); bool isSafeToHoist(Instruction *I) const; bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst); bool mergeLoads(BasicBlock *BB); + bool mergeLoadsMemorySSA(BasicBlock *BB); // Routines for sinking stores StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI); PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1); bool isStoreSinkBarrierInRange(const Instruction &Start, const Instruction &End, MemoryLocation Loc); + bool isStoreSinkBarrierInRangeMemorySSA(MemoryLocation &Loc, + MemoryAccess *Start); bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst); bool mergeStores(BasicBlock *BB); - // The mergeLoad/Store algorithms could have Size0 * Size1 complexity, - // where Size0 and Size1 are the #instructions on the two sides of - // the diamond. The constant chosen here is arbitrary. Compiler Time - // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl. - const int MagicCompileTimeControl; + bool mergeStoresMemorySSA(BasicBlock *BB); + void computeBarriers(); + void updateMemorySSAForSunkStores(BasicBlock *SinkBlock, StoreInst *S0, + StoreInst *S1, StoreInst *SNew); }; char MergedLoadStoreMotion::ID = 0; @@ -227,14 +298,9 @@ /// being loaded or protect against the load from happening /// it is considered a hoist barrier. /// -bool MergedLoadStoreMotion::isLoadHoistBarrierInRange( - const Instruction &Start, const Instruction &End, LoadInst *LI, - bool SafeToLoadUnconditionally) { - if (!SafeToLoadUnconditionally) - for (const Instruction &Inst : - make_range(Start.getIterator(), End.getIterator())) - if (!isGuaranteedToTransferExecutionToSuccessor(&Inst)) - return true; +bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction &Start, + const Instruction &End, + LoadInst *LI) { MemoryLocation Loc = MemoryLocation::get(LI); return AA->canInstructionRangeModRef(Start, End, Loc, MRI_Mod); } @@ -265,11 +331,20 @@ MemoryLocation Loc0 = MemoryLocation::get(Load0); MemoryLocation Loc1 = MemoryLocation::get(Load1); + if (!SafeToLoadUnconditionally) { + // If the first hoist barrier in the block is before the load, we + // can't hoist. + unsigned int BB0HoistBarrier = FirstHoistBarrier.lookup(BB0); + if (BB0HoistBarrier != 0 && DFSNumberMap.lookup(Load0) > BB0HoistBarrier) + continue; + unsigned int BB1HoistBarrier = FirstHoistBarrier.lookup(BB1); + if (BB1HoistBarrier != 0 && DFSNumberMap.lookup(Load1) > BB1HoistBarrier) + continue; + } + if (AA->isMustAlias(Loc0, Loc1) && Load0->isSameOperationAs(Load1) && - !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1, - SafeToLoadUnconditionally) && - !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0, - SafeToLoadUnconditionally)) { + !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1) && + !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0)) { return Load1; } } @@ -303,6 +378,20 @@ // Hoist instruction. HoistedInst->insertBefore(HoistPt); + if (UseMemorySSA) { + // We also hoist operands of loads using this function, so check to see if + // this is really a memory access before we try to update MemorySSA for it. + MemoryAccess *HoistCandAccess = MSSA->getMemoryAccess(HoistCand); + if (HoistCandAccess) { + MemoryUseOrDef *MUD = cast(HoistCandAccess); + // What is happening here is that we are creating the hoisted access + // and destroying the old accesses. + MSSA->createMemoryAccess(HoistedInst, MUD->getDefiningAccess(), BB, + MemorySSA::End); + MSSA->removeMemoryAccess(HoistCandAccess); + MSSA->removeMemoryAccess(MSSA->getMemoryAccess(ElseInst)); + } + } HoistCand->replaceAllUsesWith(HoistedInst); removeInstruction(HoistCand); @@ -395,10 +484,6 @@ bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start, const Instruction &End, MemoryLocation Loc) { - for (const Instruction &Inst : - make_range(Start.getIterator(), End.getIterator())) - if (Inst.mayThrow()) - return true; return AA->canInstructionRangeModRef(Start, End, Loc, MRI_ModRef); } @@ -419,8 +504,17 @@ if (!Store1) continue; + // If the last sink barrier in the block is after us, we can't sink out + // of the block. + unsigned int BB0SinkBarrier = LastSinkBarrier.lookup(BB0); + if (BB0SinkBarrier != 0 && DFSNumberMap.lookup(Store0) < BB0SinkBarrier) + continue; + unsigned int BB1SinkBarrier = LastSinkBarrier.lookup(BB1); + if (BB1SinkBarrier != 0 && DFSNumberMap.lookup(Store1) < BB1SinkBarrier) + continue; MemoryLocation Loc0 = MemoryLocation::get(Store0); MemoryLocation Loc1 = MemoryLocation::get(Store1); + if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) && !isStoreSinkBarrierInRange(*Store1->getNextNode(), BB1->back(), Loc1) && !isStoreSinkBarrierInRange(*Store0->getNextNode(), BB0->back(), Loc0)) { @@ -480,6 +574,9 @@ assert(S0->getParent() == A0->getParent()); assert(S1->getParent() == A1->getParent()); + if (UseMemorySSA) { + updateMemorySSAForSunkStores(BB, S0, S1, SNew); + } // New PHI operand? Use it. if (PHINode *NewPN = getPHIOperand(BB, S0, S1)) @@ -553,6 +650,31 @@ return MergedStores; } +// \brief DFS Number instructions in blocks in dominator order, tracking +void MergedLoadStoreMotion::computeBarriers() { + // This is 1 so the default constructed value of 0 can be used to say we + // didn't find anything. + unsigned DFSNum = 1; + for (auto DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); + DI != DE; ++DI) { + BasicBlock *DomBlock = DI->getBlock(); + bool FoundHoistBarrierInBlock = false; + for (const auto &Inst : *DomBlock) { + DFSNumberMap[&Inst] = DFSNum; + + if (!FoundHoistBarrierInBlock && + !isGuaranteedToTransferExecutionToSuccessor(&Inst)) { + FirstHoistBarrier[DomBlock] = DFSNum; + FoundHoistBarrierInBlock = true; + } + if (Inst.mayThrow()) { + LastSinkBarrier[DomBlock] = DFSNum; + } + ++DFSNum; + } + } +} + /// /// \brief Run the transformation for each function /// @@ -563,21 +685,394 @@ auto *MDWP = getAnalysisIfAvailable(); MD = MDWP ? &MDWP->getMemDep() : nullptr; AA = &getAnalysis().getAAResults(); + DT = &getAnalysis().getDomTree(); + DT->updateDFSNumbers(); + if (UseMemorySSA) { + MSSA = &getAnalysis().getMSSA(); + CachingWalker = MSSA->getWalker(); + } bool Changed = false; DEBUG(dbgs() << "Instruction Merger\n"); + computeBarriers(); // Merge unconditional branches, allowing PRE to catch more // optimization opportunities. - for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { - BasicBlock *BB = &*FI++; - + for (BasicBlock &BB : F) { // Hoist equivalent loads and sink stores // outside diamonds when possible - if (isDiamondHead(BB)) { - Changed |= mergeLoads(BB); - Changed |= mergeStores(getDiamondTail(BB)); + if (isDiamondHead(&BB)) { + if (UseMemorySSA) { + Changed |= mergeLoadsMemorySSA(&BB); + Changed |= mergeStoresMemorySSA(getDiamondTail(&BB)); + } else { + Changed |= mergeLoads(&BB); + Changed |= mergeStores(getDiamondTail(&BB)); + } } } return Changed; } + +/// This file contained a MemorySSA and non-MemorySSA version of the same +/// optimization, and to keep it less confusing, the functions specific to the +/// MemorySSA version are placed below this comment. + +/// +/// \brief Try to hoist two loads to same address into diamond header +/// +/// Starting from a diamond head block, iterate over the loads in one +/// successor block, put them in the hash table. +/// Then walk through loads in the successor block, and see if they match any in +/// the hash table and can be hoisted. +/// +bool MergedLoadStoreMotion::mergeLoadsMemorySSA(BasicBlock *BB) { + bool MergedLoads = false; + assert(isDiamondHead(BB)); + BranchInst *BI = dyn_cast(BB->getTerminator()); + BasicBlock *Succ0 = BI->getSuccessor(0); + BasicBlock *Succ1 = BI->getSuccessor(1); + // #Instructions in Succ1 for Compile Time Control + int Size1 = Succ1->size(); + int NLoads = 0; + + // Skip if we don't have memory accesses in both blocks + auto *Succ0Accesses = MSSA->getBlockAccesses(Succ0); + if (!Succ0Accesses) + return false; + auto *Succ1Accesses = MSSA->getBlockAccesses(Succ1); + if (!Succ1Accesses) + return false; + + // Walk all accesses in first block, but them into a hash table. + for (auto &Access : *Succ0Accesses) { + const MemoryUse *MU = dyn_cast(&Access); + if (!MU) + continue; + Instruction *I = MU->getMemoryInst(); + // Only move non-simple (atomic, volatile) loads. + LoadInst *Load0 = dyn_cast(I); + // FIXME: The isUsedOutsideofBlock is super-conservative, and used not to + // lengthen live ranges. There are better ways. + if (!Load0 || !Load0->isSimple() || Load0->isUsedOutsideOfBlock(Succ0)) + continue; + + LoadInfo.insert({Load0, CachingWalker->getClobberingMemoryAccess(Load0)}); + ++NLoads; + if (NLoads * Size1 >= MagicCompileTimeControl) + break; + } + // Walk all accesses in the second block, see if we can match them against + // accesses in the first. + for (auto AI = Succ1Accesses->begin(), AE = Succ1Accesses->end(); AI != AE;) { + auto CurrIter = AI; + ++AI; + const MemoryUse *MU = dyn_cast(&*CurrIter); + if (!MU) + continue; + LoadInst *Load1 = dyn_cast(MU->getMemoryInst()); + // This isUsedOutsideOfBlock is also conservative + if (!Load1 || Load1->isUsedOutsideOfBlock(Succ1)) + continue; + // We know that if the load has the same clobbering access as this one, they + // must not be killed until the same point. That is, we are guaranteed that + // all the loads that could possibly be merged must have a common MemoryDef + // (or MemoryPhi) that they reach. If not, then we can't merge them because + // something is in the way on one of the branches. If we *do* find a + // possible match, the only further checking we need to do is to ensure they + // are loads of the same pointer. We could simply check the pointer + // operands, but isMustAlias can do a better job of it. + + auto LookupResult = LoadInfo.equal_range( + {Load1, CachingWalker->getClobberingMemoryAccess(Load1)}); + bool Res = false; + auto LookupIter = LookupResult.first; + bool SafeToLoadUnconditionally = + (LookupResult.first != LookupResult.second) && + isSafeToLoadUnconditionally(Load1->getPointerOperand(), + Load1->getAlignment(), + Load1->getModule()->getDataLayout(), + /*ScanFrom=*/BB->getTerminator()); + + while (!Res && LookupIter != LookupResult.second) { + LoadInst *Load0 = LookupIter->first; + auto OldIter = LookupIter; + ++LookupIter; + if (!SafeToLoadUnconditionally) { + // If the first hoist barrier in the block is before the load, we + // can't hoist. + unsigned int BB0HoistBarrier = + FirstHoistBarrier.lookup(Load0->getParent()); + if (BB0HoistBarrier != 0 && + DFSNumberMap.lookup(Load0) > BB0HoistBarrier) + continue; + unsigned int BB1HoistBarrier = + FirstHoistBarrier.lookup(Load1->getParent()); + if (BB1HoistBarrier != 0 && + DFSNumberMap.lookup(Load1) > BB1HoistBarrier) + continue; + } + + MemoryLocation Loc0 = MemoryLocation::get(Load0); + MemoryLocation Loc1 = MemoryLocation::get(Load1); + if (AA->isMustAlias(Loc0, Loc1)) + Res = hoistLoad(BB, Load0, Load1); + MergedLoads |= Res; + // Don't attempt to hoist above loads that had not been hoisted. + if (Res) + LoadInfo.erase(OldIter); + } + } + LoadInfo.clear(); + return MergedLoads; +} + +/// +/// \brief True when two stores are equivalent and can sink into the footer +/// +/// Starting from a diamond tail block, place all the stores in one predecessor +/// in a hash table, and try to match them against stores in the second +/// predecessor +/// +bool MergedLoadStoreMotion::mergeStoresMemorySSA(BasicBlock *T) { + + bool MergedStores = false; + assert(T && "Footer of a diamond cannot be empty"); + + pred_iterator PI = pred_begin(T), E = pred_end(T); + assert(PI != E); + BasicBlock *Pred0 = *PI; + ++PI; + BasicBlock *Pred1 = *PI; + ++PI; + // tail block of a diamond/hammock? + if (Pred0 == Pred1) + return false; // No. + if (PI != E) + return false; // No. More than 2 predecessors. + + // #Instructions in Succ1 for Compile Time Control + int Size1 = Pred1->size(); + int NStores = 0; + + // Skip all this if we don't have any memory accesses to look at + auto *Pred0Accesses = MSSA->getBlockAccesses(Pred0); + if (!Pred0Accesses) + return false; + auto *Pred1Accesses = MSSA->getBlockAccesses(Pred1); + if (!Pred1Accesses) + return false; + + for (auto AI = Pred0Accesses->rbegin(), AE = Pred0Accesses->rend(); AI != AE; + ++AI) { + if (const MemoryDef *MD = dyn_cast(&*AI)) { + Instruction *I = MD->getMemoryInst(); + + // Sink move non-simple (atomic, volatile) stores + if (!isa(I)) + continue; + StoreInst *S0 = cast(I); + if (!S0->isSimple()) + continue; + StoreInfo.insert(S0); + + ++NStores; + if (NStores * Size1 >= MagicCompileTimeControl) + break; + } + } + + for (auto AI = Pred1Accesses->rbegin(), AE = Pred1Accesses->rend(); AI != AE; + ++AI) { + const MemoryDef *MD = dyn_cast(&*AI); + if (!MD) + continue; + Instruction *Inst = MD->getMemoryInst(); + if (!isa(Inst)) + continue; + + StoreInst *Store1 = cast(Inst); + auto LookupResult = StoreInfo.equal_range(Store1); + bool Res = false; + auto LookupIter = LookupResult.first; + while (!Res && LookupIter != LookupResult.second) { + StoreInst *Store0 = *LookupIter; + auto OldIter = LookupIter; + ++LookupIter; + + // If the last sink barrier in the block is after us, we can't sink out + // of the block. + unsigned int BB0SinkBarrier = LastSinkBarrier.lookup(Store0->getParent()); + if (BB0SinkBarrier != 0 && DFSNumberMap.lookup(Store0) < BB0SinkBarrier) + continue; + unsigned int BB1SinkBarrier = LastSinkBarrier.lookup(Store1->getParent()); + if (BB1SinkBarrier != 0 && DFSNumberMap.lookup(Store1) < BB1SinkBarrier) + continue; + + MemoryLocation Loc0 = MemoryLocation::get(Store0); + MemoryLocation Loc1 = MemoryLocation::get(Store1); + if (!AA->isMustAlias(Loc0, Loc1) || + isStoreSinkBarrierInRangeMemorySSA(Loc0, + MSSA->getMemoryAccess(Store0)) || + isStoreSinkBarrierInRangeMemorySSA(Loc1, + MSSA->getMemoryAccess(Store1))) + continue; + Res = sinkStore(T, Store0, Store1); + if (Res) + StoreInfo.erase(OldIter); + MergedStores |= Res; + } + } + for (auto V : AccessesToDelete) { + MSSA->removeMemoryAccess(V); + } + AccessesToDelete.clear(); + StoreInfo.clear(); + return MergedStores; +} + +/// +/// \brief True when the store is not downwards explosed to block \p End + +bool MergedLoadStoreMotion::isStoreSinkBarrierInRangeMemorySSA( + MemoryLocation &Loc, MemoryAccess *Start) { + DomTreeNode *StartNode = DT->getNode(Start->getBlock()); + std::pair StartDFS = {StartNode->getDFSNumIn(), + StartNode->getDFSNumOut()}; + + // Seed with current users that are in the block range. + std::queue CurrentUses; + SmallPtrSet Visited; + for (auto U : Start->users()) + CurrentUses.emplace(cast(U)); + + // Process all the uses and see if they are below end. + while (!CurrentUses.empty()) { + MemoryAccess *MA = CurrentUses.front(); + CurrentUses.pop(); + + BasicBlock *BB = MA->getBlock(); + DomTreeNode *BBNode = DT->getNode(BB); + std::pair CurrDFS = {BBNode->getDFSNumIn(), + BBNode->getDFSNumOut()}; + // First see if it's outside the dominator tree range + // The only way it could affect us is if it's below us in the dominator + // tree. + // The start of the final sink point is not below us in a hammock's + // dominator tree, because in a hammock, the merge block must be a + // sibling of the split block in the dominator tree. + // Thus, the things below us in the dominator tree are all things + // that lead to the sink point. + if ((CurrDFS.first >= StartDFS.first && CurrDFS.first <= StartDFS.first) && + CurrDFS.second >= StartDFS.second && + CurrDFS.second <= StartDFS.second) { + // A phi node is not a memory access on its own, and we may have deleted + // this access already. Becasue we are walking backwards, we don't perform + // updates on the fly. + if (AccessesToDelete.count(MA) || isa(MA)) + continue; + Instruction *Use = cast(MA)->getMemoryInst(); + + // If it really conflicts, we have a barrier. + if (AA->getModRefInfo(Use, Loc) & MRI_ModRef) + return true; + + // If not, add it's immediate uses to keep walking + for (auto U : Start->users()) { + MemoryAccess *MU = cast(U); + if (Visited.insert(MU).second) + CurrentUses.emplace(MU); + } + } + } + return false; +} + +void MergedLoadStoreMotion::updateMemorySSAForSunkStores( + BasicBlock *SinkLocation, StoreInst *S0, StoreInst *S1, StoreInst *SNew) { + // There are three basic store sinking cases that we have to handle when + // updating. + // Because we are sinking to the bottom of a diamond, we may need to place a + // phi node depending on what the old memory state of the MemoryDef's were. + // This breaks down into three cases. + // + // 1. There are no stores above S0 or S1 in either block. + // In this case, no phi node is necessary, both MemoryDefs must have the same + // defining access, as the top of the diamond can only have one reaching + // MemoryDef/MemoryPhi at the end. After use replacement, we may end up with a phi + // in the sink location block having all the same operands, which we check + // for. + // + // 2. There are stores above either S0 or S1, and uses below the block + // In this case, a phi node will already exist in the sink location, but may + // need to be updated. + // If the phi node argument is either S0 or S1's MemoryDef, we replace it with + // S0/S1's + // defining memorydef. + // 3. There are stores above either S0 or S1, and no uses below the block + // In this case, no phi node will exist, but we need one to merge the + // MemoryDef's in order to produce the defining access for the MemoryDef we + // will place in the sink location block. + // After handling the above, we create a new MemoryDefwith either the phi + // node, or the correct version, in the sink location block. + + MemoryDef *MD0 = cast(MSSA->getMemoryAccess(S0)); + MemoryDef *MD1 = cast(MSSA->getMemoryAccess(S1)); + + bool MayCreateDegeneratePhi = false; + MemoryAccess *NewDefiningAccess; + if (MD0->getDefiningAccess() == MD1->getDefiningAccess()) { + // Case 1 + NewDefiningAccess = MD0->getDefiningAccess(); + // This may create a phi node that has the same versions for everything + // after replaceALlUsesWith + MayCreateDegeneratePhi = true; + } else if (MemoryAccess *MA = MSSA->getMemoryAccess(SinkLocation)) { + // Case 2 + MemoryPhi *MP = cast(MA); + for (Use &Op : MP->incoming_values()) { + if (Op.get() == MD0) + Op.set(MD0->getDefiningAccess()); + else if (Op.get() == MD1) + Op.set(MD1->getDefiningAccess()); + } + NewDefiningAccess = MP; + } else { + // Case 3 + MemoryPhi *MP = MSSA->createMemoryPhi(SinkLocation); + // Being a diamond, the only incoming predecessors should be the blocks for + // the two stores. + MP->addIncoming(MD0->getDefiningAccess(), MD0->getBlock()); + MP->addIncoming(MD1->getDefiningAccess(), MD1->getBlock()); + NewDefiningAccess = MP; + } + // Create the new MemoryDef + MemoryAccess *SNewMA = MSSA->createMemoryAccess( + SNew, NewDefiningAccess, SinkLocation, MemorySSA::Beginning); + MD1->replaceAllUsesWith(SNewMA); + MD0->replaceAllUsesWith(SNewMA); + MSSA->removeMemoryAccess(MSSA->getMemoryAccess(S0)); + // ilist's reverse iterator invalidation semantics basically mean we need to + // wait until the loop in our caller is dead before we kill these + AccessesToDelete.insert(MSSA->getMemoryAccess(S1)); + if (MayCreateDegeneratePhi) { + if (MemoryAccess *MA = MSSA->getMemoryAccess(SinkLocation)) { + MemoryPhi *MP = cast(MA); + MemoryAccess *SamePhiOp = nullptr; + for (auto &Op : MP->incoming_values()) { + if (!SamePhiOp) + SamePhiOp = cast(Op.get()); + else if (SamePhiOp != Op.get()) { + SamePhiOp = nullptr; + break; + } + } + // If this has some value, it means all operands of the phi are now the + // same + if (SamePhiOp) { + MP->replaceAllUsesWith(SamePhiOp); + MSSA->removeMemoryAccess(MP); + } + } + } +} Index: lib/Transforms/Utils/MemorySSA.cpp =================================================================== --- lib/Transforms/Utils/MemorySSA.cpp +++ lib/Transforms/Utils/MemorySSA.cpp @@ -225,7 +225,8 @@ MA.dropAllReferences(); } -MemorySSA::AccessListType *MemorySSA::getOrCreateAccessList(BasicBlock *BB) { +MemorySSA::AccessListType * +MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); if (Res.second) @@ -319,7 +320,7 @@ for (auto &BB : IDFBlocks) { // Insert phi node AccessListType *Accesses = getOrCreateAccessList(BB); - MemoryPhi *Phi = new MemoryPhi(F.getContext(), BB, NextID++); + MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++); ValueToMemoryAccess.insert(std::make_pair(BB, Phi)); // Phi's always are placed at the front of the block. Accesses->push_front(Phi); @@ -357,6 +358,68 @@ return Walker.get(); } +MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) { + assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB"); + AccessListType *Accesses = getOrCreateAccessList(BB); + MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++); + ValueToMemoryAccess.insert(std::make_pair(BB, Phi)); + // Phi's always are placed at the front of the block. + Accesses->push_front(Phi); + return Phi; +} + +MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, + MemoryAccess *Definition) { + assert(!isa(I) && "Cannot create a defined access for a PHI"); + MemoryUseOrDef *NewAccess = createNewAccess(I); + assert( + NewAccess != nullptr && + "Tried to create a memory access for a non-memory touching instruction"); + NewAccess->setDefiningAccess(Definition); + return NewAccess; +} + +MemoryAccess *MemorySSA::createMemoryAccess(Instruction *I, + MemoryAccess *Definition, + const BasicBlock *BB, + InsertionPlace Point) { + MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition); + auto *Accesses = getOrCreateAccessList(BB); + if (Point == Beginning) { + // It goes after any phi nodes + auto AI = std::find_if( + Accesses->begin(), Accesses->end(), + [](const MemoryAccess &MA) { return !isa(MA); }); + + Accesses->insert(AI, NewAccess); + } else { + Accesses->push_back(NewAccess); + } + + return NewAccess; +} +MemoryAccess *MemorySSA::createMemoryAccessBefore(Instruction *I, + MemoryAccess *Definition, + MemoryAccess *InsertPt) { + assert(I->getParent() == InsertPt->getBlock() && + "New and old access must be in the same block"); + MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition); + auto *Accesses = getOrCreateAccessList(InsertPt->getBlock()); + Accesses->insert(AccessListType::iterator(InsertPt), NewAccess); + return NewAccess; +} + +MemoryAccess *MemorySSA::createMemoryAccessAfter(Instruction *I, + MemoryAccess *Definition, + MemoryAccess *InsertPt) { + assert(I->getParent() == InsertPt->getBlock() && + "New and old access must be in the same block"); + MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition); + auto *Accesses = getOrCreateAccessList(InsertPt->getBlock()); + Accesses->insertAfter(AccessListType::iterator(InsertPt), NewAccess); + return NewAccess; +} + /// \brief Helper function to create new memory accesses MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { // The assume intrinsic has a control dependency which we model by claiming @@ -551,7 +614,8 @@ continue; for (User *U : MD->users()) { - BasicBlock *UseBlock; (void)UseBlock; + BasicBlock *UseBlock; + (void)UseBlock; // Things are allowed to flow to phi nodes over their predecessor edge. if (auto *P = dyn_cast(U)) { for (const auto &Arg : P->operands()) { @@ -593,9 +657,13 @@ void MemorySSA::verifyDefUses(Function &F) const { for (BasicBlock &B : F) { // Phi nodes are attached to basic blocks - if (MemoryPhi *Phi = getMemoryAccess(&B)) + if (MemoryPhi *Phi = getMemoryAccess(&B)) { + assert(Phi->getNumOperands() == + std::distance(pred_begin(&B), pred_end(&B)) && + "Incomplete MemoryPhi Node"); for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) verifyUseInDefs(Phi->getIncomingValue(I), Phi); + } for (Instruction &I : B) { if (MemoryAccess *MA = getMemoryAccess(&I)) { Index: test/Transforms/InstMerge/exceptions.ll =================================================================== --- test/Transforms/InstMerge/exceptions.ll +++ test/Transforms/InstMerge/exceptions.ll @@ -1,4 +1,5 @@ ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/InstMerge/ld_hoist1.ll =================================================================== --- test/Transforms/InstMerge/ld_hoist1.ll +++ test/Transforms/InstMerge/ld_hoist1.ll @@ -1,5 +1,6 @@ ; Test load hoist ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc_linux" Index: test/Transforms/InstMerge/ld_hoist_st_sink.ll =================================================================== --- test/Transforms/InstMerge/ld_hoist_st_sink.ll +++ test/Transforms/InstMerge/ld_hoist_st_sink.ll @@ -1,6 +1,7 @@ ; Tests to make sure that loads and stores in a diamond get merged ; Loads are hoisted into the header. Stores sunks into the footer. ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i64, %struct.node*, %struct.node*, %struct.node*, i64, %struct.arc*, i64, i64, i64 } Index: test/Transforms/InstMerge/st_sink_barrier_call.ll =================================================================== --- test/Transforms/InstMerge/st_sink_barrier_call.ll +++ test/Transforms/InstMerge/st_sink_barrier_call.ll @@ -1,6 +1,7 @@ ; Test to make sure that a function call that needs to be a barrier to sinking stores is indeed a barrier. ; Stores sunks into the footer. ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } Index: test/Transforms/InstMerge/st_sink_bugfix_22613.ll =================================================================== --- test/Transforms/InstMerge/st_sink_bugfix_22613.ll +++ test/Transforms/InstMerge/st_sink_bugfix_22613.ll @@ -3,6 +3,7 @@ target triple = "x86_64-unknown-linux-gnu" ; RUN: opt -O2 -S < %s | FileCheck %s +; RUN: opt -O2 -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s ; CHECK-LABEL: main ; CHECK: if.end Index: test/Transforms/InstMerge/st_sink_no_barrier_call.ll =================================================================== --- test/Transforms/InstMerge/st_sink_no_barrier_call.ll +++ test/Transforms/InstMerge/st_sink_no_barrier_call.ll @@ -1,6 +1,7 @@ ; Test to make sure that stores in a diamond get merged with a non barrier function call after the store instruction ; Stores sunks into the footer. ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } Index: test/Transforms/InstMerge/st_sink_no_barrier_load.ll =================================================================== --- test/Transforms/InstMerge/st_sink_no_barrier_load.ll +++ test/Transforms/InstMerge/st_sink_no_barrier_load.ll @@ -1,6 +1,7 @@ ; Test to make sure that stores in a diamond get merged with a non barrier load after the store instruction ; Stores sunks into the footer. ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } Index: test/Transforms/InstMerge/st_sink_no_barrier_store.ll =================================================================== --- test/Transforms/InstMerge/st_sink_no_barrier_store.ll +++ test/Transforms/InstMerge/st_sink_no_barrier_store.ll @@ -1,6 +1,7 @@ ; Test to make sure that stores in a diamond get merged with a non barrier store after the store instruction to be sunk ; Stores sunks into the footer. ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } Index: test/Transforms/InstMerge/st_sink_two_stores.ll =================================================================== --- test/Transforms/InstMerge/st_sink_two_stores.ll +++ test/Transforms/InstMerge/st_sink_two_stores.ll @@ -1,6 +1,7 @@ ; Test to make sure that stores in a diamond get merged ; Stores sunks into the footer. ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } Index: test/Transforms/InstMerge/st_sink_with_barrier.ll =================================================================== --- test/Transforms/InstMerge/st_sink_with_barrier.ll +++ test/Transforms/InstMerge/st_sink_with_barrier.ll @@ -1,5 +1,6 @@ ; Test to make sure that load from the same address as a store and appears after the store prevents the store from being sunk ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 }