Index: lib/Transforms/Scalar/MergedLoadStoreMotion.cpp =================================================================== --- lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -87,11 +87,56 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/MemorySSA.h" +#include +#include +#include using namespace llvm; #define DEBUG_TYPE "mldst-motion" +static cl::opt + UseMemorySSA("use-memoryssa-mldst", cl::Hidden, + cl::desc("Use MemorySSA for MergedLoadStoreMotion")); + +//===----------------------------------------------------------------------===// +// MergedLoadStoreMotion Pass +//===----------------------------------------------------------------------===// + +namespace { + +// This hash matches the most common things isSameOperationAs checks. It must +// always be a subset or equal to isSameOperationAs for everything to function +// properly. + +const auto StoreInstHash = [](const StoreInst *A) { + return hash_combine(A->getType(), A->getPointerOperand()->getType(), + A->getValueOperand()->getType(), A->isVolatile(), + A->getAlignment(), A->getOrdering(), A->getSynchScope()); +}; + +const auto StoreInstEq = [](const StoreInst *A, const StoreInst *B) { + return (A == B || A->isSameOperationAs(B)); +}; + +typedef std::pair LoadPair; + +// This hash has two parts. The first is the instruction, the second, the +// clobbering MemorySSA access. For the instruction part, this hash matches the +// most common things isSameOperationAs checks. It must always be a subset or +// equal to isSameOperationAs for everything to function properly. + +const auto LoadPairHash = [](const LoadPair &LP) { + const LoadInst *A = LP.first; + return hash_combine(A->getType(), A->getPointerOperand()->getType(), + A->isVolatile(), A->getAlignment(), A->getOrdering(), + A->getSynchScope(), LP.second); +}; +const auto LoadPairEq = [](const LoadPair &A, const LoadPair &B) { + return (A.second == B.second && + (A.first == B.first || A.first->isSameOperationAs(B.first))); +}; +} //===----------------------------------------------------------------------===// // MergedLoadStoreMotion Pass @@ -99,15 +144,34 @@ class MergedLoadStoreMotion { MemoryDependenceResults *MD = nullptr; AliasAnalysis *AA = nullptr; - - // The mergeLoad/Store algorithms could have Size0 * Size1 complexity, - // where Size0 and Size1 are the #instructions on the two sides of - // the diamond. The constant chosen here is arbitrary. Compiler Time + MemorySSA *MSSA = nullptr; + MemorySSAWalker *CachingWalker = nullptr; + DominatorTree *DT = nullptr; + // The non-MemorySSA versions of mergeLoad/Store algorithms could have Size0 * + // Size1 complexity, where Size0 and Size1 are the #instructions on the two + // sides of the diamond. The constant chosen here is arbitrary. Compiler Time // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl. const int MagicCompileTimeControl = 250; + // We DFS number instructions and avoid hoisting or sinking things past may + // throw instructions and instructions not guaranteed to transfer execution to + // successors. + DenseMap DFSNumberMap; + DenseMap FirstHoistBarrier; + DenseMap LastSinkBarrier; + SmallPtrSet AccessesToDelete; + std::unordered_multiset + LoadInfo; + std::unordered_multiset + StoreInfo; public: - bool run(Function &F, MemoryDependenceResults *MD, AliasAnalysis &AA); + bool run(Function &F, MemoryDependenceResults *MD, AliasAnalysis &AA, + DominatorTree *DT, MemorySSA *MSSA); + MergedLoadStoreMotion() + : LoadInfo(1, LoadPairHash, LoadPairEq), + StoreInfo(1, StoreInstHash, StoreInstEq) {} private: /// @@ -119,21 +183,28 @@ bool isDiamondHead(BasicBlock *BB); // Routines for hoisting loads bool isLoadHoistBarrierInRange(const Instruction &Start, - const Instruction &End, LoadInst *LI, - bool SafeToLoadUnconditionally); + const Instruction &End, LoadInst *LI); LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI); void hoistInstruction(BasicBlock *BB, Instruction *HoistCand, Instruction *ElseInst); bool isSafeToHoist(Instruction *I) const; bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst); bool mergeLoads(BasicBlock *BB); + bool mergeLoadsMemorySSA(BasicBlock *BB); + bool mergeLoadMemorySSA(const MemoryUse *Cand, BasicBlock *HoistBlock); // Routines for sinking stores StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI); PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1); bool isStoreSinkBarrierInRange(const Instruction &Start, const Instruction &End, MemoryLocation Loc); + bool isStoreSinkBarrierInRangeMemorySSA(MemoryLocation &Loc, + MemoryAccess *Start); bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst); bool mergeStores(BasicBlock *BB); + bool mergeStoresMemorySSA(BasicBlock *BB); + void computeBarriers(); + void updateMemorySSAForSunkStores(BasicBlock *SinkBlock, StoreInst *S0, + StoreInst *S1, StoreInst *SNew); }; /// @@ -193,14 +264,9 @@ /// being loaded or protect against the load from happening /// it is considered a hoist barrier. /// -bool MergedLoadStoreMotion::isLoadHoistBarrierInRange( - const Instruction &Start, const Instruction &End, LoadInst *LI, - bool SafeToLoadUnconditionally) { - if (!SafeToLoadUnconditionally) - for (const Instruction &Inst : - make_range(Start.getIterator(), End.getIterator())) - if (!isGuaranteedToTransferExecutionToSuccessor(&Inst)) - return true; +bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction &Start, + const Instruction &End, + LoadInst *LI) { MemoryLocation Loc = MemoryLocation::get(LI); return AA->canInstructionRangeModRef(Start, End, Loc, MRI_Mod); } @@ -229,13 +295,22 @@ if (!Load1 || Inst->isUsedOutsideOfBlock(BB1)) continue; + if (!SafeToLoadUnconditionally) { + // If the first hoist barrier in the block is before the load, we + // can't hoist. + unsigned int BB0HoistBarrier = FirstHoistBarrier.lookup(BB0); + if (BB0HoistBarrier != 0 && DFSNumberMap.lookup(Load0) > BB0HoistBarrier) + continue; + unsigned int BB1HoistBarrier = FirstHoistBarrier.lookup(BB1); + if (BB1HoistBarrier != 0 && DFSNumberMap.lookup(Load1) > BB1HoistBarrier) + continue; + } MemoryLocation Loc0 = MemoryLocation::get(Load0); MemoryLocation Loc1 = MemoryLocation::get(Load1); + if (Load0->isSameOperationAs(Load1) && AA->isMustAlias(Loc0, Loc1) && - !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1, - SafeToLoadUnconditionally) && - !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0, - SafeToLoadUnconditionally)) { + !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1) && + !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0)) { return Load1; } } @@ -269,6 +344,20 @@ // Hoist instruction. HoistedInst->insertBefore(HoistPt); + if (MSSA) { + // We also hoist operands of loads using this function, so check to see if + // this is really a memory access before we try to update MemorySSA for it. + MemoryAccess *HoistCandAccess = MSSA->getMemoryAccess(HoistCand); + if (HoistCandAccess) { + MemoryUseOrDef *MUD = cast(HoistCandAccess); + // What is happening here is that we are creating the hoisted access + // and destroying the old accesses. + MSSA->createMemoryAccessInBB(HoistedInst, MUD->getDefiningAccess(), BB, + MemorySSA::End); + MSSA->removeMemoryAccess(HoistCandAccess); + MSSA->removeMemoryAccess(MSSA->getMemoryAccess(ElseInst)); + } + } HoistCand->replaceAllUsesWith(HoistedInst); removeInstruction(HoistCand); @@ -343,6 +432,7 @@ bool Res = hoistLoad(BB, L0, L1); MergedLoads |= Res; // Don't attempt to hoist above loads that had not been hoisted. + // FIXME: This is very restrictive, and could be fixed. if (!Res) break; } @@ -361,10 +451,6 @@ bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start, const Instruction &End, MemoryLocation Loc) { - for (const Instruction &Inst : - make_range(Start.getIterator(), End.getIterator())) - if (Inst.mayThrow()) - return true; return AA->canInstructionRangeModRef(Start, End, Loc, MRI_ModRef); } @@ -382,8 +468,17 @@ if (!Store1) continue; + // If the last sink barrier in the block is after us, we can't sink out + // of the block. + unsigned int BB0SinkBarrier = LastSinkBarrier.lookup(BB0); + if (BB0SinkBarrier != 0 && DFSNumberMap.lookup(Store0) < BB0SinkBarrier) + continue; + unsigned int BB1SinkBarrier = LastSinkBarrier.lookup(BB1); + if (BB1SinkBarrier != 0 && DFSNumberMap.lookup(Store1) < BB1SinkBarrier) + continue; MemoryLocation Loc0 = MemoryLocation::get(Store0); MemoryLocation Loc1 = MemoryLocation::get(Store1); + if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) && !isStoreSinkBarrierInRange(*Store1->getNextNode(), BB1->back(), Loc1) && !isStoreSinkBarrierInRange(*Store0->getNextNode(), BB0->back(), Loc0)) { @@ -443,6 +538,7 @@ assert(S0->getParent() == A0->getParent()); assert(S1->getParent() == A1->getParent()); + updateMemorySSAForSunkStores(BB, S0, S1, SNew); // New PHI operand? Use it. if (PHINode *NewPN = getPHIOperand(BB, S0, S1)) @@ -516,24 +612,60 @@ return MergedStores; } +/// \brief Visit blocks in DFS order and number instructions. For each block +/// track first hoist barrier and last sink barrier. +void MergedLoadStoreMotion::computeBarriers() { + // This is 1 so the default constructed value of 0 can be used to say we + // didn't find anything. + unsigned DFSNum = 1; + for (auto DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); + DI != DE; ++DI) { + BasicBlock *DomBlock = DI->getBlock(); + bool FoundHoistBarrierInBlock = false; + for (const auto &Inst : *DomBlock) { + DFSNumberMap[&Inst] = DFSNum; + + if (!FoundHoistBarrierInBlock && + !isGuaranteedToTransferExecutionToSuccessor(&Inst)) { + FirstHoistBarrier[DomBlock] = DFSNum; + FoundHoistBarrierInBlock = true; + } + if (Inst.mayThrow()) { + LastSinkBarrier[DomBlock] = DFSNum; + } + ++DFSNum; + } + } +} + bool MergedLoadStoreMotion::run(Function &F, MemoryDependenceResults *MD, - AliasAnalysis &AA) { + AliasAnalysis &AA, DominatorTree *DT, + MemorySSA *MSSA) { this->MD = MD; this->AA = &AA; + this->MSSA = MSSA; + this->DT = DT; + DT->updateDFSNumbers(); + if (MSSA) + CachingWalker = MSSA->getWalker(); bool Changed = false; DEBUG(dbgs() << "Instruction Merger\n"); + computeBarriers(); // Merge unconditional branches, allowing PRE to catch more // optimization opportunities. - for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { - BasicBlock *BB = &*FI++; - + for (BasicBlock &BB : F) { // Hoist equivalent loads and sink stores // outside diamonds when possible - if (isDiamondHead(BB)) { - Changed |= mergeLoads(BB); - Changed |= mergeStores(getDiamondTail(BB)); + if (isDiamondHead(&BB)) { + if (MSSA) { + Changed |= mergeLoadsMemorySSA(&BB); + Changed |= mergeStoresMemorySSA(getDiamondTail(&BB)); + } else { + Changed |= mergeLoads(&BB); + Changed |= mergeStores(getDiamondTail(&BB)); + } } } return Changed; @@ -556,8 +688,14 @@ return false; MergedLoadStoreMotion Impl; auto *MDWP = getAnalysisIfAvailable(); + MemorySSA *MSSA = nullptr; + DominatorTree *DT = &getAnalysis().getDomTree(); + if (UseMemorySSA) + MSSA = &getAnalysis().getMSSA(); + return Impl.run(F, MDWP ? &MDWP->getMemDep() : nullptr, - getAnalysis().getAAResults()); + getAnalysis().getAAResults(), DT, + MSSA); } private: @@ -565,6 +703,11 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); + AU.addRequired(); + if (UseMemorySSA) { + AU.addRequired(); + AU.addPreserved(); + } AU.addPreserved(); AU.addPreserved(); } @@ -584,6 +727,8 @@ "MergedLoadStoreMotion", false, false) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_END(MergedLoadStoreMotionLegacyPass, "mldst-motion", "MergedLoadStoreMotion", false, false) @@ -592,12 +737,420 @@ MergedLoadStoreMotion Impl; auto *MD = AM.getCachedResult(F); auto &AA = AM.getResult(F); - if (!Impl.run(F, MD, AA)) + + MemorySSA *MSSA = nullptr; + DominatorTree *DT = &AM.getResult(F); + if (UseMemorySSA) + MSSA = &AM.getResult(F); + + if (!Impl.run(F, MD, AA, DT, MSSA)) return PreservedAnalyses::all(); // FIXME: This should also 'preserve the CFG'. PreservedAnalyses PA; PA.preserve(); PA.preserve(); + PA.preserve(); + if (MSSA) + PA.preserve(); return PA; } + +/// This file contained a MemorySSA and non-MemorySSA version of the same +/// optimization, and to keep it less confusing, the functions specific to the +/// MemorySSA version are placed below this comment. + +// Try to hoist a given load candidate into HoistBlock by trying to merge it +// with any equivalent loads. +bool MergedLoadStoreMotion::mergeLoadMemorySSA(const MemoryUse *Cand, + BasicBlock *HoistBlock) { + bool MergedLoads = false; + BasicBlock *BB = Cand->getBlock(); + LoadInst *Load1 = dyn_cast(Cand->getMemoryInst()); + // This isUsedOutsideOfBlock is also conservative. + if (!Load1 || Load1->isUsedOutsideOfBlock(BB)) + return false; + // We know that if the load has the same clobbering access as this one, they + // must not be killed until the same point. That is, we are guaranteed that + // all the loads that could possibly be merged must have a common MemoryDef + // (or MemoryPhi) that they reach. If not, then we can't merge them because + // something is in the way on one of the branches. If we *do* find a + // possible match, the only further checking we need to do is to ensure they + // are loads of the same pointer. We could simply check the pointer + // operands, but isMustAlias can do a better job of it. + + auto LookupResult = LoadInfo.equal_range( + {Load1, CachingWalker->getClobberingMemoryAccess(Load1)}); + bool Res = false; + auto LookupIter = LookupResult.first; + bool SafeToLoadUnconditionally = + (LookupResult.first != LookupResult.second) && + isSafeToLoadUnconditionally(Load1->getPointerOperand(), + Load1->getAlignment(), + Load1->getModule()->getDataLayout(), + /*ScanFrom=*/HoistBlock->getTerminator()); + // If we won't be able to hoist the load out of the block, there is + // no point in walking all the equivalent loads in the other block. + unsigned int BB1HoistBarrier = FirstHoistBarrier.lookup(BB); + if (!SafeToLoadUnconditionally && + (BB1HoistBarrier != 0 && DFSNumberMap.lookup(Load1) > BB1HoistBarrier)) + return false; + + while (!Res && LookupIter != LookupResult.second) { + LoadInst *Load0 = LookupIter->first; + auto OldIter = LookupIter; + ++LookupIter; + if (!SafeToLoadUnconditionally) { + // If the first hoist barrier in the block is before the load, we + // can't hoist. + unsigned int BB0HoistBarrier = + FirstHoistBarrier.lookup(Load0->getParent()); + if (BB0HoistBarrier != 0 && DFSNumberMap.lookup(Load0) > BB0HoistBarrier) + continue; + } + + MemoryLocation Loc0 = MemoryLocation::get(Load0); + MemoryLocation Loc1 = MemoryLocation::get(Load1); + if (AA->isMustAlias(Loc0, Loc1)) + Res |= hoistLoad(HoistBlock, Load0, Load1); + MergedLoads |= Res; + if (Res) + LoadInfo.erase(OldIter); + } + return MergedLoads; +} + +/// +/// \brief Try to hoist two loads to same address into diamond header +/// +/// Starting from a diamond head block, iterate over the loads in one +/// successor block, put them in the hash table. +/// Then walk through loads in the successor block, and see if they match any in +/// the hash table and can be hoisted. +/// +bool MergedLoadStoreMotion::mergeLoadsMemorySSA(BasicBlock *BB) { + bool MergedLoads = false; + assert(isDiamondHead(BB)); + BranchInst *BI = dyn_cast(BB->getTerminator()); + BasicBlock *Succ0 = BI->getSuccessor(0); + BasicBlock *Succ1 = BI->getSuccessor(1); + // #Instructions in Succ1 for Compile Time Control + int Size1 = Succ1->size(); + int NLoads = 0; + + // Skip if we don't have memory accesses in both blocks + auto *Succ0Accesses = MSSA->getBlockAccesses(Succ0); + if (!Succ0Accesses) + return false; + auto *Succ1Accesses = MSSA->getBlockAccesses(Succ1); + if (!Succ1Accesses) + return false; + + // Walk all accesses in first block, put them into a hash table. + for (auto &Access : *Succ0Accesses) { + const MemoryUse *MU = dyn_cast(&Access); + if (!MU) + continue; + Instruction *I = MU->getMemoryInst(); + // Only move simple non-atomic and non-volatile loads. + LoadInst *Load0 = dyn_cast(I); + // FIXME: The isUsedOutsideofBlock is super-conservative, and used not to + // lengthen live ranges. There are better ways. + if (!Load0 || !Load0->isSimple() || Load0->isUsedOutsideOfBlock(Succ0)) + continue; + + LoadInfo.insert({Load0, CachingWalker->getClobberingMemoryAccess(Load0)}); + if (++NLoads * Size1 >= MagicCompileTimeControl) + break; + } + // Walk all accesses in the second block, see if we can match them against + // accesses in the first. + for (auto AI = Succ1Accesses->begin(), AE = Succ1Accesses->end(); AI != AE;) { + const MemoryUse *MU = dyn_cast(&*AI); + ++AI; + if (!MU) + continue; + MergedLoads |= mergeLoadMemorySSA(MU, BB); + } + LoadInfo.clear(); + return MergedLoads; +} + +/// +/// \brief True when two stores are equivalent and can sink into the footer +/// +/// Starting from a diamond tail block, place all the stores in one predecessor +/// in a hash table, and try to match them against stores in the second +/// predecessor +/// +bool MergedLoadStoreMotion::mergeStoresMemorySSA(BasicBlock *T) { + + bool MergedStores = false; + assert(T && "Footer of a diamond cannot be empty"); + + pred_iterator PI = pred_begin(T), E = pred_end(T); + assert(PI != E); + BasicBlock *Pred0 = *PI; + ++PI; + BasicBlock *Pred1 = *PI; + ++PI; + // tail block of a diamond/hammock? + if (Pred0 == Pred1) + return false; // No. + if (PI != E) + return false; // No. More than 2 predecessors. + + // #Instructions in Succ1 for Compile Time Control + int Size1 = Pred1->size(); + int NStores = 0; + + // Skip all this if we don't have any memory accesses to look at. + auto *Pred0Accesses = MSSA->getBlockAccesses(Pred0); + if (!Pred0Accesses) + return false; + auto *Pred1Accesses = MSSA->getBlockAccesses(Pred1); + if (!Pred1Accesses) + return false; + + for (auto AI = Pred0Accesses->rbegin(), AE = Pred0Accesses->rend(); AI != AE; + ++AI) { + if (const MemoryDef *MD = dyn_cast(&*AI)) { + Instruction *I = MD->getMemoryInst(); + + // Sink move non-simple (atomic, volatile) stores + if (!isa(I)) + continue; + StoreInst *S0 = cast(I); + if (!S0->isSimple()) + continue; + StoreInfo.insert(S0); + + ++NStores; + if (NStores * Size1 >= MagicCompileTimeControl) + break; + } + } + + for (auto AI = Pred1Accesses->rbegin(), AE = Pred1Accesses->rend(); AI != AE; + ++AI) { + const MemoryDef *MD = dyn_cast(&*AI); + if (!MD) + continue; + Instruction *Inst = MD->getMemoryInst(); + if (!isa(Inst)) + continue; + + StoreInst *Store1 = cast(Inst); + // If we won't be able to sink the store out of the block, there is no point + // in looking for equal stores. + unsigned int BB1SinkBarrier = LastSinkBarrier.lookup(Store1->getParent()); + if (BB1SinkBarrier != 0 && DFSNumberMap.lookup(Store1) < BB1SinkBarrier) + continue; + + auto LookupResult = StoreInfo.equal_range(Store1); + bool Res = false; + auto LookupIter = LookupResult.first; + + while (!Res && LookupIter != LookupResult.second) { + StoreInst *Store0 = *LookupIter; + auto OldIter = LookupIter; + ++LookupIter; + + // If the last sink barrier in the block is after us, we can't sink out + // of the block. + unsigned int BB0SinkBarrier = LastSinkBarrier.lookup(Store0->getParent()); + if (BB0SinkBarrier != 0 && DFSNumberMap.lookup(Store0) < BB0SinkBarrier) + continue; + + MemoryLocation Loc0 = MemoryLocation::get(Store0); + MemoryLocation Loc1 = MemoryLocation::get(Store1); + if (!AA->isMustAlias(Loc0, Loc1) || + isStoreSinkBarrierInRangeMemorySSA(Loc0, + MSSA->getMemoryAccess(Store0)) || + isStoreSinkBarrierInRangeMemorySSA(Loc1, + MSSA->getMemoryAccess(Store1))) + continue; + Res = sinkStore(T, Store0, Store1); + if (Res) + StoreInfo.erase(OldIter); + MergedStores |= Res; + } + } + for (auto V : AccessesToDelete) { + MSSA->removeMemoryAccess(V); + } + AccessesToDelete.clear(); + StoreInfo.clear(); + return MergedStores; +} + +/// +/// \brief True when the store is not downwards explosed to block \p End + +bool MergedLoadStoreMotion::isStoreSinkBarrierInRangeMemorySSA( + MemoryLocation &Loc, MemoryAccess *Start) { + DomTreeNode *StartNode = DT->getNode(Start->getBlock()); + std::pair StartDFS = {StartNode->getDFSNumIn(), + StartNode->getDFSNumOut()}; + + // Seed with current users that are in the block range. + std::queue CurrentUses; + SmallPtrSet Visited; + for (auto U : Start->users()) + CurrentUses.emplace(cast(U)); + + // Process all the uses and see if they are below end. + while (!CurrentUses.empty()) { + MemoryAccess *MA = CurrentUses.front(); + CurrentUses.pop(); + + BasicBlock *BB = MA->getBlock(); + DomTreeNode *BBNode = DT->getNode(BB); + std::pair CurrDFS = {BBNode->getDFSNumIn(), + BBNode->getDFSNumOut()}; + // First see if it's outside the dominator tree range + // The only way it could affect us is if it's below us in the dominator + // tree. + // The start of the final sink point is not below us in a hammock's + // dominator tree, because in a hammock, the merge block must be a + // sibling of the split block in the dominator tree. + // Thus, the things below us in the dominator tree are all things + // that lead to the sink point. + if ((CurrDFS.first >= StartDFS.first && CurrDFS.first <= StartDFS.first) && + CurrDFS.second >= StartDFS.second && + CurrDFS.second <= StartDFS.second) { + // A phi node is not a memory access on its own, and we may have deleted + // this access already. Becasue we are walking backwards, we don't perform + // updates on the fly. + if (AccessesToDelete.count(MA) || isa(MA)) + continue; + Instruction *Use = cast(MA)->getMemoryInst(); + + // If it really conflicts, we have a barrier. + if (AA->getModRefInfo(Use, Loc) & MRI_ModRef) + return true; + + // If not, add it's immediate uses to keep walking + for (auto U : Start->users()) { + MemoryAccess *MU = cast(U); + if (Visited.insert(MU).second) + CurrentUses.emplace(MU); + } + } + } + return false; +} + +void MergedLoadStoreMotion::updateMemorySSAForSunkStores( + BasicBlock *SinkLocation, StoreInst *S0, StoreInst *S1, StoreInst *SNew) { + // There are three basic store sinking cases that we have to handle when + // updating. + // Because we are sinking to the bottom of a diamond, we may need to place a + // phi node depending on what the old memory state of the MemoryDef's were. + // This breaks down into three cases. + // + // 1. There are no stores above S0 or S1 in either block. + // In this case, no phi node is necessary, both MemoryDefs must have the same + // defining access, as the top of the diamond can only have one reaching + // MemoryDef/MemoryPhi at the end. After use replacement, we may end up with a + // phi + // in the sink location block having all the same operands, which we check + // for. + // + // 2. There are stores above either S0 or S1, and uses below the block + // In this case, a phi node will already exist in the sink location, but may + // need to be updated. + // If the phi node argument is either S0 or S1's MemoryDef, we replace it with + // S0/S1's + // defining memorydef. + // 3. There are stores above either S0 or S1, and no uses below the block + // In this case, no phi node will exist, but we need one to merge the + // MemoryDef's in order to produce the defining access for the MemoryDef we + // will place in the sink location block. + // After handling the above, we create a new MemoryDef with either the phi + // node, or the correct version, in the sink location block. + + if (!MSSA) + return; + + MemoryDef *MD0 = cast(MSSA->getMemoryAccess(S0)); + MemoryDef *MD1 = cast(MSSA->getMemoryAccess(S1)); + + bool MayCreateDegeneratePhi = false; + MemoryAccess *NewDefiningAccess; + if (MD0->getDefiningAccess() == MD1->getDefiningAccess()) { + // Case 1 + NewDefiningAccess = MD0->getDefiningAccess(); + // This may create a phi node that has the same versions for everything + // after replaceAllUsesWith is done below. + MayCreateDegeneratePhi = true; + } else if (MemoryAccess *MA = MSSA->getMemoryAccess(SinkLocation)) { + // Case 2 + MemoryPhi *MP = cast(MA); + for (Use &Op : MP->incoming_values()) { + if (Op.get() == MD0) + Op.set(MD0->getDefiningAccess()); + else if (Op.get() == MD1) + Op.set(MD1->getDefiningAccess()); + } + NewDefiningAccess = MP; + } else { + // Case 3 + MemoryPhi *MP = MSSA->createMemoryPhi(SinkLocation); + // Being a diamond, the only incoming predecessors should be the blocks for + // the two stores. + MP->addIncoming(MD0->getDefiningAccess(), MD0->getBlock()); + MP->addIncoming(MD1->getDefiningAccess(), MD1->getBlock()); + NewDefiningAccess = MP; + } + // We may have stores that do not conflict with us, after us in the same + // block. + // Since we've already proved they do not conflict with us, the correct + // thing to do is to reset their defining access to our defining access. + + BasicBlock *MD0Block = MD0->getBlock(); + for (auto UI = MD0->use_begin(), UE = MD0->use_end(); UI != UE;) { + // Unfortunately, the iterators get invalidated when we reset the use + Use *U = &*UI; + ++UI; + if (cast(U->getUser())->getBlock() == MD0Block) + U->set(MD0->getDefiningAccess()); + } + + BasicBlock *MD1Block = MD1->getBlock(); + for (auto UI = MD1->use_begin(), UE = MD1->use_end(); UI != UE;) { + Use *U = &*UI; + ++UI; + if (cast(U->getUser())->getBlock() == MD1Block) + U->set(MD1->getDefiningAccess()); + } + // Create the new MemoryDef + MemoryAccess *SNewMA = MSSA->createMemoryAccessInBB( + SNew, NewDefiningAccess, SinkLocation, MemorySSA::Beginning); + MD1->replaceAllUsesWith(SNewMA); + MD0->replaceAllUsesWith(SNewMA); + MSSA->removeMemoryAccess(MSSA->getMemoryAccess(S0)); + // ilist's reverse iterator invalidation semantics basically mean we need to + // wait until the loop in our caller is dead before we kill these + AccessesToDelete.insert(MSSA->getMemoryAccess(S1)); + if (MayCreateDegeneratePhi) { + if (MemoryAccess *MA = MSSA->getMemoryAccess(SinkLocation)) { + MemoryPhi *MP = cast(MA); + MemoryAccess *SamePhiOp = nullptr; + for (auto &Op : MP->incoming_values()) { + if (!SamePhiOp) + SamePhiOp = cast(Op.get()); + else if (SamePhiOp != Op.get()) { + SamePhiOp = nullptr; + break; + } + } + // If this has some value, it means all operands of the phi are now the + // same + if (SamePhiOp) { + MP->replaceAllUsesWith(SamePhiOp); + MSSA->removeMemoryAccess(MP); + } + } + } +} Index: test/Transforms/InstMerge/exceptions.ll =================================================================== --- test/Transforms/InstMerge/exceptions.ll +++ test/Transforms/InstMerge/exceptions.ll @@ -1,7 +1,8 @@ ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='require',mldst-motion \ ; RUN: -S < %s | FileCheck %s - +; RUN: opt -basicaa -use-memoryssa-mldst -mldst-motion -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -use-memoryssa-mldst -passes='mldst-motion,verify' -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/InstMerge/ld_hoist1.ll =================================================================== --- test/Transforms/InstMerge/ld_hoist1.ll +++ test/Transforms/InstMerge/ld_hoist1.ll @@ -1,5 +1,7 @@ ; Test load hoist ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa-mldst -mldst-motion -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -use-memoryssa-mldst -passes='mldst-motion,verify' -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc_linux" Index: test/Transforms/InstMerge/ld_hoist_st_sink.ll =================================================================== --- test/Transforms/InstMerge/ld_hoist_st_sink.ll +++ test/Transforms/InstMerge/ld_hoist_st_sink.ll @@ -1,6 +1,8 @@ ; Tests to make sure that loads and stores in a diamond get merged ; Loads are hoisted into the header. Stores sunks into the footer. ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa-mldst -mldst-motion -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -use-memoryssa-mldst -passes='mldst-motion,verify' -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i64, %struct.node*, %struct.node*, %struct.node*, i64, %struct.arc*, i64, i64, i64 } Index: test/Transforms/InstMerge/st_sink_barrier_call.ll =================================================================== --- test/Transforms/InstMerge/st_sink_barrier_call.ll +++ test/Transforms/InstMerge/st_sink_barrier_call.ll @@ -1,6 +1,8 @@ ; Test to make sure that a function call that needs to be a barrier to sinking stores is indeed a barrier. ; Stores sunks into the footer. ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa-mldst -mldst-motion -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -use-memoryssa-mldst -passes='mldst-motion,verify' -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } Index: test/Transforms/InstMerge/st_sink_bugfix_22613.ll =================================================================== --- test/Transforms/InstMerge/st_sink_bugfix_22613.ll +++ test/Transforms/InstMerge/st_sink_bugfix_22613.ll @@ -3,6 +3,7 @@ target triple = "x86_64-unknown-linux-gnu" ; RUN: opt -O2 -S < %s | FileCheck %s +; RUN: opt -O2 -basicaa -use-memoryssa-mldst -mldst-motion -S < %s | FileCheck %s ; CHECK-LABEL: main ; CHECK: if.end Index: test/Transforms/InstMerge/st_sink_no_barrier_call.ll =================================================================== --- test/Transforms/InstMerge/st_sink_no_barrier_call.ll +++ test/Transforms/InstMerge/st_sink_no_barrier_call.ll @@ -1,6 +1,8 @@ ; Test to make sure that stores in a diamond get merged with a non barrier function call after the store instruction ; Stores sunks into the footer. ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa-mldst -mldst-motion -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -use-memoryssa-mldst -passes='mldst-motion,verify' -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } Index: test/Transforms/InstMerge/st_sink_no_barrier_load.ll =================================================================== --- test/Transforms/InstMerge/st_sink_no_barrier_load.ll +++ test/Transforms/InstMerge/st_sink_no_barrier_load.ll @@ -1,7 +1,9 @@ ; Test to make sure that stores in a diamond get merged with a non barrier load after the store instruction ; Stores sunks into the footer. ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s -target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +; RUN: opt -basicaa -use-memoryssa-mldst -mldst-motion -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -use-memoryssa-mldst -passes='mldst-motion,verify' -S < %s | FileCheck %s + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } Index: test/Transforms/InstMerge/st_sink_no_barrier_store.ll =================================================================== --- test/Transforms/InstMerge/st_sink_no_barrier_store.ll +++ test/Transforms/InstMerge/st_sink_no_barrier_store.ll @@ -1,7 +1,9 @@ ; Test to make sure that stores in a diamond get merged with a non barrier store after the store instruction to be sunk ; Stores sunks into the footer. ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s -target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +; RUN: opt -basicaa -use-memoryssa-mldst -mldst-motion -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -use-memoryssa-mldst -passes='mldst-motion,verify' -S < %s | FileCheck %s + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } Index: test/Transforms/InstMerge/st_sink_two_stores.ll =================================================================== --- test/Transforms/InstMerge/st_sink_two_stores.ll +++ test/Transforms/InstMerge/st_sink_two_stores.ll @@ -1,6 +1,8 @@ ; Test to make sure that stores in a diamond get merged ; Stores sunks into the footer. ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa-mldst -mldst-motion -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -use-memoryssa-mldst -passes='mldst-motion,verify' -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 } Index: test/Transforms/InstMerge/st_sink_with_barrier.ll =================================================================== --- test/Transforms/InstMerge/st_sink_with_barrier.ll +++ test/Transforms/InstMerge/st_sink_with_barrier.ll @@ -1,5 +1,7 @@ ; Test to make sure that load from the same address as a store and appears after the store prevents the store from being sunk ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -basicaa -use-memoryssa-mldst -mldst-motion -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -use-memoryssa-mldst -passes='mldst-motion,verify' -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 }