Index: lib/Transforms/Scalar/MergedLoadStoreMotion.cpp =================================================================== --- lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -73,6 +73,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -89,8 +90,10 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/MemorySSA.h" +#include #include +#include using namespace llvm; #define DEBUG_TYPE "mldst-motion" @@ -100,9 +103,54 @@ //===----------------------------------------------------------------------===// namespace { + +// This hash matches the most common things isSameOperationAs checks. It must +// always be a subset or equal to isSameOperationAs for everything to function +// properly. + +struct StoreInstHash { + size_t operator()(const StoreInst *A) const { + return hash_combine(A->getType(), A->getPointerOperand()->getType(), + A->getValueOperand()->getType(), A->isVolatile(), + A->getAlignment(), A->getOrdering(), + A->getSynchScope()); + } +}; + +struct StoreInstEq { + bool operator()(const StoreInst *A, const StoreInst *B) const { + return (A == B || A->isSameOperationAs(B)); + } +}; + +typedef std::pair LoadPair; + +// This hash matches the most common things isSameOperationAs checks. It must +// always be a subset or equal to isSameOperationAs for everything to function +// properly. + +struct LoadPairHash { + size_t operator()(const LoadPair &LP) const { + const LoadInst *A = LP.first; + return hash_combine(A->getType(), A->getPointerOperand()->getType(), + A->isVolatile(), A->getAlignment(), A->getOrdering(), + A->getSynchScope(), LP.second); + } +}; + +struct LoadPairEq { + bool operator()(const LoadPair &A, const LoadPair &B) const { + return (A.second == B.second && + (A.first == B.first || A.first->isSameOperationAs(B.first))); + } +}; + class MergedLoadStoreMotion : public FunctionPass { AliasAnalysis *AA; MemoryDependenceAnalysis *MD; + MemorySSA *MSSA; + MemorySSAWalker *CachingWalker; + DominatorTree *DT; public: static char ID; // Pass identification, replacement for typeid @@ -117,9 +165,12 @@ // This transformation requires dominator postdominator info void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - AU.addRequired(); + AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); } // Helper routines @@ -132,28 +183,28 @@ BasicBlock *getDiamondTail(BasicBlock *BB); bool isDiamondHead(BasicBlock *BB); // Routines for hoisting loads - bool isLoadHoistBarrierInRange(const Instruction& Start, - const Instruction& End, - LoadInst* LI); - LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI); + void hoistInstruction(BasicBlock *BB, Instruction *HoistCand, Instruction *ElseInst); bool isSafeToHoist(Instruction *I) const; bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst); bool mergeLoads(BasicBlock *BB); // Routines for sinking stores - StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI); PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1); - bool isStoreSinkBarrierInRange(const Instruction& Start, - const Instruction& End, - AliasAnalysis::Location Loc); + bool isStoreSinkBarrierInRange(AliasAnalysis::Location &Loc, + MemoryAccess *Start); bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst); bool mergeStores(BasicBlock *BB); + void computeDFSNumbers(); // The mergeLoad/Store algorithms could have Size0 * Size1 complexity, // where Size0 and Size1 are the #instructions on the two sides of // the diamond. The constant chosen here is arbitrary. Compiler Time // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl. const int MagicCompileTimeControl; + std::unordered_multiset LoadInfo; + std::unordered_multiset StoreInfo; + SmallPtrSet AccessesToDelete; + DenseMap> DFSDomMap; }; char MergedLoadStoreMotion::ID = 0; @@ -168,6 +219,8 @@ INITIALIZE_PASS_BEGIN(MergedLoadStoreMotion, "mldst-motion", "MergedLoadStoreMotion", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSALazy) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) @@ -231,53 +284,6 @@ } /// -/// \brief True when instruction is a hoist barrier for a load -/// -/// Whenever an instruction could possibly modify the value -/// being loaded or protect against the load from happening -/// it is considered a hoist barrier. -/// - -bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start, - const Instruction& End, - LoadInst* LI) { - AliasAnalysis::Location Loc = AA->getLocation(LI); - return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Mod); -} - -/// -/// \brief Decide if a load can be hoisted -/// -/// When there is a load in \p BB to the same address as \p LI -/// and it can be hoisted from \p BB, return that load. -/// Otherwise return Null. -/// -LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1, - LoadInst *Load0) { - - for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); BBI != BBE; - ++BBI) { - Instruction *Inst = BBI; - - // Only merge and hoist loads when their result in used only in BB - if (!isa(Inst) || Inst->isUsedOutsideOfBlock(BB1)) - continue; - - LoadInst *Load1 = dyn_cast(Inst); - BasicBlock *BB0 = Load0->getParent(); - - AliasAnalysis::Location Loc0 = AA->getLocation(Load0); - AliasAnalysis::Location Loc1 = AA->getLocation(Load1); - if (AA->isMustAlias(Loc0, Loc1) && Load0->isSameOperationAs(Load1) && - !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1) && - !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0)) { - return Load1; - } - } - return nullptr; -} - -/// /// \brief Merge two equivalent instructions \p HoistCand and \p ElseInst into /// \p BB /// @@ -309,6 +315,15 @@ // Hoist instruction. HoistedInst->insertBefore(HoistPt); + // We also hoist operands of loads using this function, so check to see if + // this is really a memory access before we try to update MemorySSA for it. + MemoryAccess *HoistCandAccess = MSSA->getMemoryAccess(HoistCand); + if (HoistCandAccess) { + MemoryAccess *NewAccess = MSSA->replaceMemoryAccessWithNewAccess( + HoistCandAccess, HoistedInst, MemorySSA::InsertionPlace::End); + MSSA->replaceMemoryAccess(MSSA->getMemoryAccess(ElseInst), NewAccess); + } + HoistCand->replaceAllUsesWith(HoistedInst); removeInstruction(HoistCand); // Replace the else block instruction. @@ -366,76 +381,113 @@ // #Instructions in Succ1 for Compile Time Control int Size1 = Succ1->size(); int NLoads = 0; - for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end(); - BBI != BBE;) { - Instruction *I = BBI; - ++BBI; + // Skip if we don't have memory accesses in both blocks + auto *Succ0Accesses = MSSA->getBlockAccesses(Succ0); + if (!Succ0Accesses) + return false; + auto *Succ1Accesses = MSSA->getBlockAccesses(Succ1); + if (!Succ1Accesses) + return false; + for (auto &Access : *Succ0Accesses) { + const MemoryUse *MU = dyn_cast(&Access); + if (!MU) + continue; + Instruction *I = MU->getMemoryInst(); // Only move non-simple (atomic, volatile) loads. - LoadInst *L0 = dyn_cast(I); - if (!L0 || !L0->isSimple() || L0->isUsedOutsideOfBlock(Succ0)) + LoadInst *Load0 = dyn_cast(I); + if (!Load0 || !Load0->isSimple() || Load0->isUsedOutsideOfBlock(Succ0)) continue; + LoadInfo.insert({Load0, CachingWalker->getClobberingMemoryAccess(Load0)}); ++NLoads; if (NLoads * Size1 >= MagicCompileTimeControl) break; - if (LoadInst *L1 = canHoistFromBlock(Succ1, L0)) { - bool Res = hoistLoad(BB, L0, L1); + } + + for (auto AI = Succ1Accesses->begin(), AE = Succ1Accesses->end(); AI != AE;) { + auto CurrIter = AI; + ++AI; + const MemoryUse *MU = dyn_cast(&*CurrIter); + if (!MU) + continue; + LoadInst *Load1 = dyn_cast(MU->getMemoryInst()); + if (!Load1 || Load1->isUsedOutsideOfBlock(Succ1)) + continue; + + // We know that if the load has the same clobbering access as this one, + // they must not be killed until the same point + + auto LookupResult = LoadInfo.equal_range( + {Load1, CachingWalker->getClobberingMemoryAccess(Load1)}); + bool Res = false; + auto LookupIter = LookupResult.first; + while (!Res && LookupIter != LookupResult.second) { + LoadInst *Load0 = LookupIter->first; + auto OldIter = LookupIter; + ++LookupIter; + AliasAnalysis::Location Loc0 = AA->getLocation(Load0); + AliasAnalysis::Location Loc1 = AA->getLocation(Load1); + if (AA->isMustAlias(Loc0, Loc1)) + Res = hoistLoad(BB, Load0, Load1); MergedLoads |= Res; // Don't attempt to hoist above loads that had not been hoisted. - if (!Res) - break; + if (Res) + LoadInfo.erase(OldIter); } } + LoadInfo.clear(); return MergedLoads; } /// -/// \brief True when instruction is a sink barrier for a store -/// located in Loc -/// -/// Whenever an instruction could possibly read or modify the -/// value being stored or protect against the store from -/// happening it is considered a sink barrier. -/// - -bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction& Start, - const Instruction& End, - AliasAnalysis::Location - Loc) { - return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::ModRef); -} - -/// -/// \brief Check if \p BB contains a store to the same address as \p SI -/// -/// \return The store in \p when it is safe to sink. Otherwise return Null. -/// -StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1, - StoreInst *Store0) { - DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n"); - BasicBlock *BB0 = Store0->getParent(); - for (BasicBlock::reverse_iterator RBI = BB1->rbegin(), RBE = BB1->rend(); - RBI != RBE; ++RBI) { - Instruction *Inst = &*RBI; - - if (!isa(Inst)) - continue; - - StoreInst *Store1 = cast(Inst); - - AliasAnalysis::Location Loc0 = AA->getLocation(Store0); - AliasAnalysis::Location Loc1 = AA->getLocation(Store1); - if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) && - !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store1))), - BB1->back(), Loc1) && - !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store0))), - BB0->back(), Loc0)) { - return Store1; +/// \brief True when the store is not downwards explosed to block \p End + +bool MergedLoadStoreMotion::isStoreSinkBarrierInRange( + AliasAnalysis::Location &Loc, MemoryAccess *Start) { + std::pair StartDFS = DFSDomMap.lookup(Start->getBlock()); + + // Seed with current uses that are in the block range. + std::queue CurrentUses; + SmallPtrSet Visited; + for (auto U : Start->uses()) + CurrentUses.emplace(U); + + // Process all the uses and see if they are below end. + + while (!CurrentUses.empty()) { + MemoryAccess *MA = CurrentUses.front(); + CurrentUses.pop(); + + BasicBlock *BB = MA->getBlock(); + std::pair CurrDFS = DFSDomMap.lookup(BB); + // First see if it's outside the dominator tree range + // The only way it could affect us is if it's below us in the dominator tree. + // The start of the final sink point is not below us in a hammock's + // dominator tree. The dominator tree below us will end at whatever nodes are + // immediate predecessors to the sink point. + if ((CurrDFS.first >= StartDFS.first && CurrDFS.first <= StartDFS.first) && + CurrDFS.second >= StartDFS.second && + CurrDFS.second <= StartDFS.second) { + // A phi node is not a memory access on its own, and we may have deleted + // this access already. Becasue we are walking backwards, we don't perform + // updates on the fly. + if (AccessesToDelete.count(MA) || isa(MA)) + continue; + Instruction *Use = MA->getMemoryInst(); + + // If it really conflicts, we have a barrier. + if (AA->getModRefInfo(Use, Loc) & AliasAnalysis::ModRef) + return true; + + // If not, add it's immediate uses to keep walking + for (auto U : Start->uses()) + if (Visited.insert(U).second) + CurrentUses.emplace(U); } } - return nullptr; + return false; } /// @@ -504,7 +556,14 @@ // New PHI operand? Use it. if (NewPN) SNew->setOperand(0, NewPN); + MemoryAccess *Phi = MSSA->getMemoryAccess(BB); + MSSA->replaceMemoryAccessWithNewAccess( + Phi, SNew, MemorySSA::InsertionPlace::Beginning); + MSSA->removeMemoryAccess(MSSA->getMemoryAccess(S0)); removeInstruction(S0); + // ilist's reverse iterator invalidation semantics basically mean we need to + // wait until the loop in our caller is dead before we kill these + AccessesToDelete.insert(MSSA->getMemoryAccess(S1)); removeInstruction(S1); A0->replaceAllUsesWith(ANew); removeInstruction(A0); @@ -515,11 +574,46 @@ return false; } +/// \brief Compute DFS in/out numbers for dominator tree +/// +/// Sadly, while the dominator tree has internal DFS numbers, we can't be +/// guaranteed they are up to date, as they are an implementation detail only +/// computed after a certain number of queries +void MergedLoadStoreMotion::computeDFSNumbers() { + unsigned DFSNum = 0; + SmallVector, 32> + WorkStack; + + WorkStack.emplace_back(DT->getRootNode(), DT->getRootNode()->begin()); + DFSDomMap[DT->getRoot()].first = DFSNum++; + + while (!WorkStack.empty()) { + const DomTreeNode *Node = WorkStack.back().first; + DomTreeNode::const_iterator ChildIt = WorkStack.back().second; + BasicBlock *BB = Node->getBlock(); + + // If we visited all of the children of this node, "recurse" back up the + // stack setting the DFOutNum. + if (ChildIt == Node->end()) { + DFSDomMap[BB].second = DFSNum++; + WorkStack.pop_back(); + } else { + // Otherwise, recursively visit this child. + const DomTreeNode *Child = *ChildIt; + ++WorkStack.back().second; + + WorkStack.emplace_back(Child, Child->begin()); + DFSDomMap[Child->getBlock()].first = DFSNum++; + } + } +} + /// /// \brief True when two stores are equivalent and can sink into the footer /// -/// Starting from a diamond tail block, iterate over the instructions in one -/// predecessor block and try to match a store in the second predecessor. +/// Starting from a diamond tail block, place all the stores in one predecessor +/// in a hash table, and try to match them against stores in the second +/// predecessor /// bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) { @@ -542,46 +636,92 @@ int Size1 = Pred1->size(); int NStores = 0; - for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend(); - RBI != RBE;) { + // Short circuit all of this if there is no memory phi in T + // It's not possible for both sides of the branch to have stores unless there + // is a memory phi in T merging the results + auto *TAccesses = MSSA->getBlockAccesses(T); + // If there is a phi, it will be listed as the memory access for the T basic + // block + if (!TAccesses || !MSSA->getMemoryAccess(T)) + return false; - Instruction *I = &*RBI; - ++RBI; + // Skip all this if we don't have any memory accesses to look at + auto *Pred0Accesses = MSSA->getBlockAccesses(Pred0); + if (!Pred0Accesses) + return false; + auto *Pred1Accesses = MSSA->getBlockAccesses(Pred1); + if (!Pred1Accesses) + return false; - // Sink move non-simple (atomic, volatile) stores - if (!isa(I)) + for (auto AI = Pred0Accesses->rbegin(), AE = Pred0Accesses->rend(); AI != AE; + ++AI) { + if (const MemoryDef *MD = dyn_cast(&*AI)) { + Instruction *I = MD->getMemoryInst(); + + // Sink move non-simple (atomic, volatile) stores + if (!isa(I)) + continue; + StoreInst *S0 = cast(I); + if (!S0->isSimple()) + continue; + StoreInfo.insert(S0); + + ++NStores; + if (NStores * Size1 >= MagicCompileTimeControl) + break; + } + } + + for (auto AI = Pred1Accesses->rbegin(), AE = Pred1Accesses->rend(); AI != AE; + ++AI) { + const MemoryDef *MD = dyn_cast(&*AI); + if (!MD) continue; - StoreInst *S0 = (StoreInst *)I; - if (!S0->isSimple()) + Instruction *Inst = MD->getMemoryInst(); + if (!isa(Inst)) continue; - ++NStores; - if (NStores * Size1 >= MagicCompileTimeControl) - break; - if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) { - bool Res = sinkStore(T, S0, S1); + StoreInst *Store1 = cast(Inst); + auto LookupResult = StoreInfo.equal_range(Store1); + bool Res = false; + auto LookupIter = LookupResult.first; + while (!Res && LookupIter != LookupResult.second) { + StoreInst *Store0 = *LookupIter; + auto OldIter = LookupIter; + ++LookupIter; + + if (DFSDomMap.empty()) + computeDFSNumbers(); + + AliasAnalysis::Location Loc0 = AA->getLocation(Store0); + AliasAnalysis::Location Loc1 = AA->getLocation(Store1); + if (!AA->isMustAlias(Loc0, Loc1) || + isStoreSinkBarrierInRange(Loc0, MSSA->getMemoryAccess(Store0)) || + isStoreSinkBarrierInRange(Loc1, MSSA->getMemoryAccess(Store1))) + continue; + Res = sinkStore(T, Store0, Store1); + if (Res) + StoreInfo.erase(OldIter); MergedStores |= Res; - // Don't attempt to sink below stores that had to stick around - // But after removal of a store and some of its feeding - // instruction search again from the beginning since the iterator - // is likely stale at this point. - if (!Res) - break; - else { - RBI = Pred0->rbegin(); - RBE = Pred0->rend(); - DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump()); - } } } + for (auto V : AccessesToDelete) { + MSSA->removeMemoryAccess(V); + } + AccessesToDelete.clear(); + StoreInfo.clear(); return MergedStores; } + /// /// \brief Run the transformation for each function /// bool MergedLoadStoreMotion::runOnFunction(Function &F) { - MD = &getAnalysis(); + DT = &getAnalysis().getDomTree(); + MD = getAnalysisIfAvailable(); AA = &getAnalysis(); + MSSA = &getAnalysis().getMSSA(); + CachingWalker = MSSA->buildMemorySSA(AA, DT); bool Changed = false; DEBUG(dbgs() << "Instruction Merger\n"); @@ -598,5 +738,7 @@ Changed |= mergeStores(getDiamondTail(BB)); } } + StoreInfo.clear(); + DFSDomMap.clear(); return Changed; }