Index: include/llvm/Transforms/Utils/MemorySSA.h =================================================================== --- include/llvm/Transforms/Utils/MemorySSA.h +++ include/llvm/Transforms/Utils/MemorySSA.h @@ -518,6 +518,31 @@ return It == PerBlockAccesses.end() ? nullptr : It->second.get(); } + enum InsertionPlace { Beginning, End }; + + /// \brief Create a MemoryAccess in MemorySSA at a specified point in a block, + /// with a specified clobbering definition. + /// This should be called when a memory instruction is created that is being + /// used to replace an existing memory instruction. It will *not* create PHI + /// nodes, or verify the clobbering definition. The insertion place is used + /// solely to determine where in the memoryssa access lists the instruction + /// will be placed. It will return the new MemoryAccess. + MemoryAccess *createMemoryAccess(Instruction *I, MemoryAccess *Definition, + const BasicBlock *BB, InsertionPlace Point); + /// \brief Create a MemoryAccess in MemorySSA before or after an existing + /// MemoryAccess with a specified clobbering definition. + /// This should be called when a memory instruction is created that is being + /// used to replace an existing memory instruction. It will *not* create PHI + /// nodes, or verify the clobbering definition. The insertion place is used + /// solely to determine where in the memoryssa access lists the instruction + /// will be placed. It will return the new MemoryAccess. + MemoryAccess *createMemoryAccessBefore(Instruction *I, + MemoryAccess *Definition, + MemoryAccess *InsertPt); + MemoryAccess *createMemoryAccessAfter(Instruction *I, + MemoryAccess *Definition, + MemoryAccess *InsertPt); + /// \brief Remove a MemoryAccess from MemorySSA, including updating all /// definitions and uses. /// This should be called when a memory instruction that has a MemoryAccess @@ -526,8 +551,6 @@ /// on the MemoryAccess for that store/load. void removeMemoryAccess(MemoryAccess *); - enum InsertionPlace { Beginning, End }; - /// \brief Given two memory accesses in the same basic block, determine /// whether MemoryAccess \p A dominates MemoryAccess \p B. bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const; @@ -554,13 +577,14 @@ void markUnreachableAsLiveOnEntry(BasicBlock *BB); bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const; MemoryAccess *createNewAccess(Instruction *, bool ignoreNonMemory = false); + MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *); MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace); void removeFromLookups(MemoryAccess *); MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *); void renamePass(DomTreeNode *, MemoryAccess *IncomingVal, SmallPtrSet &Visited); - AccessListType *getOrCreateAccessList(BasicBlock *); + AccessListType *getOrCreateAccessList(const BasicBlock *); AliasAnalysis *AA; DominatorTree *DT; Function &F; Index: lib/Transforms/Scalar/MergedLoadStoreMotion.cpp =================================================================== --- lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -90,8 +90,10 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/MemorySSA.h" +#include #include +#include using namespace llvm; @@ -102,14 +104,67 @@ //===----------------------------------------------------------------------===// namespace { + +// This hash matches the most common things isSameOperationAs checks. It must +// always be a subset or equal to isSameOperationAs for everything to function +// properly. + +struct StoreInstHash { + size_t operator()(const StoreInst *A) const { + return hash_combine(A->getType(), A->getPointerOperand()->getType(), + A->getValueOperand()->getType(), A->isVolatile(), + A->getAlignment(), A->getOrdering(), + A->getSynchScope()); + } +}; + +struct StoreInstEq { + bool operator()(const StoreInst *A, const StoreInst *B) const { + return (A == B || A->isSameOperationAs(B)); + } +}; + +typedef std::pair LoadPair; + +// This hash has two parts. The first is the instruction, the second, the +// clobbering MemorySSA access. For the instruction part, this hash matches the +// most common things isSameOperationAs checks. It must always be a subset or +// equal to isSameOperationAs for everything to function properly. + +struct LoadPairHash { + size_t operator()(const LoadPair &LP) const { + const LoadInst *A = LP.first; + return hash_combine(A->getType(), A->getPointerOperand()->getType(), + A->isVolatile(), A->getAlignment(), A->getOrdering(), + A->getSynchScope(), LP.second); + } +}; + +struct LoadPairEq { + bool operator()(const LoadPair &A, const LoadPair &B) const { + return (A.second == B.second && + (A.first == B.first || A.first->isSameOperationAs(B.first))); + } +}; + class MergedLoadStoreMotion : public FunctionPass { AliasAnalysis *AA; MemoryDependenceAnalysis *MD; + MemorySSA *MSSA; + MemorySSAWalker *CachingWalker; + DominatorTree *DT; + bool UseMemorySSA; + // The non-MemorySSA versions of mergeLoad/Store algorithms could have Size0 * + // Size1 complexity, where Size0 and Size1 are the #instructions on the two + // sides of the diamond. The constant chosen here is arbitrary. Compiler Time + // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl. + const int MagicCompileTimeControl; public: static char ID; // Pass identification, replacement for typeid MergedLoadStoreMotion() - : FunctionPass(ID), MD(nullptr), MagicCompileTimeControl(250) { + : FunctionPass(ID), MD(nullptr), UseMemorySSA(true), + MagicCompileTimeControl(250) { initializeMergedLoadStoreMotionPass(*PassRegistry::getPassRegistry()); } @@ -120,8 +175,11 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); + AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); } @@ -135,15 +193,15 @@ BasicBlock *getDiamondTail(BasicBlock *BB); bool isDiamondHead(BasicBlock *BB); // Routines for hoisting loads - bool isLoadHoistBarrierInRange(const Instruction& Start, - const Instruction& End, - LoadInst* LI); + bool isLoadHoistBarrierInRange(const Instruction &Start, + const Instruction &End, LoadInst *LI); LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI); void hoistInstruction(BasicBlock *BB, Instruction *HoistCand, Instruction *ElseInst); bool isSafeToHoist(Instruction *I) const; bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst); bool mergeLoads(BasicBlock *BB); + bool mergeLoadsMemorySSA(BasicBlock *BB); // Routines for sinking stores StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI); PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1); @@ -151,11 +209,12 @@ const Instruction &End, MemoryLocation Loc); bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst); bool mergeStores(BasicBlock *BB); - // The mergeLoad/Store algorithms could have Size0 * Size1 complexity, - // where Size0 and Size1 are the #instructions on the two sides of - // the diamond. The constant chosen here is arbitrary. Compiler Time - // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl. - const int MagicCompileTimeControl; + void computeDFSNumbers(); + + std::unordered_multiset LoadInfo; + std::unordered_multiset StoreInfo; + SmallPtrSet AccessesToDelete; + DenseMap> DFSDomMap; }; char MergedLoadStoreMotion::ID = 0; @@ -176,6 +235,39 @@ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_END(MergedLoadStoreMotion, "mldst-motion", "MergedLoadStoreMotion", false, false) +/// \brief Compute DFS in/out numbers for dominator tree +/// +/// Sadly, while the dominator tree has internal DFS numbers, we can't be +/// guaranteed they are up to date, as they are an implementation detail only +/// computed after a certain number of queries +void MergedLoadStoreMotion::computeDFSNumbers() { + unsigned DFSNum = 0; + SmallVector, 32> + WorkStack; + + WorkStack.emplace_back(DT->getRootNode(), DT->getRootNode()->begin()); + DFSDomMap[DT->getRoot()].first = DFSNum++; + + while (!WorkStack.empty()) { + const DomTreeNode *Node = WorkStack.back().first; + DomTreeNode::const_iterator ChildIt = WorkStack.back().second; + BasicBlock *BB = Node->getBlock(); + + // If we visited all of the children of this node, "recurse" back up the + // stack setting the DFOutNum. + if (ChildIt == Node->end()) { + DFSDomMap[BB].second = DFSNum++; + WorkStack.pop_back(); + } else { + // Otherwise, recursively visit this child. + const DomTreeNode *Child = *ChildIt; + ++WorkStack.back().second; + + WorkStack.emplace_back(Child, Child->begin()); + DFSDomMap[Child->getBlock()].first = DFSNum++; + } + } +} /// /// \brief Remove instruction from parent and update memory dependence analysis. @@ -240,9 +332,9 @@ /// being loaded or protect against the load from happening /// it is considered a hoist barrier. /// -bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start, - const Instruction& End, - LoadInst* LI) { +bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction &Start, + const Instruction &End, + LoadInst *LI) { MemoryLocation Loc = MemoryLocation::get(LI); return AA->canInstructionRangeModRef(Start, End, Loc, MRI_Mod); } @@ -306,6 +398,18 @@ // Hoist instruction. HoistedInst->insertBefore(HoistPt); + // We also hoist operands of loads using this function, so check to see if + // this is really a memory access before we try to update MemorySSA for it. + MemoryAccess *HoistCandAccess = MSSA->getMemoryAccess(HoistCand); + if (HoistCandAccess) { + MemoryUseOrDef *MUD = cast(HoistCandAccess); + // What is happening here is that we are creating the hoisted access + // and destroying the old accesses. + MSSA->createMemoryAccess(HoistedInst, MUD->getDefiningAccess(), BB, + MemorySSA::End); + MSSA->removeMemoryAccess(HoistCandAccess); + MSSA->removeMemoryAccess(MSSA->getMemoryAccess(ElseInst)); + } HoistCand->replaceAllUsesWith(HoistedInst); removeInstruction(HoistCand); @@ -348,6 +452,88 @@ } else return false; } +/// +/// \brief Try to hoist two loads to same address into diamond header +/// +/// Starting from a diamond head block, iterate over the loads in one +/// successor block, put them in the hash table. +/// Then walk through loads in the successor block, and see if they match any in +/// the hash table and can be hoisted. +/// +bool MergedLoadStoreMotion::mergeLoadsMemorySSA(BasicBlock *BB) { + bool MergedLoads = false; + assert(isDiamondHead(BB)); + BranchInst *BI = dyn_cast(BB->getTerminator()); + BasicBlock *Succ0 = BI->getSuccessor(0); + BasicBlock *Succ1 = BI->getSuccessor(1); + // #Instructions in Succ1 for Compile Time Control + int Size1 = Succ1->size(); + int NLoads = 0; + + // Skip if we don't have memory accesses in both blocks + auto *Succ0Accesses = MSSA->getBlockAccesses(Succ0); + if (!Succ0Accesses) + return false; + auto *Succ1Accesses = MSSA->getBlockAccesses(Succ1); + if (!Succ1Accesses) + return false; + + for (auto &Access : *Succ0Accesses) { + const MemoryUse *MU = dyn_cast(&Access); + if (!MU) + continue; + Instruction *I = MU->getMemoryInst(); + // Only move non-simple (atomic, volatile) loads. + LoadInst *Load0 = dyn_cast(I); + if (!Load0 || !Load0->isSimple() || Load0->isUsedOutsideOfBlock(Succ0)) + continue; + + LoadInfo.insert({Load0, CachingWalker->getClobberingMemoryAccess(Load0)}); + ++NLoads; + if (NLoads * Size1 >= MagicCompileTimeControl) + break; + } + + for (auto AI = Succ1Accesses->begin(), AE = Succ1Accesses->end(); AI != AE;) { + auto CurrIter = AI; + ++AI; + const MemoryUse *MU = dyn_cast(&*CurrIter); + if (!MU) + continue; + LoadInst *Load1 = dyn_cast(MU->getMemoryInst()); + if (!Load1 || Load1->isUsedOutsideOfBlock(Succ1)) + continue; + + // We know that if the load has the same clobbering access as this one, they + // must not be killed until the same point. That is, we are guaranteed that + // all the loads that could possibly be merged must have a common MemoryDef + // (or MemoryPhi) that they reach. If not, then we can't merge them because + // something is in the way on one of the branches. If we *do* find a + // possible match, the only further checking we need to do is to ensure they + // are loads of the same pointer. We could simply check the pointer + // operands, but isMustAlias can do a better job of it. + + auto LookupResult = LoadInfo.equal_range( + {Load1, CachingWalker->getClobberingMemoryAccess(Load1)}); + bool Res = false; + auto LookupIter = LookupResult.first; + while (!Res && LookupIter != LookupResult.second) { + LoadInst *Load0 = LookupIter->first; + auto OldIter = LookupIter; + ++LookupIter; + MemoryLocation Loc0 = MemoryLocation::get(Load0); + MemoryLocation Loc1 = MemoryLocation::get(Load1); + if (AA->isMustAlias(Loc0, Loc1)) + Res = hoistLoad(BB, Load0, Load1); + MergedLoads |= Res; + // Don't attempt to hoist above loads that had not been hoisted. + if (Res) + LoadInfo.erase(OldIter); + } + } + LoadInfo.clear(); + return MergedLoads; +} /// /// \brief Try to hoist two loads to same address into diamond header @@ -416,17 +602,17 @@ Instruction *Inst = &*RBI; if (!isa(Inst)) - continue; + continue; StoreInst *Store1 = cast(Inst); MemoryLocation Loc0 = MemoryLocation::get(Store0); MemoryLocation Loc1 = MemoryLocation::get(Store1); if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) && - !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store1))), - BB1->back(), Loc1) && - !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store0))), - BB0->back(), Loc0)) { + !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store1))), + BB1->back(), Loc1) && + !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store0))), + BB0->back(), Loc0)) { return Store1; } } @@ -567,20 +753,26 @@ bool MergedLoadStoreMotion::runOnFunction(Function &F) { MD = getAnalysisIfAvailable(); AA = &getAnalysis().getAAResults(); + if (UseMemorySSA) { + DT = &getAnalysis().getDomTree(); + MSSA = &getAnalysis().getMSSA(); + CachingWalker = MSSA->buildMemorySSA(AA, DT); + } bool Changed = false; DEBUG(dbgs() << "Instruction Merger\n"); // Merge unconditional branches, allowing PRE to catch more // optimization opportunities. - for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { - BasicBlock *BB = &*FI++; - + for (BasicBlock &BB : F) { // Hoist equivalent loads and sink stores // outside diamonds when possible - if (isDiamondHead(BB)) { - Changed |= mergeLoads(BB); - Changed |= mergeStores(getDiamondTail(BB)); + if (isDiamondHead(&BB)) { + if (UseMemorySSA) + Changed |= mergeLoadsMemorySSA(&BB); + else + Changed |= mergeLoads(&BB); + Changed |= mergeStores(getDiamondTail(&BB)); } } return Changed; Index: lib/Transforms/Utils/MemorySSA.cpp =================================================================== --- lib/Transforms/Utils/MemorySSA.cpp +++ lib/Transforms/Utils/MemorySSA.cpp @@ -215,7 +215,8 @@ MA.dropAllReferences(); } -MemorySSA::AccessListType *MemorySSA::getOrCreateAccessList(BasicBlock *BB) { +MemorySSA::AccessListType * +MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); if (Res.second) @@ -356,6 +357,59 @@ return Walker; } +MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, + MemoryAccess *Definition) { + MemoryAccess *NewAccess = createNewAccess(I, true); + assert( + NewAccess != nullptr && + "Tried to create a memory access for a non-memory touching instruction"); + MemoryUseOrDef *MUD = cast(NewAccess); + MUD->setDefiningAccess(Definition); + return MUD; +} + +MemoryAccess *MemorySSA::createMemoryAccess(Instruction *I, + MemoryAccess *Definition, + const BasicBlock *BB, + InsertionPlace Point) { + MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition); + auto *Accesses = getOrCreateAccessList(BB); + if (Point == Beginning) { + // It goes after any phi nodes + auto AI = Accesses->begin(); + while (AI != Accesses->end()) { + if (!isa(AI)) + break; + ++AI; + } + Accesses->insert(AI, NewAccess); + } else { + Accesses->push_back(NewAccess); + } + + return NewAccess; +} +MemoryAccess *MemorySSA::createMemoryAccessBefore(Instruction *I, + MemoryAccess *Definition, + MemoryAccess *InsertPt) { + assert(I->getParent() == InsertPt->getBlock() && + "New and old access must be in the same block"); + MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition); + auto *Accesses = getOrCreateAccessList(InsertPt->getBlock()); + Accesses->insert(AccessListType::iterator(InsertPt), NewAccess); + return NewAccess; +} + +MemoryAccess *MemorySSA::createMemoryAccessAfter(Instruction *I, + MemoryAccess *Definition, + MemoryAccess *InsertPt) { + assert(I->getParent() == InsertPt->getBlock() && + "New and old access must be in the same block"); + MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition); + auto *Accesses = getOrCreateAccessList(InsertPt->getBlock()); + Accesses->insertAfter(AccessListType::iterator(InsertPt), NewAccess); + return NewAccess; +} /// \brief Helper function to create new memory accesses MemoryAccess *MemorySSA::createNewAccess(Instruction *I, bool IgnoreNonMemory) {