Index: lib/Transforms/Scalar/MergedLoadStoreMotion.cpp =================================================================== --- lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -73,6 +73,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -89,8 +90,9 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/MemorySSA.h" #include +#include using namespace llvm; #define DEBUG_TYPE "mldst-motion" @@ -100,9 +102,46 @@ //===----------------------------------------------------------------------===// namespace { + +struct StoreInstHash { + size_t operator()(const StoreInst *A) const { + return hash_combine(A->getType(), A->getPointerOperand()->getType(), + A->getValueOperand()->getType(), A->isVolatile(), + A->getAlignment(), A->getOrdering(), + A->getSynchScope()); + } +}; + +struct StoreInstEq { + bool operator()(const StoreInst *A, const StoreInst *B) const { + return (A == B || A->isSameOperationAs(B)); + } +}; + +typedef std::pair LoadPair; + +struct LoadPairHash { + size_t operator()(const LoadPair &LP) const { + const LoadInst *A = LP.first; + return hash_combine(A->getType(), A->getPointerOperand()->getType(), + A->isVolatile(), A->getAlignment(), A->getOrdering(), + A->getSynchScope(), LP.second); + } +}; + +struct LoadPairEq { + bool operator()(const LoadPair &A, const LoadPair &B) const { + return (A.second == B.second && + (A.first == B.first || A.first->isSameOperationAs(B.first))); + } +}; + class MergedLoadStoreMotion : public FunctionPass { AliasAnalysis *AA; MemoryDependenceAnalysis *MD; + MemorySSA *MSSA; + MemorySSAWalker *CachingWalker; + DominatorTree *DT; public: static char ID; // Pass identification, replacement for typeid @@ -117,9 +156,12 @@ // This transformation requires dominator postdominator info void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - AU.addRequired(); + AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); } // Helper routines @@ -132,20 +174,16 @@ BasicBlock *getDiamondTail(BasicBlock *BB); bool isDiamondHead(BasicBlock *BB); // Routines for hoisting loads - bool isLoadHoistBarrierInRange(const Instruction& Start, - const Instruction& End, - LoadInst* LI); - LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI); + void hoistInstruction(BasicBlock *BB, Instruction *HoistCand, Instruction *ElseInst); bool isSafeToHoist(Instruction *I) const; bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst); bool mergeLoads(BasicBlock *BB); // Routines for sinking stores - StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI); PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1); - bool isStoreSinkBarrierInRange(const Instruction& Start, - const Instruction& End, + bool isStoreSinkBarrierInRange(const Instruction &Start, + const Instruction &End, AliasAnalysis::Location Loc); bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst); bool mergeStores(BasicBlock *BB); @@ -154,6 +192,9 @@ // the diamond. The constant chosen here is arbitrary. Compiler Time // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl. const int MagicCompileTimeControl; + std::unordered_multiset LoadInfo; + std::unordered_multiset StoreInfo; + SmallVector AccessesToDelete; }; char MergedLoadStoreMotion::ID = 0; @@ -168,6 +209,8 @@ INITIALIZE_PASS_BEGIN(MergedLoadStoreMotion, "mldst-motion", "MergedLoadStoreMotion", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSALazy) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) @@ -231,53 +274,6 @@ } /// -/// \brief True when instruction is a hoist barrier for a load -/// -/// Whenever an instruction could possibly modify the value -/// being loaded or protect against the load from happening -/// it is considered a hoist barrier. -/// - -bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start, - const Instruction& End, - LoadInst* LI) { - AliasAnalysis::Location Loc = AA->getLocation(LI); - return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Mod); -} - -/// -/// \brief Decide if a load can be hoisted -/// -/// When there is a load in \p BB to the same address as \p LI -/// and it can be hoisted from \p BB, return that load. -/// Otherwise return Null. -/// -LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1, - LoadInst *Load0) { - - for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); BBI != BBE; - ++BBI) { - Instruction *Inst = BBI; - - // Only merge and hoist loads when their result in used only in BB - if (!isa(Inst) || Inst->isUsedOutsideOfBlock(BB1)) - continue; - - LoadInst *Load1 = dyn_cast(Inst); - BasicBlock *BB0 = Load0->getParent(); - - AliasAnalysis::Location Loc0 = AA->getLocation(Load0); - AliasAnalysis::Location Loc1 = AA->getLocation(Load1); - if (AA->isMustAlias(Loc0, Loc1) && Load0->isSameOperationAs(Load1) && - !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1) && - !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0)) { - return Load1; - } - } - return nullptr; -} - -/// /// \brief Merge two equivalent instructions \p HoistCand and \p ElseInst into /// \p BB /// @@ -309,6 +305,15 @@ // Hoist instruction. HoistedInst->insertBefore(HoistPt); + // We also hoist operands, so see if this is even a memory access we want to + // fix up + MemoryAccess *HoistCandAccess = MSSA->getMemoryAccess(HoistCand); + if (HoistCandAccess) { + MemoryAccess *NewAccess = + MSSA->replaceMemoryAccessWithNewAccess(HoistCandAccess, HoistedInst, MemorySSA::InsertionPlace::End); + MSSA->replaceMemoryAccess(MSSA->getMemoryAccess(ElseInst), NewAccess); + } + HoistCand->replaceAllUsesWith(HoistedInst); removeInstruction(HoistCand); // Replace the else block instruction. @@ -366,28 +371,63 @@ // #Instructions in Succ1 for Compile Time Control int Size1 = Succ1->size(); int NLoads = 0; - for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end(); - BBI != BBE;) { - Instruction *I = BBI; - ++BBI; + // Skip if we don't have memory accesses in both blocks + auto *Succ0Accesses = MSSA->getBlockAccesses(Succ0); + if (!Succ0Accesses) + return false; + auto *Succ1Accesses = MSSA->getBlockAccesses(Succ1); + if (!Succ1Accesses) + return false; + for (auto &Access : *Succ0Accesses) { + const MemoryUse *MU = dyn_cast(&Access); + if (!MU) + continue; + Instruction *I = MU->getMemoryInst(); // Only move non-simple (atomic, volatile) loads. - LoadInst *L0 = dyn_cast(I); - if (!L0 || !L0->isSimple() || L0->isUsedOutsideOfBlock(Succ0)) + LoadInst *Load0 = dyn_cast(I); + if (!Load0 || !Load0->isSimple() || Load0->isUsedOutsideOfBlock(Succ0)) continue; + LoadInfo.insert({Load0, CachingWalker->getClobberingMemoryAccess(Load0)}); ++NLoads; if (NLoads * Size1 >= MagicCompileTimeControl) break; - if (LoadInst *L1 = canHoistFromBlock(Succ1, L0)) { - bool Res = hoistLoad(BB, L0, L1); + } + + for (auto AI = Succ1Accesses->begin(), AE = Succ1Accesses->end(); AI != AE;) { + auto CurrIter = AI; + ++AI; + const MemoryUse *MU = dyn_cast(&*CurrIter); + if (!MU) + continue; + LoadInst *Load1 = dyn_cast(MU->getMemoryInst()); + if (!Load1 || Load1->isUsedOutsideOfBlock(Succ1)) + continue; + + // We know that if the load has the same clobbering access as this one,, + // they must not be killed until the same point + + auto LookupResult = LoadInfo.equal_range( + {Load1, CachingWalker->getClobberingMemoryAccess(Load1)}); + bool Res = false; + auto LookupIter = LookupResult.first; + while (!Res && LookupIter != LookupResult.second) { + LoadInst *Load0 = LookupIter->first; + auto OldIter = LookupIter; + ++LookupIter; + AliasAnalysis::Location Loc0 = AA->getLocation(Load0); + AliasAnalysis::Location Loc1 = AA->getLocation(Load1); + if (AA->isMustAlias(Loc0, Loc1)) + Res = hoistLoad(BB, Load0, Load1); MergedLoads |= Res; // Don't attempt to hoist above loads that had not been hoisted. - if (!Res) - break; + if (Res) + LoadInfo.erase(OldIter); } } + LoadInfo.clear(); return MergedLoads; } @@ -400,45 +440,13 @@ /// happening it is considered a sink barrier. /// -bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction& Start, - const Instruction& End, - AliasAnalysis::Location - Loc) { +bool MergedLoadStoreMotion::isStoreSinkBarrierInRange( + const Instruction &Start, const Instruction &End, + AliasAnalysis::Location Loc) { return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::ModRef); } /// -/// \brief Check if \p BB contains a store to the same address as \p SI -/// -/// \return The store in \p when it is safe to sink. Otherwise return Null. -/// -StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1, - StoreInst *Store0) { - DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n"); - BasicBlock *BB0 = Store0->getParent(); - for (BasicBlock::reverse_iterator RBI = BB1->rbegin(), RBE = BB1->rend(); - RBI != RBE; ++RBI) { - Instruction *Inst = &*RBI; - - if (!isa(Inst)) - continue; - - StoreInst *Store1 = cast(Inst); - - AliasAnalysis::Location Loc0 = AA->getLocation(Store0); - AliasAnalysis::Location Loc1 = AA->getLocation(Store1); - if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) && - !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store1))), - BB1->back(), Loc1) && - !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store0))), - BB0->back(), Loc0)) { - return Store1; - } - } - return nullptr; -} - -/// /// \brief Create a PHI node in BB for the operands of S0 and S1 /// PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0, @@ -504,7 +512,15 @@ // New PHI operand? Use it. if (NewPN) SNew->setOperand(0, NewPN); + MemoryAccess *Phi = MSSA->getMemoryAccess(BB); + MemorySSA::AccessListType::iterator Place(Phi); + ++Place; + MSSA->replaceMemoryAccessWithNewAccess(Phi, SNew, Place); + MSSA->removeMemoryAccess(MSSA->getMemoryAccess(S0)); removeInstruction(S0); + // ilist's reverse iterator invalidation semantics basically mean we need to + // wait until the loop in our caller is dead before we kill these + AccessesToDelete.push_back(MSSA->getMemoryAccess(S1)); removeInstruction(S1); A0->replaceAllUsesWith(ANew); removeInstruction(A0); @@ -518,8 +534,9 @@ /// /// \brief True when two stores are equivalent and can sink into the footer /// -/// Starting from a diamond tail block, iterate over the instructions in one -/// predecessor block and try to match a store in the second predecessor. +/// Starting from a diamond tail block, place all the stores in one predecessor +/// in a hash table, and try to match them against stores in the second +/// predecessor /// bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) { @@ -538,50 +555,95 @@ if (PI != E) return false; // No. More than 2 predecessors. + StoreInfo.reserve(Pred0->size()); + // #Instructions in Succ1 for Compile Time Control int Size1 = Pred1->size(); int NStores = 0; - for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend(); - RBI != RBE;) { + // Short circuit all of this if there is no memory phi in T + // It's not possible for both sides of the branch to have stores unless there + // is a memory phi in T merging the results + auto *TAccesses = MSSA->getBlockAccesses(T); + if (!TAccesses || !isa(&TAccesses->front())) + return false; - Instruction *I = &*RBI; - ++RBI; + // Skip all this if we don't have any memory accesses to look at + auto *Pred0Accesses = MSSA->getBlockAccesses(Pred0); + if (!Pred0Accesses) + return false; + auto *Pred1Accesses = MSSA->getBlockAccesses(Pred1); + if (!Pred1Accesses) + return false; - // Sink move non-simple (atomic, volatile) stores - if (!isa(I)) + for (auto AI = Pred0Accesses->rbegin(), AE = Pred0Accesses->rend(); AI != AE; + ++AI) { + if (const MemoryDef *MD = dyn_cast(&*AI)) { + Instruction *I = MD->getMemoryInst(); + + // Sink move non-simple (atomic, volatile) stores + if (!isa(I)) + continue; + StoreInst *S0 = cast(I); + if (!S0->isSimple()) + continue; + StoreInfo.insert(S0); + + ++NStores; + if (NStores * Size1 >= MagicCompileTimeControl) + break; + } + } + + for (auto AI = Pred1Accesses->rbegin(), AE = Pred1Accesses->rend(); AI != AE; + ++AI) { + const MemoryDef *MD = dyn_cast(&*AI); + if (!MD) continue; - StoreInst *S0 = (StoreInst *)I; - if (!S0->isSimple()) + Instruction *Inst = MD->getMemoryInst(); + if (!isa(Inst)) continue; - ++NStores; - if (NStores * Size1 >= MagicCompileTimeControl) - break; - if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) { - bool Res = sinkStore(T, S0, S1); + StoreInst *Store1 = cast(Inst); + auto LookupResult = StoreInfo.equal_range(Store1); + bool Res = false; + auto LookupIter = LookupResult.first; + while (!Res && LookupIter != LookupResult.second) { + StoreInst *Store0 = *LookupIter; + auto OldIter = LookupIter; + ++LookupIter; + + AliasAnalysis::Location Loc0 = AA->getLocation(Store0); + AliasAnalysis::Location Loc1 = AA->getLocation(Store1); + if (!AA->isMustAlias(Loc0, Loc1) || + isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store1))), + Pred1->back(), Loc1) || + isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store0))), + Pred0->back(), Loc0)) + continue; + Res = sinkStore(T, Store0, Store1); + if (Res) + StoreInfo.erase(OldIter); MergedStores |= Res; - // Don't attempt to sink below stores that had to stick around - // But after removal of a store and some of its feeding - // instruction search again from the beginning since the iterator - // is likely stale at this point. - if (!Res) - break; - else { - RBI = Pred0->rbegin(); - RBE = Pred0->rend(); - DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump()); - } } } + for (auto &V : AccessesToDelete) { + MSSA->removeMemoryAccess(V); + } + AccessesToDelete.clear(); + StoreInfo.clear(); return MergedStores; } + /// /// \brief Run the transformation for each function /// bool MergedLoadStoreMotion::runOnFunction(Function &F) { - MD = &getAnalysis(); + DT = &getAnalysis().getDomTree(); + MD = getAnalysisIfAvailable(); AA = &getAnalysis(); + MSSA = &getAnalysis().getMSSA(); + CachingWalker = MSSA->buildMemorySSA(AA, DT); bool Changed = false; DEBUG(dbgs() << "Instruction Merger\n"); @@ -598,5 +660,6 @@ Changed |= mergeStores(getDiamondTail(BB)); } } + StoreInfo.clear(); return Changed; }