Index: include/llvm/Transforms/Utils/MemorySSA.h
===================================================================
--- include/llvm/Transforms/Utils/MemorySSA.h
+++ include/llvm/Transforms/Utils/MemorySSA.h
@@ -536,6 +536,31 @@
     return It == PerBlockAccesses.end() ? nullptr : It->second.get();
   }
 
+  enum InsertionPlace { Beginning, End };
+
+  /// \brief Create a MemoryAccess in MemorySSA at a specified point in a block,
+  /// with a specified clobbering definition.
+  /// This should be called when a memory instruction is created that is being
+  /// used to replace an existing memory instruction. It will *not* create PHI
+  /// nodes, or verify the clobbering definition. The insertion place is used
+  /// solely to determine where in the memoryssa access lists the instruction
+  /// will be placed.  It will return the new MemoryAccess.
+  MemoryAccess *createMemoryAccess(Instruction *I, MemoryAccess *Definition,
+                                   const BasicBlock *BB, InsertionPlace Point);
+  /// \brief Create a MemoryAccess in MemorySSA before or after an existing
+  /// MemoryAccess with a specified clobbering definition.
+  /// This should be called when a memory instruction is created that is being
+  /// used to replace an existing memory instruction. It will *not* create PHI
+  /// nodes, or verify the clobbering definition. The insertion place is used
+  /// solely to determine where in the memoryssa access lists the instruction
+  /// will be placed.  It will return the new MemoryAccess.
+  MemoryAccess *createMemoryAccessBefore(Instruction *I,
+                                         MemoryAccess *Definition,
+                                         MemoryAccess *InsertPt);
+  MemoryAccess *createMemoryAccessAfter(Instruction *I,
+                                        MemoryAccess *Definition,
+                                        MemoryAccess *InsertPt);
+
   /// \brief Remove a MemoryAccess from MemorySSA, including updating all
   /// definitions and uses.
   /// This should be called when a memory instruction that has a MemoryAccess
@@ -544,8 +569,6 @@
   /// on the MemoryAccess for that store/load.
   void removeMemoryAccess(MemoryAccess *);
 
-  enum InsertionPlace { Beginning, End };
-
   /// \brief Given two memory accesses in the same basic block, determine
   /// whether MemoryAccess \p A dominates MemoryAccess \p B.
   bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const;
@@ -572,13 +595,14 @@
   void markUnreachableAsLiveOnEntry(BasicBlock *BB);
   bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const;
   MemoryUseOrDef *createNewAccess(Instruction *);
+  MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *);
   MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace);
   void removeFromLookups(MemoryAccess *);
 
   MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *);
   void renamePass(DomTreeNode *, MemoryAccess *IncomingVal,
                   SmallPtrSet<BasicBlock *, 16> &Visited);
-  AccessListType *getOrCreateAccessList(BasicBlock *);
+  AccessListType *getOrCreateAccessList(const BasicBlock *);
   AliasAnalysis *AA;
   DominatorTree *DT;
   Function &F;
Index: lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
===================================================================
--- lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -86,25 +86,91 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/MemorySSA.h"
+#include <queue>
+#include <unordered_set>
+#include <vector>
 
 using namespace llvm;
 
 #define DEBUG_TYPE "mldst-motion"
+static cl::opt<bool> UseMemorySSA("use-memoryssa", cl::Hidden,
+                                  cl::desc("Use MemorySSA for optimizations"));
 
 //===----------------------------------------------------------------------===//
 //                         MergedLoadStoreMotion Pass
 //===----------------------------------------------------------------------===//
 
 namespace {
+
+// This hash matches the most common things isSameOperationAs checks. It must
+// always be a subset or equal to isSameOperationAs for everything to function
+// properly.
+
+struct StoreInstHash {
+  size_t operator()(const StoreInst *A) const {
+    return hash_combine(A->getType(), A->getPointerOperand()->getType(),
+                        A->getValueOperand()->getType(), A->isVolatile(),
+                        A->getAlignment(), A->getOrdering(),
+                        A->getSynchScope());
+  }
+};
+
+struct StoreInstEq {
+  bool operator()(const StoreInst *A, const StoreInst *B) const {
+    return (A == B || A->isSameOperationAs(B));
+  }
+};
+
+typedef std::pair<LoadInst *, const MemoryAccess *> LoadPair;
+
+// This hash has two parts. The first is the instruction, the second, the
+// clobbering MemorySSA access.  For the instruction part, this hash matches the
+// most common things isSameOperationAs checks. It must always be a subset or
+// equal to isSameOperationAs for everything to function properly.
+
+struct LoadPairHash {
+  size_t operator()(const LoadPair &LP) const {
+    const LoadInst *A = LP.first;
+    return hash_combine(A->getType(), A->getPointerOperand()->getType(),
+                        A->isVolatile(), A->getAlignment(), A->getOrdering(),
+                        A->getSynchScope(), LP.second);
+  }
+};
+
+struct LoadPairEq {
+  bool operator()(const LoadPair &A, const LoadPair &B) const {
+    return (A.second == B.second &&
+            (A.first == B.first || A.first->isSameOperationAs(B.first)));
+  }
+};
+
 class MergedLoadStoreMotion : public FunctionPass {
   AliasAnalysis *AA;
   MemoryDependenceResults *MD;
+  MemorySSA *MSSA;
+  MemorySSAWalker *CachingWalker;
+  DominatorTree *DT;
+  // The non-MemorySSA versions of mergeLoad/Store algorithms could have Size0 *
+  // Size1 complexity, where Size0 and Size1 are the #instructions on the two
+  // sides of the diamond. The constant chosen here is arbitrary. Compiler Time
+  // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl.
+  const int MagicCompileTimeControl;
+  // We DFS number instructions and avoid hoisting or sinking things past may
+  // throw instructions and instructions not guaranteed to transfer execution to
+  // successors.
+  DenseMap<const Instruction *, unsigned> DFSNumberMap;
+  DenseMap<const BasicBlock *, unsigned> FirstHoistBarrier;
+  DenseMap<const BasicBlock *, unsigned> LastSinkBarrier;
+  SmallPtrSet<MemoryAccess *, 8> AccessesToDelete;
+  std::unordered_multiset<LoadPair, LoadPairHash, LoadPairEq> LoadInfo;
+  std::unordered_multiset<StoreInst *, StoreInstHash, StoreInstEq> StoreInfo;
 
 public:
   static char ID; // Pass identification, replacement for typeid
   MergedLoadStoreMotion()
       : FunctionPass(ID), MD(nullptr), MagicCompileTimeControl(250) {
+
     initializeMergedLoadStoreMotionPass(*PassRegistry::getPassRegistry());
   }
 
@@ -114,8 +180,12 @@
   // This transformation requires dominator postdominator info
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<MemorySSAWrapperPass>();
     AU.addRequired<AAResultsWrapperPass>();
     AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addPreserved<MemorySSAWrapperPass>();
     AU.addPreserved<MemoryDependenceWrapperPass>();
   }
 
@@ -130,26 +200,25 @@
   bool isDiamondHead(BasicBlock *BB);
   // Routines for hoisting loads
   bool isLoadHoistBarrierInRange(const Instruction &Start,
-                                 const Instruction &End, LoadInst *LI,
-                                 bool SafeToLoadUnconditionally);
+                                 const Instruction &End, LoadInst *LI);
   LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI);
   void hoistInstruction(BasicBlock *BB, Instruction *HoistCand,
                         Instruction *ElseInst);
   bool isSafeToHoist(Instruction *I) const;
   bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst);
   bool mergeLoads(BasicBlock *BB);
+  bool mergeLoadsMemorySSA(BasicBlock *BB);
   // Routines for sinking stores
   StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI);
   PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1);
   bool isStoreSinkBarrierInRange(const Instruction &Start,
                                  const Instruction &End, MemoryLocation Loc);
+  bool isStoreSinkBarrierInRangeMemorySSA(MemoryLocation &Loc,
+                                          MemoryAccess *Start);
   bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst);
   bool mergeStores(BasicBlock *BB);
-  // The mergeLoad/Store algorithms could have Size0 * Size1 complexity,
-  // where Size0 and Size1 are the #instructions on the two sides of
-  // the diamond. The constant chosen here is arbitrary. Compiler Time
-  // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl.
-  const int MagicCompileTimeControl;
+  bool mergeStoresMemorySSA(BasicBlock *BB);
+  void computeBarriers();
 };
 
 char MergedLoadStoreMotion::ID = 0;
@@ -227,14 +296,9 @@
 /// being loaded or protect against the load from happening
 /// it is considered a hoist barrier.
 ///
-bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(
-    const Instruction &Start, const Instruction &End, LoadInst *LI,
-    bool SafeToLoadUnconditionally) {
-  if (!SafeToLoadUnconditionally)
-    for (const Instruction &Inst :
-         make_range(Start.getIterator(), End.getIterator()))
-      if (!isGuaranteedToTransferExecutionToSuccessor(&Inst))
-        return true;
+bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction &Start,
+                                                      const Instruction &End,
+                                                      LoadInst *LI) {
   MemoryLocation Loc = MemoryLocation::get(LI);
   return AA->canInstructionRangeModRef(Start, End, Loc, MRI_Mod);
 }
@@ -265,11 +329,20 @@
 
     MemoryLocation Loc0 = MemoryLocation::get(Load0);
     MemoryLocation Loc1 = MemoryLocation::get(Load1);
+    if (!SafeToLoadUnconditionally) {
+      // If the first hoist barrier in the block is before the load, we
+      // can't hoist.
+      unsigned int BB0HoistBarrier = FirstHoistBarrier.lookup(BB0);
+      if (BB0HoistBarrier != 0 && DFSNumberMap.lookup(Load0) > BB0HoistBarrier)
+        continue;
+      unsigned int BB1HoistBarrier = FirstHoistBarrier.lookup(BB1);
+      if (BB1HoistBarrier != 0 && DFSNumberMap.lookup(Load1) > BB1HoistBarrier)
+        continue;
+    }
+
     if (AA->isMustAlias(Loc0, Loc1) && Load0->isSameOperationAs(Load1) &&
-        !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1,
-                                   SafeToLoadUnconditionally) &&
-        !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0,
-                                   SafeToLoadUnconditionally)) {
+        !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1) &&
+        !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0)) {
       return Load1;
     }
   }
@@ -303,6 +376,20 @@
 
   // Hoist instruction.
   HoistedInst->insertBefore(HoistPt);
+  if (UseMemorySSA) {
+    // We also hoist operands of loads using this function, so check to see if
+    // this is really a memory access before we try to update MemorySSA for it.
+    MemoryAccess *HoistCandAccess = MSSA->getMemoryAccess(HoistCand);
+    if (HoistCandAccess) {
+      MemoryUseOrDef *MUD = cast<MemoryUseOrDef>(HoistCandAccess);
+      // What is happening here is that we are creating the hoisted access
+      // and destroying the old accesses.
+      MSSA->createMemoryAccess(HoistedInst, MUD->getDefiningAccess(), BB,
+                               MemorySSA::End);
+      MSSA->removeMemoryAccess(HoistCandAccess);
+      MSSA->removeMemoryAccess(MSSA->getMemoryAccess(ElseInst));
+    }
+  }
 
   HoistCand->replaceAllUsesWith(HoistedInst);
   removeInstruction(HoistCand);
@@ -395,10 +482,6 @@
 bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start,
                                                       const Instruction &End,
                                                       MemoryLocation Loc) {
-  for (const Instruction &Inst :
-       make_range(Start.getIterator(), End.getIterator()))
-    if (Inst.mayThrow())
-      return true;
   return AA->canInstructionRangeModRef(Start, End, Loc, MRI_ModRef);
 }
 
@@ -419,8 +502,17 @@
     if (!Store1)
       continue;
 
+    // If the last sink barrier in the block is after us, we can't sink out
+    // of the block.
+    unsigned int BB0SinkBarrier = LastSinkBarrier.lookup(BB0);
+    if (BB0SinkBarrier != 0 && DFSNumberMap.lookup(Store0) < BB0SinkBarrier)
+      continue;
+    unsigned int BB1SinkBarrier = LastSinkBarrier.lookup(BB1);
+    if (BB1SinkBarrier != 0 && DFSNumberMap.lookup(Store1) < BB1SinkBarrier)
+      continue;
     MemoryLocation Loc0 = MemoryLocation::get(Store0);
     MemoryLocation Loc1 = MemoryLocation::get(Store1);
+
     if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&
         !isStoreSinkBarrierInRange(*Store1->getNextNode(), BB1->back(), Loc1) &&
         !isStoreSinkBarrierInRange(*Store0->getNextNode(), BB0->back(), Loc0)) {
@@ -480,6 +572,13 @@
 
     assert(S0->getParent() == A0->getParent());
     assert(S1->getParent() == A1->getParent());
+    if (UseMemorySSA) {
+      MSSA->removeMemoryAccess(MSSA->getMemoryAccess(S0));
+      // ilist's reverse iterator invalidation semantics basically mean we need
+      // to
+      // wait until the loop in our caller is dead before we kill these
+      AccessesToDelete.insert(MSSA->getMemoryAccess(S1));
+    }
 
     // New PHI operand? Use it.
     if (PHINode *NewPN = getPHIOperand(BB, S0, S1))
@@ -553,6 +652,31 @@
   return MergedStores;
 }
 
+// \brief DFS Number instructions in blocks in dominator order, tracking
+void MergedLoadStoreMotion::computeBarriers() {
+  // This is 1 so the default constructed value of 0 can be used to say we
+  // didn't find anything.
+  unsigned DFSNum = 1;
+  for (auto DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode());
+       DI != DE; ++DI) {
+    BasicBlock *DomBlock = DI->getBlock();
+    bool FoundHoistBarrierInBlock = false;
+    for (const auto &Inst : *DomBlock) {
+      DFSNumberMap[&Inst] = DFSNum;
+
+      if (!FoundHoistBarrierInBlock &&
+          !isGuaranteedToTransferExecutionToSuccessor(&Inst)) {
+        FirstHoistBarrier[DomBlock] = DFSNum;
+        FoundHoistBarrierInBlock = true;
+      }
+      if (Inst.mayThrow()) {
+        LastSinkBarrier[DomBlock] = DFSNum;
+      }
+      ++DFSNum;
+    }
+  }
+}
+
 ///
 /// \brief Run the transformation for each function
 ///
@@ -563,21 +687,305 @@
   auto *MDWP = getAnalysisIfAvailable<MemoryDependenceWrapperPass>();
   MD = MDWP ? &MDWP->getMemDep() : nullptr;
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  DT->updateDFSNumbers();
+  if (UseMemorySSA) {
+    MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+    CachingWalker = MSSA->getWalker();
+  }
 
   bool Changed = false;
   DEBUG(dbgs() << "Instruction Merger\n");
+  computeBarriers();
 
   // Merge unconditional branches, allowing PRE to catch more
   // optimization opportunities.
-  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
-    BasicBlock *BB = &*FI++;
-
+  for (BasicBlock &BB : F) {
     // Hoist equivalent loads and sink stores
     // outside diamonds when possible
-    if (isDiamondHead(BB)) {
-      Changed |= mergeLoads(BB);
-      Changed |= mergeStores(getDiamondTail(BB));
+    if (isDiamondHead(&BB)) {
+      if (UseMemorySSA) {
+        Changed |= mergeLoadsMemorySSA(&BB);
+        Changed |= mergeStoresMemorySSA(getDiamondTail(&BB));
+      } else {
+        Changed |= mergeLoads(&BB);
+        Changed |= mergeStores(getDiamondTail(&BB));
+      }
     }
   }
   return Changed;
 }
+
+/// This file contained a MemorySSA and non-MemorySSA version of the same
+/// optimization, and to keep it less confusing, the functions specific to the
+/// MemorySSA version are placed below this comment.
+
+///
+/// \brief Try to hoist two loads to same address into diamond header
+///
+/// Starting from a diamond head block, iterate over the loads in one
+/// successor block, put them in the hash table.
+/// Then walk through loads in the successor block, and see if they match any in
+/// the hash table and can be hoisted.
+///
+bool MergedLoadStoreMotion::mergeLoadsMemorySSA(BasicBlock *BB) {
+  bool MergedLoads = false;
+  assert(isDiamondHead(BB));
+  BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+  BasicBlock *Succ0 = BI->getSuccessor(0);
+  BasicBlock *Succ1 = BI->getSuccessor(1);
+  // #Instructions in Succ1 for Compile Time Control
+  int Size1 = Succ1->size();
+  int NLoads = 0;
+
+  // Skip if we don't have memory accesses in both blocks
+  auto *Succ0Accesses = MSSA->getBlockAccesses(Succ0);
+  if (!Succ0Accesses)
+    return false;
+  auto *Succ1Accesses = MSSA->getBlockAccesses(Succ1);
+  if (!Succ1Accesses)
+    return false;
+
+  // Walk all accesses in first block, but them into a hash table.
+  for (auto &Access : *Succ0Accesses) {
+    const MemoryUse *MU = dyn_cast<MemoryUse>(&Access);
+    if (!MU)
+      continue;
+    Instruction *I = MU->getMemoryInst();
+    // Only move non-simple (atomic, volatile) loads.
+    LoadInst *Load0 = dyn_cast<LoadInst>(I);
+    // FIXME: The isUsedOutsideofBlock is super-conservative, and used not to
+    // lengthen live ranges. There are better ways.
+    if (!Load0 || !Load0->isSimple() || Load0->isUsedOutsideOfBlock(Succ0))
+      continue;
+
+    LoadInfo.insert({Load0, CachingWalker->getClobberingMemoryAccess(Load0)});
+    ++NLoads;
+    if (NLoads * Size1 >= MagicCompileTimeControl)
+      break;
+  }
+  // Walk all accesses in the second block, see if we can match them against
+  // accesses in the first.
+  for (auto AI = Succ1Accesses->begin(), AE = Succ1Accesses->end(); AI != AE;) {
+    auto CurrIter = AI;
+    ++AI;
+    const MemoryUse *MU = dyn_cast<MemoryUse>(&*CurrIter);
+    if (!MU)
+      continue;
+    LoadInst *Load1 = dyn_cast<LoadInst>(MU->getMemoryInst());
+    // This isUsedOutsideOfBlock is also conservative
+    if (!Load1 || Load1->isUsedOutsideOfBlock(Succ1))
+      continue;
+    // We know that if the load has the same clobbering access as this one, they
+    // must not be killed until the same point.  That is, we are guaranteed that
+    // all the loads that could possibly be merged must have a common MemoryDef
+    // (or MemoryPhi) that they reach.  If not, then we can't merge them because
+    // something is in the way on one of the branches.  If we *do* find a
+    // possible match, the only further checking we need to do is to ensure they
+    // are loads of the same pointer.  We could simply check the pointer
+    // operands, but isMustAlias can do a better job of it.
+
+    auto LookupResult = LoadInfo.equal_range(
+        {Load1, CachingWalker->getClobberingMemoryAccess(Load1)});
+    bool Res = false;
+    auto LookupIter = LookupResult.first;
+    bool SafeToLoadUnconditionally =
+        (LookupResult.first != LookupResult.second) &&
+        isSafeToLoadUnconditionally(Load1->getPointerOperand(),
+                                    Load1->getAlignment(),
+                                    Load1->getModule()->getDataLayout(),
+                                    /*ScanFrom=*/BB->getTerminator());
+
+    while (!Res && LookupIter != LookupResult.second) {
+      LoadInst *Load0 = LookupIter->first;
+      auto OldIter = LookupIter;
+      ++LookupIter;
+      if (!SafeToLoadUnconditionally) {
+        // If the first hoist barrier in the block is before the load, we
+        // can't hoist.
+        unsigned int BB0HoistBarrier =
+            FirstHoistBarrier.lookup(Load0->getParent());
+        if (BB0HoistBarrier != 0 &&
+            DFSNumberMap.lookup(Load0) > BB0HoistBarrier)
+          continue;
+        unsigned int BB1HoistBarrier =
+            FirstHoistBarrier.lookup(Load1->getParent());
+        if (BB1HoistBarrier != 0 &&
+            DFSNumberMap.lookup(Load1) > BB1HoistBarrier)
+          continue;
+      }
+
+      MemoryLocation Loc0 = MemoryLocation::get(Load0);
+      MemoryLocation Loc1 = MemoryLocation::get(Load1);
+      if (AA->isMustAlias(Loc0, Loc1))
+        Res = hoistLoad(BB, Load0, Load1);
+      MergedLoads |= Res;
+      // Don't attempt to hoist above loads that had not been hoisted.
+      if (Res)
+        LoadInfo.erase(OldIter);
+    }
+  }
+  LoadInfo.clear();
+  return MergedLoads;
+}
+
+///
+/// \brief True when two stores are equivalent and can sink into the footer
+///
+/// Starting from a diamond tail block, place all the stores in one predecessor
+/// in a hash table, and try to match them against stores in the second
+/// predecessor
+///
+bool MergedLoadStoreMotion::mergeStoresMemorySSA(BasicBlock *T) {
+
+  bool MergedStores = false;
+  assert(T && "Footer of a diamond cannot be empty");
+
+  pred_iterator PI = pred_begin(T), E = pred_end(T);
+  assert(PI != E);
+  BasicBlock *Pred0 = *PI;
+  ++PI;
+  BasicBlock *Pred1 = *PI;
+  ++PI;
+  // tail block  of a diamond/hammock?
+  if (Pred0 == Pred1)
+    return false; // No.
+  if (PI != E)
+    return false; // No. More than 2 predecessors.
+
+  // #Instructions in Succ1 for Compile Time Control
+  int Size1 = Pred1->size();
+  int NStores = 0;
+
+  // Skip all this if we don't have any memory accesses to look at
+  auto *Pred0Accesses = MSSA->getBlockAccesses(Pred0);
+  if (!Pred0Accesses)
+    return false;
+  auto *Pred1Accesses = MSSA->getBlockAccesses(Pred1);
+  if (!Pred1Accesses)
+    return false;
+
+  for (auto AI = Pred0Accesses->rbegin(), AE = Pred0Accesses->rend(); AI != AE;
+       ++AI) {
+    if (const MemoryDef *MD = dyn_cast<MemoryDef>(&*AI)) {
+      Instruction *I = MD->getMemoryInst();
+
+      // Sink move non-simple (atomic, volatile) stores
+      if (!isa<StoreInst>(I))
+        continue;
+      StoreInst *S0 = cast<StoreInst>(I);
+      if (!S0->isSimple())
+        continue;
+      StoreInfo.insert(S0);
+
+      ++NStores;
+      if (NStores * Size1 >= MagicCompileTimeControl)
+        break;
+    }
+  }
+
+  for (auto AI = Pred1Accesses->rbegin(), AE = Pred1Accesses->rend(); AI != AE;
+       ++AI) {
+    const MemoryDef *MD = dyn_cast<MemoryDef>(&*AI);
+    if (!MD)
+      continue;
+    Instruction *Inst = MD->getMemoryInst();
+    if (!isa<StoreInst>(Inst))
+      continue;
+
+    StoreInst *Store1 = cast<StoreInst>(Inst);
+    auto LookupResult = StoreInfo.equal_range(Store1);
+    bool Res = false;
+    auto LookupIter = LookupResult.first;
+    while (!Res && LookupIter != LookupResult.second) {
+      StoreInst *Store0 = *LookupIter;
+      auto OldIter = LookupIter;
+      ++LookupIter;
+
+      // If the last sink barrier in the block is after us, we can't sink out
+      // of the block.
+      unsigned int BB0SinkBarrier = LastSinkBarrier.lookup(Store0->getParent());
+      if (BB0SinkBarrier != 0 && DFSNumberMap.lookup(Store0) < BB0SinkBarrier)
+        continue;
+      unsigned int BB1SinkBarrier = LastSinkBarrier.lookup(Store1->getParent());
+      if (BB1SinkBarrier != 0 && DFSNumberMap.lookup(Store1) < BB1SinkBarrier)
+        continue;
+
+      MemoryLocation Loc0 = MemoryLocation::get(Store0);
+      MemoryLocation Loc1 = MemoryLocation::get(Store1);
+      if (!AA->isMustAlias(Loc0, Loc1) ||
+          isStoreSinkBarrierInRangeMemorySSA(Loc0,
+                                             MSSA->getMemoryAccess(Store0)) ||
+          isStoreSinkBarrierInRangeMemorySSA(Loc1,
+                                             MSSA->getMemoryAccess(Store1)))
+        continue;
+      Res = sinkStore(T, Store0, Store1);
+      if (Res)
+        StoreInfo.erase(OldIter);
+      MergedStores |= Res;
+    }
+  }
+  for (auto V : AccessesToDelete) {
+    MSSA->removeMemoryAccess(V);
+  }
+  AccessesToDelete.clear();
+  StoreInfo.clear();
+  return MergedStores;
+}
+
+///
+/// \brief True when the store is not downwards explosed to block \p End
+
+bool MergedLoadStoreMotion::isStoreSinkBarrierInRangeMemorySSA(
+    MemoryLocation &Loc, MemoryAccess *Start) {
+  DomTreeNode *StartNode = DT->getNode(Start->getBlock());
+  std::pair<unsigned, unsigned> StartDFS = {StartNode->getDFSNumIn(),
+                                            StartNode->getDFSNumOut()};
+
+  // Seed with current users that are in the block range.
+  std::queue<MemoryAccess *> CurrentUses;
+  SmallPtrSet<const MemoryAccess *, 8> Visited;
+  for (auto U : Start->users())
+    CurrentUses.emplace(cast<MemoryAccess>(U));
+
+  // Process all the uses and see if they are below end.
+  while (!CurrentUses.empty()) {
+    MemoryAccess *MA = CurrentUses.front();
+    CurrentUses.pop();
+
+    BasicBlock *BB = MA->getBlock();
+    DomTreeNode *BBNode = DT->getNode(BB);
+    std::pair<unsigned, unsigned> CurrDFS = {BBNode->getDFSNumIn(),
+                                             BBNode->getDFSNumOut()};
+    // First see if it's outside the dominator tree range
+    // The only way it could affect us is if it's below us in the dominator
+    // tree.
+    // The start of the final sink point is not below us in a hammock's
+    // dominator tree, because in a hammock, the merge block must be a
+    // sibling of the split block in the dominator tree.
+    // Thus, the things below us in the dominator tree are all things
+    // that lead to the sink point.
+    if ((CurrDFS.first >= StartDFS.first && CurrDFS.first <= StartDFS.first) &&
+        CurrDFS.second >= StartDFS.second &&
+        CurrDFS.second <= StartDFS.second) {
+      // A phi node is not a memory access on its own, and we may have deleted
+      // this access already. Becasue we are walking backwards, we don't perform
+      // updates on the fly.
+      if (AccessesToDelete.count(MA) || isa<MemoryPhi>(MA))
+        continue;
+      Instruction *Use = cast<MemoryUseOrDef>(MA)->getMemoryInst();
+
+      // If it really conflicts, we have a barrier.
+      if (AA->getModRefInfo(Use, Loc) & MRI_ModRef)
+        return true;
+
+      // If not, add it's immediate uses to keep walking
+      for (auto U : Start->users()) {
+        MemoryAccess *MU = cast<MemoryAccess>(U);
+        if (Visited.insert(MU).second)
+          CurrentUses.emplace(MU);
+      }
+    }
+  }
+  return false;
+}
Index: lib/Transforms/Utils/MemorySSA.cpp
===================================================================
--- lib/Transforms/Utils/MemorySSA.cpp
+++ lib/Transforms/Utils/MemorySSA.cpp
@@ -225,7 +225,8 @@
       MA.dropAllReferences();
 }
 
-MemorySSA::AccessListType *MemorySSA::getOrCreateAccessList(BasicBlock *BB) {
+MemorySSA::AccessListType *
+MemorySSA::getOrCreateAccessList(const BasicBlock *BB) {
   auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr));
 
   if (Res.second)
@@ -356,6 +357,57 @@
 
   return Walker.get();
 }
+MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
+                                               MemoryAccess *Definition) {
+  assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI");
+  MemoryUseOrDef *NewAccess = createNewAccess(I);
+  assert(
+      NewAccess != nullptr &&
+      "Tried to create a memory access for a non-memory touching instruction");
+  NewAccess->setDefiningAccess(Definition);
+  return NewAccess;
+}
+
+MemoryAccess *MemorySSA::createMemoryAccess(Instruction *I,
+                                            MemoryAccess *Definition,
+                                            const BasicBlock *BB,
+                                            InsertionPlace Point) {
+  MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition);
+  auto *Accesses = getOrCreateAccessList(BB);
+  if (Point == Beginning) {
+    // It goes after any phi nodes
+    auto AI = std::find_if(
+        Accesses->begin(), Accesses->end(),
+        [](const MemoryAccess &MA) { return !isa<MemoryPhi>(MA); });
+
+    Accesses->insert(AI, NewAccess);
+  } else {
+    Accesses->push_back(NewAccess);
+  }
+
+  return NewAccess;
+}
+MemoryAccess *MemorySSA::createMemoryAccessBefore(Instruction *I,
+                                                  MemoryAccess *Definition,
+                                                  MemoryAccess *InsertPt) {
+  assert(I->getParent() == InsertPt->getBlock() &&
+         "New and old access must be in the same block");
+  MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition);
+  auto *Accesses = getOrCreateAccessList(InsertPt->getBlock());
+  Accesses->insert(AccessListType::iterator(InsertPt), NewAccess);
+  return NewAccess;
+}
+
+MemoryAccess *MemorySSA::createMemoryAccessAfter(Instruction *I,
+                                                 MemoryAccess *Definition,
+                                                 MemoryAccess *InsertPt) {
+  assert(I->getParent() == InsertPt->getBlock() &&
+         "New and old access must be in the same block");
+  MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition);
+  auto *Accesses = getOrCreateAccessList(InsertPt->getBlock());
+  Accesses->insertAfter(AccessListType::iterator(InsertPt), NewAccess);
+  return NewAccess;
+}
 
 /// \brief Helper function to create new memory accesses
 MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
@@ -551,7 +603,8 @@
         continue;
 
       for (User *U : MD->users()) {
-        BasicBlock *UseBlock; (void)UseBlock;
+        BasicBlock *UseBlock;
+        (void)UseBlock;
         // Things are allowed to flow to phi nodes over their predecessor edge.
         if (auto *P = dyn_cast<MemoryPhi>(U)) {
           for (const auto &Arg : P->operands()) {
Index: test/Transforms/InstMerge/exceptions.ll
===================================================================
--- test/Transforms/InstMerge/exceptions.ll
+++ test/Transforms/InstMerge/exceptions.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
Index: test/Transforms/InstMerge/ld_hoist1.ll
===================================================================
--- test/Transforms/InstMerge/ld_hoist1.ll
+++ test/Transforms/InstMerge/ld_hoist1.ll
@@ -1,5 +1,6 @@
 ; Test load hoist
 ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc_linux"
 
Index: test/Transforms/InstMerge/ld_hoist_st_sink.ll
===================================================================
--- test/Transforms/InstMerge/ld_hoist_st_sink.ll
+++ test/Transforms/InstMerge/ld_hoist_st_sink.ll
@@ -1,6 +1,7 @@
 ; Tests to make sure that loads and stores in a diamond get merged
 ; Loads are hoisted into the header. Stores sunks into the footer.
 ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
 %struct.node = type { i64, %struct.node*, %struct.node*, %struct.node*, i64, %struct.arc*, i64, i64, i64 }
Index: test/Transforms/InstMerge/st_sink_barrier_call.ll
===================================================================
--- test/Transforms/InstMerge/st_sink_barrier_call.ll
+++ test/Transforms/InstMerge/st_sink_barrier_call.ll
@@ -1,6 +1,7 @@
 ; Test to make sure that a function call that needs to be a barrier to sinking stores is indeed a barrier.
 ; Stores sunks into the footer.
 ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
 %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 }
Index: test/Transforms/InstMerge/st_sink_bugfix_22613.ll
===================================================================
--- test/Transforms/InstMerge/st_sink_bugfix_22613.ll
+++ test/Transforms/InstMerge/st_sink_bugfix_22613.ll
@@ -3,6 +3,7 @@
 target triple = "x86_64-unknown-linux-gnu"
 
 ; RUN: opt -O2 -S < %s | FileCheck %s
+; RUN: opt -O2 -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s
 
 ; CHECK-LABEL: main
 ; CHECK: if.end
Index: test/Transforms/InstMerge/st_sink_no_barrier_call.ll
===================================================================
--- test/Transforms/InstMerge/st_sink_no_barrier_call.ll
+++ test/Transforms/InstMerge/st_sink_no_barrier_call.ll
@@ -1,6 +1,7 @@
 ; Test to make sure that stores in a diamond get merged with a non barrier function call after the store instruction
 ; Stores sunks into the footer.
 ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
 %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 }
Index: test/Transforms/InstMerge/st_sink_no_barrier_load.ll
===================================================================
--- test/Transforms/InstMerge/st_sink_no_barrier_load.ll
+++ test/Transforms/InstMerge/st_sink_no_barrier_load.ll
@@ -1,6 +1,7 @@
 ; Test to make sure that stores in a diamond get merged with a non barrier load after the store instruction
 ; Stores sunks into the footer.
 ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
 %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 }
Index: test/Transforms/InstMerge/st_sink_no_barrier_store.ll
===================================================================
--- test/Transforms/InstMerge/st_sink_no_barrier_store.ll
+++ test/Transforms/InstMerge/st_sink_no_barrier_store.ll
@@ -1,6 +1,7 @@
 ; Test to make sure that stores in a diamond get merged with a non barrier store after the store instruction to be sunk
 ; Stores sunks into the footer.
 ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
 %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 }
Index: test/Transforms/InstMerge/st_sink_two_stores.ll
===================================================================
--- test/Transforms/InstMerge/st_sink_two_stores.ll
+++ test/Transforms/InstMerge/st_sink_two_stores.ll
@@ -1,6 +1,7 @@
 ; Test to make sure that stores in a diamond get merged
 ; Stores sunks into the footer.
 ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
 %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 }
Index: test/Transforms/InstMerge/st_sink_with_barrier.ll
===================================================================
--- test/Transforms/InstMerge/st_sink_with_barrier.ll
+++ test/Transforms/InstMerge/st_sink_with_barrier.ll
@@ -1,5 +1,6 @@
 ; Test to make sure that load from the same address as a store and appears after the store prevents the store from being sunk
 ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+; RUN: opt -basicaa -use-memoryssa -mldst-motion -S < %s | FileCheck %s
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
 %struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 }