Index: include/llvm/InitializePasses.h
===================================================================
--- include/llvm/InitializePasses.h
+++ include/llvm/InitializePasses.h
@@ -103,7 +103,8 @@
 void initializeDAEPass(PassRegistry&);
 void initializeDAHPass(PassRegistry&);
 void initializeDCELegacyPassPass(PassRegistry&);
-void initializeDSELegacyPassPass(PassRegistry&);
+void initializeDSELegacyMemDepPass(PassRegistry&);
+void initializeDSELegacyMSSAPass(PassRegistry&);
 void initializeDataFlowSanitizerPass(PassRegistry&);
 void initializeDeadInstEliminationPass(PassRegistry&);
 void initializeDeadMachineInstructionElimPass(PassRegistry&);
Index: include/llvm/Transforms/Scalar/DeadStoreElimination.h
===================================================================
--- include/llvm/Transforms/Scalar/DeadStoreElimination.h
+++ include/llvm/Transforms/Scalar/DeadStoreElimination.h
@@ -25,7 +25,7 @@
 
 /// This class implements a trivial dead store elimination. We consider
 /// only the redundant stores that are local to a single Basic Block.
-class DSEPass : public PassInfoMixin<DSEPass> {
+template <bool UseMSSA> class DSEPass : public PassInfoMixin<DSEPass<UseMSSA>> {
 public:
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
 };
Index: lib/Passes/PassBuilder.cpp
===================================================================
--- lib/Passes/PassBuilder.cpp
+++ lib/Passes/PassBuilder.cpp
@@ -364,7 +364,8 @@
   // redo DCE, etc.
   FPM.addPass(JumpThreadingPass());
   FPM.addPass(CorrelatedValuePropagationPass());
-  FPM.addPass(DSEPass());
+  FPM.addPass(DSEPass<false>());
+  FPM.addPass(DSEPass<true>());
   FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
 
   // Finally, do an expensive DCE pass to catch all the dead code exposed by
@@ -679,7 +680,8 @@
   MainFPM.addPass(MemCpyOptPass());
 
   // Nuke dead stores.
-  MainFPM.addPass(DSEPass());
+  MainFPM.addPass(DSEPass<false>());
+  MainFPM.addPass(DSEPass<true>());
 
   // FIXME: at this point, we run a bunch of loop passes:
   // indVarSimplify, loopDeletion, loopInterchange, loopUnrool,
Index: lib/Passes/PassRegistry.def
===================================================================
--- lib/Passes/PassRegistry.def
+++ lib/Passes/PassRegistry.def
@@ -141,7 +141,8 @@
 FUNCTION_PASS("consthoist", ConstantHoistingPass())
 FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass())
 FUNCTION_PASS("dce", DCEPass())
-FUNCTION_PASS("dse", DSEPass())
+FUNCTION_PASS("dse", DSEPass<false>())
+FUNCTION_PASS("dsem", DSEPass<true>())
 FUNCTION_PASS("dot-cfg", CFGPrinterPass())
 FUNCTION_PASS("dot-cfg-only", CFGOnlyPrinterPass())
 FUNCTION_PASS("early-cse", EarlyCSEPass(/*UseMemorySSA=*/false))
Index: lib/Transforms/Scalar/DeadStoreElimination.cpp
===================================================================
--- lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -16,6 +16,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -24,6 +25,7 @@
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Constants.h"
@@ -40,6 +42,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/DeadStoreElimination.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/MemorySSA.h"
 #include <map>
 using namespace llvm;
 
@@ -54,6 +57,9 @@
     "enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden,
     cl::desc("Enable partial-overwrite tracking in DSE"));
 
+static cl::opt<bool> EnableMSSA("use-dse-mssa", cl::init(false), cl::Hidden,
+                                cl::desc("Use the new MemorySSA-backed DSE."));
+
 //===----------------------------------------------------------------------===//
 // Helper functions
 //===----------------------------------------------------------------------===//
@@ -1185,10 +1191,425 @@
   return MadeChange;
 }
 
+struct WalkResult {
+  enum {
+    NextDef,
+    NextPhi,
+    KilledByUse,
+    SplitPoint,
+    ReachedEnd,
+  } State;
+  MemoryAccess *MA;
+};
+
+class DSEWalker {
+  Function *F;
+
+  unsigned InstNum;
+  DenseMap<const Value *, unsigned> InstNums;
+  // ^ Post-order numbering of MemoryPhis and instructions, a) that throw, or b)
+  // are DSE candidates. Used to detect intervening MayThrows or loop latches
+  SmallVector<unsigned, 32> MayThrows;
+  SmallPtrSet<const Value *, 16> Returns;
+  // ^ Values returned by F.
+  DenseSet<const Value *> NonEscapes;
+  // ^ Args and insts that don't escape on unwind of F.
+  InstOverlapIntervalsTy IOL;
+  // ^ isOverwrite needs this.
+public:
+  SmallVector<MemoryDef *, 32> Stores;
+  // ^ Stores to visit, post-ordered
+
+private:
+  AliasAnalysis *AA;
+  MemorySSA *MSSA;
+  const PostDominatorTree *PDT;
+  const TargetLibraryInfo *TLI;
+
+  unsigned addPO(Instruction &I) {
+    InstNums[&I] = InstNum++;
+    DEBUG(dbgs() << "PO: " << I << ", " << InstNums[&I] << "\n");
+    return InstNums[&I];
+  }
+
+  unsigned addPO(MemoryPhi &P) {
+    InstNums[&P] = InstNum++;
+    DEBUG(dbgs() << "PO: " << P << ", " << InstNums[&P] << "\n");
+    return InstNums[&P];
+  }
+
+  bool nonEscapingOnUnwind(Instruction &I) {
+    return isa<AllocaInst>(&I) ||
+           (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, false, true));
+  }
+
+  MemoryLocation getWriteLoc(Instruction *I) const {
+    MemoryLocation Loc = getLocForWrite(I, *AA);
+    if (!Loc.Ptr) {
+      if (auto CS = CallSite(I)) {
+        if (Function *F = CS.getCalledFunction()) {
+          StringRef FnName = F->getName();
+          if ((TLI->has(LibFunc_strcpy) &&
+               FnName == TLI->getName(LibFunc_strcpy)) ||
+              (TLI->has(LibFunc_strncpy) &&
+               FnName == TLI->getName(LibFunc_strncpy)) ||
+              (TLI->has(LibFunc_strcat) &&
+               FnName == TLI->getName(LibFunc_strcat)) ||
+              (TLI->has(LibFunc_strncat) &&
+               FnName == TLI->getName(LibFunc_strncat)))
+            return MemoryLocation::getForArgument(ImmutableCallSite(I), 0,
+                                                  *TLI);
+        }
+      }
+    }
+    return Loc;
+  }
+
+  // For free and lifetime_end, the size of MemLoc doesn't matter.
+  Value *getLifetimeEndish(Instruction &I) {
+    const DataLayout &DL = F->getParent()->getDataLayout();
+    if (isFreeCall(&I, TLI))
+      return GetUnderlyingObject(cast<CallInst>(&I)->getArgOperand(0), DL);
+    if (auto *II = dyn_cast<IntrinsicInst>(&I))
+      if (II->getIntrinsicID() == Intrinsic::lifetime_end)
+        // TODO: this isn't entirely  correct, since lifetime_end contains a
+        // size param.
+        return GetUnderlyingObject(cast<CallInst>(&I)->getArgOperand(1), DL);
+
+    return nullptr;
+  }
+
+public:
+  // Represents a DSE candidate and its cached escape info.
+  struct Candidate {
+    MemoryDef *D;
+    MemoryLocation Loc;
+    const Value *Und;
+    bool Returned;
+    // ^ Is this candidate's memory location returned by the function?
+    bool Escapes;
+    // ^ If execution in the function unwinds after this candidate writes, will
+    // its memory location escapes?
+  };
+
+  void deleteDead(MemoryDef &D) {
+    Instruction &I = *D.getMemoryInst();
+    DEBUG(dbgs() << "DSE-ing:\n\t" << D << "\n\t" << I << "\n");
+
+    SmallVector<Value *, 32> DeadPool(I.value_op_begin(), I.value_op_end());
+    DenseSet<Instruction *> Deleted; // Prevents double deletion.
+    MSSA->removeMemoryAccess(&D);
+    I.eraseFromParent();
+    IOL.erase(&I);
+
+    while (!DeadPool.empty()) {
+      auto *Cand = dyn_cast<Instruction>(DeadPool.pop_back_val());
+      if (Cand && !Deleted.count(Cand) &&
+          // Check if safe to delete, accounting for possible volatile or
+          // atomic.
+          isInstructionTriviallyDead(Cand, TLI)) {
+        DeadPool.insert(DeadPool.end(), Cand->value_op_begin(),
+                        Cand->value_op_end());
+        if (MemoryAccess *MA = MSSA->getMemoryAccess(Cand))
+          MSSA->removeMemoryAccess(MA);
+        Cand->eraseFromParent();
+        IOL.erase(Cand);
+        Deleted.insert(Cand);
+        ++NumFastOther;
+      }
+    }
+  }
+
+  // TODO: this fails to catch the following case:
+  //
+  // p = load x;
+  // while (...) { store p, x; store _, x; ... }
+  //
+  // memoryIsNotModifiedBetween is able to detect this.
+  bool isNoop(const MemoryDef &D) {
+    if (auto *SI = dyn_cast<StoreInst>(D.getMemoryInst())) {
+      MemoryAccess *Clob = MSSA->getWalker()->getClobberingMemoryAccess(SI);
+      if (auto *LI = dyn_cast<LoadInst>(SI->getValueOperand())) {
+        if (auto *U = dyn_cast<MemoryUse>(MSSA->getMemoryAccess(LI)))
+          // If LI is a MemoryUse, then MSSA guarantees that its defining access
+          // is accurate.
+          return U->getDefiningAccess() == Clob &&
+                 AA->isMustAlias(MemoryLocation::get(LI),
+                                 MemoryLocation::get(SI));
+        // Otherwise, LI is an abnormal load, i.e. atomic and/or volatile.
+        return Clob == MSSA->getMemoryAccess(LI) &&
+               AA->isMustAlias(MemoryLocation::get(LI),
+                               MemoryLocation::get(SI));
+      } else if (auto *MUD = dyn_cast<MemoryUseOrDef>(Clob)) {
+        // Storing zero to calloc-ed memory counts as no-op.
+        if (!MSSA->isLiveOnEntryDef(MUD)) {
+          Constant *C;
+          return (C = dyn_cast<Constant>(SI->getValueOperand())) &&
+                 C->isNullValue() && isCallocLikeFn(MUD->getMemoryInst(), TLI);
+        }
+      }
+    }
+    // TODO: Can also handle memmove(a <- a) and memcpy(a <- a), though the
+    // latter is technically UB.
+    return false;
+  }
+
+  unsigned po(Instruction &I) {
+    assert(InstNums.find(&I) != InstNums.end() && "Unindexed instruction.");
+    return InstNums.find(&I)->second;
+  }
+
+  unsigned po(MemoryPhi &P) {
+    assert(InstNums.find(&P) != InstNums.end() && "Unindexed MemoryPhi.");
+    return InstNums.find(&P)->second;
+  }
+
+  bool throwInRange(unsigned Earlier, unsigned Later) const {
+    DEBUG(dbgs() << "checking for throws between " << Earlier << " and "
+                 << Later << "\n");
+    assert(Earlier >= Later &&
+           "Larger number == later in post-order == earlier in rpo.");
+    return std::upper_bound(MayThrows.begin(), MayThrows.end(), Earlier) !=
+           std::lower_bound(MayThrows.begin(), MayThrows.end(), Later);
+  }
+
+  // Could Def prevent the DSE of a candidate store to Loc (which necessarily
+  // isn't volatile or atomic)? Example reasons for a yes:
+  // - Def's atomicity is stronger than monotonic
+  // - Def itself uses Loc, e.g., memcpy(... <- Loc)
+  bool isDSEBarrier(MemoryDef &D, const Candidate &Cand) {
+    Instruction *I = D.getMemoryInst();
+
+    if (getLifetimeEndish(*I))
+      return false;
+
+    if (I->isAtomic()) {
+      auto F = [](AtomicOrdering A) {
+        return A == AtomicOrdering::Monotonic || A == AtomicOrdering::Unordered;
+      };
+      if (auto *LI = dyn_cast<LoadInst>(I))
+        // Compensate for AA's skittishness around atomics.
+        return !(F(LI->getOrdering()) &&
+                 AA->isNoAlias(MemoryLocation::get(LI), Cand.Loc));
+      else if (auto *SI = dyn_cast<StoreInst>(I))
+        return !F(SI->getOrdering());
+      else if (isa<FenceInst>(I) && !Cand.Escapes && !Cand.Returned)
+        // From the old DSE: "Fences only constraints the ordering of already
+        // visible stores, it does not make a store visible to other threads.
+        // So, skipping over a fence does not change a store from being dead."
+        return false;
+    }
+
+    return AA->getModRefInfo(I, Cand.Loc) & MRI_Ref;
+  }
+
+  // Given the current walk location DefOrPhi, attempt to move downwards to the
+  // next MemoryDef that could DSE Cand.
+  WalkResult walkNext(MemoryAccess *DefOrPhi, const Candidate &Cand) {
+    DEBUG(dbgs() << "descending past " << *DefOrPhi << "\n");
+    WalkResult Res = {WalkResult::ReachedEnd, nullptr};
+
+    // Ensure that the uselist 1) doesn't MRI_Ref Cand.Loc, and 2) contains at
+    // most one MemoryDef or MemoryPhi.
+    for (Use &U : DefOrPhi->uses()) {
+      if (auto *Phi = dyn_cast<MemoryPhi>(U.getUser())) {
+        unsigned EarlierNum;
+        if (auto *D = dyn_cast<MemoryDef>(DefOrPhi))
+          EarlierNum = InstNums.find(D->getMemoryInst())->second;
+        else
+          EarlierNum = InstNums.find(DefOrPhi)->second;
+
+        DEBUG(dbgs() << "found phi: " << *Phi << ", "
+                     << InstNums.find(Phi)->second << ", " << EarlierNum
+                     << "\n");
+
+        if (Res.MA || InstNums.find(Phi)->second > EarlierNum)
+          // More than one MemoryDef or phi in the uselist implies a split point
+          // in the MSSA graph. A Phi with a lower (higher) RPO (PO) number
+          // means that we've encountered a loop latch.
+          return {WalkResult::SplitPoint, nullptr};
+        Res = {WalkResult::NextPhi, Phi};
+      } else if (auto *Load = dyn_cast<MemoryUse>(U.getUser())) {
+        if (AA->getModRefInfo(Load->getMemoryInst(), Cand.Loc) & MRI_Ref) {
+          // For a pair of stores to DSE, there can't be any intervening uses of
+          // the stored-to memory.
+          DEBUG(dbgs() << "used by " << *Load << "\n");
+          return {WalkResult::KilledByUse, Load};
+        }
+      } else if (auto *D = dyn_cast<MemoryDef>(U.getUser())) {
+        if (isDSEBarrier(*D, Cand)) {
+          DEBUG(dbgs() << "used by " << *D << "\n");
+          return {WalkResult::KilledByUse, D};
+        } else if (Res.MA)
+          return {WalkResult::SplitPoint, nullptr};
+        Res = {WalkResult::NextDef, D};
+      } else
+        llvm_unreachable("Unexpected MemorySSA node type.");
+    }
+
+    if (Res.State == WalkResult::ReachedEnd)
+      Res.MA = DefOrPhi;
+    return Res;
+  }
+
+  Candidate makeCand(MemoryDef &D) const {
+    assert(hasMemoryWrite(D.getMemoryInst(), *TLI) &&
+           "DSE candidates must write to an analyzable memory location.");
+    const DataLayout &DL = F->getParent()->getDataLayout();
+    MemoryLocation Loc = getWriteLoc(D.getMemoryInst());
+    assert(Loc.Ptr && "Expected a Loc!");
+    const Value *Und = GetUnderlyingObject(Loc.Ptr, DL);
+    bool Returned = Returns.count(Und);
+    DEBUG(dbgs() << "is " << D << " returned? " << Returned << "\n");
+    bool Escapes =
+        !(NonEscapes.count(Und) || ([&]() {
+            SmallVector<Value *, 4> Unds;
+            GetUnderlyingObjects(const_cast<Value *>(Und), Unds, DL);
+            return all_of(Unds, [&](Value *V) { return NonEscapes.count(V); });
+          })());
+    DEBUG(dbgs() << "does " << D << " escape? " << Escapes << "\n");
+    return {&D, Loc, Und, Returned, Escapes};
+  }
+
+  bool canDSE(const Candidate &Earlier, const MemoryDef &Later,
+              bool NonLocal = false) {
+    DEBUG(dbgs() << "can dse " << Later << "?\n");
+    Instruction &EarlierI = *Earlier.D->getMemoryInst();
+    Instruction &LaterI = *Later.getMemoryInst();
+
+    const DataLayout &DL = F->getParent()->getDataLayout();
+    if (Value *Ptr = getLifetimeEndish(LaterI)) {
+      // TODO: Possibly avoid repeated calls to GetUnderlyingObject through
+      // getLifetimeEndish.
+      DEBUG(dbgs() << "checking alias with free:\n\t" << *Earlier.Und << "\n\t"
+                   << *Ptr << "\n");
+      return AA->isMustAlias(Earlier.Und, Ptr);
+    }
+
+    MemoryLocation LLoc = getWriteLoc(&LaterI);
+    if (!LLoc.Ptr ||
+        (NonLocal &&
+         !PDT->dominates(Later.getBlock(), Earlier.D->getBlock())) ||
+        (Earlier.Escapes && throwInRange(po(EarlierI), po(LaterI))))
+      return false;
+
+    int64_t EOff, LOff;
+    OverwriteResult O =
+        isOverwrite(LLoc, Earlier.Loc, DL, *TLI, EOff, LOff, &EarlierI, IOL);
+    DEBUG(dbgs() << "Overwrite: " << O << "\n");
+    return O == OverwriteComplete;
+  }
+
+  DSEWalker(Function &F_, AliasAnalysis &AA_, MemorySSA &MSSA_,
+            const PostDominatorTree &PDT_, const TargetLibraryInfo &TLI_)
+      : F(&F_), InstNum(0), AA(&AA_), MSSA(&MSSA_), PDT(&PDT_), TLI(&TLI_) {
+    // Record non-escaping args.
+    for (Argument &Arg : F->args())
+      if (Arg.hasByValOrInAllocaAttr())
+        NonEscapes.insert(&Arg);
+
+    // Number instructions of interest by post-order
+    for (BasicBlock *BB : post_order(F)) {
+      for (Instruction &I : reverse(*BB)) {
+        if (I.mayThrow()) {
+          MayThrows.push_back(addPO(I));
+        } else if (auto *Def =
+                       dyn_cast_or_null<MemoryDef>(MSSA->getMemoryAccess(&I))) {
+          addPO(I);
+          // Only consider true DSE candidates. Volatile/atomic stores don't
+          // qualify, for instance.
+          if (hasMemoryWrite(&I, *TLI) && isRemovable(&I)) {
+            DEBUG(dbgs() << "candidate: " << I << "\n");
+            Stores.push_back(Def);
+          }
+        }
+
+        if (nonEscapingOnUnwind(I)) {
+          DEBUG(dbgs() << "found non-escaping mem: " << I << "\n");
+          NonEscapes.insert(&I);
+        }
+      }
+
+      if (MemoryPhi *Phi = MSSA->getMemoryAccess(BB))
+        // Phis numbers are used to recognize loop latches.
+        addPO(*Phi);
+
+      if (auto *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+        const DataLayout &DL = F->getParent()->getDataLayout();
+        // TODO: Possibly ignore unreachable blocks, which would require DT.
+        if (Value *RetVal = RI->getReturnValue())
+          Returns.insert(GetUnderlyingObject(RetVal, DL));
+      }
+    }
+  }
+};
+
+static bool eliminateDeadStoresMSSA(Function &F, AliasAnalysis &AA,
+                                    MemorySSA &MSSA,
+                                    const PostDominatorTree &PDT,
+                                    const TargetLibraryInfo &TLI) {
+  DEBUG(MSSA.print(dbgs()));
+  DSEWalker Walker(F, AA, MSSA, PDT, TLI);
+
+  bool Changed = false;
+  for (MemoryDef *D : Walker.Stores) {
+    DEBUG(dbgs() << "inspecting " << *D->getMemoryInst() << "\n");
+
+    if (Walker.isNoop(*D)) {
+      DEBUG(dbgs() << "deleting no-op store " << *D << "\n");
+      Walker.deleteDead(*D);
+      Changed = true;
+      continue;
+    }
+
+    DSEWalker::Candidate Cand = Walker.makeCand(*D);
+    WalkResult Res;
+    // Attempt to DSE within the same block because post-dom checks are limited
+    // to BasicBlock granularity.
+    bool next = false;
+    for (Res = Walker.walkNext(D, Cand); Res.State == WalkResult::NextDef &&
+                                         Res.MA->getBlock() == D->getBlock();
+         Res = Walker.walkNext(Res.MA, Cand)) {
+      if (Walker.canDSE(Cand, *cast<MemoryDef>(Res.MA), /* NonLocal */ false)) {
+        Walker.deleteDead(*D);
+        Changed = true;
+        next = true;
+        break;
+      }
+    }
+
+    if (next)
+      continue;
+
+    // Non-local search.
+    for (; Res.State <= WalkResult::NextPhi;
+         Res = Walker.walkNext(Res.MA, Cand)) {
+      if (auto *Def = dyn_cast<MemoryDef>(Res.MA)) {
+        if (Walker.canDSE(Cand, *Def, /* NonLocal */ true)) {
+          Walker.deleteDead(*D);
+          Changed = true;
+          break;
+        }
+      }
+    }
+
+    DEBUG(dbgs() << "Finished walking. " << Res.State << "\n");
+    if (Res.State == WalkResult::ReachedEnd && !Cand.Escapes &&
+        !Cand.Returned) {
+      DEBUG(dbgs() << "Caught unused write to non-escaping memory.\n");
+      Walker.deleteDead(*D);
+      Changed = true;
+    }
+  }
+  return Changed;
+}
+
 //===----------------------------------------------------------------------===//
 // DSE Pass
 //===----------------------------------------------------------------------===//
-PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
+template <>
+PreservedAnalyses DSEPass<false>::run(Function &F,
+                                      FunctionAnalysisManager &AM) {
   AliasAnalysis *AA = &AM.getResult<AAManager>(F);
   DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
   MemoryDependenceResults *MD = &AM.getResult<MemoryDependenceAnalysis>(F);
@@ -1204,54 +1625,100 @@
   return PA;
 }
 
+template <>
+PreservedAnalyses DSEPass<true>::run(Function &F, FunctionAnalysisManager &AM) {
+  if (!eliminateDeadStoresMSSA(F, AM.getResult<AAManager>(F),
+                               AM.getResult<MemorySSAAnalysis>(F).getMSSA(),
+                               AM.getResult<PostDominatorTreeAnalysis>(F),
+                               AM.getResult<TargetLibraryAnalysis>(F)))
+    return PreservedAnalyses::all();
+  PreservedAnalyses PA;
+  PA.preserve<PostDominatorTreeAnalysis>();
+  PA.preserve<MemorySSAAnalysis>();
+  PA.preserve<GlobalsAA>();
+  return PA;
+}
+
 namespace {
 /// A legacy pass for the legacy pass manager that wraps \c DSEPass.
-class DSELegacyPass : public FunctionPass {
+template <bool UseMSSA> class DSELegacyPass : public FunctionPass {
 public:
   DSELegacyPass() : FunctionPass(ID) {
-    initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());
+    if (UseMSSA)
+      initializeDSELegacyMSSAPass(*PassRegistry::getPassRegistry());
+    else
+      initializeDSELegacyMemDepPass(*PassRegistry::getPassRegistry());
   }
 
   bool runOnFunction(Function &F) override {
     if (skipFunction(F))
       return false;
 
-    DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
-    MemoryDependenceResults *MD =
-        &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
     const TargetLibraryInfo *TLI =
         &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-
-    return eliminateDeadStores(F, AA, MD, DT, TLI);
+    if (UseMSSA) {
+      PostDominatorTree &PDT =
+          getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+      MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
+      return eliminateDeadStoresMSSA(F, *AA, MSSA, PDT, *TLI);
+    } else {
+      DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+      MemoryDependenceResults *MD =
+          &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
+      return eliminateDeadStores(F, AA, MD, DT, TLI);
+    }
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
-    AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<AAResultsWrapperPass>();
-    AU.addRequired<MemoryDependenceWrapperPass>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
-    AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addPreserved<GlobalsAAWrapperPass>();
-    AU.addPreserved<MemoryDependenceWrapperPass>();
+
+    if (UseMSSA) {
+      AU.addRequired<PostDominatorTreeWrapperPass>();
+      AU.addRequired<MemorySSAWrapperPass>();
+      AU.addPreserved<PostDominatorTreeWrapperPass>();
+      AU.addPreserved<MemorySSAWrapperPass>();
+    } else {
+      AU.addRequired<DominatorTreeWrapperPass>();
+      AU.addRequired<MemoryDependenceWrapperPass>();
+      AU.addPreserved<DominatorTreeWrapperPass>();
+      AU.addPreserved<MemoryDependenceWrapperPass>();
+    }
   }
 
   static char ID; // Pass identification, replacement for typeid
 };
 } // end anonymous namespace
 
-char DSELegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
+using DSELegacyMemDep = DSELegacyPass<false>;
+template <> char DSELegacyPass<false>::ID = 0;
+INITIALIZE_PASS_BEGIN(DSELegacyMemDep, "dsemd", "Dead Store Elimination", false,
                       false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
+INITIALIZE_PASS_END(DSELegacyMemDep, "dsemd", "Dead Store Elimination", false,
                     false)
 
+using DSELegacyMSSA = DSELegacyPass<true>;
+template <> char DSELegacyPass<true>::ID = 0;
+INITIALIZE_PASS_BEGIN(DSELegacyMSSA, "dse",
+                      "Dead Store Elimination (Memory SSA)", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(DSELegacyMSSA, "dse", "Dead Store Elimination (Memory SSA)",
+                    false, false)
+
 FunctionPass *llvm::createDeadStoreEliminationPass() {
-  return new DSELegacyPass();
+  if (EnableMSSA)
+    return new DSELegacyPass<true>();
+  return new DSELegacyPass<false>();
 }
Index: lib/Transforms/Scalar/Scalar.cpp
===================================================================
--- lib/Transforms/Scalar/Scalar.cpp
+++ lib/Transforms/Scalar/Scalar.cpp
@@ -40,7 +40,8 @@
   initializeDCELegacyPassPass(Registry);
   initializeDeadInstEliminationPass(Registry);
   initializeScalarizerPass(Registry);
-  initializeDSELegacyPassPass(Registry);
+  initializeDSELegacyMemDepPass(Registry);
+  initializeDSELegacyMSSAPass(Registry);
   initializeGuardWideningLegacyPassPass(Registry);
   initializeGVNLegacyPassPass(Registry);
   initializeNewGVNPass(Registry);
Index: test/Transforms/DeadStoreElimination/global-dse.ll
===================================================================
--- /dev/null
+++ test/Transforms/DeadStoreElimination/global-dse.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+
+define void @simple0(i8* %a) {
+; CHECK-LABEL: @simple0(
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    store i8 undef, i8* [[A:%.*]]
+; CHECK-NEXT:    ret void
+;
+bb0:
+  store i8 undef, i8* %a
+  br label %bb1
+bb1:
+  store i8 undef, i8* %a
+  ret void
+}
+
+define void @simple1_postdom(i8* %a) {
+; CHECK-LABEL: @simple1_postdom(
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    store i8 undef, i8* [[A:%.*]]
+; CHECK-NEXT:    ret void
+;
+bb0:
+  br i1 undef, label %bb1, label %bb2
+bb1:
+  store i8 undef, i8* %a
+  br label %bb3
+bb2:
+  br label %bb3
+bb3:
+  store i8 undef, i8* %a
+  ret void
+}
+
+define void @simple2_killed_by_load(i8* %a, i8* %b) {
+; CHECK-LABEL: @simple2_killed_by_load(
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    store i8 undef, i8* [[A:%.*]]
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP:%.*]] = load i8, i8* [[B:%.*]]
+; CHECK-NEXT:    store i8 undef, i8* [[A]]
+; CHECK-NEXT:    ret void
+;
+bb0:
+  store i8 undef, i8* %a
+  br label %bb1
+bb1:
+  %tmp = load i8, i8* %b
+  store i8 undef, i8* %a
+  ret void
+}
+
+declare void @might_throw()
+
+define void @escaping_dse_blocked_by_maythrow(i8* %a) {
+; CHECK-LABEL: @escaping_dse_blocked_by_maythrow(
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    store i8 undef, i8* [[A:%.*]]
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    call void @might_throw()
+; CHECK-NEXT:    store i8 undef, i8* [[A]]
+; CHECK-NEXT:    ret void
+;
+bb0:
+  store i8 undef, i8* %a
+  br label %bb1
+bb1:
+  call void @might_throw()
+  store i8 undef, i8* %a
+  ret void
+}
+
+define i8 @but_nonescaping_can_dse() {
+; CHECK-LABEL: @but_nonescaping_can_dse(
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    [[A:%.*]] = alloca i8
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    call void @might_throw()
+; CHECK-NEXT:    store i8 undef, i8* [[A]]
+; CHECK-NEXT:    [[RV:%.*]] = load i8, i8* [[A]]
+; CHECK-NEXT:    ret i8 [[RV]]
+;
+bb0:
+  %a = alloca i8
+  store i8 undef, i8* %a
+  br label %bb1
+bb1:
+  call void @might_throw()
+  store i8 undef, i8* %a
+  %rv = load i8, i8* %a
+  ret i8 %rv
+}
+
+define void @dse_nonescaping_nonreturned() {
+; CHECK-LABEL: @dse_nonescaping_nonreturned(
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    call void @might_throw()
+; CHECK-NEXT:    ret void
+;
+bb0:
+  %a = alloca i8
+  store i8 undef, i8* %a
+  br label %bb1
+bb1:
+  call void @might_throw()
+  store i8 undef, i8* %a
+  ret void
+}