diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -38,6 +38,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -327,6 +328,7 @@
   OW_End,
   OW_PartialEarlierWithFullLater,
   OW_MaybePartial,
+  OW_None,
   OW_Unknown
 };
 
@@ -847,6 +849,95 @@
   return false;
 }
 
+using PhiTransPtr = std::pair<WeakTrackingVH, int64_t>;
+using ExMemoryAccess = std::pair<MemoryAccess *, PhiTransPtr>;
+
+static PhiTransPtr phiTranslatePtr(const PhiTransPtr &Ptr, BasicBlock *FromBB,
+                                   BasicBlock *ToBB, const DataLayout &DL,
+                                   DominatorTree &DT, AssumptionCache *AC) {
+  PhiTransPtr ResPtr;
+  DenseMap<BasicBlock *, PhiTransPtr> Visited;
+  SmallVector<std::pair<BasicBlock *, const PhiTransPtr>, 16> WorkList;
+
+  if (FromBB == ToBB) {
+    return Ptr;
+  }
+
+  WorkList.push_back({FromBB, Ptr});
+
+  while (!WorkList.empty()) {
+    auto &CurrNode = WorkList.back();
+    WorkList.pop_back();
+    BasicBlock *CurrBB = CurrNode.first;
+    const PhiTransPtr &CurrPtr = CurrNode.second;
+
+    for (pred_iterator PI = pred_begin(CurrBB), E = pred_end(CurrBB); PI != E;
+         ++PI) {
+      BasicBlock *PredBB = *PI;
+      int64_t Offset = 0;
+      Value *BasePtr =
+          GetPointerBaseWithConstantOffset(CurrPtr.first, Offset, DL);
+      Offset += CurrPtr.second;
+      PHITransAddr TransAddr{BasePtr, DL, AC};
+
+      // TODO:
+      if (!DT.dominates(ToBB, PredBB))
+        continue;
+
+      if (TransAddr.NeedsPHITranslationFromBlock(CurrBB) &&
+              (!TransAddr.IsPotentiallyPHITranslatable() ||
+          TransAddr.PHITranslateValue(CurrBB, PredBB, &DT, false)))
+        return PhiTransPtr{};
+
+      auto Inserted = Visited.try_emplace(PredBB, TransAddr.getAddr(), Offset);
+      auto &TransPtr = Inserted.first->second;
+      if (!Inserted.second) {
+        if (TransAddr.getAddr() != TransPtr.first ||
+            Offset != TransPtr.second)
+          // We already visited this block before. If it was with a different
+          // address - bail out!
+          return PhiTransPtr{};
+        continue;
+      }
+
+      if (PredBB == ToBB) {
+        ResPtr = TransPtr;
+        continue;
+      }
+
+      WorkList.push_back({PredBB, TransPtr});
+    }
+  }
+
+  assert(ResPtr.first.pointsToAliveValue() &&
+         "PHI translation is expected to complete successfully");
+  return ResPtr;
+}
+
+static ExMemoryAccess
+phiTransFromMemoryAccessTo(const ExMemoryAccess &FromAccess,
+                           MemoryAccess *ToAccess, const DataLayout &DL,
+                           DominatorTree &DT, AssumptionCache *AC) {
+  PhiTransPtr ResAddr;
+
+  if (FromAccess.second.first.pointsToAliveValue()) {
+
+    ResAddr = phiTranslatePtr(FromAccess.second, FromAccess.first->getBlock(),
+                               ToAccess->getBlock(), DL, DT, AC);
+  }
+
+  return std::make_pair(ToAccess, ResAddr);
+}
+
+static ExMemoryAccess getExDefiningAccess(const ExMemoryAccess &CurrAccess,
+                                          const DataLayout &DL, DominatorTree &DT,
+                                          AssumptionCache *AC) {
+  assert(isa<MemoryUseOrDef>(CurrAccess.first) && "TODO");
+  MemoryAccess *Next =
+      cast<MemoryUseOrDef>(CurrAccess.first)->getDefiningAccess();
+  return phiTransFromMemoryAccessTo(CurrAccess, Next, DL, DT, AC);
+}
+
 struct DSEState {
   Function &F;
   AliasAnalysis &AA;
@@ -866,6 +957,7 @@
   const TargetLibraryInfo &TLI;
   const DataLayout &DL;
   const LoopInfo &LI;
+  AssumptionCache *AC;
 
   // Whether the function contains any irreducible control flow, useful for
   // being accurately able to detect loops.
@@ -894,14 +986,16 @@
 
   DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
            PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
-           const LoopInfo &LI)
+           const LoopInfo &LI, AssumptionCache *AC)
       : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
-        DL(F.getParent()->getDataLayout()), LI(LI) {}
+        DL(F.getParent()->getDataLayout()), LI(LI), AC(AC),
+        ContainsIrreducibleLoops(false) {}
 
   static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
                       DominatorTree &DT, PostDominatorTree &PDT,
-                      const TargetLibraryInfo &TLI, const LoopInfo &LI) {
-    DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
+                      const TargetLibraryInfo &TLI, const LoopInfo &LI,
+                      AssumptionCache *AC) {
+    DSEState State(F, AA, MSSA, DT, PDT, TLI, LI, AC);
     // Collect blocks with throwing instructions not modeled in MemorySSA and
     // alloc-like objects.
     unsigned PO = 0;
@@ -981,7 +1075,8 @@
     // the later store was larger than the earlier store.
     if (AAR == AliasResult::MustAlias) {
       // Make sure that the Later size is >= the Earlier size.
-      if (LaterSize >= EarlierSize)
+      if (EarlierOff >= LaterOff && LaterSize >= EarlierSize &&
+          uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize)
         return OW_Complete;
     }
 
@@ -1013,10 +1108,12 @@
     // Okay, we have stores to two completely different pointers.  Try to
     // decompose the pointer into a "base + constant_offset" form.  If the base
     // pointers are equal, then we can reason about the two stores.
-    EarlierOff = 0;
-    LaterOff = 0;
-    const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL);
-    const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL);
+    int64_t ExtraEarlierOff = 0;
+    int64_t ExtraLaterOff = 0;
+    const Value *BP1 = GetPointerBaseWithConstantOffset(P1, ExtraEarlierOff, DL);
+    const Value *BP2 = GetPointerBaseWithConstantOffset(P2, ExtraLaterOff, DL);
+    EarlierOff += ExtraEarlierOff;
+    LaterOff += ExtraLaterOff;
 
     // If the base pointers still differ, we have two completely different stores.
     if (BP1 != BP2)
@@ -1055,7 +1152,7 @@
 
     // Can reach here only if accesses are known not to overlap. There is no
     // dedicated code to indicate no overlap so signal "unknown".
-    return OW_Unknown;
+    return OW_None;
   }
 
   bool isInvisibleToCallerAfterRet(const Value *V) {
@@ -1143,7 +1240,8 @@
       if (CB->onlyAccessesInaccessibleMemory())
         return false;
 
-    int64_t InstWriteOffset, DepWriteOffset;
+    int64_t InstWriteOffset = 0;
+    int64_t DepWriteOffset = 0;
     if (auto CC = getLocForWriteEx(UseInst))
       return isOverwrite(UseInst, DefInst, *CC, DefLoc, DepWriteOffset,
                          InstWriteOffset) == OW_Complete;
@@ -1156,7 +1254,8 @@
                       << *Def->getMemoryInst()
                       << ") is at the end the function \n");
 
-    auto MaybeLoc = getLocForWriteEx(Def->getMemoryInst());
+    Instruction *DefInst = Def->getMemoryInst();
+    auto MaybeLoc = getLocForWriteEx(DefInst);
     if (!MaybeLoc) {
       LLVM_DEBUG(dbgs() << "  ... could not get location for write.\n");
       return false;
@@ -1188,7 +1287,7 @@
       // TODO: Checking for aliasing is expensive. Consider reducing the amount
       // of times this is called and/or caching it.
       Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
-      if (isReadClobber(*MaybeLoc, UseInst)) {
+      if (isReadClobber(DefInst, *MaybeLoc, 0, UseInst)) {
         LLVM_DEBUG(dbgs() << "  ... hit read clobber " << *UseInst << ".\n");
         return false;
       }
@@ -1248,13 +1347,17 @@
       const Value *LocUO = getUnderlyingObject(Loc.Ptr);
       return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
     }
-    int64_t InstWriteOffset, DepWriteOffset;
+    int64_t InstWriteOffset = 0;
+    int64_t DepWriteOffset = 0;
     return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DepWriteOffset,
                        InstWriteOffset) == OW_Complete;
   }
 
   // Returns true if \p Use may read from \p DefLoc.
-  bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
+  bool isReadClobber(Instruction *DefInst, const MemoryLocation &DefLoc,
+                     int64_t DefOffset, Instruction *UseInst,
+                     Optional<MemoryLocation> UseLoc = None,
+                     int64_t UseOffset = 0) {
     if (isNoopIntrinsic(UseInst))
       return false;
 
@@ -1270,11 +1373,25 @@
       if (CB->onlyAccessesInaccessibleMemory())
         return false;
 
+    if (!UseLoc) {
+      UseLoc = getLocForWriteEx(UseInst);
+      if (!UseLoc)
+        UseLoc = MemoryLocation::getOrNone(UseInst);
+    }
+
+    if (UseLoc &&
+        isOverwrite(DefInst, UseInst, DefLoc, *UseLoc, UseOffset, DefOffset) ==
+            OW_None)
+      return false;
+
     // NOTE: For calls, the number of stores removed could be slightly improved
     // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to
     // be expensive compared to the benefits in practice. For now, avoid more
     // expensive analysis to limit compile-time.
-    return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
+    if (DefOffset == 0 && UseOffset == 0)
+      return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
+
+    return true;
   }
 
   /// Returns true if a dependency between \p Current and \p KillingDef is
@@ -1335,8 +1452,8 @@
   // such MemoryDef, return None. The returned value may not (completely)
   // overwrite \p DefLoc. Currently we bail out when we encounter an aliasing
   // MemoryUse (read).
-  Optional<MemoryAccess *>
-  getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
+  Optional<ExMemoryAccess>
+  getDomMemoryDef(MemoryDef *KillingDef, const ExMemoryAccess &ExStartAccess,
                   const MemoryLocation &DefLoc, const Value *DefUO,
                   unsigned &ScanLimit, unsigned &WalkerStepLimit,
                   bool IsMemTerm, unsigned &PartialLimit) {
@@ -1345,13 +1462,24 @@
       return None;
     }
 
-    MemoryAccess *Current = StartAccess;
+    MemoryAccess * const StartAccess = ExStartAccess.first;
     Instruction *KillingI = KillingDef->getMemoryInst();
     LLVM_DEBUG(dbgs() << "  trying to get dominating access\n");
 
+    ExMemoryAccess CurrentAccess = ExStartAccess;
     // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
     Optional<MemoryLocation> CurrentLoc;
-    for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
+    for (;; CurrentAccess = getExDefiningAccess(CurrentAccess, DL, DT, AC)) {
+      MemoryAccess *Current = CurrentAccess.first;
+      int64_t DefOffset = 0;
+      const Value *ResDefPtr = DefLoc.Ptr;
+
+      if (CurrentAccess.second.first.pointsToAliveValue()) {
+        ResDefPtr = CurrentAccess.second.first;
+        DefOffset = CurrentAccess.second.second;
+      }
+
+      MemoryLocation ResDefLoc(ResDefPtr, DefLoc.Size, DefLoc.AATags);
       LLVM_DEBUG({
         dbgs() << "   visiting " << *Current;
         if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
@@ -1381,7 +1509,7 @@
       // caller is responsible for traversing them.
       if (isa<MemoryPhi>(Current)) {
         LLVM_DEBUG(dbgs() << "   ...  found MemoryPhi\n");
-        return Current;
+        return CurrentAccess;
       }
 
       // Below, check if CurrentDef is a valid candidate to be eliminated by
@@ -1406,18 +1534,39 @@
         return None;
       }
 
+      // If Current does not have an analyzable write location, skip it
+      int64_t CurrOffset = 0;
+      CurrentLoc = getLocForWriteEx(CurrentI);
+      if (!CurrentLoc)
+        continue;
+
       // If Current is known to be on path that reads DefLoc or is a read
       // clobber, bail out, as the path is not profitable. We skip this check
       // for intrinsic calls, because the code knows how to handle memcpy
       // intrinsics.
-      if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(DefLoc, CurrentI))
+      if (!isa<IntrinsicInst>(CurrentI) &&
+          isReadClobber(KillingI, ResDefLoc, DefOffset, CurrentI, CurrentLoc,
+                        CurrOffset))
         return None;
 
       // Quick check if there are direct uses that are read-clobbers.
-      if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) {
-            if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
-              return !MSSA.dominates(StartAccess, UseOrDef) &&
-                     isReadClobber(DefLoc, UseOrDef->getMemoryInst());
+      if (any_of(Current->uses(), [this, KillingI, CurrentI, &DefLoc,
+                                   &ResDefLoc, DefOffset, StartAccess](Use &U) {
+            if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser())) {
+              auto *UseInst = UseOrDef->getMemoryInst();
+              if (MSSA.dominates(StartAccess, UseOrDef))
+                return false;
+              if (UseInst->getParent() == CurrentI->getParent()) {
+                if (!isReadClobber(KillingI, ResDefLoc, DefOffset, UseInst)) {
+                  return false;
+                }
+              } else {
+                if (!isReadClobber(KillingI, DefLoc, 0, UseInst)) {
+                  return false;
+                }
+              }
+              return true;
+            }
             return false;
           })) {
         LLVM_DEBUG(dbgs() << "   ...  found a read clobber\n");
@@ -1429,11 +1578,6 @@
       if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI))
         continue;
 
-      // If Current does not have an analyzable write location, skip it
-      CurrentLoc = getLocForWriteEx(CurrentI);
-      if (!CurrentLoc)
-        continue;
-
       // AliasAnalysis does not account for loops. Limit elimination to
       // candidates for which we can guarantee they always store to the same
       // memory location and not located in different loops.
@@ -1450,12 +1594,13 @@
         if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI))
           continue;
       } else {
-        int64_t InstWriteOffset, DepWriteOffset;
+        int64_t InstWriteOffset = 0;
+        int64_t DepWriteOffset = 0;
         auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc,
                               DepWriteOffset, InstWriteOffset);
         // If Current does not write to the same object as KillingDef, check
         // the next candidate.
-        if (OR == OW_Unknown)
+        if (OR == OW_Unknown || OR == OW_None)
           continue;
         else if (OR == OW_MaybePartial) {
           // If KillingDef only partially overwrites Current, check the next
@@ -1464,6 +1609,7 @@
           // which are less likely to be removable in the end.
           if (PartialLimit <= 1) {
             WalkerStepLimit -= 1;
+            LLVM_DEBUG(dbgs() << "   ... reached partial limit ... continue with next access\n");
             continue;
           }
           PartialLimit -= 1;
@@ -1478,7 +1624,8 @@
     // they cover all paths from EarlierAccess to any function exit.
     SmallPtrSet<Instruction *, 16> KillingDefs;
     KillingDefs.insert(KillingDef->getMemoryInst());
-    MemoryAccess *EarlierAccess = Current;
+    ExMemoryAccess &ExEarlierAccess = CurrentAccess;
+    MemoryAccess *EarlierAccess = CurrentAccess.first;
     Instruction *EarlierMemInst =
         cast<MemoryDef>(EarlierAccess)->getMemoryInst();
     LLVM_DEBUG(dbgs() << "  Checking for reads of " << *EarlierAccess << " ("
@@ -1555,9 +1702,25 @@
 
       // Uses which may read the original MemoryDef mean we cannot eliminate the
       // original MD. Stop walk.
-      if (isReadClobber(*CurrentLoc, UseInst)) {
-        LLVM_DEBUG(dbgs() << "    ... found read clobber\n");
-        return None;
+      if (UseInst->getParent() == EarlierMemInst->getParent()) {
+        int64_t DefOffset = 0;
+        const Value *ResDefPtr = DefLoc.Ptr;
+
+        if (CurrentAccess.second.first.pointsToAliveValue()) {
+          ResDefPtr = CurrentAccess.second.first;
+          DefOffset = CurrentAccess.second.second;
+        }
+
+        MemoryLocation ResDefLoc(ResDefPtr, DefLoc.Size, DefLoc.AATags);
+        if (isReadClobber(KillingI, ResDefLoc, DefOffset, UseInst)) {
+          LLVM_DEBUG(dbgs() << "    ... found read clobber\n");
+          return None;
+        }
+      } else {
+        if (isReadClobber(EarlierMemInst, *CurrentLoc, 0, UseInst)) {
+          LLVM_DEBUG(dbgs() << "    ... found read clobber\n");
+          return None;
+        }
       }
 
       // If this worklist walks back to the original memory access (and the
@@ -1630,7 +1793,7 @@
       // post-dominates EarlierAccess.
       if (KillingBlocks.count(CommonPred)) {
         if (PDT.dominates(CommonPred, EarlierAccess->getBlock()))
-          return {EarlierAccess};
+          return {ExEarlierAccess};
         return None;
       }
 
@@ -1671,14 +1834,14 @@
             return None;
         }
         NumCFGSuccess++;
-        return {EarlierAccess};
+        return {ExEarlierAccess};
       }
       return None;
     }
 
     // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is
     // potentially dead.
-    return {EarlierAccess};
+    return {ExEarlierAccess};
   }
 
   // Delete dead memory defs
@@ -1880,10 +2043,11 @@
 static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
                                 DominatorTree &DT, PostDominatorTree &PDT,
                                 const TargetLibraryInfo &TLI,
-                                const LoopInfo &LI) {
+                                const LoopInfo &LI, AssumptionCache *AC) {
   bool MadeChange = false;
 
-  DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI, LI);
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI, LI, AC);
   // For each store:
   for (unsigned I = 0; I < State.MemDefs.size(); I++) {
     MemoryDef *KillingDef = State.MemDefs[I];
@@ -1903,41 +2067,45 @@
                         << *SI << "\n");
       continue;
     }
-    MemoryLocation SILoc = *MaybeSILoc;
+    const MemoryLocation SILoc = *MaybeSILoc;
     assert(SILoc.Ptr && "SILoc should not be null");
     const Value *SILocUnd = getUnderlyingObject(SILoc.Ptr);
 
-    MemoryAccess *Current = KillingDef;
     LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
-                      << *Current << " (" << *SI << ")\n");
+                      << *KillingDef << " (" << *SI << ")\n");
 
     unsigned ScanLimit = MemorySSAScanLimit;
     unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
     unsigned PartialLimit = MemorySSAPartialStoreLimit;
     // Worklist of MemoryAccesses that may be killed by KillingDef.
-    SetVector<MemoryAccess *> ToCheck;
+    SmallVector<ExMemoryAccess> ToCheck;
 
-    if (SILocUnd)
-      ToCheck.insert(KillingDef->getDefiningAccess());
+    if (SILocUnd) {
+      ExMemoryAccess ExKillingDef{KillingDef,
+                                  {const_cast<Value *>(SILoc.Ptr), 0}};
+      ToCheck.push_back(getExDefiningAccess(ExKillingDef, DL, DT, AC));
+    }
 
     bool Shortend = false;
     bool IsMemTerm = State.isMemTerminatorInst(SI);
     // Check if MemoryAccesses in the worklist are killed by KillingDef.
-    for (unsigned I = 0; I < ToCheck.size(); I++) {
-      Current = ToCheck[I];
+    while (!ToCheck.empty()) {
+      const ExMemoryAccess CurrentAccess = ToCheck.pop_back_val();
+      MemoryAccess *Current = CurrentAccess.first;
       if (State.SkipStores.count(Current))
         continue;
 
-      Optional<MemoryAccess *> Next = State.getDomMemoryDef(
-          KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit,
-          IsMemTerm, PartialLimit);
+      Optional<ExMemoryAccess> Next = State.getDomMemoryDef(
+          KillingDef, CurrentAccess, SILoc, SILocUnd, ScanLimit,
+          WalkerStepLimit, IsMemTerm, PartialLimit);
 
       if (!Next) {
         LLVM_DEBUG(dbgs() << "  finished walk\n");
         continue;
       }
 
-      MemoryAccess *EarlierAccess = *Next;
+      ExMemoryAccess &ExEarlierAccess = *Next;
+      MemoryAccess *EarlierAccess = ExEarlierAccess.first;
       LLVM_DEBUG(dbgs() << " Checking if we can kill " << *EarlierAccess);
       if (isa<MemoryPhi>(EarlierAccess)) {
         LLVM_DEBUG(dbgs() << "\n  ... adding incoming values to worklist\n");
@@ -1951,14 +2119,15 @@
           // strictly dominate our starting def.
           if (State.PostOrderNumbers[IncomingBlock] >
               State.PostOrderNumbers[PhiBlock])
-            ToCheck.insert(IncomingAccess);
+            ToCheck.push_back(phiTransFromMemoryAccessTo(
+                ExEarlierAccess, IncomingAccess, DL, DT, AC));
         }
         continue;
       }
       auto *NextDef = cast<MemoryDef>(EarlierAccess);
       Instruction *NI = NextDef->getMemoryInst();
       LLVM_DEBUG(dbgs() << " (" << *NI << ")\n");
-      ToCheck.insert(NextDef->getDefiningAccess());
+      ToCheck.push_back(getExDefiningAccess(ExEarlierAccess, DL, DT, AC));
       NumGetDomMemoryDefPassed++;
 
       if (!DebugCounter::shouldExecute(MemorySSACounter))
@@ -1966,9 +2135,22 @@
 
       MemoryLocation NILoc = *State.getLocForWriteEx(NI);
 
+      // Check if NI overwrites SI.
+      int64_t ResOffset = 0;
+      int64_t CurOffset = 0;
+      const Value *ResSIPtr = SILoc.Ptr;
+
+      if (CurrentAccess.second.first.pointsToAliveValue()) {
+        ResSIPtr = CurrentAccess.second.first;
+        ResOffset = CurrentAccess.second.second;
+      }
+
+      MemoryLocation ResSILoc(ResSIPtr, SILoc.Size, SILoc.AATags);
+      const Value *ResSILocUnd = getUnderlyingObject(ResSIPtr);
+
       if (IsMemTerm) {
         const Value *NIUnd = getUnderlyingObject(NILoc.Ptr);
-        if (SILocUnd != NIUnd)
+        if (ResSILocUnd != NIUnd)
           continue;
         LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: " << *NI
                           << "\n  KILLER: " << *SI << '\n');
@@ -1977,16 +2159,15 @@
         MadeChange = true;
       } else {
         // Check if NI overwrites SI.
-        int64_t InstWriteOffset, DepWriteOffset;
-        OverwriteResult OR = State.isOverwrite(SI, NI, SILoc, NILoc,
-                                               DepWriteOffset, InstWriteOffset);
+        OverwriteResult OR =
+            State.isOverwrite(SI, NI, SILoc, NILoc, CurOffset, ResOffset);
         if (OR == OW_MaybePartial) {
           auto Iter = State.IOLs.insert(
               std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
                   NI->getParent(), InstOverlapIntervalsTy()));
           auto &IOL = Iter.first->second;
-          OR = isPartialOverwrite(SILoc, NILoc, DepWriteOffset, InstWriteOffset,
-                                  NI, IOL);
+          OR = isPartialOverwrite(ResSILoc, NILoc, CurOffset, ResOffset, NI,
+                                  IOL);
         }
 
         if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
@@ -1997,7 +2178,7 @@
           // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
           if (Earlier && Later && DT.dominates(Earlier, Later)) {
             if (Constant *Merged = tryToMergePartialOverlappingStores(
-                    Earlier, Later, InstWriteOffset, DepWriteOffset, State.DL,
+                    Earlier, Later, ResOffset, CurOffset, State.DL,
                     State.BatchAA, &DT)) {
 
               // Update stored value of earlier store to merged constant.
@@ -2057,8 +2238,9 @@
   MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
   PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
   LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
+  AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
 
-  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
+  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI, &AC);
 
 #ifdef LLVM_ENABLE_STATS
   if (AreStatisticsEnabled())
@@ -2099,8 +2281,10 @@
     PostDominatorTree &PDT =
         getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
     LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+    AssumptionCache &AC =
+        getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
 
-    bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
+    bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI, &AC);
 
 #ifdef LLVM_ENABLE_STATS
     if (AreStatisticsEnabled())
@@ -2124,6 +2308,8 @@
     AU.addPreserved<MemorySSAWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
     AU.addPreserved<LoopInfoWrapperPass>();
+    AU.addRequired<AssumptionCacheTracker>();
+    AU.addPreserved<AssumptionCacheTracker>();
   }
 };