diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -617,23 +617,39 @@
 /// instruction.
 static bool memoryIsNotModifiedBetween(Instruction *FirstI,
                                        Instruction *SecondI,
-                                       AliasAnalysis *AA) {
-  SmallVector<BasicBlock *, 16> WorkList;
-  SmallPtrSet<BasicBlock *, 8> Visited;
+                                       AliasAnalysis *AA,
+                                       const DataLayout &DL,
+                                       DominatorTree *DT) {
+  // Do a backwards scan through the CFG from SecondI to FirstI. Look for
+  // instructions which can modify the memory location accessed by SecondI.
+  //
+  // While doing the walk keep track of the address to check. It might be
+  // different in different basic blocks due to PHI translation.
+  using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
+  SmallVector<BlockAddressPair, 16> WorkList;
+  // Keep track of the address we visited each block with. Bail out if we
+  // visit a block with different addresses.
+  DenseMap<BasicBlock *, Value *> Visited;
+
   BasicBlock::iterator FirstBBI(FirstI);
   ++FirstBBI;
   BasicBlock::iterator SecondBBI(SecondI);
   BasicBlock *FirstBB = FirstI->getParent();
   BasicBlock *SecondBB = SecondI->getParent();
   MemoryLocation MemLoc = MemoryLocation::get(SecondI);
+  auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
 
   // Start checking the SecondBB.
-  WorkList.push_back(SecondBB);
+  WorkList.push_back(
+      std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
   bool isFirstBlock = true;
 
   // Check all blocks going backward until we reach the FirstBB.
   while (!WorkList.empty()) {
-    BasicBlock *B = WorkList.pop_back_val();
+    BlockAddressPair Current = WorkList.pop_back_val();
+    BasicBlock *B = Current.first;
+    PHITransAddr &Addr = Current.second;
+    Value *Ptr = Addr.getAddr();
 
     // Ignore instructions before FirstI if this is the FirstBB.
     BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
@@ -652,16 +668,31 @@
     for (; BI != EI; ++BI) {
       Instruction *I = &*BI;
       if (I->mayWriteToMemory() && I != SecondI)
-        if (isModSet(AA->getModRefInfo(I, MemLoc)))
+        if (isModSet(AA->getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
           return false;
     }
     if (B != FirstBB) {
       assert(B != &FirstBB->getParent()->getEntryBlock() &&
           "Should not hit the entry block because SI must be dominated by LI");
       for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) {
-        if (!Visited.insert(*PredI).second)
+        PHITransAddr PredAddr = Addr;
+        if (PredAddr.NeedsPHITranslationFromBlock(B)) {
+          if (!PredAddr.IsPotentiallyPHITranslatable())
+            return false;
+          if (PredAddr.PHITranslateValue(B, *PredI, DT, false))
+            return false;
+        }
+        Value *TranslatedPtr = PredAddr.getAddr();
+        auto Inserted = Visited.insert(std::make_pair(*PredI, TranslatedPtr));
+        if (!Inserted.second) {
+          // We already visited this block before. If it was with a different
+          // address - bail out!
+          if (TranslatedPtr != Inserted.first->second)
+            return false;
+          // ... otherwise just skip it.
           continue;
-        WorkList.push_back(*PredI);
+        }
+        WorkList.push_back(std::make_pair(*PredI, PredAddr));
       }
     }
   }
@@ -1063,7 +1094,8 @@
                                const DataLayout &DL,
                                const TargetLibraryInfo *TLI,
                                InstOverlapIntervalsTy &IOL,
-                               MapVector<Instruction *, bool> &ThrowableInst) {
+                               MapVector<Instruction *, bool> &ThrowableInst,
+                               DominatorTree *DT) {
   // Must be a store instruction.
   StoreInst *SI = dyn_cast<StoreInst>(Inst);
   if (!SI)
@@ -1073,7 +1105,8 @@
   // then the store can be removed.
   if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) {
     if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
-        isRemovable(SI) && memoryIsNotModifiedBetween(DepLoad, SI, AA)) {
+        isRemovable(SI) &&
+        memoryIsNotModifiedBetween(DepLoad, SI, AA, DL, DT)) {
 
       LLVM_DEBUG(
           dbgs() << "DSE: Remove Store Of Load from same pointer:\n  LOAD: "
@@ -1092,7 +1125,7 @@
         dyn_cast<Instruction>(GetUnderlyingObject(SI->getPointerOperand(), DL));
 
     if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) &&
-        memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA)) {
+        memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA, DL, DT)) {
       LLVM_DEBUG(
           dbgs() << "DSE: Remove null store to the calloc'ed object:\n  DEAD: "
                  << *Inst << "\n  OBJECT: " << *UnderlyingPointer << '\n');
@@ -1140,7 +1173,7 @@
 
     // eliminateNoopStore will update in iterator, if necessary.
     if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL,
-                           ThrowableInst)) {
+                           ThrowableInst, DT)) {
       MadeChange = true;
       continue;
     }
@@ -1258,7 +1291,7 @@
               Later && isa<ConstantInt>(Later->getValueOperand()) &&
               DL.typeSizeEqualsStoreSize(
                   Later->getValueOperand()->getType()) &&
-              memoryIsNotModifiedBetween(Earlier, Later, AA)) {
+              memoryIsNotModifiedBetween(Earlier, Later, AA, DL, DT)) {
             // If the store we find is:
             //   a) partially overwritten by the store to 'Loc'
             //   b) the later store is fully contained in the earlier one and
diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/simple.ll
--- a/llvm/test/Transforms/DeadStoreElimination/simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/simple.ll
@@ -893,5 +893,299 @@
   ret void
 }
 
+define i32 @test40() {
+; CHECK-LABEL: @test40(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[M:%.*]] = call i8* @calloc(i32 9, i32 20)
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[P_NEXT:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    store i8 1, i8* [[P_NEXT]]
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i8 0, i8* [[P]]
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15
+; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %m = call i8* @calloc(i32 9, i32 20)
+  br label %loop
+loop:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %p.next = getelementptr inbounds i8, i8* %m, i64 %indvars.iv.next
+  store i8 1, i8* %p.next
+  %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv
+  store i8 0, i8* %p
+  %continue = icmp ugt i64 %indvars.iv, 15
+  br i1 %continue, label %loop, label %return
+return:
+  ret i32 0
+}
+
+define i32 @test41() {
+; CHECK-LABEL: @test41(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[M:%.*]] = call i8* @calloc(i32 9, i32 20)
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONT:%.*]] ]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[P_NEXT:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    store i8 1, i8* [[P_NEXT]]
+; CHECK-NEXT:    br label [[CONT]]
+; CHECK:       cont:
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i8 0, i8* [[P]]
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15
+; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %m = call i8* @calloc(i32 9, i32 20)
+  br label %loop
+loop:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cont ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %p.next = getelementptr inbounds i8, i8* %m, i64 %indvars.iv.next
+  store i8 1, i8* %p.next
+  br label %cont
+
+cont:
+  %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv
+  store i8 0, i8* %p
+  %continue = icmp ugt i64 %indvars.iv, 15
+  br i1 %continue, label %loop, label %return
+
+return:
+  ret i32 0
+}
+
+; The store is redundant here, but currently we fail to eliminate it.
+; We are walking from the store up to the calloc and translate phis as
+; needed. In this case we fail to translate %p while going over the
+; backedge. Because of that we conservatively assume that zero initialized
+; memory is clobbered.
+define i32 @test42() {
+; CHECK-LABEL: @test42(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[M:%.*]] = call i8* @calloc(i32 9, i32 20)
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONT:%.*]] ]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    br label [[CONT]]
+; CHECK:       cont:
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i8 0, i8* [[P]]
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15
+; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %m = call i8* @calloc(i32 9, i32 20)
+  br label %loop
+loop:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cont ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %cont
+
+cont:
+  %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv
+  store i8 0, i8* %p
+  %continue = icmp ugt i64 %indvars.iv, 15
+  br i1 %continue, label %loop, label %return
+
+return:
+  ret i32 0
+}
+
+define i32 @test43() {
+; CHECK-LABEL: @test43(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[M:%.*]] = call i8* @calloc(i32 9, i32 20)
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONT_2:%.*]] ]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[P_NEXT:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    store i8 1, i8* [[P_NEXT]]
+; CHECK-NEXT:    br label [[CONT:%.*]]
+; CHECK:       cont:
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i8 0, i8* [[P]]
+; CHECK-NEXT:    br label [[CONT_2]]
+; CHECK:       cont.2:
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15
+; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %m = call i8* @calloc(i32 9, i32 20)
+  br label %loop
+loop:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cont.2 ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %p.next = getelementptr inbounds i8, i8* %m, i64 %indvars.iv.next
+  store i8 1, i8* %p.next
+  br label %cont
+
+cont:
+  %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv
+  store i8 0, i8* %p
+  br label %cont.2
+
+cont.2:
+  %continue = icmp ugt i64 %indvars.iv, 15
+  br i1 %continue, label %loop, label %return
+
+return:
+  ret i32 0
+}
+
+define i32 @test44() {
+; CHECK-LABEL: @test44(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[M:%.*]] = call i8* @calloc(i32 9, i32 20)
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONT_2:%.*]] ]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[P_NEXT:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i8 0, i8* [[P]]
+; CHECK-NEXT:    br label [[CONT:%.*]]
+; CHECK:       cont:
+; CHECK-NEXT:    store i8 1, i8* [[P_NEXT]]
+; CHECK-NEXT:    br label [[CONT_2]]
+; CHECK:       cont.2:
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15
+; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %m = call i8* @calloc(i32 9, i32 20)
+  br label %loop
+loop:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cont.2 ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %p.next = getelementptr inbounds i8, i8* %m, i64 %indvars.iv.next
+  %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv
+  store i8 0, i8* %p
+  br label %cont
+
+cont:
+  store i8 1, i8* %p.next
+  br label %cont.2
+
+cont.2:
+  %continue = icmp ugt i64 %indvars.iv, 15
+  br i1 %continue, label %loop, label %return
+
+return:
+  ret i32 0
+}
+
+; This is an example which can potentially benefit from PHI translation.
+; Current implementation doesn't handle this case though. This is because
+; we don't visit the same block with different addresses while looking for
+; clobbering instructions.
+define i32 @test45(i1 %c) {
+; CHECK-LABEL: @test45(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[M:%.*]] = call i8* @calloc(i32 9, i32 20)
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; CHECK:       true:
+; CHECK-NEXT:    [[P_1:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 1
+; CHECK-NEXT:    store i8 1, i8* [[P_1]]
+; CHECK-NEXT:    br label [[CONT:%.*]]
+; CHECK:       false:
+; CHECK-NEXT:    [[P_2:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 2
+; CHECK-NEXT:    store i8 1, i8* [[P_2]]
+; CHECK-NEXT:    br label [[CONT]]
+; CHECK:       cont:
+; CHECK-NEXT:    [[OFFSET:%.*]] = phi i64 [ 2, [[TRUE]] ], [ 1, [[FALSE]] ]
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[OFFSET]]
+; CHECK-NEXT:    store i8 0, i8* [[P]]
+; CHECK-NEXT:    br label [[RETURN:%.*]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %m = call i8* @calloc(i32 9, i32 20)
+  br i1 %c, label %true, label %false
+
+true:
+  %p.1 = getelementptr inbounds i8, i8* %m, i64 1
+  store i8 1, i8* %p.1
+  br label %cont
+
+false:
+  %p.2 = getelementptr inbounds i8, i8* %m, i64 2
+  store i8 1, i8* %p.2
+  br label %cont
+
+cont:
+  %offset = phi i64 [ 2, %true ], [ 1, %false ]
+  %p = getelementptr inbounds i8, i8* %m, i64 %offset
+  store i8 0, i8* %p
+  br label %return
+
+return:
+  ret i32 0
+}
+
+; This is test45 modified in a way to demonstrate PHI translation
+; improving the accuracy of the analysis (on a slightly convoluted
+; case though).
+define i32 @test46(i1 %c) {
+; CHECK-LABEL: @test46(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[M:%.*]] = call i8* @calloc(i32 9, i32 20)
+; CHECK-NEXT:    [[P_1:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 1
+; CHECK-NEXT:    [[P_2:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 2
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; CHECK:       true:
+; CHECK-NEXT:    store i8 1, i8* [[P_1]]
+; CHECK-NEXT:    br label [[CONT:%.*]]
+; CHECK:       false:
+; CHECK-NEXT:    store i8 1, i8* [[P_1]]
+; CHECK-NEXT:    br label [[CONT]]
+; CHECK:       cont:
+; CHECK-NEXT:    br label [[RETURN:%.*]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %m = call i8* @calloc(i32 9, i32 20)
+  %p.1 = getelementptr inbounds i8, i8* %m, i64 1
+  %p.2 = getelementptr inbounds i8, i8* %m, i64 2
+  br i1 %c, label %true, label %false
+
+true:
+  store i8 1, i8* %p.1
+  br label %cont
+
+false:
+  store i8 1, i8* %p.1
+  br label %cont
+
+cont:
+  %offset = phi i64 [ 2, %true ], [ 2, %false ]
+  %p = getelementptr inbounds i8, i8* %m, i64 %offset
+  store i8 0, i8* %p
+  br label %return
+
+return:
+  ret i32 0
+}
+
 declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
 declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32)