diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17492,6 +17492,10 @@
         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
           TryToAddCandidate(I2);
       }
+      // Check stores that depend on the root (e.g. Store 3 in the chart above).
+      if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
+        TryToAddCandidate(I);
+      }
     }
   } else {
     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
diff --git a/llvm/test/CodeGen/Hexagon/store-widen-aliased-load.ll b/llvm/test/CodeGen/Hexagon/store-widen-aliased-load.ll
--- a/llvm/test/CodeGen/Hexagon/store-widen-aliased-load.ll
+++ b/llvm/test/CodeGen/Hexagon/store-widen-aliased-load.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon --combiner-store-merging=false < %s | FileCheck %s
 ; CHECK-NOT: memh
 ; Check that store widening does not merge the two stores.
 
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
--- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
@@ -562,69 +562,59 @@
 ; LE-PWR9-LABEL: testUnalignedLdStPair:
 ; LE-PWR9:       # %bb.0: # %entry
 ; LE-PWR9-NEXT:    addis r3, r2, g@toc@ha
-; LE-PWR9-NEXT:    li r6, 19
 ; LE-PWR9-NEXT:    li r4, 11
-; LE-PWR9-NEXT:    li r5, 35
-; LE-PWR9-NEXT:    li r7, 27
 ; LE-PWR9-NEXT:    addi r3, r3, g@toc@l
-; LE-PWR9-NEXT:    lxvx vs0, r3, r6
-; LE-PWR9-NEXT:    ldx r4, r3, r4
-; LE-PWR9-NEXT:    ldx r5, r3, r5
-; LE-PWR9-NEXT:    stdx r4, r3, r6
-; LE-PWR9-NEXT:    stxvx vs0, r3, r7
-; LE-PWR9-NEXT:    li r7, 43
-; LE-PWR9-NEXT:    stdx r5, r3, r7
+; LE-PWR9-NEXT:    lxvx vs0, r3, r4
+; LE-PWR9-NEXT:    li r4, 27
+; LE-PWR9-NEXT:    lxvx vs1, r3, r4
+; LE-PWR9-NEXT:    li r4, 35
+; LE-PWR9-NEXT:    stxvx vs1, r3, r4
+; LE-PWR9-NEXT:    li r4, 19
+; LE-PWR9-NEXT:    stxvx vs0, r3, r4
 ; LE-PWR9-NEXT:    blr
 ;
 ; LE-PWR8-LABEL: testUnalignedLdStPair:
 ; LE-PWR8:       # %bb.0: # %entry
 ; LE-PWR8-NEXT:    addis r3, r2, g@toc@ha
-; LE-PWR8-NEXT:    li r4, 19
+; LE-PWR8-NEXT:    li r4, 27
 ; LE-PWR8-NEXT:    li r5, 11
-; LE-PWR8-NEXT:    li r6, 35
-; LE-PWR8-NEXT:    li r7, 43
-; LE-PWR8-NEXT:    li r8, 27
+; LE-PWR8-NEXT:    li r6, 19
+; LE-PWR8-NEXT:    li r8, 35
 ; LE-PWR8-NEXT:    addi r3, r3, g@toc@l
 ; LE-PWR8-NEXT:    lxvd2x vs0, r3, r4
 ; LE-PWR8-NEXT:    ldx r5, r3, r5
-; LE-PWR8-NEXT:    ldx r6, r3, r6
-; LE-PWR8-NEXT:    stdx r6, r3, r7
-; LE-PWR8-NEXT:    stdx r5, r3, r4
+; LE-PWR8-NEXT:    ldx r7, r3, r6
+; LE-PWR8-NEXT:    stdx r7, r3, r4
+; LE-PWR8-NEXT:    stdx r5, r3, r6
 ; LE-PWR8-NEXT:    stxvd2x vs0, r3, r8
 ; LE-PWR8-NEXT:    blr
 ;
 ; BE-PWR9-LABEL: testUnalignedLdStPair:
 ; BE-PWR9:       # %bb.0: # %entry
 ; BE-PWR9-NEXT:    addis r3, r2, g@toc@ha
-; BE-PWR9-NEXT:    li r6, 19
 ; BE-PWR9-NEXT:    li r4, 11
-; BE-PWR9-NEXT:    li r5, 35
-; BE-PWR9-NEXT:    li r7, 27
 ; BE-PWR9-NEXT:    addi r3, r3, g@toc@l
-; BE-PWR9-NEXT:    lxvx vs0, r3, r6
-; BE-PWR9-NEXT:    ldx r4, r3, r4
-; BE-PWR9-NEXT:    ldx r5, r3, r5
-; BE-PWR9-NEXT:    stdx r4, r3, r6
-; BE-PWR9-NEXT:    stxvx vs0, r3, r7
-; BE-PWR9-NEXT:    li r7, 43
-; BE-PWR9-NEXT:    stdx r5, r3, r7
+; BE-PWR9-NEXT:    lxvx vs0, r3, r4
+; BE-PWR9-NEXT:    li r4, 27
+; BE-PWR9-NEXT:    lxvx vs1, r3, r4
+; BE-PWR9-NEXT:    li r4, 35
+; BE-PWR9-NEXT:    stxvx vs1, r3, r4
+; BE-PWR9-NEXT:    li r4, 19
+; BE-PWR9-NEXT:    stxvx vs0, r3, r4
 ; BE-PWR9-NEXT:    blr
 ;
 ; BE-PWR8-LABEL: testUnalignedLdStPair:
 ; BE-PWR8:       # %bb.0: # %entry
 ; BE-PWR8-NEXT:    addis r3, r2, g@toc@ha
-; BE-PWR8-NEXT:    li r4, 19
-; BE-PWR8-NEXT:    li r5, 11
-; BE-PWR8-NEXT:    li r6, 35
-; BE-PWR8-NEXT:    li r7, 27
+; BE-PWR8-NEXT:    li r4, 11
+; BE-PWR8-NEXT:    li r5, 27
 ; BE-PWR8-NEXT:    addi r3, r3, g@toc@l
 ; BE-PWR8-NEXT:    lxvd2x vs0, r3, r4
-; BE-PWR8-NEXT:    ldx r5, r3, r5
-; BE-PWR8-NEXT:    ldx r6, r3, r6
-; BE-PWR8-NEXT:    stxvd2x vs0, r3, r7
-; BE-PWR8-NEXT:    li r7, 43
-; BE-PWR8-NEXT:    stdx r5, r3, r4
-; BE-PWR8-NEXT:    stdx r6, r3, r7
+; BE-PWR8-NEXT:    lxvd2x vs1, r3, r5
+; BE-PWR8-NEXT:    li r4, 35
+; BE-PWR8-NEXT:    li r5, 19
+; BE-PWR8-NEXT:    stxvd2x vs1, r3, r4
+; BE-PWR8-NEXT:    stxvd2x vs0, r3, r5
 ; BE-PWR8-NEXT:    blr
 entry:
   %0 = bitcast <256 x i1>* @g to i8*
diff --git a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
--- a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -920,3 +920,31 @@
   ret void
 }
 
+define i32 @merge_store_load_store_seq(i32* %buff) {
+entry:
+; CHECK-LABEL: merge_store_load_store_seq:
+; CHECK:      movl 4(%rdi), %eax
+; CHECK-NEXT: movq $0, (%rdi)
+; CHECK-NEXT: retq
+
+  store i32 0, i32* %buff, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %buff, i64 1
+  %0 = load i32, i32* %arrayidx1, align 4
+  store i32 0, i32* %arrayidx1, align 4
+  ret i32 %0
+}
+
+define i32 @merge_store_alias(i32* %buff, i32* %other) {
+entry:
+; CHECK-LABEL: merge_store_alias:
+; CHECK:  movl $0, (%rdi)
+; CHECK-NEXT:  movl (%rsi), %eax
+; CHECK-NEXT:  movl $0, 4(%rdi)
+; CHECK-NEXT:  retq
+
+  store i32 0, i32* %buff, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %buff, i64 1
+  %0 = load i32, i32* %other, align 4
+  store i32 0, i32* %arrayidx1, align 4
+  ret i32 %0
+}