diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17492,6 +17492,10 @@ for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2) TryToAddCandidate(I2); } + // Check stores that depend on the root (e.g. Store 3 in the chart above). + if (I.getOperandNo() == 0 && isa(*I)) { + TryToAddCandidate(I); + } } } else { for (auto I = RootNode->use_begin(), E = RootNode->use_end(); diff --git a/llvm/test/CodeGen/Hexagon/store-widen-aliased-load.ll b/llvm/test/CodeGen/Hexagon/store-widen-aliased-load.ll --- a/llvm/test/CodeGen/Hexagon/store-widen-aliased-load.ll +++ b/llvm/test/CodeGen/Hexagon/store-widen-aliased-load.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon < %s | FileCheck %s +; RUN: llc -march=hexagon --combiner-store-merging=false < %s | FileCheck %s ; CHECK-NOT: memh ; Check that store widening does not merge the two stores. diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll --- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll +++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll @@ -562,69 +562,59 @@ ; LE-PWR9-LABEL: testUnalignedLdStPair: ; LE-PWR9: # %bb.0: # %entry ; LE-PWR9-NEXT: addis r3, r2, g@toc@ha -; LE-PWR9-NEXT: li r6, 19 ; LE-PWR9-NEXT: li r4, 11 -; LE-PWR9-NEXT: li r5, 35 -; LE-PWR9-NEXT: li r7, 27 ; LE-PWR9-NEXT: addi r3, r3, g@toc@l -; LE-PWR9-NEXT: lxvx vs0, r3, r6 -; LE-PWR9-NEXT: ldx r4, r3, r4 -; LE-PWR9-NEXT: ldx r5, r3, r5 -; LE-PWR9-NEXT: stdx r4, r3, r6 -; LE-PWR9-NEXT: stxvx vs0, r3, r7 -; LE-PWR9-NEXT: li r7, 43 -; LE-PWR9-NEXT: stdx r5, r3, r7 +; LE-PWR9-NEXT: lxvx vs0, r3, r4 +; LE-PWR9-NEXT: li r4, 27 +; LE-PWR9-NEXT: lxvx vs1, r3, r4 +; LE-PWR9-NEXT: li r4, 35 +; LE-PWR9-NEXT: stxvx vs1, r3, r4 +; LE-PWR9-NEXT: li r4, 19 +; LE-PWR9-NEXT: stxvx vs0, r3, r4 ; LE-PWR9-NEXT: blr ; ; LE-PWR8-LABEL: testUnalignedLdStPair: ; LE-PWR8: # %bb.0: # %entry ; LE-PWR8-NEXT: addis r3, r2, g@toc@ha -; LE-PWR8-NEXT: li r4, 19 +; LE-PWR8-NEXT: li r4, 27 ; LE-PWR8-NEXT: li r5, 11 -; LE-PWR8-NEXT: li r6, 35 -; LE-PWR8-NEXT: li r7, 43 -; LE-PWR8-NEXT: li r8, 27 +; LE-PWR8-NEXT: li r6, 19 +; LE-PWR8-NEXT: li r8, 35 ; LE-PWR8-NEXT: addi r3, r3, g@toc@l ; LE-PWR8-NEXT: lxvd2x vs0, r3, r4 ; LE-PWR8-NEXT: ldx r5, r3, r5 -; LE-PWR8-NEXT: ldx r6, r3, r6 -; LE-PWR8-NEXT: stdx r6, r3, r7 -; LE-PWR8-NEXT: stdx r5, r3, r4 +; LE-PWR8-NEXT: ldx r7, r3, r6 +; LE-PWR8-NEXT: stdx r7, r3, r4 +; LE-PWR8-NEXT: stdx r5, r3, r6 ; LE-PWR8-NEXT: stxvd2x vs0, r3, r8 ; LE-PWR8-NEXT: blr ; ; BE-PWR9-LABEL: testUnalignedLdStPair: ; BE-PWR9: # %bb.0: # %entry ; BE-PWR9-NEXT: addis r3, r2, g@toc@ha -; BE-PWR9-NEXT: li r6, 19 ; BE-PWR9-NEXT: li r4, 11 -; BE-PWR9-NEXT: li r5, 35 -; BE-PWR9-NEXT: li r7, 27 ; BE-PWR9-NEXT: addi r3, r3, g@toc@l -; BE-PWR9-NEXT: lxvx vs0, r3, r6 -; BE-PWR9-NEXT: ldx r4, r3, r4 -; BE-PWR9-NEXT: ldx r5, r3, r5 -; BE-PWR9-NEXT: stdx r4, r3, r6 -; BE-PWR9-NEXT: stxvx vs0, r3, r7 -; BE-PWR9-NEXT: li r7, 43 -; BE-PWR9-NEXT: stdx r5, r3, r7 +; BE-PWR9-NEXT: lxvx vs0, r3, r4 +; BE-PWR9-NEXT: li r4, 27 +; BE-PWR9-NEXT: lxvx vs1, r3, r4 +; BE-PWR9-NEXT: li r4, 35 +; BE-PWR9-NEXT: stxvx vs1, r3, r4 +; BE-PWR9-NEXT: li r4, 19 +; BE-PWR9-NEXT: stxvx vs0, r3, r4 ; BE-PWR9-NEXT: blr ; ; BE-PWR8-LABEL: testUnalignedLdStPair: ; BE-PWR8: # %bb.0: # %entry ; BE-PWR8-NEXT: addis r3, r2, g@toc@ha -; BE-PWR8-NEXT: li r4, 19 -; BE-PWR8-NEXT: li r5, 11 -; BE-PWR8-NEXT: li r6, 35 -; BE-PWR8-NEXT: li r7, 27 +; BE-PWR8-NEXT: li r4, 11 +; BE-PWR8-NEXT: li r5, 27 ; BE-PWR8-NEXT: addi r3, r3, g@toc@l ; BE-PWR8-NEXT: lxvd2x vs0, r3, r4 -; BE-PWR8-NEXT: ldx r5, r3, r5 -; BE-PWR8-NEXT: ldx r6, r3, r6 -; BE-PWR8-NEXT: stxvd2x vs0, r3, r7 -; BE-PWR8-NEXT: li r7, 43 -; BE-PWR8-NEXT: stdx r5, r3, r4 -; BE-PWR8-NEXT: stdx r6, r3, r7 +; BE-PWR8-NEXT: lxvd2x vs1, r3, r5 +; BE-PWR8-NEXT: li r4, 35 +; BE-PWR8-NEXT: li r5, 19 +; BE-PWR8-NEXT: stxvd2x vs1, r3, r4 +; BE-PWR8-NEXT: stxvd2x vs0, r3, r5 ; BE-PWR8-NEXT: blr entry: %0 = bitcast <256 x i1>* @g to i8* diff --git a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll --- a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll +++ b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll @@ -920,3 +920,31 @@ ret void } +define i32 @merge_store_load_store_seq(i32* %buff) { +entry: +; CHECK-LABEL: merge_store_load_store_seq: +; CHECK: movl 4(%rdi), %eax +; CHECK-NEXT: movq $0, (%rdi) +; CHECK-NEXT: retq + + store i32 0, i32* %buff, align 4 + %arrayidx1 = getelementptr inbounds i32, i32* %buff, i64 1 + %0 = load i32, i32* %arrayidx1, align 4 + store i32 0, i32* %arrayidx1, align 4 + ret i32 %0 +} + +define i32 @merge_store_alias(i32* %buff, i32* %other) { +entry: +; CHECK-LABEL: merge_store_alias: +; CHECK: movl $0, (%rdi) +; CHECK-NEXT: movl (%rsi), %eax +; CHECK-NEXT: movl $0, 4(%rdi) +; CHECK-NEXT: retq + + store i32 0, i32* %buff, align 4 + %arrayidx1 = getelementptr inbounds i32, i32* %buff, i64 1 + %0 = load i32, i32* %other, align 4 + store i32 0, i32* %arrayidx1, align 4 + ret i32 %0 +}