diff --git a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h --- a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h @@ -63,7 +63,13 @@ // Returns true if `Other` (with size `OtherSize`) can be proven to be fully // contained in `*this` (with size `Size`). bool contains(int64_t Size, const BaseIndexOffset &Other, int64_t OtherSize, - const SelectionDAG &DAG) const; + const SelectionDAG &DAG) const { + int64_t Offset = 0; + return contains(Size, Other, OtherSize, DAG, Offset); + } + + bool contains(int64_t Size, const BaseIndexOffset &Other, int64_t OtherSize, + const SelectionDAG &DAG, int64_t &Offset) const; // Returns true `BasePtr0` and `BasePtr1` can be proven to alias/not alias, in // which case `IsAlias` is set to true/false. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15436,6 +15436,32 @@ CombineTo(ST1, ST1->getChain()); return SDValue(); } + + // If ST stores to a subset of preceeding stores value, we may be able + // to fold ST's value into the predeeding stored value. As we know the + // other uses of ST1's chain are unconcerned with ST, this folding will + // not affect those nodes. + // FIXME: We should be able to do this for Big Endian as well. + int64_t Offset; + if (!DAG.getDataLayout().isBigEndian() && + ChainBase.contains(ChainByteSize, STBase, STByteSize, DAG, + Offset)) { + SDValue ChainValue = ST1->getValue(); + if (auto *C1 = dyn_cast(ChainValue)) { + if (auto *C = dyn_cast(Value)) { + APInt Val = C1->getAPIntValue(); + APInt InsertVal = C->getAPIntValue().zextOrTrunc(STByteSize * 8); + Val.insertBits(InsertVal, Offset * 8); + SDValue NewSDVal = + DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(), + C1->isTargetOpcode(), C1->isOpaque()); + SDNode *NewST1 = DAG.UpdateNodeOperands( + ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2), + ST1->getOperand(3)); + return CombineTo(ST, SDValue(NewST1, 0)); + } + } + } // End ST subset of ST1 case. } } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -136,9 +136,8 @@ } bool BaseIndexOffset::contains(int64_t Size, const BaseIndexOffset &Other, - int64_t OtherSize, - const SelectionDAG &DAG) const { - int64_t Offset; + int64_t OtherSize, const SelectionDAG &DAG, + int64_t &Offset) const { if (!equalBaseIndex(Other, DAG, Offset)) return false; if (Offset >= 0) { diff --git a/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll b/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll --- a/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll +++ b/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll @@ -11,8 +11,8 @@ ; Make sure the stores happen in the correct order (the exact instructions could change). ; CHECK-LABEL: main: -; CHECK: str xzr, [sp, #80] -; CHECK: str w9, [sp, #80] +; CHECK: orr w9, wzr, #0x1 +; CHECK: str x9, [sp, #80] ; CHECK: stp q0, q0, [sp, #48] ; CHECK: ldr w8, [sp, #48] diff --git a/llvm/test/CodeGen/X86/stores-merging.ll b/llvm/test/CodeGen/X86/stores-merging.ll --- a/llvm/test/CodeGen/X86/stores-merging.ll +++ b/llvm/test/CodeGen/X86/stores-merging.ll @@ -26,9 +26,8 @@ define void @redundant_stores_merging_reverse() { ; CHECK-LABEL: redundant_stores_merging_reverse: ; CHECK: # %bb.0: -; CHECK-NEXT: movabsq $528280977409, %rax # imm = 0x7B00000001 +; CHECK-NEXT: movabsq $1958505086977, %rax # imm = 0x1C800000001 ; CHECK-NEXT: movq %rax, e+{{.*}}(%rip) -; CHECK-NEXT: movl $456, e+{{.*}}(%rip) # imm = 0x1C8 ; CHECK-NEXT: retq store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4 store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4