diff --git a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h --- a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h @@ -63,7 +63,13 @@ // Returns true if `Other` (with size `OtherSize`) can be proven to be fully // contained in `*this` (with size `Size`). bool contains(int64_t Size, const BaseIndexOffset &Other, int64_t OtherSize, - const SelectionDAG &DAG) const; + const SelectionDAG &DAG) const { + int64_t Offset; + return contains(Size, Other, OtherSize, DAG, Offset); + } + + bool contains(int64_t Size, const BaseIndexOffset &Other, int64_t OtherSize, + const SelectionDAG &DAG, int64_t &Offset) const; // Returns true `BasePtr0` and `BasePtr1` can be proven to alias/not alias, in // which case `IsAlias` is set to true/false. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15437,6 +15437,32 @@ CombineTo(ST1, ST1->getChain()); return SDValue(); } + + // If ST stores to a subset of preceeding store's write set, we may be + // able to fold ST's value into the preceeding stored value. As we know + // the other uses of ST1's chain are unconcerned with ST, this folding + // will not affect those nodes. + int64_t Offset; + if (ChainBase.contains(ChainByteSize, STBase, STByteSize, DAG, + Offset)) { + SDValue ChainValue = ST1->getValue(); + if (auto *C1 = dyn_cast(ChainValue)) { + if (auto *C = dyn_cast(Value)) { + APInt Val = C1->getAPIntValue(); + APInt InsertVal = C->getAPIntValue().zextOrTrunc(STByteSize * 8); + if (DAG.getDataLayout().isBigEndian()) + Offset = ChainByteSize - 1 - Offset; + Val.insertBits(InsertVal, Offset * 8); + SDValue NewSDVal = + DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(), + C1->isTargetOpcode(), C1->isOpaque()); + SDNode *NewST1 = DAG.UpdateNodeOperands( + ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2), + ST1->getOperand(3)); + return CombineTo(ST, SDValue(NewST1, 0)); + } + } + } // End ST subset of ST1 case. } } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -136,9 +136,8 @@ } bool BaseIndexOffset::contains(int64_t Size, const BaseIndexOffset &Other, - int64_t OtherSize, - const SelectionDAG &DAG) const { - int64_t Offset; + int64_t OtherSize, const SelectionDAG &DAG, + int64_t &Offset) const { if (!equalBaseIndex(Other, DAG, Offset)) return false; if (Offset >= 0) { diff --git a/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll b/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll --- a/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll +++ b/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll @@ -11,8 +11,8 @@ ; Make sure the stores happen in the correct order (the exact instructions could change). ; CHECK-LABEL: main: -; CHECK: str xzr, [sp, #80] -; CHECK: str w9, [sp, #80] +; CHECK: orr w9, wzr, #0x1 +; CHECK: str x9, [sp, #80] ; CHECK: stp q0, q0, [sp, #48] ; CHECK: ldr w8, [sp, #48] diff --git a/llvm/test/CodeGen/PowerPC/constant-combines.ll b/llvm/test/CodeGen/PowerPC/constant-combines.ll --- a/llvm/test/CodeGen/PowerPC/constant-combines.ll +++ b/llvm/test/CodeGen/PowerPC/constant-combines.ll @@ -5,18 +5,15 @@ define void @fold_constant_stores_loaddr(i8* %i8_ptr) { ; BE-LABEL: fold_constant_stores_loaddr: ; BE: # %bb.0: # %entry -; BE-NEXT: li 4, 0 +; BE-NEXT: li 4, 85 +; BE-NEXT: sldi 4, 4, 57 ; BE-NEXT: std 4, 0(3) -; BE-NEXT: li 4, -86 -; BE-NEXT: stb 4, 0(3) ; BE-NEXT: blr ; ; LE-LABEL: fold_constant_stores_loaddr: ; LE: # %bb.0: # %entry -; LE-NEXT: li 4, 0 -; LE-NEXT: li 5, -86 +; LE-NEXT: li 4, 170 ; LE-NEXT: std 4, 0(3) -; LE-NEXT: stb 5, 0(3) ; LE-NEXT: blr entry: %i64_ptr = bitcast i8* %i8_ptr to i64* @@ -29,18 +26,15 @@ define void @fold_constant_stores_hiaddr(i8* %i8_ptr) { ; BE-LABEL: fold_constant_stores_hiaddr: ; BE: # %bb.0: # %entry -; BE-NEXT: li 4, 0 +; BE-NEXT: li 4, 85 +; BE-NEXT: sldi 4, 4, 57 ; BE-NEXT: std 4, 0(3) -; BE-NEXT: li 4, -86 -; BE-NEXT: stb 4, 0(3) ; BE-NEXT: blr ; ; LE-LABEL: fold_constant_stores_hiaddr: ; LE: # %bb.0: # %entry -; LE-NEXT: li 4, 0 -; LE-NEXT: li 5, -86 +; LE-NEXT: li 4, 170 ; LE-NEXT: std 4, 0(3) -; LE-NEXT: stb 5, 0(3) ; LE-NEXT: blr entry: %i64_ptr = bitcast i8* %i8_ptr to i64* diff --git a/llvm/test/CodeGen/X86/stores-merging.ll b/llvm/test/CodeGen/X86/stores-merging.ll --- a/llvm/test/CodeGen/X86/stores-merging.ll +++ b/llvm/test/CodeGen/X86/stores-merging.ll @@ -26,9 +26,8 @@ define void @redundant_stores_merging_reverse() { ; CHECK-LABEL: redundant_stores_merging_reverse: ; CHECK: # %bb.0: -; CHECK-NEXT: movabsq $528280977409, %rax # imm = 0x7B00000001 +; CHECK-NEXT: movabsq $1958505086977, %rax # imm = 0x1C800000001 ; CHECK-NEXT: movq %rax, e+{{.*}}(%rip) -; CHECK-NEXT: movl $456, e+{{.*}}(%rip) # imm = 0x1C8 ; CHECK-NEXT: retq store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4 store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4