diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1207,16 +1207,25 @@ MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize, InstWriteOffset, LaterSize, IsOverwriteEnd); } else if (EnablePartialStoreMerging && - OR == OW_PartialEarlierWithFullLater) { + OR == OW_PartialEarlierWithFullLater && + isa(DepWrite) && + isa(Inst)) { auto *Earlier = dyn_cast(DepWrite); auto *Later = dyn_cast(Inst); + auto *EarlierVT = Earlier->getValueOperand()->getType(); + auto *LaterVT = Later->getValueOperand()->getType(); if (Earlier && isa(Earlier->getValueOperand()) && + (DL.getTypeStoreSizeInBits(EarlierVT) == + DL.getTypeSizeInBits(EarlierVT)) && Later && isa(Later->getValueOperand()) && + (DL.getTypeStoreSizeInBits(LaterVT) == + DL.getTypeSizeInBits(LaterVT)) && memoryIsNotModifiedBetween(Earlier, Later, AA)) { // If the store we find is: // a) partially overwritten by the store to 'Loc' // b) the later store is fully contained in the earlier one and // c) they both have a constant value + // d) no padding is needed for either of the stores // Merge the two stores, replacing the earlier store's value with a // merge of both values. // TODO: Deal with other constant types (vectors, etc), and probably @@ -1248,7 +1257,7 @@ << "\n Merged Value: " << Merged << '\n'); auto *SI = new StoreInst( - ConstantInt::get(Earlier->getValueOperand()->getType(), Merged), + ConstantInt::get(EarlierVT, Merged), Earlier->getPointerOperand(), false, Earlier->getAlignment(), Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite); diff --git a/llvm/test/Transforms/DeadStoreElimination/PartialStore2.ll b/llvm/test/Transforms/DeadStoreElimination/PartialStore2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/PartialStore2.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s --data-layout "e" -dse -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-LE %s +; RUN: opt < %s --data-layout "E" -dse -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-BE %s + +; This test used to hit an assertion (see PR41949). +; +; Better safe than sorry, do not assume anything about the padding for the +; i28 store that has 32 bits as store size. +define void @test1(i32* %p) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[A:%.*]] = alloca i32 +; CHECK-NEXT: [[B:%.*]] = bitcast i32* [[A]] to i28* +; CHECK-NEXT: [[C:%.*]] = bitcast i32* [[A]] to { i16, i16 }* +; CHECK-NEXT: [[C1:%.*]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* [[C]], i32 0, i32 1 +; CHECK-NEXT: store i28 10, i28* [[B]] +; CHECK-NEXT: store i16 20, i16* [[C1]] +; CHECK-NEXT: call void @test1(i32* [[A]]) +; CHECK-NEXT: ret void +; + %a = alloca i32 + %b = bitcast i32* %a to i28* + %c = bitcast i32* %a to { i16, i16 }* + %c1 = getelementptr inbounds { i16, i16 }, { i16, i16 }* %c, i32 0, i32 1 + store i28 10, i28* %b + store i16 20, i16* %c1 + + call void @test1(i32* %a) + ret void +} + + +; This test used to mis-compile (see PR41949). +; +; Better safe than sorry, do not assume anything about the padding for the +; i12 store that has 16 bits as store size. +define void @test2(i32* %p) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[U:%.*]] = alloca i32 +; CHECK-NEXT: [[A:%.*]] = bitcast i32* [[U]] to i32* +; CHECK-NEXT: [[B:%.*]] = bitcast i32* [[U]] to i12* +; CHECK-NEXT: store i32 -1, i32* [[A]] +; CHECK-NEXT: store i12 20, i12* [[B]] +; CHECK-NEXT: call void @test2(i32* [[U]]) +; CHECK-NEXT: ret void +; + %u = alloca i32 + %a = bitcast i32* %u to i32* + %b = bitcast i32* %u to i12* + store i32 -1, i32* %a + store i12 20, i12* %b + + call void @test2(i32* %u) + ret void +} +