Index: llvm/trunk/include/llvm/IR/DataLayout.h =================================================================== --- llvm/trunk/include/llvm/IR/DataLayout.h +++ llvm/trunk/include/llvm/IR/DataLayout.h @@ -453,6 +453,14 @@ return 8 * getTypeStoreSize(Ty); } + /// Returns true if no extra padding bits are needed when storing the + /// specified type. + /// + /// For example, returns false for i19 that has a 24-bit store size. + bool typeSizeEqualsStoreSize(Type *Ty) const { + return getTypeSizeInBits(Ty) == getTypeStoreSizeInBits(Ty); + } + /// Returns the offset in bytes between successive objects of the /// specified type, including alignment padding. /// Index: llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1211,12 +1211,17 @@ auto *Earlier = dyn_cast(DepWrite); auto *Later = dyn_cast(Inst); if (Earlier && isa(Earlier->getValueOperand()) && + DL.typeSizeEqualsStoreSize( + Earlier->getValueOperand()->getType()) && Later && isa(Later->getValueOperand()) && + DL.typeSizeEqualsStoreSize( + Later->getValueOperand()->getType()) && memoryIsNotModifiedBetween(Earlier, Later, AA)) { // If the store we find is: // a) partially overwritten by the store to 'Loc' // b) the later store is fully contained in the earlier one and // c) they both have a constant value + // d) none of the two stores need padding // Merge the two stores, replacing the earlier store's value with a // merge of both values. // TODO: Deal with other constant types (vectors, etc), and probably Index: llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore2.ll =================================================================== --- llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore2.ll +++ llvm/trunk/test/Transforms/DeadStoreElimination/PartialStore2.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s --data-layout "e" -dse -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-LE %s +; RUN: opt < %s --data-layout "E" -dse -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-BE %s + +; This test used to hit an assertion (see PR41949). +; +; Better safe than sorry, do not assume anything about the padding for the +; i28 store that has 32 bits as store size. +define void @test1(i32* %p) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[A:%.*]] = alloca i32 +; CHECK-NEXT: [[B:%.*]] = bitcast i32* [[A]] to i28* +; CHECK-NEXT: [[C:%.*]] = bitcast i32* [[A]] to { i16, i16 }* +; CHECK-NEXT: [[C1:%.*]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* [[C]], i32 0, i32 1 +; CHECK-NEXT: store i28 10, i28* [[B]] +; CHECK-NEXT: store i16 20, i16* [[C1]] +; CHECK-NEXT: call void @test1(i32* [[A]]) +; CHECK-NEXT: ret void +; + %a = alloca i32 + %b = bitcast i32* %a to i28* + %c = bitcast i32* %a to { i16, i16 }* + %c1 = getelementptr inbounds { i16, i16 }, { i16, i16 }* %c, i32 0, i32 1 + store i28 10, i28* %b + store i16 20, i16* %c1 + + call void @test1(i32* %a) + ret void +} + + +; This test used to mis-compile (see PR41949). +; +; Better safe than sorry, do not assume anything about the padding for the +; i12 store that has 16 bits as store size. +define void @test2(i32* %p) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[U:%.*]] = alloca i32 +; CHECK-NEXT: [[A:%.*]] = bitcast i32* [[U]] to i32* +; CHECK-NEXT: [[B:%.*]] = bitcast i32* [[U]] to i12* +; CHECK-NEXT: store i32 -1, i32* [[A]] +; CHECK-NEXT: store i12 20, i12* [[B]] +; CHECK-NEXT: call void @test2(i32* [[U]]) +; CHECK-NEXT: ret void +; + %u = alloca i32 + %a = bitcast i32* %u to i32* + %b = bitcast i32* %u to i12* + store i32 -1, i32* %a + store i12 20, i12* %b + + call void @test2(i32* %u) + ret void +} +