diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1864,8 +1864,18 @@ // We are searching for the definition of the store's destination. // So, if that is the same definition as the load, then this is a // noop. Otherwise, fail. - if (LoadAccess != Current) + if (LoadAccess != Current) { + if (auto *CurrentDef = dyn_cast(Current)) + if (auto *CurrentStoreI = + dyn_cast(CurrentDef->getMemoryInst())) + if (CurrentStoreI->getOperand(0) == LoadI && + CurrentStoreI->getAlign() >= + LoadI->getType()->getScalarSizeInBits() / 8) + continue; return false; + } + // This is a potentially clobbering store, but it writes the same + // value, so we can safely ignore it. } return true; } diff --git a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll --- a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll +++ b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll @@ -612,16 +612,28 @@ ; CHECK-LABEL: @pr49927( ; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: store i32 [[V]], i32* [[Q:%.*]], align 4 -; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 ; CHECK-NEXT: ret void ; %v = load i32, i32* %p, align 4 store i32 %v, i32* %q, align 4 - ; FIXME: this store can be eliminated store i32 %v, i32* %p, align 4 ret void } + +define void @pr49927_unalign(i32* %q, i32* %p) { +; CHECK-LABEL: @pr49927_unalign( +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 1 +; CHECK-NEXT: store i32 [[V]], i32* [[Q:%.*]], align 1 +; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 1 +; CHECK-NEXT: ret void +; + %v = load i32, i32* %p, align 1 + store i32 %v, i32* %q, align 1 + store i32 %v, i32* %p, align 1 + ret void +} + define void @pr50339(i8* nocapture readonly %0) { ; CHECK-LABEL: @pr50339( ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 16 dereferenceable(16) getelementptr inbounds ([32 x i8], [32 x i8]* @a, i64 0, i64 0), i8* noundef nonnull align 1 dereferenceable(16) [[TMP0:%.*]], i64 16, i1 false)