Index: lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- lib/Transforms/Scalar/DeadStoreElimination.cpp +++ lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -527,27 +527,14 @@ // Self reads can only happen for instructions that read memory. Get the // location read. MemoryLocation InstReadLoc = getLocForRead(Inst, TLI); - if (!InstReadLoc.Ptr) return false; // Not a reading instruction. - - // If the read and written loc obviously don't alias, it isn't a read. - if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false; - - // Okay, 'Inst' may copy over itself. However, we can still remove a the - // DepWrite instruction if we can prove that it reads from the same location - // as Inst. This handles useful cases like: - // memcpy(A <- B) - // memcpy(A <- B) - // Here we don't know if A/B may alias, but we do know that B/B are must - // aliases, so removing the first memcpy is safe (assuming it writes <= # - // bytes as the second one. - MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI); - - if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) - return false; - - // If DepWrite doesn't read memory or if we can't prove it is a must alias, - // then it can't be considered dead. - return true; + if (!InstReadLoc.Ptr) + return false; // Not a reading instruction. + + // If the read and written loc obviously don't alias, it isn't a self-read. + // llvm.memcpy has undefined behavior when the source and the destination + // overlap so if Inst is a memcpy InstReadLoc and InstStoreLoc don't overlap + // by definition. + return !isa(Inst) && !AA.isNoAlias(InstReadLoc, InstStoreLoc); } /// Returns true if the memory which is accessed by the second instruction is not Index: test/Transforms/DeadStoreElimination/simple.ll =================================================================== --- test/Transforms/DeadStoreElimination/simple.ll +++ test/Transforms/DeadStoreElimination/simple.ll @@ -248,17 +248,19 @@ ; CHECK-NEXT: ret } -; PR8728 -; Do not delete instruction where possible situation is: -; A = B -; A = A define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp { tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) ret void + +; We can delete the first memcpy here even though we have no information about +; %Q and %R. The only way the writes to %P...%P+12 due to the first memcpy can +; be live is if the second memcpy reads those writes. However, the second +; memcpy also writes to %P...%P+12 and therefore could not have read from those +; locations without invoking undefined behavior. + ; CHECK-LABEL: @test18( -; CHECK-NEXT: call void @llvm.memcpy -; CHECK-NEXT: call void @llvm.memcpy +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) ; CHECK-NEXT: ret } @@ -521,3 +523,49 @@ store i32 0, i32* %p ret void } + +; We cannot optimize away the first memmove since %P could overlap with %Q. +define void @test36(i8* %P, i8* %Q) { +; CHECK-LABEL: @test36( +; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK-NEXT: ret + + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + ret void +} + +define void @test37(i8* %P, i8* %Q, i8* %R) { +; CHECK-LABEL: @test37( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK-NEXT: ret + + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) + ret void +} + +define void @test38(i8* %P, i8* %Q, i8* %R) { +; CHECK-LABEL: @test38( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK-NEXT: ret + + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) + ret void +} + +define void @test39(i8* %P, i8* %Q, i8* %R) { +; CHECK-LABEL: @test39( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 8, i1 false) +; CHECK-NEXT: ret + + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 8, i1 false) + ret void +} + +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)