Index: llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -510,8 +510,8 @@ /// memory region into an identical pointer) then it doesn't actually make its /// input dead in the traditional sense. Consider this case: /// -/// memcpy(A <- B) -/// memcpy(A <- A) +/// memmove(A <- B) +/// memmove(A <- A) /// /// In this case, the second store to A does not make the first store to A dead. /// The usual situation isn't an explicit A<-A store like this (which can be @@ -527,23 +527,34 @@ // Self reads can only happen for instructions that read memory. Get the // location read. MemoryLocation InstReadLoc = getLocForRead(Inst, TLI); - if (!InstReadLoc.Ptr) return false; // Not a reading instruction. + if (!InstReadLoc.Ptr) + return false; // Not a reading instruction. // If the read and written loc obviously don't alias, it isn't a read. - if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false; + if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) + return false; - // Okay, 'Inst' may copy over itself. However, we can still remove a the - // DepWrite instruction if we can prove that it reads from the same location - // as Inst. This handles useful cases like: - // memcpy(A <- B) - // memcpy(A <- B) - // Here we don't know if A/B may alias, but we do know that B/B are must - // aliases, so removing the first memcpy is safe (assuming it writes <= # - // bytes as the second one. - MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI); + if (isa(Inst)) { + // LLVM's memcpy overlap semantics are not fully fleshed out (see PR11763) + // but in practice memcpy(A <- B) either means that A and B are disjoint or + // are equal (i.e. there are not partial overlaps). Given that, if we have: + // + // memcpy/memmove(A <- B) // DepWrite + // memcpy(A <- B) // Inst + // + // with Inst reading/writing a >= size than DepWrite, we can reason as + // follows: + // + // - If A == B then both the copies are no-ops, so the DepWrite can be + // removed. + // - If A != B then A and B are disjoint locations in Inst. Since + // Inst.size >= DepWrite.size A and B are disjoint in DepWrite too. + // Therefore DepWrite can be removed. + MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI); - if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) - return false; + if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) + return false; + } // If DepWrite doesn't read memory or if we can't prove it is a must alias, // then it can't be considered dead. Index: llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll =================================================================== --- llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll +++ llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll @@ -252,6 +252,9 @@ ; Do not delete instruction where possible situation is: ; A = B ; A = A +; +; NB! See PR11763 - currently LLVM allows memcpy's source and destination to be +; equal (but not inequal and overlapping). define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp { tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) @@ -521,3 +524,51 @@ store i32 0, i32* %p ret void } + +; We cannot optimize away the first memmove since %P could overlap with %Q. +define void @test36(i8* %P, i8* %Q) { +; CHECK-LABEL: @test36( +; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK-NEXT: ret + + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + ret void +} + +define void @test37(i8* %P, i8* %Q, i8* %R) { +; CHECK-LABEL: @test37( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK-NEXT: ret + + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) + ret void +} + +; Same caveat about memcpy as in @test18 applies here. +define void @test38(i8* %P, i8* %Q, i8* %R) { +; CHECK-LABEL: @test38( +; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK-NEXT: ret + + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) + ret void +} + +define void @test39(i8* %P, i8* %Q, i8* %R) { +; CHECK-LABEL: @test39( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 8, i1 false) +; CHECK-NEXT: ret + + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 8, i1 false) + ret void +} + +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)