Index: lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -884,22 +884,40 @@ // c) memcpy from freshly alloca'd space or space that has just started its // lifetime copies undefined data, and we can therefore eliminate the // memcpy in favor of the data that was already at the destination. - MemDepResult DepInfo = MD->getDependency(M); + AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M); + MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true, + M, M->getParent()); + AliasAnalysis::Location DstLoc = AliasAnalysis::getLocationForDest(M); + MemDepResult DstDepInfo = MD->getPointerDependencyFrom(DstLoc, true, + M, M->getParent()); + + // If the destination just started its lifetime, pretend that the lifetime + // start isn't present, try the callslot optimization and if it succeeds, + // extend the lifetime accordingly + IntrinsicInst *LT = dyn_cast_or_null(DstDepInfo.getInst()); + if (LT && LT->getIntrinsicID() != Intrinsic::lifetime_start) + LT = nullptr; + + MemDepResult DepInfo = LT ? SrcDepInfo : MD->getDependency(M); if (DepInfo.isClobber()) { if (CallInst *C = dyn_cast(DepInfo.getInst())) { if (performCallSlotOptzn(M, M->getDest(), M->getSource(), CopySize->getZExtValue(), M->getAlignment(), C)) { + if (LT) { + IRBuilder<> Builder(SrcDepInfo.getInst()); + Builder.CreateLifetimeStart(M->getDest(), + cast(LT->getArgOperand(0))); + MD->removeInstruction(LT); + LT->eraseFromParent(); + } + MD->removeInstruction(M); M->eraseFromParent(); return true; } } } - - AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M); - MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true, - M, M->getParent()); if (SrcDepInfo.isClobber()) { if (MemCpyInst *MDep = dyn_cast(SrcDepInfo.getInst())) return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue()); Index: test/Transforms/MemCpyOpt/extend-lifetime.ll =================================================================== --- /dev/null +++ test/Transforms/MemCpyOpt/extend-lifetime.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + + +declare void @foo([100000 x i32]*) + +declare void @llvm.lifetime.start(i64, i8* nocapture) +declare void @llvm.lifetime.end(i64, i8* nocapture) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) + +; Function Attrs: nounwind uwtable +define void @testfunc() { + %src = alloca [100000 x i32], align 4 + %dst = alloca [100000 x i32], align 4 + %1 = bitcast [100000 x i32]* %src to i8* + call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 400000, i32 4, i1 false) + %2 = bitcast [100000 x i32]* %dst to i8* + call void @llvm.lifetime.start(i64 400000, i8* %2) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %1, i64 400000, i32 4, i1 false) + call void @foo([100000 x i32]* %dst) + call void @llvm.lifetime.end(i64 400000, i8* %2) + ret void +; CHECK-LABEL: @testfunc +; CHECK-NOT: memcpy +; CHECK: lifetime.start +; CHECK: memset +}