Index: lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -519,19 +519,33 @@ // We use alias analysis to check if an instruction may store to // the memory we load from in between the load and the store. If - // such an instruction is found, we store it in AI. - Instruction *AI = nullptr; + // such an instruction is found, we try to promote there instead + // of at the store position. + Instruction *P = SI; for (BasicBlock::iterator I = ++LI->getIterator(), E = SI->getIterator(); I != E; ++I) { - if (AA.getModRefInfo(&*I, LoadLoc) & MRI_Mod) { - AI = &*I; - break; + if (!(AA.getModRefInfo(&*I, LoadLoc) & MRI_Mod)) + continue; + + // We found an instruction that may write to the loaded memory. + // We can try to promote at this position instead of the store + // position if nothing alias the store memory after this. + P = &*I; + for (; I != E; ++I) { + MemoryLocation StoreLoc = MemoryLocation::get(SI); + if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) { + DEBUG(dbgs() << "Alias " << *I << "\n"); + P = nullptr; + break; + } } + + break; } - // If no aliasing instruction is found, then we can promote the - // load/store pair to a memcpy at the store loaction. - if (!AI) { + // If a valid insertion position is found, then we can promote + // the load/store pair to a memcpy. + if (P) { // If we load from memory that may alias the memory we store to, // memmove must be used to preserve semantic. If not, memcpy can // be used. @@ -542,7 +556,7 @@ unsigned Align = findCommonAlignment(DL, SI, LI); uint64_t Size = DL.getTypeStoreSize(T); - IRBuilder<> Builder(SI); + IRBuilder<> Builder(P); Instruction *M; if (UseMemMove) M = Builder.CreateMemMove(SI->getPointerOperand(), Index: test/Transforms/MemCpyOpt/fca2memcpy.ll =================================================================== --- test/Transforms/MemCpyOpt/fca2memcpy.ll +++ test/Transforms/MemCpyOpt/fca2memcpy.ll @@ -7,6 +7,7 @@ define void @copy(%S* %src, %S* %dst) { ; CHECK-LABEL: copy +; CHECK-NOT: load ; CHECK: call void @llvm.memmove.p0i8.p0i8.i64 ; CHECK-NEXT: ret void %1 = load %S, %S* %src @@ -16,6 +17,7 @@ define void @noaliassrc(%S* noalias %src, %S* %dst) { ; CHECK-LABEL: noaliassrc +; CHECK-NOT: load ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 ; CHECK-NEXT: ret void %1 = load %S, %S* %src @@ -25,9 +27,32 @@ define void @noaliasdst(%S* %src, %S* noalias %dst) { ; CHECK-LABEL: noaliasdst +; CHECK-NOT: load +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +; CHECK-NEXT: ret void + %1 = load %S, %S* %src + store %S %1, %S* %dst + ret void +} + +define void @destroysrc(%S* %src, %S* %dst) { +; CHECK-LABEL: destroysrc +; CHECK-NOT: call +; CHECK: ret void + %1 = load %S, %S* %src + store %S zeroinitializer, %S* %src + store %S %1, %S* %dst + ret void +} + +define void @destroynoaliassrc(%S* noalias %src, %S* %dst) { +; CHECK-LABEL: destroynoaliassrc +; CHECK-NOT: load ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +; CHECK-NEXT: store %S zeroinitializer, %S* %src ; CHECK-NEXT: ret void %1 = load %S, %S* %src + store %S zeroinitializer, %S* %src store %S %1, %S* %dst ret void }