diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1152,8 +1152,14 @@ // still want to eliminate the intermediate value, but we have to generate a // memmove instead of memcpy. bool UseMemMove = false; - if (isModSet(BAA.getModRefInfo(M, MemoryLocation::getForSource(MDep)))) + if (isModSet(BAA.getModRefInfo(M, MemoryLocation::getForSource(MDep)))) { + // Don't convert llvm.memcpy.inline into memmove because memmove can be + // lowered as a call, and that is not allowed for llvm.memcpy.inline (and + // there is no inline version of llvm.memmove) + if (isa(M)) + return false; UseMemMove = true; + } // If all checks passed, then we can transform M. LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n" diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll --- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -125,6 +125,20 @@ } +; Similar to test5_memcpy, but without noalias; check that memcpy.inline is not folded into memmove. +define void @test6_memcpy(ptr %src, ptr %dest) nounwind { +; CHECK-LABEL: @test6_memcpy( +; CHECK-NEXT: [[TMP:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 [[TMP]], ptr align 1 [[DEST:%.*]], i32 16, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 [[DEST]], ptr align 1 [[TMP]], i32 16, i1 false) +; CHECK-NEXT: ret void +; + %tmp = alloca [16 x i8], align 1 + call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %tmp, ptr align 1 %dest, i32 16, i1 false) + call void @llvm.memcpy.inline.p0.p0.i32(ptr align 1 %dest, ptr align 1 %tmp, i32 16, i1 false) + ret void +} + @x = external global %0