diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1422,19 +1422,19 @@ // d) memcpy from a just-memset'd source can be turned into memset. if (auto *MD = dyn_cast(SrcClobber)) { if (Instruction *MI = MD->getMemoryInst()) { - if (auto *C = dyn_cast(MI)) { - // The memcpy must post-dom the call. Limit to the same block for now. - // Additionally, we need to ensure that there are no accesses to dest - // between the call and the memcpy. Accesses to src will be checked - // by performCallSlotOptzn(). - // TODO: Support non-local call-slot optimization? - if (C->getParent() == M->getParent() && - !accessedBetween(*AA, DestLoc, MD, MA)) { - // FIXME: Can we pass in either of dest/src alignment here instead - // of conservatively taking the minimum? - Align Alignment = std::min(M->getDestAlign().valueOrOne(), - M->getSourceAlign().valueOrOne()); - if (ConstantInt *CopySize = dyn_cast(M->getLength())) { + if (ConstantInt *CopySize = dyn_cast(M->getLength())) { + if (auto *C = dyn_cast(MI)) { + // The memcpy must post-dom the call. Limit to the same block for + // now. Additionally, we need to ensure that there are no accesses + // to dest between the call and the memcpy. Accesses to src will be + // checked by performCallSlotOptzn(). + // TODO: Support non-local call-slot optimization? + if (C->getParent() == M->getParent() && + !accessedBetween(*AA, DestLoc, MD, MA)) { + // FIXME: Can we pass in either of dest/src alignment here instead + // of conservatively taking the minimum? + Align Alignment = std::min(M->getDestAlign().valueOrOne(), + M->getSourceAlign().valueOrOne()); if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), CopySize->getZExtValue(), Alignment, C)) { @@ -1484,13 +1484,13 @@ // its lifetime copies undefined data, and we can therefore eliminate // the memcpy in favor of the data that was already at the destination. // d) memcpy from a just-memset'd source can be turned into memset. - if (DepInfo.isClobber()) { - if (CallInst *C = dyn_cast(DepInfo.getInst())) { - // FIXME: Can we pass in either of dest/src alignment here instead - // of conservatively taking the minimum? - Align Alignment = std::min(M->getDestAlign().valueOrOne(), - M->getSourceAlign().valueOrOne()); - if (ConstantInt *CopySize = dyn_cast(M->getLength())) { + if (ConstantInt *CopySize = dyn_cast(M->getLength())) { + if (DepInfo.isClobber()) { + if (CallInst *C = dyn_cast(DepInfo.getInst())) { + // FIXME: Can we pass in either of dest/src alignment here instead + // of conservatively taking the minimum? + Align Alignment = std::min(M->getDestAlign().valueOrOne(), + M->getSourceAlign().valueOrOne()); if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), CopySize->getZExtValue(), Alignment, C)) { eraseInstruction(M); diff --git a/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll --- a/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll @@ -3,8 +3,6 @@ ; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -%T = type { i64, i64 } - define void @test(i8* %src, i64 %size) { ; CHECK-LABEL: @test( ; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE:%.*]], align 1 @@ -21,4 +19,21 @@ ret void } +; Differing sizes, so left as it is. +define void @negative_test(i8* %src, i64 %size1, i64 %size2) { +; CHECK-LABEL: @negative_test( +; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1 +; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE2:%.*]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP]], i8* align 8 [[SRC:%.*]], i64 [[SIZE1]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST]], i8* align 8 [[TMP]], i64 [[SIZE2]], i1 false) +; CHECK-NEXT: ret void +; + %tmp = alloca i8, i64 %size1 + %dst = alloca i8, i64 %size2 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 %size1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 %size2, i1 false) + + ret void +} + declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) diff --git a/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/variable-sized-mempcy-uninit.ll copy from llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll copy to llvm/test/Transforms/MemCpyOpt/variable-sized-mempcy-uninit.ll --- a/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/variable-sized-mempcy-uninit.ll @@ -3,20 +3,28 @@ ; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -%T = type { i64, i64 } - -define void @test(i8* %src, i64 %size) { +define void @test(i64 %size) { ; CHECK-LABEL: @test( -; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE:%.*]], align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i64 [[SIZE:%.*]], align 1 ; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE]], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP]], i8* align 8 [[SRC:%.*]], i64 [[SIZE]], i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST]], i8* align 8 [[SRC]], i64 [[SIZE]], i1 false) ; CHECK-NEXT: ret void ; - %tmp = alloca i8, i64 %size + %src = alloca i8, i64 %size %dst = alloca i8, i64 %size - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 %size, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 %size, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 %size, i1 false) + + ret void +} + +define void @test2(i64 %size1, i64 %size2, i64 %cpy_size) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[SRC:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1 +; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE2:%.*]], align 1 +; CHECK-NEXT: ret void +; + %src = alloca i8, i64 %size1 + %dst = alloca i8, i64 %size2 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 %cpy_size, i1 false) ret void } diff --git a/llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll --- a/llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll @@ -3,8 +3,6 @@ ; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -%T = type { i64, i64 } - define void @test(i8* %src, i8 %c, i64 %size) { ; CHECK-LABEL: @test( ; CHECK-NEXT: [[DST1:%.*]] = alloca i8, i64 [[SIZE:%.*]], align 1 @@ -21,5 +19,22 @@ ret void } +; Differing sizes, so left as it is. +define void @negative_test(i8* %src, i8 %c, i64 %size1, i64 %size2) { +; CHECK-LABEL: @negative_test( +; CHECK-NEXT: [[DST1:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1 +; CHECK-NEXT: [[DST2:%.*]] = alloca i8, i64 [[SIZE2:%.*]], align 1 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[DST1]], i8 [[C:%.*]], i64 [[SIZE1]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST2]], i8* align 8 [[DST1]], i64 [[SIZE2]], i1 false) +; CHECK-NEXT: ret void +; + %dst1 = alloca i8, i64 %size1 + %dst2 = alloca i8, i64 %size2 + call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 %size1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst2, i8* align 8 %dst1, i64 %size2, i1 false) + + ret void +} + declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1) declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)