diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -873,7 +873,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore, Value *cpyDest, Value *cpySrc, TypeSize cpySize, - Align cpyAlign, BatchAAResults &BAA, + Align cpyDestAlign, BatchAAResults &BAA, std::function GetC) { // The general transformation to keep in mind is // @@ -978,7 +978,7 @@ // Check that dest points to memory that is at least as aligned as src. Align srcAlign = srcAlloca->getAlign(); - bool isDestSufficientlyAligned = srcAlign <= cpyAlign; + bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign; // If dest is not aligned enough and we can't increase its alignment then // bail out. if (!isDestSufficientlyAligned && !isa(cpyDest)) { @@ -1501,13 +1501,9 @@ if (Instruction *MI = MD->getMemoryInst()) { if (auto *CopySize = dyn_cast(M->getLength())) { if (auto *C = dyn_cast(MI)) { - // FIXME: Can we pass in either of dest/src alignment here instead - // of conservatively taking the minimum? - Align Alignment = std::min(M->getDestAlign().valueOrOne(), - M->getSourceAlign().valueOrOne()); if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), TypeSize::getFixed(CopySize->getZExtValue()), - Alignment, BAA, + M->getDestAlign().valueOrOne(), BAA, [C]() -> CallInst * { return C; })) { LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n" << " call: " << *C << "\n" diff --git a/llvm/test/Transforms/MemCpyOpt/callslot.ll b/llvm/test/Transforms/MemCpyOpt/callslot.ll --- a/llvm/test/Transforms/MemCpyOpt/callslot.ll +++ b/llvm/test/Transforms/MemCpyOpt/callslot.ll @@ -211,6 +211,18 @@ ret void } +define void @source_alignment(ptr noalias dereferenceable(128) %dst) { +; CHECK-LABEL: @source_alignment( +; CHECK-NEXT: [[SRC:%.*]] = alloca [128 x i8], align 4 +; CHECK-NEXT: call void @accept_ptr(ptr nocapture [[DST:%.*]]) #[[ATTR3]] +; CHECK-NEXT: ret void +; + %src = alloca [128 x i8], align 4 + call void @accept_ptr(ptr nocapture %src) nounwind + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr %src, i64 128, i1 false) + ret void +} + declare void @may_throw() declare void @accept_ptr(ptr) declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)