diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/MemorySSA.h" @@ -788,40 +789,9 @@ // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. - // TODO: Use isDereferenceablePointer() API instead. - if (AllocaInst *A = dyn_cast(cpyDest)) { - // The destination is an alloca. Check it is larger than srcSize. - ConstantInt *destArraySize = dyn_cast(A->getArraySize()); - if (!destArraySize) - return false; - - uint64_t destSize = DL.getTypeAllocSize(A->getAllocatedType()) * - destArraySize->getZExtValue(); - - if (destSize < srcSize) - return false; - } else if (Argument *A = dyn_cast(cpyDest)) { - if (A->getDereferenceableBytes() < srcSize) { - // If the destination is an sret parameter then only accesses that are - // outside of the returned struct type can trap. - if (!A->hasStructRetAttr()) - return false; - - Type *StructTy = A->getParamStructRetType(); - if (!StructTy->isSized()) { - // The call may never return and hence the copy-instruction may never - // be executed, and therefore it's not safe to say "the destination - // has at least bytes, as implied by the copy-instruction", - return false; - } - - uint64_t destSize = DL.getTypeAllocSize(StructTy); - if (destSize < srcSize) - return false; - } - } else { + if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen), + DL, C, DT)) return false; - } // Make sure that nothing can observe cpyDest being written early. There are // a number of cases to consider: @@ -837,6 +807,7 @@ // guaranteed to be executed if C is. As it is a non-atomic access, it // renders accesses from other threads undefined. // TODO: This is currently not checked. + // TODO: Check underlying object, so we can look through GEPs. if (!isa(cpyDest)) { assert(C->getParent() == cpyStore->getParent() && "call and copy must be in the same block"); @@ -893,6 +864,7 @@ // Since we're changing the parameter to the callsite, we need to make sure // that what would be the new parameter dominates the callsite. + // TODO: Support moving instructions like GEPs upwards. if (Instruction *cpyDestInst = dyn_cast(cpyDest)) if (!DT->dominates(cpyDestInst, C)) return false; diff --git a/llvm/test/Transforms/MemCpyOpt/callslot.ll b/llvm/test/Transforms/MemCpyOpt/callslot.ll --- a/llvm/test/Transforms/MemCpyOpt/callslot.ll +++ b/llvm/test/Transforms/MemCpyOpt/callslot.ll @@ -110,8 +110,9 @@ ; CHECK-NEXT: [[SRC:%.*]] = alloca [8 x i8], align 1 ; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [8 x i8]* [[SRC]] to i8* ; CHECK-NEXT: [[DEST_I8:%.*]] = getelementptr [16 x i8], [16 x i8]* [[DEST]], i64 0, i64 8 -; CHECK-NEXT: call void @accept_ptr(i8* [[SRC_I8]]) [[ATTR3:#.*]] -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST_I8]], i8* [[SRC_I8]], i64 8, i1 false) +; CHECK-NEXT: [[DEST_I81:%.*]] = bitcast i8* [[DEST_I8]] to [8 x i8]* +; CHECK-NEXT: [[DEST_I812:%.*]] = bitcast [8 x i8]* [[DEST_I81]] to i8* +; CHECK-NEXT: call void @accept_ptr(i8* [[DEST_I812]]) [[ATTR3:#.*]] ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8]