Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -121,10 +121,11 @@ return MI; } - // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with - // load/store. + // If MemCpyInst length is a constant and fits in a legal integer then + // replace memcpy with load/store. ConstantInt *MemOpLength = dyn_cast(MI->getLength()); - if (!MemOpLength) return nullptr; + if (!MemOpLength) + return nullptr; // Source and destination pointer types are always "i8*" for intrinsic. See // if the size is something we can handle with a single primitive load/store. @@ -133,8 +134,12 @@ uint64_t Size = MemOpLength->getLimitedValue(); assert(Size && "0-sized memory transferring should be removed already."); - if (Size > 8 || (Size&(Size-1))) - return nullptr; // If not 1/2/4/8 bytes, exit. + // Check whether this fits within a legal integer. When the length is + // a constant and doesn't require large copies, it is more optimizable to + // represent this in terms of integer loads and stores. + // FIXME: Consider letting TTI set the upper bound. + if (!DL.fitsInLegalInteger(Size)) + return nullptr; // Use an integer load+store unless we can find something better. unsigned SrcAddrSp = @@ -166,7 +171,8 @@ Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); LoadInst *L = Builder.CreateLoad(Src); - // Alignment from the mem intrinsic will be better, so use it. + // Alignment from the mem intrinsic is necessary -- it may be less aligned + // than this integer size. L->setAlignment(CopySrcAlign); if (CopyMD) L->setMetadata(LLVMContext::MD_tbaa, CopyMD); @@ -176,7 +182,8 @@ L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD); StoreInst *S = Builder.CreateStore(L, Dest); - // Alignment from the mem intrinsic will be better, so use it. + // Alignment from the mem intrinsic is necessary -- it may be less aligned + // than this integer size. S->setAlignment(CopyDstAlign); if (CopyMD) S->setMetadata(LLVMContext::MD_tbaa, CopyMD);