Index: llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -167,6 +167,7 @@ Value *optimizeMemCmp(CallInst *CI, IRBuilder<> &B); Value *optimizeBCmp(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCmpBCmpCommon(CallInst *CI, IRBuilder<> &B); + Value *optimizeMemPCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B, bool isIntrinsic = false); Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B, bool isIntrinsic = false); Value *optimizeMemSet(CallInst *CI, IRBuilder<> &B, bool isIntrinsic = false); Index: llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -998,6 +998,15 @@ return CI->getArgOperand(0); } +Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilder<> &B) { + Value *Dst = CI->getArgOperand(0); + Value *N = CI->getArgOperand(2); + // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n + CallInst *NewCI = B.CreateMemCpy(Dst, 1, CI->getArgOperand(1), 1, N); + NewCI->setAttributes(CI->getAttributes()); + return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N); +} + Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B, bool isIntrinsic) { Value *Size = CI->getArgOperand(2); if (ConstantInt *LenC = dyn_cast(Size)) @@ -2624,6 +2633,8 @@ return optimizeMemCmp(CI, Builder); case LibFunc_memcpy: return optimizeMemCpy(CI, Builder); + case LibFunc_mempcpy: + return optimizeMemPCpy(CI, Builder); case LibFunc_memmove: return optimizeMemMove(CI, Builder); case LibFunc_memset: Index: llvm/trunk/test/Transforms/InstCombine/mempcpy.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/mempcpy.ll +++ llvm/trunk/test/Transforms/InstCombine/mempcpy.ll @@ -3,17 +3,41 @@ define i8* @memcpy_nonconst_n(i8* %d, i8* nocapture readonly %s, i64 %n) { ; CHECK-LABEL: @memcpy_nonconst_n( -; CHECK-NEXT: [[R:%.*]] = tail call i8* @mempcpy(i8* [[D:%.*]], i8* [[S:%.*]], i64 [[N:%.*]]) -; CHECK-NEXT: ret i8* [[R]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 [[N]] +; CHECK-NEXT: ret i8* [[TMP1]] ; %r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 %n) ret i8* %r } +define i8* @memcpy_nonconst_n_copy_attrs(i8* %d, i8* nocapture readonly %s, i64 %n) { +; CHECK-LABEL: @memcpy_nonconst_n_copy_attrs( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 dereferenceable(16) [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 [[N]] +; CHECK-NEXT: ret i8* [[TMP1]] +; + %r = tail call i8* @mempcpy(i8* dereferenceable(16) %d, i8* %s, i64 %n) + ret i8* %r +} + +define void @memcpy_nonconst_n_unused_retval(i8* %d, i8* nocapture readonly %s, i64 %n) { +; CHECK-LABEL: @memcpy_nonconst_n_unused_retval( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false) +; CHECK-NEXT: ret void +; + call i8* @mempcpy(i8* %d, i8* %s, i64 %n) + ret void +} + define i8* @memcpy_small_const_n(i8* %d, i8* nocapture readonly %s) { ; CHECK-LABEL: @memcpy_small_const_n( -; CHECK-NEXT: [[R:%.*]] = tail call i8* @mempcpy(i8* [[D:%.*]], i8* [[S:%.*]], i64 8) -; CHECK-NEXT: ret i8* [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[S:%.*]] to i64* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[D:%.*]] to i64* +; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP2]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 8 +; CHECK-NEXT: ret i8* [[TMP4]] ; %r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 8) ret i8* %r @@ -21,8 +45,9 @@ define i8* @memcpy_big_const_n(i8* %d, i8* nocapture readonly %s) { ; CHECK-LABEL: @memcpy_big_const_n( -; CHECK-NEXT: [[R:%.*]] = tail call i8* @mempcpy(i8* [[D:%.*]], i8* [[S:%.*]], i64 1024) -; CHECK-NEXT: ret i8* [[R]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 dereferenceable(1024) [[D:%.*]], i8* align 1 dereferenceable(1024) [[S:%.*]], i64 1024, i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 1024 +; CHECK-NEXT: ret i8* [[TMP1]] ; %r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 1024) ret i8* %r