Index: include/llvm/Transforms/Utils/SimplifyLibCalls.h =================================================================== --- include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -168,6 +168,7 @@ Value *optimizeBCmp(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCmpBCmpCommon(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B); + Value *optimizeMemPCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B); Value *optimizeMemSet(CallInst *CI, IRBuilder<> &B); Value *optimizeRealloc(CallInst *CI, IRBuilder<> &B); Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -962,6 +962,14 @@ return CI->getArgOperand(0); } +Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilder<> &B) { + Value *Dst = CI->getArgOperand(0); + Value *N = CI->getArgOperand(2); + // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n + B.CreateMemCpy(Dst, 1, CI->getArgOperand(1), 1, N); + return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N); +} + Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n) B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, @@ -2579,6 +2587,8 @@ return optimizeMemCmp(CI, Builder); case LibFunc_memcpy: return optimizeMemCpy(CI, Builder); + case LibFunc_mempcpy: + return optimizeMemPCpy(CI, Builder); case LibFunc_memmove: return optimizeMemMove(CI, Builder); case LibFunc_memset: Index: test/Transforms/InstCombine/mempcpy.ll =================================================================== --- test/Transforms/InstCombine/mempcpy.ll +++ test/Transforms/InstCombine/mempcpy.ll @@ -3,8 +3,9 @@ define i8* @memcpy_nonconst_n(i8* %d, i8* nocapture readonly %s, i64 %n) { ; CHECK-LABEL: @memcpy_nonconst_n( -; CHECK-NEXT: [[R:%.*]] = tail call i8* @mempcpy(i8* [[D:%.*]], i8* [[S:%.*]], i64 [[N:%.*]]) -; CHECK-NEXT: ret i8* [[R]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[D:%.*]], i8* align 1 [[S:%.*]], i64 [[N:%.*]], i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 [[N]] +; CHECK-NEXT: ret i8* [[TMP1]] ; %r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 %n) ret i8* %r @@ -12,8 +13,12 @@ define i8* @memcpy_small_const_n(i8* %d, i8* nocapture readonly %s) { ; CHECK-LABEL: @memcpy_small_const_n( -; CHECK-NEXT: [[R:%.*]] = tail call i8* @mempcpy(i8* [[D:%.*]], i8* [[S:%.*]], i64 8) -; CHECK-NEXT: ret i8* [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[S:%.*]] to i64* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[D:%.*]] to i64* +; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1 +; CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP2]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 8 +; CHECK-NEXT: ret i8* [[TMP4]] ; %r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 8) ret i8* %r @@ -21,8 +26,9 @@ define i8* @memcpy_big_const_n(i8* %d, i8* nocapture readonly %s) { ; CHECK-LABEL: @memcpy_big_const_n( -; CHECK-NEXT: [[R:%.*]] = tail call i8* @mempcpy(i8* [[D:%.*]], i8* [[S:%.*]], i64 1024) -; CHECK-NEXT: ret i8* [[R]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[D:%.*]], i8* align 1 [[S:%.*]], i64 1024, i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[D]], i64 1024 +; CHECK-NEXT: ret i8* [[TMP1]] ; %r = tail call i8* @mempcpy(i8* %d, i8* %s, i64 1024) ret i8* %r