Index: lib/IR/IRBuilder.cpp =================================================================== --- lib/IR/IRBuilder.cpp +++ lib/IR/IRBuilder.cpp @@ -107,6 +107,7 @@ Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys); CallInst *CI = createCallHelper(TheFn, Ops, this); + CI->setTailCall(true); if (Align > 0) cast(CI)->setDestAlignment(Align); @@ -138,6 +139,7 @@ M, Intrinsic::memset_element_unordered_atomic, Tys); CallInst *CI = createCallHelper(TheFn, Ops, this); + CI->setTailCall(true); cast(CI)->setDestAlignment(Align); @@ -169,6 +171,7 @@ Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys); CallInst *CI = createCallHelper(TheFn, Ops, this); + CI->setTailCall(true); auto* MCI = cast(CI); if (DstAlign > 0) @@ -211,6 +214,7 @@ M, Intrinsic::memcpy_element_unordered_atomic, Tys); CallInst *CI = createCallHelper(TheFn, Ops, this); + CI->setTailCall(true); // Set the alignment of the pointer args. auto *AMCI = cast(CI); @@ -249,6 +253,7 @@ Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys); CallInst *CI = createCallHelper(TheFn, Ops, this); + CI->setTailCall(true); auto *MMI = cast(CI); if (DstAlign > 0) @@ -287,6 +292,7 @@ M, Intrinsic::memmove_element_unordered_atomic, Tys); CallInst *CI = createCallHelper(TheFn, Ops, this); + CI->setTailCall(true); // Set the alignment of the pointer args. CI->addParamAttr(0, Attribute::getWithAlignment(CI->getContext(), DstAlign)); Index: test/CodeGen/X86/memset-nonzero.ll =================================================================== --- test/CodeGen/X86/memset-nonzero.ll +++ test/CodeGen/X86/memset-nonzero.ll @@ -142,14 +142,9 @@ define void @memset_256_nonzero_bytes(i8* %x) { ; SSE-LABEL: memset_256_nonzero_bytes: ; SSE: # %bb.0: -; SSE-NEXT: pushq %rax -; SSE-NEXT: .cfi_def_cfa_offset 16 ; SSE-NEXT: movl $256, %edx # imm = 0x100 ; SSE-NEXT: movl $42, %esi -; SSE-NEXT: callq memset -; SSE-NEXT: popq %rax -; SSE-NEXT: .cfi_def_cfa_offset 8 -; SSE-NEXT: retq +; SSE-NEXT: jmp memset ; ; SSE2FAST-LABEL: memset_256_nonzero_bytes: ; SSE2FAST: # %bb.0: Index: test/Transforms/MemCpyOpt/enable-tail-call.ll =================================================================== --- /dev/null +++ test/Transforms/MemCpyOpt/enable-tail-call.ll @@ -0,0 +1,40 @@ +; RUN: opt -memcpyopt -S < %s | FileCheck %s + +target datalayout = "e-i64:64-f80:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%S = type { i8*, i8, i32 } + +define void @copy(%S* %src, %S* %dst) { +; CHECK-LABEL: copy +; CHECK-NOT: load +; CHECK: tail call void @llvm.memmove.p0i8.p0i8.i64 +; CHECK-NEXT: ret void + %1 = load %S, %S* %src + store %S %1, %S* %dst + ret void +} + +define void @noaliassrc(%S* noalias %src, %S* %dst) { +; CHECK-LABEL: noaliassrc +; CHECK-NOT: load +; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64 +; CHECK-NEXT: ret void + %1 = load %S, %S* %src + store %S %1, %S* %dst + ret void +} + + +define void @destroysrc(%S* %src, %S* %dst) { +; CHECK-LABEL: destroysrc +; CHECK: load %S, %S* %src +; CHECK: tail call void @llvm.memset.p0i8.i64 +; CHECK-NEXT: store %S %1, %S* %dst +; CHECK-NEXT: ret void + %1 = load %S, %S* %src + store %S zeroinitializer, %S* %src + store %S %1, %S* %dst + ret void +} +