diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -56,7 +56,7 @@ SRCS ${ADD_IMPL_SRCS} HDRS ${ADD_IMPL_HDRS} DEPENDS ${ADD_IMPL_DEPENDS} - COMPILE_OPTIONS ${ADD_IMPL_COMPILE_OPTIONS} ${flags} + COMPILE_OPTIONS ${ADD_IMPL_COMPILE_OPTIONS} ${flags} -O2 ) get_fq_target_name(${impl_name} fq_target_name) set_target_properties(${fq_target_name} PROPERTIES REQUIRE_CPU_FEATURES "${ADD_IMPL_REQUIRE}") @@ -70,13 +70,15 @@ # include the relevant architecture specific implementations if(${LIBC_TARGET_MACHINE} STREQUAL "x86_64") set(LIBC_STRING_TARGET_ARCH "x86") + set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/x86/memcpy.cpp) else() set(LIBC_STRING_TARGET_ARCH ${LIBC_TARGET_MACHINE}) + set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/memcpy.cpp) endif() function(add_memcpy memcpy_name) add_implementation(memcpy ${memcpy_name} - SRCS ${LIBC_SOURCE_DIR}/src/string/${LIBC_STRING_TARGET_ARCH}/memcpy.cpp + SRCS ${MEMCPY_SRC} HDRS ${LIBC_SOURCE_DIR}/src/string/memcpy.h DEPENDS .memory_utils.memory_utils @@ -87,7 +89,11 @@ ) endfunction() -add_memcpy(memcpy MARCH native) +if(${LIBC_STRING_TARGET_ARCH} STREQUAL "x86") + add_memcpy(memcpy MARCH native) +else() + add_memcpy(memcpy) +endif() # ------------------------------------------------------------------------------ # memset @@ -106,7 +112,11 @@ ) endfunction() -add_memset(memset MARCH native) +if(${LIBC_STRING_TARGET_ARCH} STREQUAL "x86") + add_memset(memset MARCH native) +else() + add_memset(memset) +endif() # ------------------------------------------------------------------------------ # bzero @@ -126,7 +136,11 @@ ) endfunction() -add_bzero(bzero MARCH native) +if(${LIBC_STRING_TARGET_ARCH} STREQUAL "x86") + add_bzero(bzero MARCH native) +else() + add_bzero(bzero) +endif() # ------------------------------------------------------------------------------ # Add all other relevant implementations for the native target. diff --git a/libc/src/string/memcpy.cpp b/libc/src/string/memcpy.cpp new file mode 100644 --- /dev/null +++ b/libc/src/string/memcpy.cpp @@ -0,0 +1,69 @@ +//===-- Implementation of memcpy ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memcpy.h" +#include "src/__support/common.h" +#include "src/string/memory_utils/memcpy_utils.h" + +namespace __llvm_libc { + +// Design rationale +// ================ +// +// Using a profiler to observe size distributions for calls into libc +// functions, it was found most operations act on a small number of bytes. +// This makes it important to favor small sizes. +// +// The tests for `count` are in ascending order so the cost of branching is +// proportional to the cost of copying. +// +// The function is written in C++ for several reasons: +// - The compiler can __see__ the code, this is useful when performing Profile +// Guided Optimization as the optimized code can take advantage of branching +// probabilities. +// - It also allows for easier customization and favors testing multiple +// implementation parameters. +// - As compilers and processors get better, the generated code is improved +// with little change on the code side. +static void memcpy_impl(char *__restrict dst, const char *__restrict src, + size_t count) { + if (count == 0) + return; + if (count == 1) + return CopyBlock<1>(dst, src); + if (count == 2) + return CopyBlock<2>(dst, src); + if (count == 3) + return CopyBlock<3>(dst, src); + if (count == 4) + return CopyBlock<4>(dst, src); + if (count < 8) + return CopyBlockOverlap<4>(dst, src, count); + if (count == 8) + return CopyBlock<8>(dst, src); + if (count < 16) + return CopyBlockOverlap<8>(dst, src, count); + if (count == 16) + return CopyBlock<16>(dst, src); + if (count < 32) + return CopyBlockOverlap<16>(dst, src, count); + if (count < 64) + return CopyBlockOverlap<32>(dst, src, count); + if (count < 128) + return CopyBlockOverlap<64>(dst, src, count); + return CopyAlignedBlocks<32>(dst, src, count); +} + +void *LLVM_LIBC_ENTRYPOINT(memcpy)(void *__restrict dst, + const void *__restrict src, size_t size) { + memcpy_impl(reinterpret_cast(dst), + reinterpret_cast(src), size); + return dst; +} + +} // namespace __llvm_libc