diff --git a/libc/benchmarks/CMakeLists.txt b/libc/benchmarks/CMakeLists.txt --- a/libc/benchmarks/CMakeLists.txt +++ b/libc/benchmarks/CMakeLists.txt @@ -172,11 +172,12 @@ endforeach() endfunction() -add_libc_multi_impl_benchmark(memcpy) -add_libc_multi_impl_benchmark(memset) +add_libc_multi_impl_benchmark(bcmp) add_libc_multi_impl_benchmark(bzero) add_libc_multi_impl_benchmark(memcmp) -add_libc_multi_impl_benchmark(bcmp) +add_libc_multi_impl_benchmark(memcpy) +add_libc_multi_impl_benchmark(memmove) +add_libc_multi_impl_benchmark(memset) #============================================================================== # Google Benchmarking tool @@ -199,6 +200,7 @@ libc.src.string.memcpy_opt_host libc.src.string.memset_opt_host libc.src.string.bzero_opt_host + libc.src.string.memmove_opt_host benchmark_main ) diff --git a/libc/benchmarks/LibcDefaultImplementations.cpp b/libc/benchmarks/LibcDefaultImplementations.cpp --- a/libc/benchmarks/LibcDefaultImplementations.cpp +++ b/libc/benchmarks/LibcDefaultImplementations.cpp @@ -5,6 +5,7 @@ namespace __llvm_libc { extern void *memcpy(void *__restrict, const void *__restrict, size_t); +extern void *memmove(void *, const void *, size_t); extern void *memset(void *, int, size_t); extern void bzero(void *, size_t); extern int memcmp(const void *, const void *, size_t); @@ -17,6 +18,7 @@ using llvm::libc_benchmarks::BzeroConfiguration; using llvm::libc_benchmarks::MemcmpOrBcmpConfiguration; using llvm::libc_benchmarks::MemcpyConfiguration; +using llvm::libc_benchmarks::MemmoveConfiguration; using llvm::libc_benchmarks::MemsetConfiguration; llvm::ArrayRef getMemcpyConfigurations() { @@ -24,6 +26,11 @@ {__llvm_libc::memcpy, "__llvm_libc::memcpy"}}; return llvm::makeArrayRef(kMemcpyConfigurations); } +llvm::ArrayRef getMemmoveConfigurations() { + static constexpr MemmoveConfiguration kMemmoveConfigurations[] = { + {__llvm_libc::memmove, "__llvm_libc::memmove"}}; + return llvm::makeArrayRef(kMemmoveConfigurations); +} llvm::ArrayRef getMemcmpConfigurations() { static constexpr MemcmpOrBcmpConfiguration kMemcmpConfiguration[] = { {__llvm_libc::memcmp, "__llvm_libc::memcmp"}}; diff --git a/libc/benchmarks/LibcFunctionPrototypes.h b/libc/benchmarks/LibcFunctionPrototypes.h --- a/libc/benchmarks/LibcFunctionPrototypes.h +++ b/libc/benchmarks/LibcFunctionPrototypes.h @@ -14,6 +14,12 @@ llvm::StringRef Name; }; +using MemmoveFunction = void *(*)(void *, const void *, size_t); +struct MemmoveConfiguration { + MemmoveFunction Function; + llvm::StringRef Name; +}; + using MemsetFunction = void *(*)(void *, int, size_t); struct MemsetConfiguration { MemsetFunction Function; diff --git a/libc/benchmarks/LibcMemoryBenchmark.h b/libc/benchmarks/LibcMemoryBenchmark.h --- a/libc/benchmarks/LibcMemoryBenchmark.h +++ b/libc/benchmarks/LibcMemoryBenchmark.h @@ -206,6 +206,24 @@ AlignedBuffer DstBuffer; }; +/// Provides source and destination buffers for the Move operation as well as +/// the associated size distributions. +struct MoveSetup : public ParameterBatch { + MoveSetup(); + + inline static const ArrayRef getDistributions() { + return getMemmoveSizeDistributions(); + } + + inline void *Call(ParameterType Parameter, MemmoveFunction Memmove) { + return Memmove(Buffer + ParameterBatch::BufferSize / 3, + Buffer + Parameter.OffsetBytes, Parameter.SizeBytes); + } + +private: + AlignedBuffer Buffer; +}; + /// Provides destination buffer for the Set operation as well as the associated /// size distributions. struct SetSetup : public ParameterBatch { diff --git a/libc/benchmarks/LibcMemoryBenchmark.cpp b/libc/benchmarks/LibcMemoryBenchmark.cpp --- a/libc/benchmarks/LibcMemoryBenchmark.cpp +++ b/libc/benchmarks/LibcMemoryBenchmark.cpp @@ -108,6 +108,9 @@ : ParameterBatch(2), SrcBuffer(ParameterBatch::BufferSize), DstBuffer(ParameterBatch::BufferSize) {} +MoveSetup::MoveSetup() + : ParameterBatch(3), Buffer(ParameterBatch::BufferSize * 3) {} + ComparisonSetup::ComparisonSetup() : ParameterBatch(2), LhsBuffer(ParameterBatch::BufferSize), RhsBuffer(ParameterBatch::BufferSize) { diff --git a/libc/benchmarks/LibcMemoryBenchmarkMain.cpp b/libc/benchmarks/LibcMemoryBenchmarkMain.cpp --- a/libc/benchmarks/LibcMemoryBenchmarkMain.cpp +++ b/libc/benchmarks/LibcMemoryBenchmarkMain.cpp @@ -24,6 +24,7 @@ namespace __llvm_libc { extern void *memcpy(void *__restrict, const void *__restrict, size_t); +extern void *memmove(void *, const void *, size_t); extern void *memset(void *, int, size_t); extern void bzero(void *, size_t); extern int memcmp(const void *, const void *, size_t); @@ -68,6 +69,9 @@ #if defined(LIBC_BENCHMARK_FUNCTION_MEMCPY) #define LIBC_BENCHMARK_FUNCTION LIBC_BENCHMARK_FUNCTION_MEMCPY using BenchmarkSetup = CopySetup; +#elif defined(LIBC_BENCHMARK_FUNCTION_MEMMOVE) +#define LIBC_BENCHMARK_FUNCTION LIBC_BENCHMARK_FUNCTION_MEMMOVE +using BenchmarkSetup = MoveSetup; #elif defined(LIBC_BENCHMARK_FUNCTION_MEMSET) #define LIBC_BENCHMARK_FUNCTION LIBC_BENCHMARK_FUNCTION_MEMSET using BenchmarkSetup = SetSetup; diff --git a/libc/benchmarks/LibcMemoryGoogleBenchmarkMain.cpp b/libc/benchmarks/LibcMemoryGoogleBenchmarkMain.cpp --- a/libc/benchmarks/LibcMemoryGoogleBenchmarkMain.cpp +++ b/libc/benchmarks/LibcMemoryGoogleBenchmarkMain.cpp @@ -17,8 +17,10 @@ using llvm::libc_benchmarks::CopySetup; using llvm::libc_benchmarks::MemcmpOrBcmpConfiguration; using llvm::libc_benchmarks::MemcpyConfiguration; +using llvm::libc_benchmarks::MemmoveConfiguration; using llvm::libc_benchmarks::MemorySizeDistribution; using llvm::libc_benchmarks::MemsetConfiguration; +using llvm::libc_benchmarks::MoveSetup; using llvm::libc_benchmarks::OffsetDistribution; using llvm::libc_benchmarks::SetSetup; @@ -94,6 +96,10 @@ BENCHMARK_MEMORY_FUNCTION(BM_Memcpy, CopySetup, MemcpyConfiguration, getMemcpyConfigurations()); +extern llvm::ArrayRef getMemmoveConfigurations(); +BENCHMARK_MEMORY_FUNCTION(BM_Memmove, MoveSetup, MemmoveConfiguration, + getMemmoveConfigurations()); + extern llvm::ArrayRef getMemcmpConfigurations(); BENCHMARK_MEMORY_FUNCTION(BM_Memcmp, ComparisonSetup, MemcmpOrBcmpConfiguration, getMemcmpConfigurations()); diff --git a/libc/benchmarks/automemcpy/lib/CodeGen.cpp b/libc/benchmarks/automemcpy/lib/CodeGen.cpp --- a/libc/benchmarks/automemcpy/lib/CodeGen.cpp +++ b/libc/benchmarks/automemcpy/lib/CodeGen.cpp @@ -548,6 +548,7 @@ Stream << "using llvm::libc_benchmarks::BzeroConfiguration;\n"; Stream << "using llvm::libc_benchmarks::MemcmpOrBcmpConfiguration;\n"; Stream << "using llvm::libc_benchmarks::MemcpyConfiguration;\n"; + Stream << "using llvm::libc_benchmarks::MemmoveConfiguration;\n"; Stream << "using llvm::libc_benchmarks::MemsetConfiguration;\n"; Stream << "\n"; Stream << "namespace __llvm_libc {\n"; @@ -599,6 +600,11 @@ } )"; codegen::configurations::Serialize(Stream, FunctionType::BZERO, Descriptors); + Stream << R"( +llvm::ArrayRef getMemmoveConfigurations() { + return {}; +} +)"; Stream << "// Functions : " << Descriptors.size() << "\n"; } diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -37,17 +37,6 @@ .string_utils ) -add_entrypoint_object( - memmove - SRCS - memmove.cpp - HDRS - memmove.h - DEPENDS - libc.src.__support.integer_operations - .memory_utils.memcpy_implementation -) - add_entrypoint_object( memrchr SRCS @@ -404,6 +393,42 @@ add_memcpy(memcpy) endif() +# ------------------------------------------------------------------------------ +# memmove +# ------------------------------------------------------------------------------ + +function(add_memmove memmove_name) + add_implementation(memmove ${memmove_name} + SRCS ${LIBC_SOURCE_DIR}/src/string/memmove.cpp + HDRS ${LIBC_SOURCE_DIR}/src/string/memmove.h + DEPENDS + .memory_utils.memory_utils + .memory_utils.memcpy_implementation + libc.include.string + COMPILE_OPTIONS + -fno-builtin + ${ARGN} + ) +endfunction() + +if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) + add_memmove(memmove_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2) + add_memmove(memmove_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2) + add_memmove(memmove_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2) + add_memmove(memmove_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F) + add_memmove(memmove_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) + add_memmove(memmove) +elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) + # Disable tail merging as it leads to lower performance. + # Note that '-mllvm' needs to be prefixed with 'SHELL:' to prevent CMake flag deduplication. + add_memmove(memmove_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE} + COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0") + add_memmove(memmove COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0") +else() + add_memmove(memmove_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) + add_memmove(memmove) +endif() + # ------------------------------------------------------------------------------ # memset # ------------------------------------------------------------------------------