diff --git a/libc/lib/CMakeLists.txt b/libc/lib/CMakeLists.txt --- a/libc/lib/CMakeLists.txt +++ b/libc/lib/CMakeLists.txt @@ -23,9 +23,11 @@ libc.src.stdlib.abort # string.h entrypoints + libc.src.string.bzero libc.src.string.memcpy - libc.src.string.strcpy + libc.src.string.memset libc.src.string.strcat + libc.src.string.strcpy libc.src.string.strlen # sys/mman.h entrypoints diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -34,15 +34,43 @@ libc.include.string ) +# Helper to define a function with multiple implementations +# - Computes flags to satisfy required/rejected features and arch, +# - Declares an entry point, +# - Attach the REQUIRE_CPU_FEATURES property to the target, +# - Add the fully qualified target to `${name}_implementations` global property for tests. +function(add_implementation name impl_name) + cmake_parse_arguments( + "ADD_IMPL" + "" # Optional arguments + "MARCH" # Single value arguments + "REQUIRE;REJECT;SRCS;HDRS;DEPENDS;COMPILE_OPTIONS" # Multi value arguments + ${ARGN}) + compute_flags(flags + MARCH ${ADD_IMPL_MARCH} + REQUIRE ${ADD_IMPL_REQUIRE} + REJECT ${ADD_IMPL_REJECT} + ) + add_entrypoint_object(${impl_name} + SRCS ${ADD_IMPL_SRCS} + HDRS ${ADD_IMPL_HDRS} + DEPENDS ${ADD_IMPL_DEPENDS} + COMPILE_OPTIONS ${ADD_IMPL_COMPILE_OPTIONS} ${flags} + ) + get_fq_target_name(${impl_name} fq_target_name) + set_target_properties(${fq_target_name} PROPERTIES REQUIRE_CPU_FEATURES "${ADD_IMPL_REQUIRE}") + set_property(GLOBAL APPEND PROPERTY "${name}_implementations" "${fq_target_name}") +endfunction() + # ------------------------------------------------------------------------------ # memcpy # ------------------------------------------------------------------------------ # include the relevant architecture specific implementations if(${LIBC_TARGET_MACHINE} STREQUAL "x86_64") - set(LIBC_MEMCPY_IMPL_FOLDER "x86") + set(LIBC_STRING_TARGET_FOLDER "x86") else() - set(LIBC_MEMCPY_IMPL_FOLDER ${LIBC_TARGET_MACHINE}) + set(LIBC_STRING_TARGET_FOLDER ${LIBC_TARGET_MACHINE}) endif() add_gen_header( @@ -52,30 +80,13 @@ GEN_HDR memcpy_arch_specific.h PARAMS - memcpy_arch_specific=${LIBC_MEMCPY_IMPL_FOLDER}/memcpy_arch_specific.h.inc + memcpy_arch_specific=${LIBC_STRING_TARGET_FOLDER}/memcpy_arch_specific.h.inc DATA_FILES - ${LIBC_MEMCPY_IMPL_FOLDER}/memcpy_arch_specific.h.inc + ${LIBC_STRING_TARGET_FOLDER}/memcpy_arch_specific.h.inc ) -# Helper to define an implementation of memcpy. -# - Computes flags to satisfy required/rejected features and arch, -# - Declares an entry point, -# - Attach the REQUIRE_CPU_FEATURES property to the target, -# - Add the target to `memcpy_implementations` global property for tests. function(add_memcpy memcpy_name) - cmake_parse_arguments( - "ADD_MEMCPY" - "" # Optional arguments - "MARCH" # Single value arguments - "REQUIRE;REJECT" # Multi value arguments - ${ARGN}) - compute_flags(flags - MARCH ${ADD_MEMCPY_MARCH} - REQUIRE ${ADD_MEMCPY_REQUIRE} - REJECT ${ADD_MEMCPY_REJECT} - ) - add_entrypoint_object( - ${memcpy_name} + add_implementation(memcpy ${memcpy_name} SRCS ${LIBC_SOURCE_DIR}/src/string/memcpy.cpp HDRS ${LIBC_SOURCE_DIR}/src/string/memcpy.h DEPENDS @@ -84,14 +95,53 @@ libc.include.string COMPILE_OPTIONS -fno-builtin-memcpy - ${flags} + ${ARGN} ) - get_fq_target_name(${memcpy_name} fq_target_name) - set_target_properties(${fq_target_name} PROPERTIES REQUIRE_CPU_FEATURES "${ADD_MEMCPY_REQUIRE}") - get_property(all GLOBAL PROPERTY memcpy_implementations) - list(APPEND all ${memcpy_name}) - set_property(GLOBAL PROPERTY memcpy_implementations "${all}") endfunction() -include(${LIBC_MEMCPY_IMPL_FOLDER}/CMakeLists.txt) add_memcpy(memcpy MARCH native) + +# ------------------------------------------------------------------------------ +# memset +# ------------------------------------------------------------------------------ + +function(add_memset memset_name) + add_implementation(memset ${memset_name} + SRCS ${LIBC_SOURCE_DIR}/src/string/memset.cpp + HDRS ${LIBC_SOURCE_DIR}/src/string/memset.h + DEPENDS + .memory_utils.memory_utils + libc.include.string + COMPILE_OPTIONS + -fno-builtin-memset + ${ARGN} + ) +endfunction() + +add_memset(memset MARCH native) + +# ------------------------------------------------------------------------------ +# bzero +# ------------------------------------------------------------------------------ + +function(add_bzero bzero_name) + add_implementation(bzero ${bzero_name} + SRCS ${LIBC_SOURCE_DIR}/src/string/bzero.cpp + HDRS ${LIBC_SOURCE_DIR}/src/string/bzero.h + DEPENDS + .memory_utils.memory_utils + libc.include.string + COMPILE_OPTIONS + -fno-builtin-memset + -fno-builtin-bzero + ${ARGN} + ) +endfunction() + +add_bzero(bzero MARCH native) + +# ------------------------------------------------------------------------------ +# Add all other relevant implementations for the native target. +# ------------------------------------------------------------------------------ + +include(${LIBC_STRING_TARGET_FOLDER}/CMakeLists.txt) diff --git a/libc/src/string/bzero.h b/libc/src/string/bzero.h new file mode 100644 --- /dev/null +++ b/libc/src/string/bzero.h @@ -0,0 +1,20 @@ +//===-- Implementation header for bzero -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_BZERO_H +#define LLVM_LIBC_SRC_STRING_BZERO_H + +#include "include/string.h" + +namespace __llvm_libc { + +void bzero(void *ptr, size_t count); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_BZERO_H diff --git a/libc/src/string/bzero.cpp b/libc/src/string/bzero.cpp new file mode 100644 --- /dev/null +++ b/libc/src/string/bzero.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of bzero -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/bzero.h" +#include "src/__support/common.h" +#include "src/string/memory_utils/memset_utils.h" + +namespace __llvm_libc { + +void LLVM_LIBC_ENTRYPOINT(bzero)(void *ptr, size_t count) { + GeneralPurposeMemset(reinterpret_cast(ptr), 0, count); +} + +} // namespace __llvm_libc diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt --- a/libc/src/string/memory_utils/CMakeLists.txt +++ b/libc/src/string/memory_utils/CMakeLists.txt @@ -15,6 +15,7 @@ HDRS utils.h memcpy_utils.h + memset_utils.h DEPENDS .cacheline_size ) diff --git a/libc/src/string/memory_utils/memset_utils.h b/libc/src/string/memory_utils/memset_utils.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/memset_utils.h @@ -0,0 +1,100 @@ +//===-- Memset utils --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MEMORY_UTILS_MEMSET_UTILS_H +#define LLVM_LIBC_SRC_MEMORY_UTILS_MEMSET_UTILS_H + +#include "src/string/memory_utils/utils.h" + +#include // size_t + +namespace __llvm_libc { + +// Sets `kBlockSize` bytes starting from `src` to `value`. +template static void SetBlock(char *dst, unsigned value) { + __builtin_memset(dst, value, kBlockSize); +} + +// Sets `kBlockSize` bytes from `src + count - kBlockSize` to `value`. +// Precondition: `count >= kBlockSize`. +template +static void SetLastBlock(char *dst, unsigned value, size_t count) { + const size_t offset = count - kBlockSize; + SetBlock(dst + offset, value); +} + +// Sets `kBlockSize` bytes twice with an overlap between the two. +// +// [1234567812345678123] +// [__XXXXXXXXXXXXXX___] +// [__XXXXXXXX_________] +// [________XXXXXXXX___] +// +// Precondition: `count >= kBlockSize && count <= kBlockSize`. +template +static void SetBlockOverlap(char *dst, unsigned value, size_t count) { + SetBlock(dst, value); + SetLastBlock(dst, value, count); +} + +// Sets `count` bytes by blocks of `kBlockSize` bytes. +// Sets at the start and end of the buffer are unaligned. +// Sets in the middle of the buffer are aligned to `kBlockSize`. +// +// e.g. with +// [12345678123456781234567812345678] +// [__XXXXXXXXXXXXXXXXXXXXXXXXXXX___] +// [__XXXXXXXX______________________] +// [________XXXXXXXX________________] +// [________________XXXXXXXX________] +// [_____________________XXXXXXXX___] +// +// Precondition: `count > 2 * kBlockSize` for efficiency. +// `count >= kBlockSize` for correctness. +template +static void SetAlignedBlocks(char *dst, unsigned value, size_t count) { + SetBlock(dst, value); // Set first block + + // Set aligned blocks + size_t offset = kBlockSize - offset_from_last_aligned(dst); + for (; offset + kBlockSize < count; offset += kBlockSize) + SetBlock(dst + offset, value); + + SetLastBlock(dst, value, count); // Set last block +} + +// A general purpose implementation assuming cheap unaligned writes for sizes: +// 1, 2, 4, 8, 16, 32 and 64 Bytes. +inline static void GeneralPurposeMemset(char *dst, unsigned char value, + size_t count) { + if (count == 0) + return; + if (count == 1) + return SetBlock<1>(dst, value); + if (count == 2) + return SetBlock<2>(dst, value); + if (count == 3) + return SetBlock<3>(dst, value); + if (count == 4) + return SetBlock<4>(dst, value); + if (count <= 8) + return SetBlockOverlap<4>(dst, value, count); + if (count <= 16) + return SetBlockOverlap<8>(dst, value, count); + if (count <= 32) + return SetBlockOverlap<16>(dst, value, count); + if (count <= 64) + return SetBlockOverlap<32>(dst, value, count); + if (count <= 128) + return SetBlockOverlap<64>(dst, value, count); + return SetAlignedBlocks<32>(dst, value, count); +} + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MEMORY_UTILS_MEMSET_UTILS_H diff --git a/libc/src/string/memset.h b/libc/src/string/memset.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memset.h @@ -0,0 +1,20 @@ +//===-- Implementation header for memset ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMSET_H +#define LLVM_LIBC_SRC_STRING_MEMSET_H + +#include "include/string.h" + +namespace __llvm_libc { + +void *memset(void *ptr, int value, size_t count); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMSET_H diff --git a/libc/src/string/memset.cpp b/libc/src/string/memset.cpp new file mode 100644 --- /dev/null +++ b/libc/src/string/memset.cpp @@ -0,0 +1,23 @@ +//===-- Implementation of memset ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memset.h" +#include "src/__support/common.h" +#include "src/string/memory_utils/memset_utils.h" + +namespace __llvm_libc { + +void *LLVM_LIBC_ENTRYPOINT(memset)(void *dst, int value, size_t count) { + // The value is passed as an int, but the function fills the block of memory + // using the `unsigned char` conversion of this value. + GeneralPurposeMemset(reinterpret_cast(dst), + static_cast(value), count); + return dst; +} + +} // namespace __llvm_libc diff --git a/libc/src/string/x86/CMakeLists.txt b/libc/src/string/x86/CMakeLists.txt --- a/libc/src/string/x86/CMakeLists.txt +++ b/libc/src/string/x86/CMakeLists.txt @@ -2,3 +2,13 @@ add_memcpy("memcpy_${LIBC_TARGET_MACHINE}_opt_sse" REQUIRE "SSE" REJECT "SSE2") add_memcpy("memcpy_${LIBC_TARGET_MACHINE}_opt_avx" REQUIRE "AVX" REJECT "AVX2") add_memcpy("memcpy_${LIBC_TARGET_MACHINE}_opt_avx512f" REQUIRE "AVX512F") + +add_memset("memset_${LIBC_TARGET_MACHINE}_opt_none" REJECT "${ALL_CPU_FEATURES}") +add_memset("memset_${LIBC_TARGET_MACHINE}_opt_sse" REQUIRE "SSE" REJECT "SSE2") +add_memset("memset_${LIBC_TARGET_MACHINE}_opt_avx" REQUIRE "AVX" REJECT "AVX2") +add_memset("memset_${LIBC_TARGET_MACHINE}_opt_avx512f" REQUIRE "AVX512F") + +add_bzero("bzero_${LIBC_TARGET_MACHINE}_opt_none" REJECT "${ALL_CPU_FEATURES}") +add_bzero("bzero_${LIBC_TARGET_MACHINE}_opt_sse" REQUIRE "SSE" REJECT "SSE2") +add_bzero("bzero_${LIBC_TARGET_MACHINE}_opt_avx" REQUIRE "AVX" REJECT "AVX2") +add_bzero("bzero_${LIBC_TARGET_MACHINE}_opt_avx512f" REQUIRE "AVX512F") diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -32,23 +32,28 @@ libc.src.string.strlen ) -# Tests all implementations of memcpy that can run on the host. -get_property(memcpy_implementations GLOBAL PROPERTY memcpy_implementations) -foreach(memcpy_config_name IN LISTS memcpy_implementations) - get_target_property(require_cpu_features libc.src.string.${memcpy_config_name} REQUIRE_CPU_FEATURES) - host_supports(can_run "${require_cpu_features}") - if(can_run) - add_libc_unittest( - ${memcpy_config_name}_test - SUITE - libc_string_unittests - SRCS - memcpy_test.cpp - DEPENDS - libc.src.string.${memcpy_config_name} - ) - else() - message(STATUS "Skipping test for '${memcpy_config_name}' insufficient host cpu features") - endif() -endforeach() +# Tests all implementations that can run on the host. +function(add_libc_multi_impl_test name) + get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations) + foreach(fq_config_name IN LISTS fq_implementations) + get_target_property(required_cpu_features ${fq_config_name} REQUIRE_CPU_FEATURES) + host_supports(can_run "${required_cpu_features}") + if(can_run) + add_libc_unittest( + ${fq_config_name}_test + SUITE + libc_string_unittests + DEPENDS + ${fq_config_name} + ${ARGN} + ) + else() + message(STATUS "Skipping test for '${fq_config_name}' insufficient host cpu features '${required_cpu_features}'") + endif() + endforeach() +endfunction() + +add_libc_multi_impl_test(memcpy SRCS memcpy_test.cpp) +add_libc_multi_impl_test(memset SRCS memset_test.cpp) +add_libc_multi_impl_test(bzero SRCS bzero_test.cpp) diff --git a/libc/test/src/string/memcpy_test.cpp b/libc/test/src/string/bzero_test.cpp copy from libc/test/src/string/memcpy_test.cpp copy to libc/test/src/string/bzero_test.cpp --- a/libc/test/src/string/memcpy_test.cpp +++ b/libc/test/src/string/bzero_test.cpp @@ -1,4 +1,4 @@ -//===----------------------- Unittests for memcpy -------------------------===// +//===----------------------- Unittests for bzero --------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,16 +6,14 @@ // //===----------------------------------------------------------------------===// +#include "src/string/bzero.h" #include "utils/CPP/ArrayRef.h" #include "utils/UnitTest/Test.h" -#include "src/string/memcpy.h" using __llvm_libc::cpp::Array; using __llvm_libc::cpp::ArrayRef; -using __llvm_libc::cpp::MutableArrayRef; using Data = Array; -static const ArrayRef kNumbers("0123456789", 10); static const ArrayRef kDeadcode("DEADC0DE", 8); // Returns a Data object filled with a repetition of `filler`. @@ -26,21 +24,19 @@ return out; } -TEST(MemcpyTest, Thorough) { - const Data groundtruth = getData(kNumbers); +TEST(BzeroTest, Thorough) { const Data dirty = getData(kDeadcode); for (size_t count = 0; count < 1024; ++count) { for (size_t align = 0; align < 64; ++align) { auto buffer = dirty; - const char *const src = groundtruth.data(); char *const dst = &buffer[align]; - __llvm_libc::memcpy(dst, src, count); + __llvm_libc::bzero(dst, count); // Everything before copy is untouched. for (size_t i = 0; i < align; ++i) ASSERT_EQ(buffer[i], dirty[i]); // Everything in between is copied. for (size_t i = 0; i < count; ++i) - ASSERT_EQ(buffer[align + i], groundtruth[i]); + ASSERT_EQ(buffer[align + i], char(0)); // Everything after copy is untouched. for (size_t i = align + count; i < dirty.size(); ++i) ASSERT_EQ(buffer[i], dirty[i]); diff --git a/libc/test/src/string/memcpy_test.cpp b/libc/test/src/string/memcpy_test.cpp --- a/libc/test/src/string/memcpy_test.cpp +++ b/libc/test/src/string/memcpy_test.cpp @@ -6,13 +6,12 @@ // //===----------------------------------------------------------------------===// +#include "src/string/memcpy.h" #include "utils/CPP/ArrayRef.h" #include "utils/UnitTest/Test.h" -#include "src/string/memcpy.h" using __llvm_libc::cpp::Array; using __llvm_libc::cpp::ArrayRef; -using __llvm_libc::cpp::MutableArrayRef; using Data = Array; static const ArrayRef kNumbers("0123456789", 10); @@ -34,7 +33,9 @@ auto buffer = dirty; const char *const src = groundtruth.data(); char *const dst = &buffer[align]; - __llvm_libc::memcpy(dst, src, count); + const void *const ret = __llvm_libc::memcpy(dst, src, count); + // Return value is `dst`. + ASSERT_EQ(ret, (const void *)dst); // Everything before copy is untouched. for (size_t i = 0; i < align; ++i) ASSERT_EQ(buffer[i], dirty[i]); diff --git a/libc/test/src/string/memcpy_test.cpp b/libc/test/src/string/memset_test.cpp copy from libc/test/src/string/memcpy_test.cpp copy to libc/test/src/string/memset_test.cpp --- a/libc/test/src/string/memcpy_test.cpp +++ b/libc/test/src/string/memset_test.cpp @@ -1,4 +1,4 @@ -//===----------------------- Unittests for memcpy -------------------------===// +//===----------------------- Unittests for memset -------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,16 +6,14 @@ // //===----------------------------------------------------------------------===// +#include "src/string/memset.h" #include "utils/CPP/ArrayRef.h" #include "utils/UnitTest/Test.h" -#include "src/string/memcpy.h" using __llvm_libc::cpp::Array; using __llvm_libc::cpp::ArrayRef; -using __llvm_libc::cpp::MutableArrayRef; using Data = Array; -static const ArrayRef kNumbers("0123456789", 10); static const ArrayRef kDeadcode("DEADC0DE", 8); // Returns a Data object filled with a repetition of `filler`. @@ -26,24 +24,26 @@ return out; } -TEST(MemcpyTest, Thorough) { - const Data groundtruth = getData(kNumbers); +TEST(MemsetTest, Thorough) { const Data dirty = getData(kDeadcode); - for (size_t count = 0; count < 1024; ++count) { - for (size_t align = 0; align < 64; ++align) { - auto buffer = dirty; - const char *const src = groundtruth.data(); - char *const dst = &buffer[align]; - __llvm_libc::memcpy(dst, src, count); - // Everything before copy is untouched. - for (size_t i = 0; i < align; ++i) - ASSERT_EQ(buffer[i], dirty[i]); - // Everything in between is copied. - for (size_t i = 0; i < count; ++i) - ASSERT_EQ(buffer[align + i], groundtruth[i]); - // Everything after copy is untouched. - for (size_t i = align + count; i < dirty.size(); ++i) - ASSERT_EQ(buffer[i], dirty[i]); + for (int value = -1; value <= 1; ++value) { + for (size_t count = 0; count < 1024; ++count) { + for (size_t align = 0; align < 64; ++align) { + auto buffer = dirty; + char *const dst = &buffer[align]; + const void *const ret = __llvm_libc::memset(dst, value, count); + // Return value is `dst`. + ASSERT_EQ(ret, (const void *)dst); + // Everything before copy is untouched. + for (size_t i = 0; i < align; ++i) + ASSERT_EQ(buffer[i], dirty[i]); + // Everything in between is copied. + for (size_t i = 0; i < count; ++i) + ASSERT_EQ(buffer[align + i], (char)value); + // Everything after copy is untouched. + for (size_t i = align + count; i < dirty.size(); ++i) + ASSERT_EQ(buffer[i], dirty[i]); + } } } }