diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt --- a/libc/src/string/memory_utils/CMakeLists.txt +++ b/libc/src/string/memory_utils/CMakeLists.txt @@ -6,6 +6,7 @@ aarch64/memcpy_implementations.h bcmp_implementations.h bzero_implementations.h + generic/byte_per_byte.h memcmp_implementations.h memcpy_implementations.h memmove_implementations.h diff --git a/libc/src/string/memory_utils/bcmp_implementations.h b/libc/src/string/memory_utils/bcmp_implementations.h --- a/libc/src/string/memory_utils/bcmp_implementations.h +++ b/libc/src/string/memory_utils/bcmp_implementations.h @@ -12,6 +12,7 @@ #include "src/__support/common.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY LIBC_LOOP_NOUNROLL #include "src/__support/macros/properties/architectures.h" +#include "src/string/memory_utils/generic/byte_per_byte.h" #include "src/string/memory_utils/op_aarch64.h" #include "src/string/memory_utils/op_builtin.h" #include "src/string/memory_utils/op_generic.h" @@ -22,11 +23,6 @@ namespace __llvm_libc { -[[maybe_unused]] LIBC_INLINE BcmpReturnType -inline_bcmp_byte_per_byte(CPtr p1, CPtr p2, size_t count, size_t offset = 0) { - return generic::Bcmp::loop_and_tail_offset(p1, p2, count, offset); -} - [[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aligned_access_64bit(CPtr p1, CPtr p2, size_t count) { constexpr size_t kAlign = sizeof(uint64_t); diff --git a/libc/src/string/memory_utils/generic/byte_per_byte.h b/libc/src/string/memory_utils/generic/byte_per_byte.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/generic/byte_per_byte.h @@ -0,0 +1,78 @@ +//===-- Trivial byte per byte implementations ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Straightforward implementations targeting the smallest code size possible. +// This needs to be compiled with '-Os' or '-Oz'. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_BYTE_PER_BYTE_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_BYTE_PER_BYTE_H + +#include "src/__support/macros/config.h" // LIBC_INLINE +#include "src/__support/macros/optimization.h" // LIBC_LOOP_NOUNROLL +#include "src/string/memory_utils/utils.h" // Ptr, CPtr + +#include // size_t + +namespace __llvm_libc { + +[[maybe_unused]] LIBC_INLINE void +inline_memcpy_byte_per_byte(Ptr dst, CPtr src, size_t count, + size_t offset = 0) { + LIBC_LOOP_NOUNROLL + for (; offset < count; ++offset) + dst[offset] = src[offset]; +} + +[[maybe_unused]] LIBC_INLINE void +inline_memmove_byte_per_byte(Ptr dst, CPtr src, size_t count) { + if (count == 0 || dst == src) + return; + if (dst < src) { + LIBC_LOOP_NOUNROLL + for (size_t offset = 0; offset < count; ++offset) + dst[offset] = src[offset]; + } else { + LIBC_LOOP_NOUNROLL + for (ptrdiff_t offset = count - 1; offset >= 0; --offset) + dst[offset] = src[offset]; + } +} + +[[maybe_unused]] LIBC_INLINE static void +inline_memset_byte_per_byte(Ptr dst, uint8_t value, size_t count, + size_t offset = 0) { + LIBC_LOOP_NOUNROLL + for (; offset < count; ++offset) + dst[offset] = static_cast(value); +} + +[[maybe_unused]] LIBC_INLINE BcmpReturnType +inline_bcmp_byte_per_byte(CPtr p1, CPtr p2, size_t count, size_t offset = 0) { + LIBC_LOOP_NOUNROLL + for (; offset < count; ++offset) + if (p1[offset] != p2[offset]) + return BcmpReturnType::NONZERO(); + return BcmpReturnType::ZERO(); +} + +[[maybe_unused]] LIBC_INLINE MemcmpReturnType +inline_memcmp_byte_per_byte(CPtr p1, CPtr p2, size_t count, size_t offset = 0) { + LIBC_LOOP_NOUNROLL + for (; offset < count; ++offset) { + const int32_t a = static_cast(p1[offset]); + const int32_t b = static_cast(p2[offset]); + const int32_t diff = a - b; + if (diff) + return diff; + } + return MemcmpReturnType::ZERO(); +} + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_BYTE_PER_BYTE_H diff --git a/libc/src/string/memory_utils/memcmp_implementations.h b/libc/src/string/memory_utils/memcmp_implementations.h --- a/libc/src/string/memory_utils/memcmp_implementations.h +++ b/libc/src/string/memory_utils/memcmp_implementations.h @@ -12,6 +12,7 @@ #include "src/__support/common.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY LIBC_LOOP_NOUNROLL #include "src/__support/macros/properties/architectures.h" +#include "src/string/memory_utils/generic/byte_per_byte.h" #include "src/string/memory_utils/op_generic.h" #include "src/string/memory_utils/op_riscv.h" #include "src/string/memory_utils/utils.h" // CPtr MemcmpReturnType @@ -26,11 +27,6 @@ namespace __llvm_libc { -[[maybe_unused]] LIBC_INLINE MemcmpReturnType -inline_memcmp_byte_per_byte(CPtr p1, CPtr p2, size_t count, size_t offset = 0) { - return generic::Memcmp::loop_and_tail_offset(p1, p2, count, offset); -} - [[maybe_unused]] LIBC_INLINE MemcmpReturnType inline_memcmp_aligned_access_64bit(CPtr p1, CPtr p2, size_t count) { constexpr size_t kAlign = sizeof(uint64_t); diff --git a/libc/src/string/memory_utils/memcpy_implementations.h b/libc/src/string/memory_utils/memcpy_implementations.h --- a/libc/src/string/memory_utils/memcpy_implementations.h +++ b/libc/src/string/memory_utils/memcpy_implementations.h @@ -12,6 +12,7 @@ #include "src/__support/macros/config.h" // LIBC_INLINE #include "src/__support/macros/optimization.h" // LIBC_LOOP_NOUNROLL #include "src/__support/macros/properties/architectures.h" +#include "src/string/memory_utils/generic/byte_per_byte.h" #include "src/string/memory_utils/op_builtin.h" #include "src/string/memory_utils/utils.h" @@ -25,21 +26,14 @@ namespace __llvm_libc { -[[maybe_unused]] LIBC_INLINE void -inline_memcpy_byte_per_byte(Ptr dst, CPtr src, size_t offset, size_t count) { - LIBC_LOOP_NOUNROLL - for (; offset < count; ++offset) - dst[offset] = src[offset]; -} - [[maybe_unused]] LIBC_INLINE void inline_memcpy_aligned_access_32bit(Ptr __restrict dst, CPtr __restrict src, size_t count) { constexpr size_t kAlign = sizeof(uint32_t); if (count <= 2 * kAlign) - return inline_memcpy_byte_per_byte(dst, src, 0, count); + return inline_memcpy_byte_per_byte(dst, src, count); size_t bytes_to_dst_align = distance_to_align_up(dst); - inline_memcpy_byte_per_byte(dst, src, 0, bytes_to_dst_align); + inline_memcpy_byte_per_byte(dst, src, bytes_to_dst_align); size_t offset = bytes_to_dst_align; size_t src_alignment = distance_to_align_down(src + offset); for (; offset < count - kAlign; offset += kAlign) { @@ -53,7 +47,7 @@ store32_aligned(value, dst, offset); } // remainder - inline_memcpy_byte_per_byte(dst, src, offset, count); + inline_memcpy_byte_per_byte(dst, src, count, offset); } [[maybe_unused]] LIBC_INLINE void @@ -61,9 +55,9 @@ size_t count) { constexpr size_t kAlign = sizeof(uint64_t); if (count <= 2 * kAlign) - return inline_memcpy_byte_per_byte(dst, src, 0, count); + return inline_memcpy_byte_per_byte(dst, src, count); size_t bytes_to_dst_align = distance_to_align_up(dst); - inline_memcpy_byte_per_byte(dst, src, 0, bytes_to_dst_align); + inline_memcpy_byte_per_byte(dst, src, bytes_to_dst_align); size_t offset = bytes_to_dst_align; size_t src_alignment = distance_to_align_down(src + offset); for (; offset < count - kAlign; offset += kAlign) { @@ -81,14 +75,14 @@ store64_aligned(value, dst, offset); } // remainder - inline_memcpy_byte_per_byte(dst, src, offset, count); + inline_memcpy_byte_per_byte(dst, src, count, offset); } LIBC_INLINE void inline_memcpy(Ptr __restrict dst, CPtr __restrict src, size_t count) { using namespace __llvm_libc::builtin; #if defined(LIBC_COPT_MEMCPY_USE_EMBEDDED_TINY) - return inline_memcpy_byte_per_byte(dst, src, 0, count); + return inline_memcpy_byte_per_byte(dst, src, count); #elif defined(LIBC_TARGET_ARCH_IS_X86) return inline_memcpy_x86_maybe_interpose_repmovsb(dst, src, count); #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) @@ -98,7 +92,7 @@ #elif defined(LIBC_TARGET_ARCH_IS_RISCV32) return inline_memcpy_aligned_access_32bit(dst, src, count); #else - return inline_memcpy_byte_per_byte(dst, src, 0, count); + return inline_memcpy_byte_per_byte(dst, src, count); #endif } diff --git a/libc/src/string/memory_utils/memmove_implementations.h b/libc/src/string/memory_utils/memmove_implementations.h --- a/libc/src/string/memory_utils/memmove_implementations.h +++ b/libc/src/string/memory_utils/memmove_implementations.h @@ -11,6 +11,7 @@ #include "src/__support/common.h" #include "src/__support/macros/optimization.h" +#include "src/string/memory_utils/generic/byte_per_byte.h" #include "src/string/memory_utils/op_aarch64.h" #include "src/string/memory_utils/op_builtin.h" #include "src/string/memory_utils/op_generic.h" @@ -19,21 +20,6 @@ namespace __llvm_libc { -[[maybe_unused]] LIBC_INLINE void -inline_memmove_embedded_tiny(Ptr dst, CPtr src, size_t count) { - if ((count == 0) || (dst == src)) - return; - if (dst < src) { - LIBC_LOOP_NOUNROLL - for (size_t offset = 0; offset < count; ++offset) - builtin::Memcpy<1>::block(dst + offset, src + offset); - } else { - LIBC_LOOP_NOUNROLL - for (ptrdiff_t offset = count - 1; offset >= 0; --offset) - builtin::Memcpy<1>::block(dst + offset, src + offset); - } -} - LIBC_INLINE void inline_memmove(Ptr dst, CPtr src, size_t count) { #if defined(LIBC_TARGET_ARCH_IS_X86) || defined(LIBC_TARGET_ARCH_IS_AARCH64) #if defined(LIBC_TARGET_ARCH_IS_X86) @@ -84,7 +70,7 @@ return generic::Memmove::loop_and_tail_backward(dst, src, count); } #else - return inline_memmove_embedded_tiny(dst, src, count); + return inline_memmove_byte_per_byte(dst, src, count); #endif } diff --git a/libc/src/string/memory_utils/memset_implementations.h b/libc/src/string/memory_utils/memset_implementations.h --- a/libc/src/string/memory_utils/memset_implementations.h +++ b/libc/src/string/memory_utils/memset_implementations.h @@ -12,6 +12,7 @@ #include "src/__support/common.h" #include "src/__support/macros/optimization.h" #include "src/__support/macros/properties/architectures.h" +#include "src/string/memory_utils/generic/byte_per_byte.h" #include "src/string/memory_utils/op_aarch64.h" #include "src/string/memory_utils/op_builtin.h" #include "src/string/memory_utils/op_generic.h" @@ -22,38 +23,30 @@ namespace __llvm_libc { -[[maybe_unused]] LIBC_INLINE static void -inline_memset_byte_per_byte(Ptr dst, size_t offset, uint8_t value, - size_t count) { - LIBC_LOOP_NOUNROLL - for (; offset < count; ++offset) - generic::Memset::block(dst + offset, value); -} - [[maybe_unused]] LIBC_INLINE static void inline_memset_aligned_access_32bit(Ptr dst, uint8_t value, size_t count) { constexpr size_t kAlign = sizeof(uint32_t); if (count <= 2 * kAlign) - return inline_memset_byte_per_byte(dst, 0, value, count); + return inline_memset_byte_per_byte(dst, value, count); size_t bytes_to_dst_align = distance_to_align_up(dst); - inline_memset_byte_per_byte(dst, 0, value, bytes_to_dst_align); + inline_memset_byte_per_byte(dst, value, bytes_to_dst_align); size_t offset = bytes_to_dst_align; for (; offset < count - kAlign; offset += kAlign) store32_aligned(generic::splat(value), dst, offset); - inline_memset_byte_per_byte(dst, offset, value, count); + inline_memset_byte_per_byte(dst, value, count, offset); } [[maybe_unused]] LIBC_INLINE static void inline_memset_aligned_access_64bit(Ptr dst, uint8_t value, size_t count) { constexpr size_t kAlign = sizeof(uint64_t); if (count <= 2 * kAlign) - return inline_memset_byte_per_byte(dst, 0, value, count); + return inline_memset_byte_per_byte(dst, value, count); size_t bytes_to_dst_align = distance_to_align_up(dst); - inline_memset_byte_per_byte(dst, 0, value, bytes_to_dst_align); + inline_memset_byte_per_byte(dst, value, bytes_to_dst_align); size_t offset = bytes_to_dst_align; for (; offset < count - kAlign; offset += kAlign) store64_aligned(generic::splat(value), dst, offset); - inline_memset_byte_per_byte(dst, offset, value, count); + inline_memset_byte_per_byte(dst, value, count, offset); } #if defined(LIBC_TARGET_ARCH_IS_X86) @@ -153,7 +146,7 @@ #elif defined(LIBC_TARGET_ARCH_IS_RISCV32) return inline_memset_aligned_access_32bit(dst, value, count); #else - return inline_memset_byte_per_byte(dst, 0, value, count); + return inline_memset_byte_per_byte(dst, value, count); #endif } diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -2013,13 +2013,14 @@ "src/string/memory_utils/aarch64/memcpy_implementations.h", "src/string/memory_utils/bcmp_implementations.h", "src/string/memory_utils/bzero_implementations.h", + "src/string/memory_utils/generic/byte_per_byte.h", "src/string/memory_utils/memcmp_implementations.h", "src/string/memory_utils/memcpy_implementations.h", + "src/string/memory_utils/memmem_implementations.h", "src/string/memory_utils/memmove_implementations.h", "src/string/memory_utils/memset_implementations.h", "src/string/memory_utils/strcmp_implementations.h", "src/string/memory_utils/strstr_implementations.h", - "src/string/memory_utils/memmem_implementations.h", "src/string/memory_utils/x86_64/memcmp_implementations.h", "src/string/memory_utils/x86_64/memcpy_implementations.h", ],