diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt --- a/libc/CMakeLists.txt +++ b/libc/CMakeLists.txt @@ -21,6 +21,7 @@ include(CMakeParseArguments) include(LLVMLibCRules) +include(LLVMLibCCheckCpuFeatures) add_subdirectory(src) add_subdirectory(config) diff --git a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake --- a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake +++ b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake @@ -69,15 +69,19 @@ _generate_flags_for_up_to(${feature} ${flag_name}) set(${flag_name} ${${flag_name}} PARENT_SCOPE) endforeach() + # Populates the CPU_FEATURE_OPT_LEVELS with all opt levels. + list(TRANSFORM opt_levels PREPEND "CPU_FEATURE_OPT_") + list(TRANSFORM opt_levels TOUPPER) + set(CPU_FEATURE_OPT_LEVELS ${opt_levels} PARENT_SCOPE) endfunction() _generate_opt_levels() #------------------------------------------------------------------------------ # Host cpu feature introspection -# -# Populates a HOST_CPU_FEATURES list containing the available CPU_FEATURE. #------------------------------------------------------------------------------ + +# Populates a HOST_CPU_FEATURES list containing the available CPU_FEATURE. function(_check_host_cpu_feature feature) string(TOLOWER ${feature} lowercase_feature) try_run( @@ -97,3 +101,33 @@ foreach(feature IN LISTS ALL_CPU_FEATURES) _check_host_cpu_feature(${feature}) endforeach() + +# Populates a HOST_CPU_FEATURE_OPT_LEVEL and HOST_CPU_FEATURE_OPT_FLAGS. +function(_check_host_opt_level) + if(HOST_CPU_FEATURES) + list(GET HOST_CPU_FEATURES -1 selected_feature) + else() + set(selected_feature NONE) + endif() + set(HOST_CPU_FEATURE_OPT_LEVEL "CPU_FEATURE_OPT_${selected_feature}" PARENT_SCOPE) + set(HOST_CPU_FEATURE_OPT_FLAGS "${CPU_FEATURE_OPT_${selected_feature}_FLAGS}" PARENT_SCOPE) +endfunction() + +_check_host_opt_level() + +# Convenient function to generate unique name for a particular function, architecture +# and optimization level. +function(generate_function_name function arch opt_level output) + if(NOT function) + message(FATAL_ERROR "function name must not be empty") + endif() + if(NOT arch) + message(FATAL_ERROR "architecture name must not be empty") + endif() + if(NOT ${opt_level} IN_LIST CPU_FEATURE_OPT_LEVELS) + message(FATAL_ERROR "Invalid opt_level, should be one of ${CPU_FEATURE_OPT_LEVELS} was ${opt_level}") + endif() + string(REPLACE "CPU_FEATURE_" "" short_opt ${opt_level}) + string(TOLOWER ${short_opt} lower_short_opt) + set(${output} ${function}_${arch}_${lower_short_opt} PARENT_SCOPE) +endfunction() diff --git a/libc/cmake/modules/LLVMLibCRules.cmake b/libc/cmake/modules/LLVMLibCRules.cmake --- a/libc/cmake/modules/LLVMLibCRules.cmake +++ b/libc/cmake/modules/LLVMLibCRules.cmake @@ -308,6 +308,7 @@ # SRCS # HDRS # DEPENDS +# COMPILE_OPTIONS # ) function(add_libc_unittest target_name) if(NOT LLVM_INCLUDE_TESTS) @@ -318,7 +319,7 @@ "LIBC_UNITTEST" "" # No optional arguments "SUITE" # Single value arguments - "SRCS;HDRS;DEPENDS" # Multi-value arguments + "SRCS;HDRS;DEPENDS;COMPILE_OPTIONS" # Multi-value arguments ${ARGN} ) if(NOT LIBC_UNITTEST_SRCS) @@ -356,6 +357,12 @@ ${LIBC_BUILD_DIR} ${LIBC_BUILD_DIR}/include ) + if(LIBC_UNITTEST_COMPILE_OPTIONS) + target_compile_options( + ${target_name} + PRIVATE ${LIBC_UNITTEST_COMPILE_OPTIONS} + ) + endif() if(library_deps) target_link_libraries(${target_name} PRIVATE ${library_deps}) diff --git a/libc/lib/CMakeLists.txt b/libc/lib/CMakeLists.txt --- a/libc/lib/CMakeLists.txt +++ b/libc/lib/CMakeLists.txt @@ -11,6 +11,7 @@ # string.h entrypoints strcpy strcat + memcpy # sys/mman.h entrypoints mmap diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -1,3 +1,5 @@ +add_subdirectory(memory_utils) + add_entrypoint_object( strcat SRCS @@ -19,4 +21,52 @@ string_h ) -add_subdirectory(memory_utils) +# include the relevant architecture specific implementations +if(${LIBC_TARGET_MACHINE} STREQUAL "x86") + set(LIBC_MEMCPY_IMPL_FOLDER "x86_64") +else() + set(LIBC_MEMCPY_IMPL_FOLDER ${LIBC_TARGET_MACHINE}) +endif() + +add_subdirectory(${LIBC_MEMCPY_IMPL_FOLDER}) + +add_gen_header( + memcpy_arch_specific + DEF_FILE + memcpy_arch_specific.h.def + GEN_HDR + memcpy_arch_specific.h + PARAMS + memcpy_arch_specific=${LIBC_MEMCPY_IMPL_FOLDER}/memcpy_arch_specific.h.inc + DATA_FILES + ${LIBC_MEMCPY_IMPL_FOLDER}/memcpy_arch_specific.h.inc +) + +foreach(opt_level ${LIBC_MEMCPY_OPT_LEVELS}) + generate_function_name(memcpy ${LIBC_TARGET_MACHINE} ${opt_level} memcpy_name) + add_entrypoint_object( + ${memcpy_name} + SRCS memcpy.cpp + HDRS memcpy.h + DEPENDS + string_h + memory_utils + memcpy_arch_specific + COMPILE_OPTIONS + -fno-builtin-memcpy + ${${opt_level}_FLAGS} + ) +endforeach() + +add_entrypoint_object( + memcpy + SRCS memcpy.cpp + HDRS memcpy.h + DEPENDS + string_h + memory_utils + memcpy_arch_specific + COMPILE_OPTIONS + -fno-builtin-memcpy + ${HOST_CPU_FEATURE_OPT_FLAGS} +) diff --git a/libc/src/string/memcpy.h b/libc/src/string/memcpy.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memcpy.h @@ -0,0 +1,21 @@ +//===----------------- Implementation header for memcpy -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMCPY_H +#define LLVM_LIBC_SRC_STRING_MEMCPY_H + +#include "include/string.h" +#include // size_t + +namespace __llvm_libc { + +void *memcpy(void *__restrict, const void *__restrict, size_t); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMCPY_H diff --git a/libc/src/string/memcpy.cpp b/libc/src/string/memcpy.cpp new file mode 100644 --- /dev/null +++ b/libc/src/string/memcpy.cpp @@ -0,0 +1,22 @@ +//===--------------------- Implementation of memcpy -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memcpy.h" +#include "src/__support/common.h" +#include "src/string/memcpy_arch_specific.h" + +namespace __llvm_libc { + +void *LLVM_LIBC_ENTRYPOINT(memcpy)(void *__restrict dst, + const void *__restrict src, size_t size) { + memcpy_no_return(reinterpret_cast(dst), + reinterpret_cast(src), size); + return dst; +} + +} // namespace __llvm_libc diff --git a/libc/src/string/memcpy_arch_specific.h.def b/libc/src/string/memcpy_arch_specific.h.def new file mode 100644 --- /dev/null +++ b/libc/src/string/memcpy_arch_specific.h.def @@ -0,0 +1,65 @@ +//===-------------- Implementation of arch specific memcpy ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_ARCH_H +#define LLVM_LIBC_SRC_STRING_MEMORY_ARCH_H + +%%include_file(${memcpy_arch_specific}) + +namespace __llvm_libc { + +// Design rationale +// ================ +// +// Using a profiler to observe size distributions for calls into libc +// functions, it was found most operations act on a small number of bytes. +// This makes it important to favor small sizes. +// +// The tests for `count` are in ascending order so the cost of branching is +// proportional to the cost of copying. +// +// The function is written in C++ for several reasons: +// - The compiler can __see__ the code, this is useful when performing Profile +// Guided Optimization as the optimized code can take advantage of branching +// probabilities. +// - It also allows for easier customization and favors testing multiple +// implementation parameters. +// - As compilers and processors get better, the generated code is improved +// with little change on the code side. +static void memcpy_no_return(char *__restrict dst, const char *__restrict src, + size_t count) { + if (count == 0) + return; + if (count == 1) + return Copy<1>(dst, src); + if (count == 2) + return Copy<2>(dst, src); + if (count == 3) + return Copy<3>(dst, src); + if (count == 4) + return Copy<4>(dst, src); + if (count < 8) + return CopyOverlap<4>(dst, src, count); + if (count == 8) + return Copy<8>(dst, src); + if (count < 16) + return CopyOverlap<8>(dst, src, count); + if (count == 16) + return Copy<16>(dst, src); + if (count < 32) + return CopyOverlap<16>(dst, src, count); + if (count < 64) + return CopyOverlap<32>(dst, src, count); + if (count < 128) + return CopyOverlap<64>(dst, src, count); + CopyGE128(dst, src, count); +} + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_ARCH_H diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt --- a/libc/src/string/memory_utils/CMakeLists.txt +++ b/libc/src/string/memory_utils/CMakeLists.txt @@ -12,6 +12,9 @@ add_header_library( memory_utils - HDRS utils.h - DEPENDS cacheline_size + HDRS + utils.h + memcpy_utils.h + DEPENDS + cacheline_size ) diff --git a/libc/src/string/memory_utils/memcpy_utils.h b/libc/src/string/memory_utils/memcpy_utils.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/memcpy_utils.h @@ -0,0 +1,100 @@ +//===---------------------------- Memcpy utils ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MEMORY_UTILS_MEMCPY_UTILS_H +#define LLVM_LIBC_SRC_MEMORY_UTILS_MEMCPY_UTILS_H + +#include "src/string/memory_utils/utils.h" +#include // size_t + +// __builtin_memcpy_inline guarantees to never call external functions. +// Unfortunately it is not widely available. +#if defined(__clang__) && __has_builtin(__builtin_memcpy_inline) +#define USE_BUILTIN_MEMCPY_INLINE +#elif defined(__GNUC__) +#define USE_BUILTIN_MEMCPY +#endif + +// This is useful for testing. +#if defined(LLVM_LIBC_MEMCPY_MONITOR) +extern "C" void LLVM_LIBC_MEMCPY_MONITOR(char *__restrict, + const char *__restrict, size_t); +#endif + +namespace __llvm_libc { + +// Copies `kBlockSize` bytes from `src` to `dst`. +template +static void Copy(char *__restrict dst, const char *__restrict src) { +#if defined(LLVM_LIBC_MEMCPY_MONITOR) + LLVM_LIBC_MEMCPY_MONITOR(dst, src, kBlockSize); +#elif defined(USE_BUILTIN_MEMCPY_INLINE) + __builtin_memcpy_inline(dst, src, kBlockSize); +#elif defined(USE_BUILTIN_MEMCPY) + __builtin_memcpy(dst, src, kBlockSize); +#else + for (size_t i = 0; i < kBlockSize; ++i) + dst[i] = src[i]; +#endif +} + +// Copies `kBlockSize` bytes from `src + count - kBlockSize` to +// `dst + count - kBlockSize`. +// Precondition: `count >= kBlockSize`. +template +static void CopyLastBlock(char *__restrict dst, const char *__restrict src, + size_t count) { + const size_t offset = count - kBlockSize; + Copy(dst + offset, src + offset); +} + +// Copies `kBlockSize` bytes twice with an overlap between the two. +// +// [1234567812345678123] +// [__XXXXXXXXXXXXXX___] +// [__XXXXXXXX_________] +// [________XXXXXXXX___] +// +// Precondition: `count >= kBlockSize && count <= kBlockSize`. +template +static void CopyOverlap(char *__restrict dst, const char *__restrict src, + size_t count) { + Copy(dst, src); + CopyLastBlock(dst, src, count); +} + +// Copies `count` bytes by blocks of `kBlockSize` bytes. +// Copies at the start and end of the buffer are unaligned. +// Copies in the middle of the buffer are aligned to `kBlockSize`. +// +// e.g. with +// [12345678123456781234567812345678] +// [__XXXXXXXXXXXXXXXXXXXXXXXXXXX___] +// [__XXXXXXXX______________________] +// [________XXXXXXXX________________] +// [________________XXXXXXXX________] +// [_____________________XXXXXXXX___] +// +// Precondition: `count > 2 * kBlockSize` for efficiency. +// `count >= kBlockSize` for correctness. +template +static void CopyAligned(char *__restrict dst, const char *__restrict src, + size_t count) { + Copy(dst, src); // Copy first block + + // Copy aligned blocks + size_t offset = kBlockSize - offset_from_last_aligned(dst); + for (; offset + kBlockSize < count; offset += kBlockSize) + Copy(dst + offset, src + offset); + + CopyLastBlock(dst, src, count); // Copy last block +} + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MEMORY_UTILS_MEMCPY_UTILS_H diff --git a/libc/src/string/memory_utils/utils.h b/libc/src/string/memory_utils/utils.h --- a/libc/src/string/memory_utils/utils.h +++ b/libc/src/string/memory_utils/utils.h @@ -43,6 +43,11 @@ return is_power2_or_zero(value) ? value : 1ULL << (log2(value) + 1); } +template intptr_t offset_from_last_aligned(const void *ptr) { + static_assert(is_power2(alignment), "alignment must be a power of 2"); + return reinterpret_cast(ptr) & (alignment - 1U); +} + template intptr_t offset_to_next_aligned(const void *ptr) { static_assert(is_power2(alignment), "alignment must be a power of 2"); // The logic is not straightforward and involves unsigned modulo arithmetic @@ -51,7 +56,7 @@ } // Returns the offset from `ptr` to the next cache line. -static intptr_t offset_to_next_cache_line(const void *ptr) { +static inline intptr_t offset_to_next_cache_line(const void *ptr) { return offset_to_next_aligned(ptr); } diff --git a/libc/src/string/x86_64/CMakeLists.txt b/libc/src/string/x86_64/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/libc/src/string/x86_64/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LIBC_MEMCPY_OPT_LEVELS + CPU_FEATURE_OPT_NONE + CPU_FEATURE_OPT_SSE + CPU_FEATURE_OPT_AVX + CPU_FEATURE_OPT_AVX512F + CACHE INTERNAL LIBC_MEMCPY_OPT_LEVELS +) diff --git a/libc/src/string/x86_64/memcpy_arch_specific.h.inc b/libc/src/string/x86_64/memcpy_arch_specific.h.inc new file mode 100644 --- /dev/null +++ b/libc/src/string/x86_64/memcpy_arch_specific.h.inc @@ -0,0 +1,35 @@ +#include "src/string/memory_utils/memcpy_utils.h" + +namespace __llvm_libc { + +static void CopyRepMovsb(char *__restrict dst, const char *__restrict src, + size_t count) { + // FIXME: Add MSVC suppport with + // #include + // __movsb(reinterpret_cast(dst), + // reinterpret_cast(src), count); + asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory"); +} + +#if defined(__AVX__) +#define BEST_SIZE 64 +#else +#define BEST_SIZE 32 +#endif + +static void CopyGE128(char *__restrict dst, const char *__restrict src, + size_t count) { +#if defined(__AVX__) + if (count < 256) + return CopyOverlap<128>(dst, src, count); +#endif + // kRepMovsBSize == -1 : Only CopyAligned is used. + // kRepMovsBSize == 0 : Only RepMovsb is used. + // else CopyAligned is used to to kRepMovsBSize and then RepMovsb. + constexpr size_t kRepMovsBSize = -1; + if (count <= kRepMovsBSize) + return CopyAligned(dst, src, count); + CopyRepMovsb(dst, src, count); +} + +} // namespace __llvm_libc diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -22,3 +22,22 @@ DEPENDS strcpy ) + +# Tests all implementations of memcpy. +foreach(opt_level ${LIBC_MEMCPY_OPT_LEVELS}) + # FIXME: remove implementations that can't be tested on the host. + # This needs a notion of opt_level compatibility. + generate_function_name(memcpy ${LIBC_TARGET_MACHINE} ${opt_level} memcpy_config_name) + add_libc_unittest( + ${memcpy_config_name}_test + SUITE + libc_string_unittests + SRCS + memcpy_test.cpp + DEPENDS + ${memcpy_config_name} + COMPILE_OPTIONS + ${HOST_CPU_FEATURE_OPT_FLAGS} + ) +endforeach() + diff --git a/libc/test/src/string/memcpy_test.cpp b/libc/test/src/string/memcpy_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/memcpy_test.cpp @@ -0,0 +1,53 @@ +//===----------------------- Unittests for memcpy -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "utils/CPP/ArrayRef.h" +#include "utils/UnitTest/Test.h" +#include "src/string/memcpy.h" + +using __llvm_libc::cpp::Array; +using __llvm_libc::cpp::ArrayRef; +using __llvm_libc::cpp::MutableArrayRef; +using Data = Array; + +static const ArrayRef kNumbers("0123456789", 10); +static const ArrayRef kDeadcode("DEADC0DE", 8); + +// Returns a Data object filled with a repetition of `filler`. +Data getData(ArrayRef filler) { + Data out; + for (size_t i = 0; i < out.size(); ++i) + out[i] = filler[i % filler.size()]; + return out; +} + +TEST(MemcpyTest, Thorough) { + const Data groundtruth = getData(kNumbers); + const Data dirty = getData(kDeadcode); + for (size_t count = 0; count < 1024; ++count) { + for (size_t align = 0; align < 64; ++align) { + auto buffer = dirty; + const char *const src = groundtruth.data(); + char *const dst = &buffer[align]; + __llvm_libc::memcpy(dst, src, count); + // Everything before copy is untouched. + for (size_t i = 0; i < align; ++i) + ASSERT_EQ(buffer[i], dirty[i]); + // Everything in between is copied. + for (size_t i = 0; i < count; ++i) + ASSERT_EQ(buffer[align + i], groundtruth[i]); + // Everything after copy is untouched. + for (size_t i = align + count; i < dirty.size(); ++i) + ASSERT_EQ(buffer[i], dirty[i]); + } + } +} + +// FIXME: Add tests with reads and writes on the boundary of a read/write +// protected page to check we're not reading nor writing prior/past the allowed +// regions. diff --git a/libc/test/src/string/memory_utils/CMakeLists.txt b/libc/test/src/string/memory_utils/CMakeLists.txt --- a/libc/test/src/string/memory_utils/CMakeLists.txt +++ b/libc/test/src/string/memory_utils/CMakeLists.txt @@ -4,7 +4,14 @@ libc_string_unittests SRCS utils_test.cpp + memcpy_utils_test.cpp DEPENDS memory_utils standalone_cpp ) + +target_compile_definitions( + utils_test + PRIVATE + LLVM_LIBC_MEMCPY_MONITOR=memcpy_monitor +) diff --git a/libc/test/src/string/memory_utils/memcpy_utils_test.cpp b/libc/test/src/string/memory_utils/memcpy_utils_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/memory_utils/memcpy_utils_test.cpp @@ -0,0 +1,208 @@ +//===-------------------- Unittests for memory_utils ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memory_utils/memcpy_utils.h" +#include "utils/CPP/Array.h" +#include "utils/UnitTest/Test.h" + +#include +#include // uintptr_t + +#ifndef LLVM_LIBC_MEMCPY_MONITOR +#error LLVM_LIBC_MEMCPY_MONITOR must be defined for this test. +#endif + +namespace __llvm_libc { + +struct Buffer { + static constexpr size_t kMaxBuffer = 1024; + char buffer[kMaxBuffer + 1]; + size_t last = 0; + + void Clear() { + last = 0; + for (size_t i = 0; i < kMaxBuffer; ++i) + buffer[i] = '0'; + buffer[kMaxBuffer] = '\0'; + } + + void Increment(const void *ptr) { + const auto offset = reinterpret_cast(ptr); + assert(offset < kMaxBuffer); + ++buffer[offset]; + if (offset > last) + last = offset; + } + + char *Finish() { + assert(last < kMaxBuffer); + buffer[last + 1] = '\0'; + return buffer; + } +}; + +struct Trace { + Buffer read; + Buffer write; + + void Add(char *__restrict dst, const char *__restrict src, size_t count) { + for (size_t i = 0; i < count; ++i) + read.Increment(src + i); + for (size_t i = 0; i < count; ++i) + write.Increment(dst + i); + } + + void Clear() { + read.Clear(); + write.Clear(); + } + + char *Read() { return read.Finish(); } + char *Write() { return write.Finish(); } +}; + +static Trace &GetTrace() { + static thread_local Trace events; + return events; +} + +extern "C" void LLVM_LIBC_MEMCPY_MONITOR(char *__restrict dst, + const char *__restrict src, + size_t count) { + GetTrace().Add(dst, src, count); +} + +char *I(uintptr_t offset) { return reinterpret_cast(offset); } + +TEST(MemcpyUtilsTest, CopyTrivial) { + auto &trace = GetTrace(); + + trace.Clear(); + Copy<1>(I(0), I(0)); + EXPECT_STREQ(trace.Write(), "1"); + EXPECT_STREQ(trace.Read(), "1"); + + trace.Clear(); + Copy<2>(I(0), I(0)); + EXPECT_STREQ(trace.Write(), "11"); + EXPECT_STREQ(trace.Read(), "11"); + + trace.Clear(); + Copy<4>(I(0), I(0)); + EXPECT_STREQ(trace.Write(), "1111"); + EXPECT_STREQ(trace.Read(), "1111"); + + trace.Clear(); + Copy<8>(I(0), I(0)); + EXPECT_STREQ(trace.Write(), "11111111"); + EXPECT_STREQ(trace.Read(), "11111111"); + + trace.Clear(); + Copy<16>(I(0), I(0)); + EXPECT_STREQ(trace.Write(), "1111111111111111"); + EXPECT_STREQ(trace.Read(), "1111111111111111"); + + trace.Clear(); + Copy<32>(I(0), I(0)); + EXPECT_STREQ(trace.Write(), "11111111111111111111111111111111"); + EXPECT_STREQ(trace.Read(), "11111111111111111111111111111111"); + + trace.Clear(); + Copy<64>(I(0), I(0)); + EXPECT_STREQ( + trace.Write(), + "1111111111111111111111111111111111111111111111111111111111111111"); + EXPECT_STREQ( + trace.Read(), + "1111111111111111111111111111111111111111111111111111111111111111"); +} + +TEST(MemcpyUtilsTest, CopyOffset) { + auto &trace = GetTrace(); + + trace.Clear(); + Copy<1>(I(3), I(1)); + EXPECT_STREQ(trace.Write(), "0001"); + EXPECT_STREQ(trace.Read(), "01"); + + trace.Clear(); + Copy<1>(I(2), I(1)); + EXPECT_STREQ(trace.Write(), "001"); + EXPECT_STREQ(trace.Read(), "01"); +} + +TEST(MemcpyUtilsTest, CopyOverlap) { + auto &trace = GetTrace(); + + trace.Clear(); + CopyOverlap<2>(I(0), I(0), 2); + EXPECT_STREQ(trace.Write(), "22"); + EXPECT_STREQ(trace.Read(), "22"); + + trace.Clear(); + CopyOverlap<2>(I(0), I(0), 3); + EXPECT_STREQ(trace.Write(), "121"); + EXPECT_STREQ(trace.Read(), "121"); + + trace.Clear(); + CopyOverlap<2>(I(0), I(0), 4); + EXPECT_STREQ(trace.Write(), "1111"); + EXPECT_STREQ(trace.Read(), "1111"); + + trace.Clear(); + CopyOverlap<4>(I(2), I(1), 7); + EXPECT_STREQ(trace.Write(), "001112111"); + EXPECT_STREQ(trace.Read(), "01112111"); +} + +TEST(MemcpyUtilsTest, CopyAligned) { + auto &trace = GetTrace(); + // Destination is aligned already. + // "1111000000000" + // + "0000111100000" + // + "0000000011110" + // + "0000000001111" + // = "1111111112221" + trace.Clear(); + CopyAligned<4>(I(0), I(0), 13); + EXPECT_STREQ(trace.Write(), "1111111112221"); + EXPECT_STREQ(trace.Read(), "1111111112221"); + + // Misaligned destination + // "01111000000000" + // + "00001111000000" + // + "00000000111100" + // + "00000000001111" + // = "01112111112211" + trace.Clear(); + CopyAligned<4>(I(1), I(0), 13); + EXPECT_STREQ(trace.Write(), "01112111112211"); + EXPECT_STREQ(trace.Read(), "1112111112211"); +} + +TEST(MemcpyUtilsTest, MaxReloads) { + auto &trace = GetTrace(); + for (size_t alignment = 0; alignment < 32; ++alignment) { + for (size_t count = 64; count < 768; ++count) { + trace.Clear(); + // We should never reload more than twice when copying from count = 2x32. + CopyAligned<32>(I(alignment), I(0), count); + const char *const written = trace.Write(); + // First bytes are untouched. + for (size_t i = 0; i < alignment; ++i) + EXPECT_EQ(written[i], '0'); + // Next bytes are loaded once or twice but no more. + for (size_t i = alignment; i < count; ++i) { + EXPECT_GE(written[i], '1'); + EXPECT_LE(written[i], '2'); + } + } + } +} + +} // namespace __llvm_libc diff --git a/libc/test/src/string/memory_utils/utils_test.cpp b/libc/test/src/string/memory_utils/utils_test.cpp --- a/libc/test/src/string/memory_utils/utils_test.cpp +++ b/libc/test/src/string/memory_utils/utils_test.cpp @@ -87,6 +87,14 @@ EXPECT_EQ(offset_to_next_aligned<32>(forge(16)), I(16)); } +TEST(UtilsTest, OffsetFromLastAligned) { + EXPECT_EQ(offset_from_last_aligned<16>(forge(0)), I(0)); + EXPECT_EQ(offset_from_last_aligned<16>(forge(1)), I(1)); + EXPECT_EQ(offset_from_last_aligned<16>(forge(16)), I(0)); + EXPECT_EQ(offset_from_last_aligned<16>(forge(15)), I(15)); + EXPECT_EQ(offset_from_last_aligned<32>(forge(16)), I(16)); +} + TEST(UtilsTest, OffsetToNextCacheLine) { EXPECT_GT(LLVM_LIBC_CACHELINE_SIZE, 0); EXPECT_EQ(offset_to_next_cache_line(forge(0)), I(0));