diff --git a/libc/src/__support/CPP/ArrayRef.h b/libc/src/__support/CPP/ArrayRef.h --- a/libc/src/__support/CPP/ArrayRef.h +++ b/libc/src/__support/CPP/ArrayRef.h @@ -131,6 +131,10 @@ public: // From Array. template MutableArrayRef(Array &Arr) : Impl(Arr.Data, N) {} + + operator ArrayRef() const { + return ArrayRef(this->data(), this->size()); + } }; } // namespace cpp diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -37,17 +37,6 @@ .string_utils ) -add_entrypoint_object( - memmove - SRCS - memmove.cpp - HDRS - memmove.h - DEPENDS - libc.src.__support.integer_operations - libc.src.string.memcpy -) - add_entrypoint_object( memrchr SRCS @@ -404,6 +393,41 @@ add_memcpy(memcpy) endif() +# ------------------------------------------------------------------------------ +# memmove +# ------------------------------------------------------------------------------ + +function(add_memmove memmove_name) + add_implementation(memmove ${memmove_name} + SRCS ${LIBC_SOURCE_DIR}/src/string/memmove.cpp + HDRS ${LIBC_SOURCE_DIR}/src/string/memmove.h + DEPENDS + .memory_utils.memory_utils + libc.include.string + COMPILE_OPTIONS + -fno-builtin + ${ARGN} + ) +endfunction() + +if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) + add_memmove(memmove_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2) + add_memmove(memmove_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2) + add_memmove(memmove_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2) + add_memmove(memmove_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F) + add_memmove(memmove_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) + add_memmove(memmove) +elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) + # Disable tail merging as it leads to lower performance. + # Note that '-mllvm' needs to be prefixed with 'SHELL:' to prevent CMake flag deduplication. + add_memmove(memmove_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE} + COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0") + add_memmove(memmove COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0") +else() + add_memmove(memmove_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) + add_memmove(memmove) +endif() + # ------------------------------------------------------------------------------ # memset # ------------------------------------------------------------------------------ diff --git a/libc/src/string/memmove.cpp b/libc/src/string/memmove.cpp --- a/libc/src/string/memmove.cpp +++ b/libc/src/string/memmove.cpp @@ -11,55 +11,43 @@ #include "src/__support/common.h" #include "src/__support/integer_operations.h" #include "src/string/memcpy.h" +#include "src/string/memory_utils/elements.h" #include // size_t, ptrdiff_t namespace __llvm_libc { -static inline void move_byte_forward(char *dest_m, const char *src_m, - size_t count) { - for (size_t offset = 0; count; --count, ++offset) - dest_m[offset] = src_m[offset]; -} - -static inline void move_byte_backward(char *dest_m, const char *src_m, - size_t count) { - for (size_t offset = count - 1; count; --count, --offset) - dest_m[offset] = src_m[offset]; +static inline void inline_memmove(char *dst, const char *src, size_t count) { + using namespace __llvm_libc::scalar; + if (count == 0) + return; + if (count == 1) + return Move<_1>(dst, src); + if (count < 4) + return Move>(dst, src, count); + if (count == 4) + return Move<_4>(dst, src); + if (count < 8) + return Move>(dst, src, count); + if (count <= 16) + return Move>(dst, src, count); + if (count <= 32) + return Move>(dst, src, count); + if (count <= 64) + return Move>(dst, src, count); + if (count <= 128) + return Move>(dst, src, count); + + using AlignedMoveLoop = Align<_32, Arg::Src>::Then>; + if (dst < src) + return Move(dst, src, count); + else if (dst > src) + return MoveBackward(dst, src, count); } LLVM_LIBC_FUNCTION(void *, memmove, (void *dst, const void *src, size_t count)) { - char *dest_c = reinterpret_cast(dst); - const char *src_c = reinterpret_cast(src); - - // If the distance between `src_c` and `dest_c` is equal to or greater - // than `count` (integerAbs(src_c - dest_c) >= count), they would not overlap. - // e.g. greater equal overlapping - // [12345678] [12345678] [12345678] - // src_c: [_ab_____] [_ab_____] [_ab_____] - // dest_c:[_____yz_] [___yz___] [__yz____] - - // Call `memcpy` if `src_c` and `dest_c` do not overlap. - if (__llvm_libc::integerAbs(src_c - dest_c) >= static_cast(count)) - return __llvm_libc::memcpy(dest_c, src_c, count); - - // Overlapping cases. - // If `dest_c` starts before `src_c` (dest_c < src_c), copy - // forward(pointer add 1) from beginning to end. - // If `dest_c` starts after `src_c` (dest_c > src_c), copy - // backward(pointer add -1) from end to beginning. - // If `dest_c` and `src_c` start at the same address (dest_c == src_c), - // just return dest. - // e.g. forward backward - // *-> <-* - // src_c : [___abcde_] [_abcde___] - // dest_c: [_abc--___] [___--cde_] - - // TODO: Optimize `move_byte_xxx(...)` functions. - if (dest_c < src_c) - move_byte_forward(dest_c, src_c, count); - if (dest_c > src_c) - move_byte_backward(dest_c, src_c, count); + inline_memmove(reinterpret_cast(dst), + reinterpret_cast(src), count); return dst; } diff --git a/libc/src/string/memory_utils/elements.h b/libc/src/string/memory_utils/elements.h --- a/libc/src/string/memory_utils/elements.h +++ b/libc/src/string/memory_utils/elements.h @@ -43,6 +43,11 @@ template void Move(char *dst, const char *src, size_t size) { Element::Move(dst, src, size); } +// Runtime-size move from 'src' to 'dst'. +template +void MoveBackward(char *dst, const char *src, size_t size) { + Element::MoveBackward(dst, src, size); +} // Fixed-size equality between 'lhs' and 'rhs'. template bool Equals(const char *lhs, const char *rhs) { @@ -340,6 +345,55 @@ Tail::Copy(dst, src, size); } + // Move forward suitable when dst < src. We load the tail bytes before + // handling the loop. + // + // e.g. Moving two bytes + // [ | | | | |] + // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] + // [_________________________LLLLLLLL___] + // [___LLLLLLLL_________________________] + // [_SSSSSSSS___________________________] + // [___________LLLLLLLL_________________] + // [_________SSSSSSSS___________________] + // [___________________LLLLLLLL_________] + // [_________________SSSSSSSS___________] + // [_______________________SSSSSSSS_____] + static void Move(char *dst, const char *src, size_t size) { + const size_t tail_offset = Tail::offset(size); + const auto tail_value = TailT::Load(src + tail_offset); + size_t offset = 0; + do { + T::Move(dst + offset, src + offset); + offset += T::kSize; + } while (offset < size - T::kSize); + TailT::Store(dst + tail_offset, tail_value); + } + + // Move forward suitable when dst > src. We load the head bytes before + // handling the loop. + // + // e.g. Moving two bytes + // [ | | | | |] + // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] + // [___LLLLLLLL_________________________] + // [_________________________LLLLLLLL___] + // [___________________________SSSSSSSS_] + // [_________________LLLLLLLL___________] + // [___________________SSSSSSSS_________] + // [_________LLLLLLLL___________________] + // [___________SSSSSSSS_________________] + // [_____SSSSSSSS_______________________] + static void MoveBackward(char *dst, const char *src, size_t size) { + const auto head_value = TailT::Load(src); + ptrdiff_t offset = size - T::kSize; + do { + T::Move(dst + offset, src + offset); + offset -= T::kSize; + } while (offset >= 0); + TailT::Store(dst, head_value); + } + static bool Equals(const char *lhs, const char *rhs, size_t size) { size_t offset = 0; do { @@ -374,30 +428,38 @@ namespace internal { -// Provides a specialized Bump function that adjusts pointers and size so first -// argument (resp. second argument) gets aligned to Alignment. -// We make sure the compiler knows about the adjusted pointer alignment. -template struct AlignHelper {}; +template struct ArgSelector {}; -template struct AlignHelper { +template <> struct ArgSelector { template - static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) { - const intptr_t offset = offset_to_next_aligned(p1ref); - p1ref += offset; - p2ref += offset; - size -= offset; - p1ref = assume_aligned(p1ref); + static T1 *__restrict &Select(T1 *__restrict &p1ref, T2 *__restrict &p2ref) { + return p1ref; + } +}; + +template <> struct ArgSelector { + template + static T2 *__restrict &Select(T1 *__restrict &p1ref, T2 *__restrict &p2ref) { + return p2ref; } }; -template struct AlignHelper { +// Provides a specialized Bump function that adjusts pointers and size so first +// argument (resp. second argument) gets aligned to Alignment. +// We make sure the compiler knows about the adjusted pointer alignment. +// The 'additional_bumps' parameter allows to reach previous / next aligned +// pointers. +template struct Align { template - static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) { - const intptr_t offset = offset_to_next_aligned(p2ref); + static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size, + int additional_bumps = 0) { + auto &aligned_ptr = ArgSelector::Select(p1ref, p2ref); + auto offset = offset_to_next_aligned(aligned_ptr); + offset += additional_bumps * Alignment; p1ref += offset; p2ref += offset; size -= offset; - p2ref = assume_aligned(p2ref); + aligned_ptr = assume_aligned(aligned_ptr); } }; @@ -422,28 +484,84 @@ static void Copy(char *__restrict dst, const char *__restrict src, size_t size) { AlignmentT::Copy(dst, src); - internal::AlignHelper::Bump(dst, src, size); + internal::Align::Bump(dst, src, size); NextT::Copy(dst, src, size); } + // Move forward suitable when dst < src. The alignment is performed with an + // HeadTail operation of size ∈ [Alignment, 2 x Alignment]. + // + // e.g. Moving two bytes and making sure src is then aligned. + // [ | | | | ] + // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] + // [____LLLLLLLL_____________________] + // [___________LLLLLLLL______________] + // [_SSSSSSSS________________________] + // [________SSSSSSSS_________________] + // + // e.g. Moving two bytes and making sure dst is then aligned. + // [ | | | | ] + // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] + // [____LLLLLLLL_____________________] + // [______LLLLLLLL___________________] + // [_SSSSSSSS________________________] + // [___SSSSSSSS______________________] + static void Move(char *dst, const char *src, size_t size) { + char *next_dst = dst; + const char *next_src = src; + size_t next_size = size; + internal::Align::Bump(next_dst, next_src, next_size, + 1); + HeadTail::Move(dst, src, size - next_size); + NextT::Move(next_dst, next_src, next_size); + } + + // Move backward suitable when dst > src. The alignment is performed with an + // HeadTail operation of size ∈ [Alignment, 2 x Alignment]. + // + // e.g. Moving two bytes backward and making sure src is then aligned. + // [ | | | | ] + // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] + // [ _________________LLLLLLLL_______] + // [ ___________________LLLLLLLL_____] + // [____________________SSSSSSSS_____] + // [______________________SSSSSSSS___] + // + // e.g. Moving two bytes and making sure dst is then aligned. + // [ | | | | ] + // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] + // [ _______________LLLLLLLL_________] + // [ ___________________LLLLLLLL_____] + // [__________________SSSSSSSS_______] + // [______________________SSSSSSSS___] + static void MoveBackward(char *dst, const char *src, size_t size) { + char *headtail_dst = dst + size; + const char *headtail_src = src + size; + size_t headtail_size = 0; + internal::Align::Bump(headtail_dst, headtail_src, + headtail_size, -2); + HeadTail::Move(headtail_dst, headtail_src, headtail_size); + NextT::MoveBackward(dst, src, size - headtail_size); + } + static bool Equals(const char *lhs, const char *rhs, size_t size) { if (!AlignmentT::Equals(lhs, rhs)) return false; - internal::AlignHelper::Bump(lhs, rhs, size); + internal::Align::Bump(lhs, rhs, size); return NextT::Equals(lhs, rhs, size); } static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { if (!AlignmentT::Equals(lhs, rhs)) return AlignmentT::ThreeWayCompare(lhs, rhs); - internal::AlignHelper::Bump(lhs, rhs, size); + internal::Align::Bump(lhs, rhs, size); return NextT::ThreeWayCompare(lhs, rhs, size); } static void SplatSet(char *dst, const unsigned char value, size_t size) { AlignmentT::SplatSet(dst, value); char *dummy = nullptr; - internal::AlignHelper::Bump(dst, dummy, size); + internal::Align::Bump(dst, dummy, size); NextT::SplatSet(dst, value, size); } }; diff --git a/libc/test/src/string/memmove_test.cpp b/libc/test/src/string/memmove_test.cpp --- a/libc/test/src/string/memmove_test.cpp +++ b/libc/test/src/string/memmove_test.cpp @@ -10,48 +10,90 @@ #include "src/string/memmove.h" #include "utils/UnitTest/Test.h" +#include +#include + +using __llvm_libc::cpp::Array; +using __llvm_libc::cpp::ArrayRef; +using __llvm_libc::cpp::MutableArrayRef; + +static void Print(const char *input) { fputs(input, stdout); } + +static void Display(ArrayRef buffer) { + Print("["); + for (char c : buffer) + printf("%02X ", (unsigned char)c); + Print("]\n"); +} + +static void Display(size_t size, std::function foo, + const char *true_string, const char *false_string) { + Print("["); + for (size_t i = 0; i < size; ++i) { + if (foo(i)) + Print(true_string); + else + Print(false_string); + } + Print("]\n"); +} + class LlvmLibcMemmoveTest : public __llvm_libc::testing::Test { public: - void check_memmove(void *dst, const void *src, size_t count, - const unsigned char *str, - const __llvm_libc::cpp::ArrayRef expected) { - void *result = __llvm_libc::memmove(dst, src, count); - // Making sure the pointer returned is same with `dst`. - EXPECT_EQ(result, dst); - // `expected` is designed according to `str`. - // `dst` and `src` might be part of `str`. - // Making sure `str` is same with `expected`. - for (size_t i = 0; i < expected.size(); ++i) - EXPECT_EQ(str[i], expected[i]); + bool check_memmove(MutableArrayRef buffer, size_t dst_offset, + size_t src_offset, size_t count, ArrayRef expected) { + char *ptr = buffer.data(); + void *result = + __llvm_libc::memmove(ptr + dst_offset, ptr + src_offset, count); + if (!expected.equals(buffer)) { + printf("Move %zu bytes from offset %zu to %zu\nDisplaying alignment, " + "source, destination, expected and actual buffers\n", + count, src_offset, dst_offset); + const size_t size = expected.size(); + Display( + size, + [&](size_t i) { return (intptr_t)(buffer.data() + i) % 32 == 0; }, + "|||", " "); + Display( + size, + [=](size_t i) { return i >= dst_offset && i < dst_offset + count; }, + "St ", " "); + Display( + size, + [=](size_t i) { return i >= src_offset && i < src_offset + count; }, + "Ld ", " "); + Display(expected); + Display(buffer); + return false; + } + return result == ptr + dst_offset; } }; TEST_F(LlvmLibcMemmoveTest, MoveZeroByte) { - unsigned char dst[] = {'a', 'b'}; - const unsigned char src[] = {'y', 'z'}; - const unsigned char expected[] = {'a', 'b'}; - check_memmove(dst, src, 0, dst, expected); + char buffer[] = {'a', 'b', 'y', 'z'}; + const char expected[] = {'a', 'b', 'y', 'z'}; + ASSERT_TRUE(check_memmove(buffer, 0, 2, 0, expected)); } -TEST_F(LlvmLibcMemmoveTest, OverlapThatDstAndSrcPointToSameAddress) { - unsigned char str[] = {'a', 'b'}; - const unsigned char expected[] = {'a', 'b'}; - check_memmove(str, str, 1, str, expected); +TEST_F(LlvmLibcMemmoveTest, DstAndSrcPointToSameAddress) { + char buffer[] = {'a', 'b'}; + const char expected[] = {'a', 'b'}; + ASSERT_TRUE(check_memmove(buffer, 0, 0, 1, expected)); } -TEST_F(LlvmLibcMemmoveTest, OverlapThatDstStartsBeforeSrc) { +TEST_F(LlvmLibcMemmoveTest, DstStartsBeforeSrc) { // Set boundary at beginning and end for not overstepping when // copy forward or backward. - unsigned char str[] = {'z', 'a', 'b', 'c', 'z'}; - const unsigned char expected[] = {'z', 'b', 'c', 'c', 'z'}; - // `dst` is `&str[1]`. - check_memmove(&str[1], &str[2], 2, str, expected); + char buffer[] = {'z', 'a', 'b', 'c', 'z'}; + const char expected[] = {'z', 'b', 'c', 'c', 'z'}; + ASSERT_TRUE(check_memmove(buffer, 1, 2, 2, expected)); } -TEST_F(LlvmLibcMemmoveTest, OverlapThatDstStartsAfterSrc) { - unsigned char str[] = {'z', 'a', 'b', 'c', 'z'}; - const unsigned char expected[] = {'z', 'a', 'a', 'b', 'z'}; - check_memmove(&str[2], &str[1], 2, str, expected); +TEST_F(LlvmLibcMemmoveTest, DstStartsAfterSrc) { + char buffer[] = {'z', 'a', 'b', 'c', 'z'}; + const char expected[] = {'z', 'a', 'a', 'b', 'z'}; + ASSERT_TRUE(check_memmove(buffer, 2, 1, 2, expected)); } // e.g. `dst` follow `src`. @@ -59,13 +101,46 @@ // [__src_____] // [_____dst__] TEST_F(LlvmLibcMemmoveTest, SrcFollowDst) { - unsigned char str[] = {'z', 'a', 'b', 'z'}; - const unsigned char expected[] = {'z', 'b', 'b', 'z'}; - check_memmove(&str[1], &str[2], 1, str, expected); + char buffer[] = {'z', 'a', 'b', 'z'}; + const char expected[] = {'z', 'b', 'b', 'z'}; + ASSERT_TRUE(check_memmove(buffer, 1, 2, 1, expected)); } TEST_F(LlvmLibcMemmoveTest, DstFollowSrc) { - unsigned char str[] = {'z', 'a', 'b', 'z'}; - const unsigned char expected[] = {'z', 'a', 'a', 'z'}; - check_memmove(&str[2], &str[1], 1, str, expected); + char buffer[] = {'z', 'a', 'b', 'z'}; + const char expected[] = {'z', 'a', 'a', 'z'}; + ASSERT_TRUE(check_memmove(buffer, 2, 1, 1, expected)); +} + +static constexpr int kMaxSize = 512; + +char GetRandomChar() { + static constexpr const uint64_t a = 1103515245; + static constexpr const uint64_t c = 12345; + static constexpr const uint64_t m = 1ULL << 31; + static uint64_t seed = 123456789; + seed = (a * seed + c) % m; + return seed; +} + +void Randomize(MutableArrayRef buffer) { + for (auto ¤t : buffer) + current = GetRandomChar(); +} + +TEST_F(LlvmLibcMemmoveTest, Thorough) { + using LargeBuffer = Array; + LargeBuffer GroundTruth; + Randomize(GroundTruth); + for (int Size = 0; Size < kMaxSize; ++Size) { + for (int Offset = -Size; Offset < Size; ++Offset) { + LargeBuffer Buffer = GroundTruth; + LargeBuffer Expected = GroundTruth; + size_t DstOffset = kMaxSize; + size_t SrcOffset = kMaxSize + Offset; + for (int I = 0; I < Size; ++I) + Expected[DstOffset + I] = GroundTruth[SrcOffset + I]; + ASSERT_TRUE(check_memmove(Buffer, DstOffset, SrcOffset, Size, Expected)); + } + } }