diff --git a/libc/src/string/aarch64/memcmp.cpp b/libc/src/string/aarch64/memcmp.cpp --- a/libc/src/string/aarch64/memcmp.cpp +++ b/libc/src/string/aarch64/memcmp.cpp @@ -12,48 +12,41 @@ #include // size_t namespace __llvm_libc { -namespace aarch64 { -static int memcmp_impl(const char *lhs, const char *rhs, size_t count) { - if (count == 0) +static int memcmp_aarch64(const char *lhs, const char *rhs, size_t count) { + // Use aarch64 strategies (_1, _2, _3 ...) + using namespace __llvm_libc::aarch64; + + if (count == 0) // [0, 0] return 0; - if (count == 1) + if (count == 1) // [1, 1] return ThreeWayCompare<_1>(lhs, rhs); - else if (count == 2) + if (count == 2) // [2, 2] return ThreeWayCompare<_2>(lhs, rhs); - else if (count == 3) + if (count == 3) // [3, 3] return ThreeWayCompare<_3>(lhs, rhs); - else if (count < 8) + if (count < 8) // [4, 7] return ThreeWayCompare>(lhs, rhs, count); - else if (count < 16) + if (count < 16) // [8, 15] return ThreeWayCompare>(lhs, rhs, count); - else if (count < 128) { - if (Equals<_16>(lhs, rhs)) { - if (count < 32) - return ThreeWayCompare>(lhs, rhs, count); - else { - if (Equals<_16>(lhs + 16, rhs + 16)) { - if (count < 64) - return ThreeWayCompare>(lhs, rhs, count); - if (count < 128) - return ThreeWayCompare>(lhs + 32, rhs + 32, count - 32); - } else - return ThreeWayCompare<_16>(lhs + count - 32, rhs + count - 32); - } - } + if (unlikely(count >= 128)) // [128, ∞] + return ThreeWayCompare::Then>>(lhs, rhs, count); + if (!Equals<_16>(lhs, rhs)) // [16, 16] return ThreeWayCompare<_16>(lhs, rhs); - } else - return ThreeWayCompare::Then>>(lhs, rhs, - count); + if (count < 32) // [17, 31] + return ThreeWayCompare>(lhs, rhs, count); + if (!Equals::Then<_16>>(lhs, rhs)) // [32, 32] + return ThreeWayCompare::Then<_16>>(lhs, rhs); + if (count < 64) // [33, 63] + return ThreeWayCompare>(lhs, rhs, count); + // [64, 127] + return ThreeWayCompare::Then>>(lhs, rhs, count); } -} // namespace aarch64 LLVM_LIBC_FUNCTION(int, memcmp, (const void *lhs, const void *rhs, size_t count)) { - - const char *_lhs = reinterpret_cast(lhs); - const char *_rhs = reinterpret_cast(rhs); - return aarch64::memcmp_impl(_lhs, _rhs, count); + return memcmp_aarch64(reinterpret_cast(lhs), + reinterpret_cast(rhs), count); } } // namespace __llvm_libc diff --git a/libc/src/string/memory_utils/elements.h b/libc/src/string/memory_utils/elements.h --- a/libc/src/string/memory_utils/elements.h +++ b/libc/src/string/memory_utils/elements.h @@ -304,7 +304,7 @@ // // e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as: // Copy::Then>>(dst, src, count); -template struct Align { +template struct Align { private: static constexpr size_t Alignment = AlignmentT::kSize; static_assert(Alignment > 1, "Alignment must be more than 1"); @@ -342,6 +342,44 @@ }; }; +// An operation that allows to skip the specified amount of bytes. +template struct Skip { + template struct Then { + static void Copy(char *__restrict dst, const char *__restrict src, + size_t size) { + NextT::Copy(dst + Bytes, src + Bytes, size - Bytes); + } + + static void Copy(char *__restrict dst, const char *__restrict src) { + NextT::Copy(dst + Bytes, src + Bytes); + } + + static bool Equals(const char *lhs, const char *rhs, size_t size) { + return NextT::Equals(lhs + Bytes, rhs + Bytes, size - Bytes); + } + + static bool Equals(const char *lhs, const char *rhs) { + return NextT::Equals(lhs + Bytes, rhs + Bytes); + } + + static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { + return NextT::ThreeWayCompare(lhs + Bytes, rhs + Bytes, size - Bytes); + } + + static int ThreeWayCompare(const char *lhs, const char *rhs) { + return NextT::ThreeWayCompare(lhs + Bytes, rhs + Bytes); + } + + static void SplatSet(char *dst, const unsigned char value, size_t size) { + NextT::SplatSet(dst + Bytes, value, size - Bytes); + } + + static void SplatSet(char *dst, const unsigned char value) { + NextT::SplatSet(dst + Bytes, value); + } + }; +}; + // Fixed-size Builtin Operations // ----------------------------- // Note: Do not use 'builtin' right now as it requires the implementation of the