diff --git a/libc/src/string/memory_utils/elements.h b/libc/src/string/memory_utils/elements.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/elements.h @@ -0,0 +1,507 @@ +//===-- Elementary operations to compose memory primitives ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H + +#include // size_t +#include // uint8_t, uint16_t, uint32_t, uint64_t + +#include "src/__support/endian.h" +#include "src/string/memory_utils/utils.h" + +namespace __llvm_libc { + +// Elementary Operations +// -------------------------------- +// We define abstract elementary operations acting on fixed chunks of memory. +// These are low level building blocks that are meant to be assembled to compose +// higher order abstractions. Each function is defined twice: once with +// fixed-size operations, and once with runtime-size operations. + +// Fixed-size copies from 'src' to 'dst'. +template +void Copy(char *__restrict dst, const char *__restrict src) { + Element::Copy(dst, src); +} +// Runtime-size copies from 'src' to 'dst'. +template +void Copy(char *__restrict dst, const char *__restrict src, size_t size) { + Element::Copy(dst, src, size); +} + +// Fixed-size equality between 'lhs' and 'rhs'. +template bool Equals(const char *lhs, const char *rhs) { + return Element::Equals(lhs, rhs); +} +// Runtime-size equality between 'lhs' and 'rhs'. +template +bool Equals(const char *lhs, const char *rhs, size_t size) { + return Element::Equals(lhs, rhs, size); +} + +// Fixed-size three-way comparison between 'lhs' and 'rhs'. +template +int ThreeWayCompare(const char *lhs, const char *rhs) { + return Element::ThreeWayCompare(lhs, rhs); +} +// Runtime-size three-way comparison between 'lhs' and 'rhs'. +template +int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { + return Element::ThreeWayCompare(lhs, rhs, size); +} + +// Fixed-size initialization. +template +void SplatSet(char *dst, const unsigned char value) { + Element::SplatSet(dst, value); +} +// Runtime-size initialization. +template +void SplatSet(char *dst, const unsigned char value, size_t size) { + Element::SplatSet(dst, value, size); +} + +// Fixed-size Higher-Order Operations +// ---------------------------------- +// - Repeated: Repeat the operation several times in a row. +// - Chained: Chain the operation of several types. + +// Repeat the operation several times in a row. +template struct Repeated { + static constexpr size_t kSize = ElementCount * Element::kSize; + + static void Copy(char *__restrict dst, const char *__restrict src) { + for (size_t i = 0; i < ElementCount; ++i) { + const size_t offset = i * Element::kSize; + Element::Copy(dst + offset, src + offset); + } + } + + static bool Equals(const char *lhs, const char *rhs) { + for (size_t i = 0; i < ElementCount; ++i) { + const size_t offset = i * Element::kSize; + if (!Element::Equals(lhs + offset, rhs + offset)) + return false; + } + return true; + } + + static int ThreeWayCompare(const char *lhs, const char *rhs) { + for (size_t i = 0; i < ElementCount; ++i) { + const size_t offset = i * Element::kSize; + // We make the assumption that 'Equals' si cheaper than 'ThreeWayCompare'. + if (Element::Equals(lhs + offset, rhs + offset)) + continue; + return Element::ThreeWayCompare(lhs + offset, rhs + offset); + } + return 0; + } + + static void SplatSet(char *dst, const unsigned char value) { + for (size_t i = 0; i < ElementCount; ++i) { + const size_t offset = i * Element::kSize; + Element::SplatSet(dst + offset, value); + } + } +}; + +// Chain the operation of several types. +// For instance, to handle a 3 bytes operation, one can use: +// Chained::Operation(); +template struct Chained; + +template struct Chained { + static constexpr size_t kSize = Head::kSize + Chained::kSize; + + static void Copy(char *__restrict dst, const char *__restrict src) { + Chained::Copy(dst + Head::kSize, src + Head::kSize); + __llvm_libc::Copy(dst, src); + } + + static bool Equals(const char *lhs, const char *rhs) { + if (!__llvm_libc::Equals(lhs, rhs)) + return false; + return Chained::Equals(lhs + Head::kSize, rhs + Head::kSize); + } + + static int ThreeWayCompare(const char *lhs, const char *rhs) { + if (__llvm_libc::Equals(lhs, rhs)) + return Chained::ThreeWayCompare(lhs + Head::kSize, + rhs + Head::kSize); + return __llvm_libc::ThreeWayCompare(lhs, rhs); + } + + static void SplatSet(char *dst, const unsigned char value) { + Chained::SplatSet(dst + Head::kSize, value); + __llvm_libc::SplatSet(dst, value); + } +}; + +template <> struct Chained<> { + static constexpr size_t kSize = 0; + static void Copy(char *__restrict dst, const char *__restrict src) {} + static bool Equals(const char *lhs, const char *rhs) { return true; } + static int ThreeWayCompare(const char *lhs, const char *rhs) { return 0; } + static void SplatSet(char *dst, const unsigned char value) {} +}; + +// Runtime-size Higher-Order Operations +// ------------------------------------ +// - Tail: Perform the operation on the last 'T::kSize' bytes of the buffer. +// - HeadTail: Perform the operation on the first and last 'T::kSize' bytes +// of the buffer. +// - Loop: Perform a loop of fixed-sized operations. + +// Perform the operation on the last 'T::kSize' bytes of the buffer. +// +// e.g. with +// [1234567812345678123] +// [__XXXXXXXXXXXXXX___] +// [________XXXXXXXX___] +// +// Precondition: `size >= T::kSize`. +template struct Tail { + static void Copy(char *__restrict dst, const char *__restrict src, + size_t size) { + return T::Copy(dst + offset(size), src + offset(size)); + } + + static bool Equals(const char *lhs, const char *rhs, size_t size) { + return T::Equals(lhs + offset(size), rhs + offset(size)); + } + + static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { + return T::ThreeWayCompare(lhs + offset(size), rhs + offset(size)); + } + + static void SplatSet(char *dst, const unsigned char value, size_t size) { + return T::SplatSet(dst + offset(size), value); + } + + static size_t offset(size_t size) { return size - T::kSize; } +}; + +// Perform the operation on the first and last 'T::kSize' bytes of the buffer. +// This is useful for overlapping operations. +// +// e.g. with +// [1234567812345678123] +// [__XXXXXXXXXXXXXX___] +// [__XXXXXXXX_________] +// [________XXXXXXXX___] +// +// Precondition: `size >= T::kSize && size <= 2 x T::kSize`. +template struct HeadTail { + static void Copy(char *__restrict dst, const char *__restrict src, + size_t size) { + T::Copy(dst, src); + Tail::Copy(dst, src, size); + } + + static bool Equals(const char *lhs, const char *rhs, size_t size) { + if (!T::Equals(lhs, rhs)) + return false; + return Tail::Equals(lhs, rhs, size); + } + + static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { + if (const int result = T::ThreeWayCompare(lhs, rhs)) + return result; + return Tail::ThreeWayCompare(lhs, rhs, size); + } + + static void SplatSet(char *dst, const unsigned char value, size_t size) { + T::SplatSet(dst, value); + Tail::SplatSet(dst, value, size); + } +}; + +// Simple loop ending with a Tail operation. +// +// e.g. with +// [12345678123456781234567812345678] +// [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___] +// [__XXXXXXXX_______________________] +// [__________XXXXXXXX_______________] +// [__________________XXXXXXXX_______] +// [______________________XXXXXXXX___] +// +// Precondition: +// - size >= T::kSize +template struct Loop { + static void Copy(char *__restrict dst, const char *__restrict src, + size_t size) { + for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize) + T::Copy(dst + offset, src + offset); + Tail::Copy(dst, src, size); + } + + static bool Equals(const char *lhs, const char *rhs, size_t size) { + for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize) + if (!T::Equals(lhs + offset, rhs + offset)) + return false; + return Tail::Equals(lhs, rhs, size); + } + + static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { + for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize) + if (const int result = T::ThreeWayCompare(lhs + offset, rhs + offset)) + return result; + return Tail::ThreeWayCompare(lhs, rhs, size); + } + + static void SplatSet(char *dst, const unsigned char value, size_t size) { + for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize) + T::SplatSet(dst + offset, value); + Tail::SplatSet(dst, value, size); + } +}; + +enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 }; + +namespace internal { + +// Provides a specialized Bump function that adjusts pointers and size so first +// argument (resp. second argument) gets aligned to Alignment. +// We make sure the compiler knows about the adjusted pointer alignment. +template struct AlignHelper {}; + +template struct AlignHelper { + template + static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) { + const intptr_t offset = offset_to_next_aligned(p1ref); + p1ref += offset; + p2ref += offset; + size -= offset; + p1ref = assume_aligned(p1ref); + } +}; + +template struct AlignHelper { + template + static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) { + const intptr_t offset = offset_to_next_aligned(p2ref); + p1ref += offset; + p2ref += offset; + size -= offset; + p2ref = assume_aligned(p2ref); + } +}; + +} // namespace internal + +// An alignment operation that: +// - executes the 'AlignmentT' operation +// - bumps 'dst' or 'src' (resp. 'lhs' or 'rhs') pointers so that the selected +// pointer gets aligned, size is decreased accordingly. +// - calls the 'NextT' operation. +// +// e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as: +// Copy::Then>>(dst, src, count); +template struct Align { +private: + static constexpr size_t Alignment = AlignmentT::kSize; + static_assert(Alignment > 1, "Alignment must be more than 1"); + static_assert(is_power2(Alignment), "Alignment must be a power of 2"); + +public: + template struct Then { + static void Copy(char *__restrict dst, const char *__restrict src, + size_t size) { + AlignmentT::Copy(dst, src); + internal::AlignHelper::Bump(dst, src, size); + NextT::Copy(dst, src, size); + } + + static bool Equals(const char *lhs, const char *rhs, size_t size) { + if (!AlignmentT::Equals(lhs, rhs)) + return false; + internal::AlignHelper::Bump(lhs, rhs, size); + return NextT::Equals(lhs, rhs, size); + } + + static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) { + if (const int result = AlignmentT::ThreeWayCompare(lhs, rhs)) + return result; + internal::AlignHelper::Bump(lhs, rhs, size); + return NextT::ThreeWayCompare(lhs, rhs, size); + } + + static void SplatSet(char *dst, const unsigned char value, size_t size) { + AlignmentT::SplatSet(dst, value); + char *dummy = nullptr; + internal::AlignHelper::Bump(dst, dummy, size); + NextT::SplatSet(dst, value, size); + } + }; +}; + +// Fixed-size Builtin Operations +// ----------------------------- +// Note: Do not use 'builtin' right now as it requires the implementation of the +// `_inline` versions of all the builtins. Theoretically, Clang can still turn +// them into calls to the C library leading to reentrancy problems. +namespace builtin { + +// __builtin_memcpy_inline guarantees to never call external functions. +// Unfortunately it is not widely available. +#ifdef __clang__ +#if __has_builtin(__builtin_memcpy_inline) +#define USE_BUILTIN_MEMCPY_INLINE +#endif +#elif defined(__GNUC__) +#define USE_BUILTIN_MEMCPY +#endif + +template struct Builtin { + static constexpr size_t kSize = Size; + + static void Copy(char *__restrict dst, const char *__restrict src) { +#if LLVM_LIBC_HAVE_MEMORY_SANITIZER || LLVM_LIBC_HAVE_ADDRESS_SANITIZER + ForLoopCopy(dst, src); +#elif defined(USE_BUILTIN_MEMCPY_INLINE) + __builtin_memcpy_inline(dst, src, kSize); +#elif defined(USE_BUILTIN_MEMCPY) + __builtin_memcpy(dst, src, kSize); +#else + ForLoopCopy(dst, src); +#endif + } + + static bool Equals(const char *lhs, const char *rhs) { + return __builtin_memcmp(lhs, rhs, kSize) == 0; + } + + static int ThreeWayCompare(const char *lhs, const char *rhs) { + return __builtin_memcmp(lhs, rhs, kSize); + } + + static void SplatSet(char *dst, const unsigned char value) { + __builtin_memset(dst, value, kSize); + } + +private: + // Copies `kBlockSize` bytes from `src` to `dst` using a for loop. + // This code requires the use of `-fno-buitin-memcpy` to prevent the compiler + // from turning the for-loop back into `__builtin_memcpy`. + template + static void ForLoopCopy(char *__restrict dst, const char *__restrict src) { + for (size_t i = 0; i < kBlockSize; ++i) + dst[i] = src[i]; + } +}; + +#undef USE_BUILTIN_MEMCPY_INLINE +#undef USE_BUILTIN_MEMCPY + +using _1 = Builtin<1>; +using _2 = Builtin<2>; +using _3 = Builtin<3>; +using _4 = Builtin<4>; +using _8 = Builtin<8>; +using _16 = Builtin<16>; +using _32 = Builtin<32>; +using _64 = Builtin<64>; +using _128 = Builtin<128>; + +} // namespace builtin + +// Fixed-size Scalar Operations +// ---------------------------- +namespace scalar { + +// The Scalar type makes use of simple sized integers. +template struct Scalar { + static constexpr size_t kSize = sizeof(T); + + static void Copy(char *__restrict dst, const char *__restrict src) { + Store(dst, Load(src)); + } + + static bool Equals(const char *lhs, const char *rhs) { + return Load(lhs) == Load(rhs); + } + + static int ThreeWayCompare(const char *lhs, const char *rhs) { + return ScalarThreeWayCompare(Load(lhs), Load(rhs)); + } + + static void SplatSet(char *dst, const unsigned char value) { + Store(dst, GetSplattedValue(value)); + } + +private: + static T Load(const char *ptr) { + T value; + builtin::Builtin::Copy(reinterpret_cast(&value), ptr); + return value; + } + static void Store(char *ptr, T value) { + builtin::Builtin::Copy(ptr, reinterpret_cast(&value)); + } + static T GetSplattedValue(const unsigned char value) { + return T(~0) / T(0xFF) * T(value); + } + static int ScalarThreeWayCompare(T a, T b); +}; + +template <> +inline int Scalar::ScalarThreeWayCompare(uint8_t a, uint8_t b) { + const int16_t la = Endian::ToBigEndian(a); + const int16_t lb = Endian::ToBigEndian(b); + return la - lb; +} +template <> +inline int Scalar::ScalarThreeWayCompare(uint16_t a, uint16_t b) { + const int32_t la = Endian::ToBigEndian(a); + const int32_t lb = Endian::ToBigEndian(b); + return la - lb; +} +template <> +inline int Scalar::ScalarThreeWayCompare(uint32_t a, uint32_t b) { + const int64_t la = Endian::ToBigEndian(a); + const int64_t lb = Endian::ToBigEndian(b); + if (la < lb) + return -1; + if (la > lb) + return 1; + return 0; +} +template <> +inline int Scalar::ScalarThreeWayCompare(uint64_t a, uint64_t b) { + const __int128_t la = Endian::ToBigEndian(a); + const __int128_t lb = Endian::ToBigEndian(b); + if (la < lb) + return -1; + if (la > lb) + return 1; + return 0; +} + +using UINT8 = Scalar; // 1 Byte +using UINT16 = Scalar; // 2 Bytes +using UINT32 = Scalar; // 4 Bytes +using UINT64 = Scalar; // 8 Bytes + +using _1 = UINT8; +using _2 = UINT16; +using _3 = Chained; +using _4 = UINT32; +using _8 = UINT64; +using _16 = Repeated<_8, 2>; +using _32 = Repeated<_8, 4>; +using _64 = Repeated<_8, 8>; +using _128 = Repeated<_8, 16>; + +} // namespace scalar +} // namespace __llvm_libc + +#include + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H diff --git a/libc/test/src/string/memory_utils/CMakeLists.txt b/libc/test/src/string/memory_utils/CMakeLists.txt --- a/libc/test/src/string/memory_utils/CMakeLists.txt +++ b/libc/test/src/string/memory_utils/CMakeLists.txt @@ -9,9 +9,3 @@ libc.src.string.memory_utils.memory_utils libc.utils.CPP.standalone_cpp ) - -target_compile_definitions( - libc.test.src.string.memory_utils.utils_test - PRIVATE - LLVM_LIBC_MEMCPY_MONITOR=memcpy_monitor -)