diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt --- a/libc/src/string/memory_utils/CMakeLists.txt +++ b/libc/src/string/memory_utils/CMakeLists.txt @@ -2,6 +2,7 @@ memory_utils HDRS utils.h + elements.h memcpy_utils.h memset_utils.h ) diff --git a/libc/src/string/memory_utils/elements.h b/libc/src/string/memory_utils/elements.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/elements.h @@ -0,0 +1,287 @@ +//===-- Elementary operations to compose memory primitives ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H + +#include // size_t +#include // uint8_t, uint16_t, uint32_t, uint64_t +#include // is_same_v + +#ifdef __SSE2__ +#include +#endif // __SSE2__ + +#include "src/__support/endian.h" + +namespace __llvm_libc { + +// Elementary Operations +// --------------------- +// We define abstract elementary operations acting on fixed chunks of memory. +// These are low level building blocks that are meant to be assembled to compose +// higher order abstractions. + +// Copies 'Element::kSize' bytes from 'src' to 'dst'. +template +void Copy(char *__restrict dst, const char *__restrict src) { + Element::Copy(dst, src); +} + +// Checks whether the 'Element::kSize' first bytes of 'lhs' and 'rhs' compare +// equal. +template bool Equals(const char *lhs, const char *rhs) { + return Element::Equals(lhs, rhs); +} + +// Computes the three-way comparison of the 'Element::kSize' first bytes +// between 'lhs' and 'rhs'. +template +int ThreeWayCompare(const char *lhs, const char *rhs) { + return Element::ThreeWayCompare(lhs, rhs); +} + +// Sets the 'Element::kSize' first bytes of 'dst' to 'value'. +template +void SplatSet(char *dst, const unsigned char value) { + Element::SplatSet(dst, value); +} + +// Elements +// -------- +// We define three types of elements: +// - Scalar : makes use of basic integer types, +// - Vector : makes use of vector extensions where available, +// - Repeated : is a meta type that repeats another type several times. + +// The Repeated type simply delegates its operations to the underlying type. +template struct Repeated { + static constexpr size_t kSize = Elements * Element::kSize; + + static void Copy(char *__restrict dst, const char *__restrict src) { + for (size_t i = 0; i < Elements; ++i) { + const size_t offset = i * Element::kSize; + Element::Copy(dst + offset, src + offset); + } + } + + static bool Equals(const char *lhs, const char *rhs) { + for (size_t i = 0; i < Elements; ++i) { + const size_t offset = i * Element::kSize; + if (!Element::Equals(lhs + offset, rhs + offset)) + return false; + } + return true; + } + + static int ThreeWayCompare(const char *lhs, const char *rhs) { + for (size_t i = 0; i < Elements; ++i) { + const size_t offset = i * Element::kSize; + // We make the assumption that 'Equals' si cheaper than 'ThreeWayCompare'. + if (Element::Equals(lhs + offset, rhs + offset)) + continue; + return Element::ThreeWayCompare(lhs + offset, rhs + offset); + } + return 0; + } + + static void SplatSet(char *dst, const unsigned char value) { + for (size_t i = 0; i < Elements; ++i) { + const size_t offset = i * Element::kSize; + Element::SplatSet(dst + offset, value); + } + } +}; + +// The Scalar type makes use of simple sized integers. +template struct Scalar { + static_assert(std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value, + "Should be one of uint8_t, uint16_t, uint32_t or uint64_t"); + static constexpr size_t kSize = sizeof(T); + + static void Copy(char *__restrict dst, const char *__restrict src) { + Store(dst, Load(src)); + } + + static bool Equals(const char *lhs, const char *rhs) { + return Load(lhs) == Load(rhs); + } + + static int ThreeWayCompare(const char *lhs, const char *rhs) { + return ScalarThreeWayCompare(Load(lhs), Load(rhs)); + } + + static void SplatSet(char *dst, const unsigned char value) { + Store(dst, GetSplattedValue(value)); + } + +private: + static T Load(const char *ptr) { + T value; + __builtin_memcpy_inline(&value, ptr, kSize); + return value; + } + static void Store(char *ptr, T value) { + __builtin_memcpy_inline(ptr, &value, kSize); + } + static T GetSplattedValue(const unsigned char value) { + return T(~0) / T(0xFF) * T(value); + } + static int ScalarThreeWayCompare(T a, T b); +}; + +template <> +inline int Scalar::ScalarThreeWayCompare(uint8_t a, uint8_t b) { + const int16_t la = Endian::ToBigEndian(a); + const int16_t lb = Endian::ToBigEndian(b); + return la - lb; +} +template <> +inline int Scalar::ScalarThreeWayCompare(uint16_t a, uint16_t b) { + const int32_t la = Endian::ToBigEndian(a); + const int32_t lb = Endian::ToBigEndian(b); + return la - lb; +} +template <> +inline int Scalar::ScalarThreeWayCompare(uint32_t a, uint32_t b) { + const int64_t la = Endian::ToBigEndian(a); + const int64_t lb = Endian::ToBigEndian(b); + const int64_t diff = la - lb; + return diff ? (diff < 0 ? -1 : 1) : 0; +} +template <> +inline int Scalar::ScalarThreeWayCompare(uint64_t a, uint64_t b) { + const __int128_t la = Endian::ToBigEndian(a); + const __int128_t lb = Endian::ToBigEndian(b); + const __int128_t diff = la - lb; + return diff ? (diff < 0 ? -1 : 1) : 0; +} + +using UINT8 = Scalar; // 1 Byte +using UINT16 = Scalar; // 2 Bytes +using UINT32 = Scalar; // 4 Bytes +using UINT64 = Scalar; // 8 Bytes +using UINT64x2 = Repeated; // 16 Bytes +using UINT64x4 = Repeated; // 32 Bytes +using UINT64x8 = Repeated; // 64 Bytes +using UINT64x16 = Repeated; // 128 Bytes +using UINT64x32 = Repeated; // 256 Bytes + +#ifdef __SSE2__ + +template struct X86Vector : public Base { + static void Copy(char *dst, const char *src) { + Base::Store(dst, Base::Load(src)); + } + + static bool Equals(const char *a, const char *b) { + return Base::NotEqualMask(Base::Load(a), Base::Load(b)) == 0; + } + + static int ThreeWayCompare(const char *a, const char *b) { + const auto mask = Base::NotEqualMask(Base::Load(a), Base::Load(b)); + if (!mask) + return 0; + return CharDiff(a, b, mask); + } + + static void SplatSet(char *dst, const unsigned char value) { + Base::Store(dst, Base::GetSplattedValue(value)); + } + + static int CharDiff(const char *a, const char *b, uint64_t mask) { + const size_t diff_index = __builtin_ctzl(mask); + const int ca = (unsigned char)a[diff_index]; + const int cb = (unsigned char)b[diff_index]; + return ca - cb; + } +}; + +struct X86_Base_128 { + static constexpr size_t kSize = 16; + using T = char __attribute__((__vector_size__(kSize))); + static uint16_t mask(T value) { return _mm_movemask_epi8(value); } + static uint16_t NotEqualMask(T a, T b) { return mask(a != b); } + static T Load(const char *ptr) { + return _mm_loadu_si128(reinterpret_cast<__m128i_u const *>(ptr)); + } + static void Store(char *ptr, T value) { + return _mm_storeu_si128(reinterpret_cast<__m128i_u *>(ptr), value); + } + static T GetSplattedValue(const unsigned char v) { + const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; + return splatted; + } +}; + +using X86Vector128 = X86Vector; // 16 Bytes +using X86Vector128x2 = Repeated; // 32 Bytes +using X86Vector128x4 = Repeated; // 64 Bytes +using X86Vector128x8 = Repeated; // 128 Bytes +using X86Vector128x16 = Repeated; // 256 Bytes + +#ifdef __AVX2__ + +struct X86_Base_256 { + static constexpr size_t kSize = 32; + using T = char __attribute__((__vector_size__(kSize))); + static uint32_t mask(T value) { return _mm256_movemask_epi8(value); } + static uint32_t NotEqualMask(T a, T b) { return mask(a != b); } + static T Load(const char *ptr) { + return _mm256_loadu_si256(reinterpret_cast<__m256i const *>(ptr)); + } + static void Store(char *ptr, T value) { + return _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), value); + } + static T GetSplattedValue(const unsigned char v) { + const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, + v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; + return splatted; + } +}; +using X86Vector256 = X86Vector; // 32 Bytes +using X86Vector256x2 = Repeated; // 64 Bytes +using X86Vector256x4 = Repeated; // 128 Bytes +using X86Vector256x8 = Repeated; // 256 Bytes + +#if defined(__AVX512F__) and defined(__AVX512BW__) + +struct X86_Base_512 { + static constexpr size_t kSize = 64; + using T = char __attribute__((__vector_size__(kSize))); + static uint64_t NotEqualMask(T a, T b) { + return _mm512_cmpneq_epi8_mask(a, b); + } + static T Load(const char *ptr) { return _mm512_loadu_epi8(ptr); } + static void Store(char *ptr, T value) { + return _mm512_storeu_epi8(ptr, value); + } + static T GetSplattedValue(const unsigned char v) { + const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, + v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, + v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, + v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v}; + return splatted; + } +}; +using X86Vector512 = X86Vector; +using X86Vector512x2 = Repeated; +using X86Vector512x4 = Repeated; + +#endif // defined(__AVX512F__) and defined(__AVX512BW__) + +#endif // __AVX2__ + +#endif // __SSE2__ + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H diff --git a/libc/test/src/string/memory_utils/CMakeLists.txt b/libc/test/src/string/memory_utils/CMakeLists.txt --- a/libc/test/src/string/memory_utils/CMakeLists.txt +++ b/libc/test/src/string/memory_utils/CMakeLists.txt @@ -3,11 +3,14 @@ SUITE libc_string_unittests SRCS + elements_test.cpp utils_test.cpp memcpy_utils_test.cpp DEPENDS libc.src.string.memory_utils.memory_utils libc.utils.CPP.standalone_cpp + COMPILE_OPTIONS + -march=native ) target_compile_definitions( diff --git a/libc/test/src/string/memory_utils/elements_test.cpp b/libc/test/src/string/memory_utils/elements_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/memory_utils/elements_test.cpp @@ -0,0 +1,141 @@ +//===-- Unittests for memory_utils ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memory_utils/elements.h" +#include "utils/CPP/Array.h" +#include "utils/UnitTest/Test.h" +#include + +namespace __llvm_libc { + +char GetRandomChar() { + static constexpr const int a = 1103515245; + static constexpr const int c = 12345; + static constexpr const int m = 1LL ^ 31; + + static int seed = 123456789; + seed = (a * seed + c) % m; + return seed; +} + +template using Buffer = cpp::Array; +template Buffer GetRandomBuffer() { + Buffer buffer; + for (auto ¤t : buffer) + current = GetRandomChar(); + return buffer; +} +template class LlvmLibcMyTest : public testing::Test {}; + +REGISTER_TYPE_NAME(char) +REGISTER_TYPE_NAME(int) +REGISTER_TYPE_NAME(long) + +using AnotherTypeList = testing::TypeList; + +TYPED_TEST_F(LlvmLibcMyTest, A, AnotherTypeList) { + EXPECT_LT(sizeof(ParamType), 8UL); +} + +REGISTER_TYPE_NAME(UINT8) +REGISTER_TYPE_NAME(UINT16) +REGISTER_TYPE_NAME(UINT32) +REGISTER_TYPE_NAME(UINT64) +REGISTER_TYPE_NAME(UINT64x2) +REGISTER_TYPE_NAME(UINT64x4) +REGISTER_TYPE_NAME(UINT64x8) +REGISTER_TYPE_NAME(UINT64x16) +REGISTER_TYPE_NAME(UINT64x32) +#ifdef __SSE2__ +REGISTER_TYPE_NAME(X86Vector128) +REGISTER_TYPE_NAME(X86Vector128x2) +REGISTER_TYPE_NAME(X86Vector128x4) +REGISTER_TYPE_NAME(X86Vector128x8) +REGISTER_TYPE_NAME(X86Vector128x16) +#endif // __SSE2__ +#ifdef __AVX2__ +REGISTER_TYPE_NAME(X86Vector256) +REGISTER_TYPE_NAME(X86Vector256x2) +REGISTER_TYPE_NAME(X86Vector256x4) +REGISTER_TYPE_NAME(X86Vector256x8) +#endif // __AVX2__ +#if defined(__AVX512F__) and defined(__AVX512BW__) +REGISTER_TYPE_NAME(X86Vector512) +REGISTER_TYPE_NAME(X86Vector512x2) +REGISTER_TYPE_NAME(X86Vector512x4) +#endif // defined(__AVX512F__) and defined(__AVX512BW__) + +using Types = testing::TypeList< +#ifdef __SSE2__ + X86Vector128, // + X86Vector128x2, // + X86Vector128x4, // + X86Vector128x8, // + X86Vector128x16, // +#endif // __SSE2__ +#ifdef __AVX2__ + X86Vector256, // + X86Vector256x2, // + X86Vector256x4, // + X86Vector256x8, // +#endif // __AVX2__ +#if defined(__AVX512F__) and defined(__AVX512BW__) + X86Vector512, // + X86Vector512x2, // + X86Vector512x4, // +#endif // defined(__AVX512F__) and defined(__AVX512BW__) + UINT8, // + UINT16, // + UINT32, // + UINT64, // + UINT64x2, // + UINT64x4, // + UINT64x8, // + UINT64x16, // + UINT64x32>; + +TYPED_TEST(LlvmLibcMemoryElements, Equals, Types) { + const auto buffer = GetRandomBuffer(); + EXPECT_TRUE(Equals(buffer.data(), buffer.data())); +} + +TYPED_TEST(LlvmLibcMemoryElements, Copy, Types) { + Buffer Dst; + const auto buffer = GetRandomBuffer(); + Copy(Dst.data(), buffer.data()); + for (size_t i = 0; i < ParamType::kSize; ++i) + EXPECT_EQ(Dst[i], buffer[i]); +} + +TYPED_TEST(LlvmLibcMemoryElements, Splat, Types) { + Buffer Dst; + const auto value = GetRandomChar(); + SplatSet(Dst.data(), value); + for (size_t i = 0; i < ParamType::kSize; ++i) + EXPECT_EQ(Dst[i], value); +} + +TYPED_TEST(LlvmLibcMemoryElements, ThreeWayCompare, Types) { + const auto buffer_a = GetRandomBuffer(); + const auto buffer_b = GetRandomBuffer(); + const auto *ptr_a = buffer_a.data(); + const auto *ptr_b = buffer_b.data(); + const int expected = std::memcmp(ptr_a, ptr_b, ParamType::kSize); + if (expected == 0) + EXPECT_EQ(ThreeWayCompare(ptr_a, ptr_b), 0); + if (expected < 0) { + EXPECT_LT(ThreeWayCompare(ptr_a, ptr_b), 0); + EXPECT_GT(ThreeWayCompare(ptr_b, ptr_a), 0); + } + if (expected > 0) { + EXPECT_GT(ThreeWayCompare(ptr_a, ptr_b), 0); + EXPECT_LT(ThreeWayCompare(ptr_b, ptr_a), 0); + } +} + +} // namespace __llvm_libc