diff --git a/libc/src/string/memory_utils/backend_aarch64.h b/libc/src/string/memory_utils/backend_aarch64.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/backend_aarch64.h @@ -0,0 +1,63 @@ +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H + +#if !defined(LLVM_LIBC_ARCH_AARCH64) +#include "src/string/memory_utils/backend_scalar.h" + +#ifdef __ARM_NEON +#include +#endif + +namespace __llvm_libc { + +// Specific implementation of the SizedOp abstraction for the set operation +// using `dc zva`. A runtime check with `hasZva` is necessary before using this +// operation. +struct Zva64 { + static constexpr size_t SIZE = 64; + + template + static inline void set(DstAddrT dst, ubyte value) { + static_assert(DstAddrT::ALIGNMENT >= 64); + void *const dst_ptr = as(dst); +#if __SIZEOF_POINTER__ == 4 + asm("dc zva, %w[dst]" : : [dst] "r"(dst_ptr) : "memory"); +#else + asm("dc zva, %[dst]" : : [dst] "r"(dst_ptr) : "memory"); +#endif + } +}; + +inline static bool hasZva() { + uint64_t zva_val; + asm("mrs %[zva_val], dczid_el0" : [zva_val] "=r"(zva_val)); + // DC ZVA is permitted if DZP, bit [4] is zero. + // BS, bits [3:0] is log2 of the block size in words. + // So the next lines check whether the instruction is permitted and block size + // is 16 words (i.e. 64 bytes). + constexpr uint64_t dzp_mask = 0b1'0000; + constexpr uint64_t dzp_permitted = 0b0'0000; + constexpr uint64_t bs_mask = 0b1111; + constexpr uint64_t bs_is_64b = 0b100; + const uint64_t dzp_value = zva_val & dzp_mask; + const uint64_t bs_value = zva_val & bs_mask; + return (dzp_value == dzp_permitted) && (bs_value == bs_is_64b); +} + +namespace aarch64 { +using _1 = SizedOp; +using _2 = SizedOp; +using _3 = SizedOp; +using _4 = SizedOp; +using _8 = SizedOp; +using _16 = SizedOp; +using _32 = SizedOp; +using _64 = SizedOp; +using _128 = SizedOp; +} // namespace aarch64 + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_ARCH_AARCH64 + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H diff --git a/libc/src/string/memory_utils/backend_scalar.h b/libc/src/string/memory_utils/backend_scalar.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/backend_scalar.h @@ -0,0 +1,97 @@ +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H + +#include "src/__support/CPP/TypeTraits.h" // ConditionalType, EnableIfType +#include "src/__support/endian.h" + +namespace __llvm_libc { + +struct Scalar64BitBackend { + static constexpr bool IS_BACKEND_TYPE = true; + + template + static constexpr bool IsScalarType = + cpp::IsSameV || cpp::IsSameV || + cpp::IsSameV || cpp::IsSameV; + + template + static inline T load(const T *src) { + static_assert(IsScalarType); + static_assert(TS == Temporality::TEMPORAL, + "Scalar load does not support non-temporal access"); + return *src; + } + + template + static inline void store(T *dst, T value) { + static_assert(IsScalarType); + static_assert(TS == Temporality::TEMPORAL, + "Scalar store does not support non-temporal access"); + *dst = value; + } + + template static inline T splat(ubyte value) { + static_assert(IsScalarType); + return (T(~0ULL) / T(0xFF)) * T(value); + } + + template static inline uint64_t not_equals(T v1, T v2) { + static_assert(IsScalarType); + return v1 ^ v2; + } + + template static inline int32_t three_way_cmp(T v1, T v2) { + DeferredStaticAssert("not implemented"); + } + + template + using getNextType = cpp::ConditionalType< + Size >= 8, uint64_t, + cpp::ConditionalType= 4, uint32_t, + cpp::ConditionalType= 2, uint16_t, uint8_t>>>; +}; + +template <> +int32_t inline Scalar64BitBackend::three_way_cmp(uint8_t a, + uint8_t b) { + const int16_t la = Endian::to_big_endian(a); + const int16_t lb = Endian::to_big_endian(b); + return la - lb; +} +template <> +int32_t inline Scalar64BitBackend::three_way_cmp(uint16_t a, + uint16_t b) { + const int32_t la = Endian::to_big_endian(a); + const int32_t lb = Endian::to_big_endian(b); + return la - lb; +} +template <> +int32_t inline Scalar64BitBackend::three_way_cmp(uint32_t a, + uint32_t b) { + const uint32_t la = Endian::to_big_endian(a); + const uint32_t lb = Endian::to_big_endian(b); + return la > lb ? 1 : la < lb ? -1 : 0; +} +template <> +int32_t inline Scalar64BitBackend::three_way_cmp(uint64_t a, + uint64_t b) { + const uint64_t la = Endian::to_big_endian(a); + const uint64_t lb = Endian::to_big_endian(b); + return la > lb ? 1 : la < lb ? -1 : 0; +} + +namespace scalar { +using _1 = SizedOp; +using _2 = SizedOp; +using _3 = SizedOp; +using _4 = SizedOp; +using _8 = SizedOp; +using _16 = SizedOp; +using _32 = SizedOp; +using _64 = SizedOp; +using _128 = SizedOp; +} // namespace scalar + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H diff --git a/libc/src/string/memory_utils/backend_x86.h b/libc/src/string/memory_utils/backend_x86.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/backend_x86.h @@ -0,0 +1,191 @@ +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H + +#if defined(LLVM_LIBC_ARCH_X86) +#include "src/__support/CPP/TypeTraits.h" // ConditionalType, EnableIfType +#include "src/string/memory_utils/backend_scalar.h" + +#ifdef __SSE2__ +#include +#endif // __SSE2__ + +#if defined(__SSE2__) +#define HAS_M128 true +#else +#define HAS_M128 false +#endif + +#if defined(__AVX2__) +#define HAS_M256 true +#else +#define HAS_M256 false +#endif + +#if defined(__AVX512F__) and defined(__AVX512BW__) +#define HAS_M512 true +#else +#define HAS_M512 false +#endif + +namespace __llvm_libc { +struct X86Backend : public Scalar64BitBackend { + static constexpr bool IS_BACKEND_TYPE = true; + + template , bool> = true> + static inline T load(const T *src) { + return Scalar64BitBackend::template load(src); + } + + template < + typename T, Temporality TS, Aligned AS, + cpp::EnableIfType, bool> = true> + static inline T load(const T *src) { + DeferredStaticAssert("X86 non-temporal load needs aligned access"); + return {}; + } + + template , bool> = true> + static inline void store(T *dst, T value) { + Scalar64BitBackend::template store(dst, value); + } + + template < + typename T, Temporality TS, Aligned AS, + cpp::EnableIfType, bool> = true> + static inline void store(T *dst, T value) { + DeferredStaticAssert("X86 non-temporal store needs aligned access"); + } + + template static inline T splat(ubyte value) { + return Scalar64BitBackend::template splat(value); + } + + template static inline uint64_t not_equals(T v1, T v2) { + return Scalar64BitBackend::template not_equals(v1, v2); + } + + template , bool> = true> + static inline int32_t three_way_cmp(T v1, T v2) { + return Scalar64BitBackend::template three_way_cmp(v1, v2); + } + + template , + bool> = true> + static inline int32_t three_way_cmp(T v1, T v2) { + return char_diff(reinterpret_cast(&v1), + reinterpret_cast(&v2), not_equals(v1, v2)); + } + + template + using getNextType = cpp::ConditionalType< + (HAS_M512 && Size >= 64), __m512i, + cpp::ConditionalType< + (HAS_M256 && Size >= 32), __m256i, + cpp::ConditionalType<(HAS_M128 && Size >= 16), __m128i, + Scalar64BitBackend::getNextType>>>; + +private: + static inline int32_t char_diff(const char *a, const char *b, uint64_t mask) { + const size_t diff_index = __builtin_ctzll(mask); + const int16_t ca = (unsigned char)a[diff_index]; + const int16_t cb = (unsigned char)b[diff_index]; + return ca - cb; + } +}; + +static inline void repmovsb(void *dst, const void *src, size_t runtime_size) { + asm volatile("rep movsb" + : "+D"(dst), "+S"(src), "+c"(runtime_size) + : + : "memory"); +} + +#define SPECIALIZE_LOAD(T, OS, AS, INTRISIC) \ + template <> inline T X86Backend::load(const T *src) { \ + return INTRISIC(const_cast(src)); \ + } +#define SPECIALIZE_STORE(T, OS, AS, INTRISIC) \ + template <> inline void X86Backend::store(T * dst, T value) { \ + INTRISIC(dst, value); \ + } + +#if HAS_M128 +SPECIALIZE_LOAD(__m128i, Temporality::TEMPORAL, Aligned::YES, _mm_load_si128) +SPECIALIZE_LOAD(__m128i, Temporality::TEMPORAL, Aligned::NO, _mm_loadu_si128) +SPECIALIZE_LOAD(__m128i, Temporality::NON_TEMPORAL, Aligned::YES, + _mm_stream_load_si128) +SPECIALIZE_STORE(__m128i, Temporality::TEMPORAL, Aligned::YES, _mm_store_si128) +SPECIALIZE_STORE(__m128i, Temporality::TEMPORAL, Aligned::NO, _mm_storeu_si128) +SPECIALIZE_STORE(__m128i, Temporality::NON_TEMPORAL, Aligned::YES, + _mm_stream_si128) +template <> inline __m128i X86Backend::splat<__m128i>(ubyte value) { + return _mm_set1_epi8(__builtin_bit_cast(char, value)); +} +template <> +inline uint64_t X86Backend::not_equals<__m128i>(__m128i a, __m128i b) { + using T = char __attribute__((__vector_size__(16))); + return _mm_movemask_epi8(T(a) != T(b)); +} +#endif // HAS_M128 + +#if HAS_M256 +SPECIALIZE_LOAD(__m256i, Temporality::TEMPORAL, Aligned::YES, _mm256_load_si256) +SPECIALIZE_LOAD(__m256i, Temporality::TEMPORAL, Aligned::NO, _mm256_loadu_si256) +SPECIALIZE_LOAD(__m256i, Temporality::NON_TEMPORAL, Aligned::YES, + _mm256_stream_load_si256) +SPECIALIZE_STORE(__m256i, Temporality::TEMPORAL, Aligned::YES, + _mm256_store_si256) +SPECIALIZE_STORE(__m256i, Temporality::TEMPORAL, Aligned::NO, + _mm256_storeu_si256) +SPECIALIZE_STORE(__m256i, Temporality::NON_TEMPORAL, Aligned::YES, + _mm256_stream_si256) +template <> inline __m256i X86Backend::splat<__m256i>(ubyte value) { + return _mm256_set1_epi8(__builtin_bit_cast(char, value)); +} +template <> +inline uint64_t X86Backend::not_equals<__m256i>(__m256i a, __m256i b) { + using T = char __attribute__((__vector_size__(32))); + return _mm256_movemask_epi8(T(a) != T(b)); +} +#endif // HAS_M256 + +#if HAS_M512 +SPECIALIZE_LOAD(__m512i, Temporality::TEMPORAL, Aligned::YES, _mm512_load_si512) +SPECIALIZE_LOAD(__m512i, Temporality::TEMPORAL, Aligned::NO, _mm512_loadu_si512) +SPECIALIZE_LOAD(__m512i, Temporality::NON_TEMPORAL, Aligned::YES, + _mm512_stream_load_si512) +SPECIALIZE_STORE(__m512i, Temporality::TEMPORAL, Aligned::YES, + _mm512_store_si512) +SPECIALIZE_STORE(__m512i, Temporality::TEMPORAL, Aligned::NO, + _mm512_storeu_si512) +SPECIALIZE_STORE(__m512i, Temporality::NON_TEMPORAL, Aligned::YES, + _mm512_stream_si512) +template <> inline __m512i X86Backend::splat<__m512i>(ubyte value) { + return _mm512_broadcastb_epi8(_mm_set1_epi8(__builtin_bit_cast(char, value))); +} +template <> +inline uint64_t X86Backend::not_equals<__m512i>(__m512i a, __m512i b) { + return _mm512_cmpneq_epi8_mask(a, b); +} +#endif // HAS_M512 + +namespace x86 { +using _1 = SizedOp; +using _2 = SizedOp; +using _3 = SizedOp; +using _4 = SizedOp; +using _8 = SizedOp; +using _16 = SizedOp; +using _32 = SizedOp; +using _64 = SizedOp; +using _128 = SizedOp; +} // namespace x86 + +} // namespace __llvm_libc + +#endif // defined(LLVM_LIBC_ARCH_X86) + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H diff --git a/libc/src/string/memory_utils/backends.h b/libc/src/string/memory_utils/backends.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/backends.h @@ -0,0 +1,57 @@ +//===-- Elementary operations to compose memory primitives ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the concept of a Backend. +// It constitutes the lowest level of the framework and is akin to instruction +// selection. It defines how to implement aligned/unaligned, +// temporal/non-temporal loads and stores for a particular architecture as well +// as efficient ways to fill and compare types. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H + +#include "src/string/memory_utils/address.h" // ubyte, Temporality, Aligned +#include "src/string/memory_utils/sized_op.h" // SizedOp +#include // size_t +#include // uint##_t + +namespace __llvm_libc { + +// Backends must implement the following interface. +struct NoBackend { + static constexpr bool IS_BACKEND_TYPE = true; + // load + template static T load(const T *src); + + // store + template + static void store(T *dst, T value); + + // return a T filled with `value` + template static T splat(ubyte value); + + // returns zero iff v1 == v2 + template static uint64_t not_equals(T v1, T v2); + + // returns zero iff v1 == v2, -1 if v1 < v2 and 1 otherwise. + template static int32_t three_way_diff(T v1, T v2); + + // returns which type should be used to consume Size bytes. + template using getNextType = void; +}; + +} // namespace __llvm_libc + +// We inline all backend implementations here to simplify the build system. +// Each file need to be guarded with the appropriate LLVM_LIBC_ARCH_XXX ifdef. +#include "src/string/memory_utils/backend_aarch64.h" +#include "src/string/memory_utils/backend_scalar.h" +#include "src/string/memory_utils/backend_x86.h" + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H diff --git a/libc/src/string/memory_utils/sized_op.h b/libc/src/string/memory_utils/sized_op.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/sized_op.h @@ -0,0 +1,138 @@ +//===-- Sized Operations --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the SizedOp struct that serves as the middle end of the +// framework. It implements sized memory operations by breaking them down into +// simpler types which availability is described in the Backend. It also +// provides a way to load and store sized chunks of memory (necessary for the +// move operation). SizedOp are the building blocks of higher order algorithms +// like HeadTail, Align or Loop. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H + +#include // size_t + +#ifndef LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE +#define LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE \ + __has_builtin(__builtin_memcpy_inline) +#endif // LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE + +namespace __llvm_libc { + +template struct SizedOp { + static constexpr size_t SIZE = Size; + +private: + static_assert(Backend::IS_BACKEND_TYPE); + static_assert(Size > 0); + using type = typename Backend::template getNextType; + static constexpr size_t TYPE_SIZE = sizeof(type); + static_assert(Size >= TYPE_SIZE); + static constexpr size_t NEXT_SIZE = Size - TYPE_SIZE; + using NextBlock = SizedOp; + + template static constexpr Aligned isAligned() { + static_assert(IsAddressType::Value); + return AddrT::ALIGNMENT > 1 && AddrT::ALIGNMENT >= sizeof(T) ? Aligned::YES + : Aligned::NO; + } + + template static inline auto loadType(SrcAddrT src) { + static_assert(IsAddressType::Value && SrcAddrT::IS_READ); + constexpr auto AS = isAligned(); + constexpr auto TS = SrcAddrT::TEMPORALITY; + return Backend::template load(as(src)); + } + + template + static inline void storeType(type value, DstAddrT dst) { + static_assert(IsAddressType::Value && DstAddrT::IS_WRITE); + constexpr auto AS = isAligned(); + constexpr auto TS = DstAddrT::TEMPORALITY; + return Backend::template store(as(dst), value); + } + + struct Value { + alignas(alignof(type)) ubyte payload[Size]; + }; + +public: + template + static inline void copy(DstAddrT dst, SrcAddrT src) { + static_assert(IsAddressType::Value && DstAddrT::IS_WRITE); + static_assert(IsAddressType::Value && SrcAddrT::IS_READ); + if constexpr (LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE && + DstAddrT::TEMPORALITY == Temporality::TEMPORAL && + SrcAddrT::TEMPORALITY == Temporality::TEMPORAL) { + // delegate optimized copy to compiler. + __builtin_memcpy_inline(dst.ptr(), src.ptr(), Size); + return; + } + storeType(loadType(src), dst); + if constexpr (NEXT_SIZE) + NextBlock::copy(offsetAddr(dst), offsetAddr(src)); + } + + template + static inline void move(DstAddrT dst, SrcAddrT src) { + const auto payload = loadType(src); + if constexpr (NEXT_SIZE) + NextBlock::move(offsetAddr(dst), offsetAddr(src)); + storeType(payload, dst); + } + + template + static inline void set(DstAddrT dst, ubyte value) { + storeType(Backend::template splat(value), dst); + if constexpr (NEXT_SIZE) + NextBlock::set(offsetAddr(dst), value); + } + + template + static inline uint64_t different(SrcAddrT1 src1, SrcAddrT2 src2) { + const uint64_t current = + Backend::template not_equals(loadType(src1), loadType(src2)); + if constexpr (NEXT_SIZE) { + return current | (NextBlock::different(offsetAddr(src1), + offsetAddr(src2))); + } else { + return current; + } + } + + template + static inline int32_t three_way_cmp(SrcAddrT1 src1, SrcAddrT2 src2) { + const auto a = loadType(src1); + const auto b = loadType(src2); + if (Backend::template not_equals(a, b)) + return Backend::template three_way_cmp(a, b); + // if (int32_t res = three_way_cmp(a, b)) { + // return res; + // } + if constexpr (NEXT_SIZE) + return NextBlock::three_way_cmp(offsetAddr(src1), + offsetAddr(src2)); + return 0; + } + + template static Value load(SrcAddrT src) { + Value output; + copy(DstAddr(output.payload), src); + return output; + } + + template static void store(DstAddrT dst, Value value) { + copy(dst, SrcAddr(value.payload)); + } +}; + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H diff --git a/libc/test/src/string/memory_utils/CMakeLists.txt b/libc/test/src/string/memory_utils/CMakeLists.txt --- a/libc/test/src/string/memory_utils/CMakeLists.txt +++ b/libc/test/src/string/memory_utils/CMakeLists.txt @@ -4,6 +4,7 @@ libc_string_unittests SRCS address_test.cpp + backend_test.cpp elements_test.cpp memory_access_test.cpp utils_test.cpp diff --git a/libc/test/src/string/memory_utils/backend_test.cpp b/libc/test/src/string/memory_utils/backend_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/memory_utils/backend_test.cpp @@ -0,0 +1,202 @@ +#include "src/__support/CPP/Array.h" +#include "src/__support/CPP/ArrayRef.h" +#include "src/__support/CPP/Bit.h" +#include "src/__support/architectures.h" +#include "src/string/memory_utils/backends.h" +#include "utils/UnitTest/Test.h" +#include + +namespace __llvm_libc { + +template using Buffer = cpp::Array; + +static char GetRandomChar() { + static constexpr const uint64_t a = 1103515245; + static constexpr const uint64_t c = 12345; + static constexpr const uint64_t m = 1ULL << 31; + static uint64_t seed = 123456789; + seed = (a * seed + c) % m; + return seed; +} + +static void Randomize(cpp::MutableArrayRef buffer) { + for (auto ¤t : buffer) + current = GetRandomChar(); +} + +template static Buffer GetRandomBuffer() { + Buffer buffer; + Randomize(buffer); + return buffer; +} + +template struct TestBackend { + static_assert(Backend::IS_BACKEND_TYPE); + using BufferT = Buffer; + using T = typename Backend::template getNextType; + static_assert(sizeof(T) == Size); + static constexpr size_t SIZE = Size; + + // returns the splatted type as a BufferT. + static BufferT splat(ubyte value) { + return bit_cast(Backend::template splat(value)); + } + + // Turns BufferT into T and delegates to Backend::not_equals. + static uint64_t not_equals(const BufferT &v1, const BufferT &v2) { + return Backend::template not_equals(bit_cast(v1), bit_cast(v2)); + } + + // Turns BufferT into T and delegates to Backend::three_way_cmp. + static int32_t three_way_cmp(const BufferT &v1, const BufferT &v2) { + return Backend::template three_way_cmp(bit_cast(v1), bit_cast(v2)); + } +}; + +using BackendTypes = testing::TypeList< // +#if defined(LLVM_LIBC_ARCH_AARCH64) + TestBackend, // + TestBackend, // + TestBackend, // + TestBackend, // +#endif +#if defined(LLVM_LIBC_ARCH_X86) // + TestBackend, // + TestBackend, // + TestBackend, // + TestBackend, // +#if HAS_M128 + TestBackend, // +#endif +#if HAS_M256 + TestBackend, // +#endif +#if HAS_M512 + TestBackend, // +#endif +#endif // defined(LLVM_LIBC_ARCH_X86) + TestBackend, // + TestBackend, // + TestBackend, // + TestBackend // + >; + +TYPED_TEST(LlvmLibcMemoryBackend, splat, BackendTypes) { + for (auto value : cpp::Array{0u, 1u, 255u}) { + alignas(64) const auto stored = ParamType::splat(bit_cast(value)); + for (size_t i = 0; i < ParamType::SIZE; ++i) + EXPECT_EQ(bit_cast(stored[i]), value); + } +} + +TYPED_TEST(LlvmLibcMemoryBackend, not_equals, BackendTypes) { + alignas(64) const auto a = GetRandomBuffer(); + EXPECT_EQ(ParamType::not_equals(a, a), 0UL); + for (size_t i = 0; i < a.size(); ++i) { + alignas(64) auto b = a; + ++b[i]; + EXPECT_NE(ParamType::not_equals(a, b), 0UL); + EXPECT_NE(ParamType::not_equals(b, a), 0UL); + } +} + +TYPED_TEST(LlvmLibcMemoryBackend, three_way_cmp, BackendTypes) { + alignas(64) const auto a = GetRandomBuffer(); + EXPECT_EQ(ParamType::three_way_cmp(a, a), 0); + for (size_t i = 0; i < a.size(); ++i) { + alignas(64) auto b = a; + ++b[i]; + const auto cmp = memcmp(&a, &b, sizeof(a)); + ASSERT_NE(cmp, 0); + if (cmp > 0) { + EXPECT_GT(ParamType::three_way_cmp(a, b), 0); + EXPECT_LT(ParamType::three_way_cmp(b, a), 0); + } else { + EXPECT_LT(ParamType::three_way_cmp(a, b), 0); + EXPECT_GT(ParamType::three_way_cmp(b, a), 0); + } + } +} + +template +struct LdStBackend { + static_assert(Backend::IS_BACKEND_TYPE); + using BufferT = Buffer; + using T = typename Backend::template getNextType; + static_assert(sizeof(T) == Size); + static constexpr size_t SIZE = Size; + + static BufferT load(const BufferT &ref) { + const auto *ptr = bit_cast(ref.data()); + const T value = Backend::template load(ptr); + return bit_cast(value); + } + + static void store(BufferT &ref, const BufferT value) { + auto *ptr = bit_cast(ref.data()); + Backend::template store(ptr, bit_cast(value)); + } +}; + +using LoadStoreBackends = testing::TypeList< // +#if defined(LLVM_LIBC_ARCH_X86) // + LdStBackend, // + LdStBackend, // + LdStBackend, // + LdStBackend, // + LdStBackend, // + LdStBackend, // + LdStBackend, // + LdStBackend, // +#if HAS_M128 + LdStBackend, // + LdStBackend, // + LdStBackend, // +#endif +#if HAS_M256 + LdStBackend, // + LdStBackend, // + LdStBackend, // +#endif +#if HAS_M512 + LdStBackend, // + LdStBackend, // + LdStBackend, // +#endif +#endif // defined(LLVM_LIBC_ARCH_X86) +#if defined(LLVM_LIBC_ARCH_AARCH64) + LdStBackend, // + LdStBackend, // + LdStBackend, // + LdStBackend, // + LdStBackend, // + LdStBackend, // + LdStBackend, // + LdStBackend, // +#endif + LdStBackend, // + LdStBackend, // + LdStBackend, // + LdStBackend, // + LdStBackend, // + LdStBackend, // + LdStBackend, // + LdStBackend // + >; + +TYPED_TEST(LlvmLibcMemoryBackend, load, LoadStoreBackends) { + alignas(64) const auto expected = GetRandomBuffer(); + const auto loaded = ParamType::load(expected); + for (size_t i = 0; i < ParamType::SIZE; ++i) + EXPECT_EQ(loaded[i], expected[i]); +} + +TYPED_TEST(LlvmLibcMemoryBackend, store, LoadStoreBackends) { + alignas(64) const auto expected = GetRandomBuffer(); + alignas(64) typename ParamType::BufferT stored; + ParamType::store(stored, expected); + for (size_t i = 0; i < ParamType::SIZE; ++i) + EXPECT_EQ(stored[i], expected[i]); +} + +} // namespace __llvm_libc