diff --git a/libc/src/__support/CPP/TypeTraits.h b/libc/src/__support/CPP/TypeTraits.h --- a/libc/src/__support/CPP/TypeTraits.h +++ b/libc/src/__support/CPP/TypeTraits.h @@ -15,7 +15,9 @@ namespace cpp { template struct EnableIf; -template struct EnableIf { typedef T Type; }; +template struct EnableIf { + typedef T Type; +}; template using EnableIfType = typename EnableIf::Type; @@ -28,7 +30,9 @@ static constexpr bool Value = false; }; -template struct TypeIdentity { typedef T Type; }; +template struct TypeIdentity { + typedef T Type; +}; template struct IsSame : public FalseValue {}; template struct IsSame : public TrueValue {}; @@ -59,6 +63,10 @@ ; }; +template struct IsEnum { + static constexpr bool Value = __is_enum(Type); +}; + template struct IsPointerTypeNoCV : public FalseValue {}; template struct IsPointerTypeNoCV : public TrueValue {}; template struct IsPointerType { @@ -77,6 +85,16 @@ IsIntegral::Value || IsFloatingPointType::Value; }; +// Compile time type selection. +template struct Conditional { + using type = TrueT; +}; +template struct Conditional { + using type = FalseT; +}; +template +using ConditionalType = typename Conditional::type; + } // namespace cpp } // namespace __llvm_libc diff --git a/libc/src/__support/common.h b/libc/src/__support/common.h --- a/libc/src/__support/common.h +++ b/libc/src/__support/common.h @@ -15,6 +15,18 @@ #define unlikely(x) __builtin_expect(x, 0) #define UNUSED __attribute__((unused)) +#ifdef __has_attribute +#define LIBC_HAVE_ATTRIBUTE(x) __has_attribute(x) +#else +#define LIBC_HAVE_ATTRIBUTE(x) 0 +#endif + +#if LIBC_HAVE_ATTRIBUTE(aligned) || (defined(__GNUC__) && !defined(__clang__)) +#define LIBC_ATTRIBUTE_FUNC_ALIGN(bytes) __attribute__((aligned(bytes))) +#else +#define LIBC_ATTRIBUTE_FUNC_ALIGN(bytes) +#endif + #ifndef LLVM_LIBC_FUNCTION_ATTR #define LLVM_LIBC_FUNCTION_ATTR #endif diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h --- a/libc/src/stdio/printf_core/parser.h +++ b/libc/src/stdio/printf_core/parser.h @@ -59,8 +59,7 @@ #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE Parser(const char *__restrict new_str, internal::ArgList &args) : str(new_str), args_cur(args), args_start(args) { - inline_memset(reinterpret_cast(desc_arr), 0, - DESC_ARR_LEN * sizeof(TypeDesc)); + inline_memset(desc_arr, ubyte::ZERO, sizeof(desc_arr)); } #else Parser(const char *__restrict new_str, internal::ArgList &args) diff --git a/libc/src/stdio/printf_core/writer.cpp b/libc/src/stdio/printf_core/writer.cpp --- a/libc/src/stdio/printf_core/writer.cpp +++ b/libc/src/stdio/printf_core/writer.cpp @@ -25,7 +25,7 @@ void Writer::write_chars(char new_char, size_t length) { constexpr size_t BUFF_SIZE = 8; char buff[BUFF_SIZE]; - inline_memset(buff, new_char, BUFF_SIZE); + inline_memset(buff, static_cast(new_char), BUFF_SIZE); while (length > BUFF_SIZE) { write(buff, BUFF_SIZE); length -= BUFF_SIZE; diff --git a/libc/src/string/bcmp.cpp b/libc/src/string/bcmp.cpp --- a/libc/src/string/bcmp.cpp +++ b/libc/src/string/bcmp.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/string/bcmp.h" + #include "src/__support/common.h" #include "src/string/memory_utils/bcmp_implementations.h" @@ -14,8 +15,7 @@ LLVM_LIBC_FUNCTION(int, bcmp, (const void *lhs, const void *rhs, size_t count)) { - return inline_bcmp(static_cast(lhs), - static_cast(rhs), count); + return inline_bcmp(lhs, rhs, count); } } // namespace __llvm_libc diff --git a/libc/src/string/bzero.cpp b/libc/src/string/bzero.cpp --- a/libc/src/string/bzero.cpp +++ b/libc/src/string/bzero.cpp @@ -7,13 +7,14 @@ //===----------------------------------------------------------------------===// #include "src/string/bzero.h" + #include "src/__support/common.h" #include "src/string/memory_utils/memset_implementations.h" namespace __llvm_libc { LLVM_LIBC_FUNCTION(void, bzero, (void *ptr, size_t count)) { - inline_memset(reinterpret_cast(ptr), 0, count); + inline_memset(ptr, ubyte::ZERO, count); } } // namespace __llvm_libc diff --git a/libc/src/string/memcmp.cpp b/libc/src/string/memcmp.cpp --- a/libc/src/string/memcmp.cpp +++ b/libc/src/string/memcmp.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "src/string/memcmp.h" + +#include "src/__support/common.h" #include "src/string/memory_utils/memcmp_implementations.h" #include // size_t @@ -15,8 +17,7 @@ LLVM_LIBC_FUNCTION(int, memcmp, (const void *lhs, const void *rhs, size_t count)) { - return inline_memcmp(static_cast(lhs), - static_cast(rhs), count); + return inline_memcmp(lhs, rhs, count); } } // namespace __llvm_libc diff --git a/libc/src/string/memcpy.cpp b/libc/src/string/memcpy.cpp --- a/libc/src/string/memcpy.cpp +++ b/libc/src/string/memcpy.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/string/memcpy.h" + #include "src/__support/common.h" #include "src/string/memory_utils/memcpy_implementations.h" @@ -15,8 +16,7 @@ LLVM_LIBC_FUNCTION(void *, memcpy, (void *__restrict dst, const void *__restrict src, size_t size)) { - inline_memcpy(reinterpret_cast(dst), - reinterpret_cast(src), size); + inline_memcpy(dst, src, size); return dst; } diff --git a/libc/src/string/memmove.cpp b/libc/src/string/memmove.cpp --- a/libc/src/string/memmove.cpp +++ b/libc/src/string/memmove.cpp @@ -9,42 +9,42 @@ #include "src/string/memmove.h" #include "src/__support/common.h" -#include "src/__support/integer_operations.h" -#include "src/string/memory_utils/elements.h" +#include "src/string/memory_utils/algorithm.h" +#include "src/string/memory_utils/backends.h" #include // size_t, ptrdiff_t namespace __llvm_libc { -static inline void inline_memmove(char *dst, const char *src, size_t count) { - using namespace __llvm_libc::scalar; +static inline void inline_memmove(DstAddr<1> dst, SrcAddr<1> src, + size_t count) { + using namespace scalar; if (count == 0) return; if (count == 1) - return move<_1>(dst, src); + return _1::move(dst, src); if (count <= 4) - return move>(dst, src, count); + return HeadTail<_2>::move(dst, src, count); if (count <= 8) - return move>(dst, src, count); + return HeadTail<_4>::move(dst, src, count); if (count <= 16) - return move>(dst, src, count); + return HeadTail<_8>::move(dst, src, count); if (count <= 32) - return move>(dst, src, count); + return HeadTail<_16>::move(dst, src, count); if (count <= 64) - return move>(dst, src, count); + return HeadTail<_32>::move(dst, src, count); if (count <= 128) - return move>(dst, src, count); + return HeadTail<_64>::move(dst, src, count); using AlignedMoveLoop = Align<_16, Arg::Src>::Then>; - if (dst < src) - return move(dst, src, count); - else if (dst > src) - return move_backward(dst, src, count); + if (dst.ptr() < src.ptr()) + return AlignedMoveLoop::move(dst, src, count); + else if (dst.ptr() > src.ptr()) + return AlignedMoveLoop::move_backward(dst, src, count); } LLVM_LIBC_FUNCTION(void *, memmove, (void *dst, const void *src, size_t count)) { - inline_memmove(reinterpret_cast(dst), - reinterpret_cast(src), count); + inline_memmove(dst, src, count); return dst; } diff --git a/libc/src/string/memory_utils/address.h b/libc/src/string/memory_utils/address.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/address.h @@ -0,0 +1,127 @@ +//===-- Strongly typed address with alignment and access semantics --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_COMMON_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_COMMON_H + +#include "src/__support/CPP/TypeTraits.h" // cpp::ConditionalType +#include "src/string/memory_utils/utils.h" // is_power2 +#include // size_t +#include // uint8_t, uint16_t, uint32_t, uint64_t + +namespace __llvm_libc { + +// Utility to enable static_assert(false) in templates. +template static void DeferredStaticAssert(const char *msg) { + static_assert(flag, "compilation error"); +} + +// A non-coercible type to represent raw data. +enum class ubyte : unsigned char { ZERO = 0 }; + +// Address attribute specifying whether the underlying load / store operations +// are temporal or non-temporal. +enum class Temporality { TEMPORAL, NON_TEMPORAL }; + +// Address attribute specifying whether the underlying load / store operations +// are aligned or unaligned. +enum class Aligned { NO, YES }; + +// Address attribute to discriminate between readable and writable addresses. +enum class Permission { Read, Write }; + +// Address is semantically equivalent to a pointer but also conveys compile time +// information that helps with instructions selection (aligned/unaligned, +// temporal/non-temporal). +template struct Address { + static_assert(is_power2(Alignment)); + static constexpr size_t ALIGNMENT = Alignment; + static constexpr Permission PERMISSION = P; + static constexpr Temporality TEMPORALITY = TS; + static constexpr bool IS_READ = P == Permission::Read; + static constexpr bool IS_WRITE = P == Permission::Write; + using PointeeType = cpp::ConditionalType; + using VoidType = cpp::ConditionalType; + + Address(VoidType *ptr) : ptr_(reinterpret_cast(ptr)) {} + + PointeeType *ptr() const { + return reinterpret_cast( + __builtin_assume_aligned(ptr_, ALIGNMENT)); + } + + PointeeType *const ptr_; +}; + +template struct IsAddressType : public cpp::FalseValue {}; +template +struct IsAddressType> : public cpp::TrueValue {}; + +// Reinterpret the address as a pointer to T. +// This is not UB since the underlying pointer always refers to a `char` in a +// buffer of raw data. +template static T *as(AddrT addr) { + static_assert(IsAddressType::Value); + return reinterpret_cast(addr.ptr()); +} + +// Offsets the address by a compile time amount, this allows propagating +// alignment whenever possible. +template +static auto offsetAddr(AddrT addr) { + static_assert(IsAddressType::Value); + auto *const NewPointee = addr.ptr_ + ByteOffset; + constexpr size_t ByteOffsetModuloAlignment = ByteOffset % AddrT::ALIGNMENT; + if constexpr (ByteOffsetModuloAlignment == 0) { + return AddrT(NewPointee); + } else if constexpr (!is_power2(ByteOffsetModuloAlignment)) { + return Address<1, AddrT::PERMISSION, AddrT::TEMPORALITY>(NewPointee); + } else { + static_assert(ByteOffsetModuloAlignment < AddrT::ALIGNMENT); + static_assert(is_power2(ByteOffsetModuloAlignment)); + return Address(NewPointee); + } +} + +// Offsets the address by a runtime amount but assuming that the resulting +// address will be Alignment aligned. +template +static auto offsetAddrAssumeAligned(AddrT addr, size_t offset) { + static_assert(IsAddressType::Value); + return Address(addr.ptr_ + + offset); +} + +// Offsets the address by a runtime amount that is assumed to be a multiple of +// Offset. This allows to propagate the address alignement whenever possible. +template +static auto offsetAddrMultiplesOf(AddrT addr, ptrdiff_t offset) { + static_assert(IsAddressType::Value); + constexpr size_t AL = Offset < AddrT::ALIGNMENT ? Offset : AddrT::ALIGNMENT; + return offsetAddrAssumeAligned(addr, offset); +} + +// User friendly aliases for common address types. +template +using SrcAddr = Address; +template +using DstAddr = Address; +template +using NtSrcAddr = + Address; +template +using NtDstAddr = + Address; + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_COMMON_H diff --git a/libc/src/string/memory_utils/algorithm.h b/libc/src/string/memory_utils/algorithm.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/algorithm.h @@ -0,0 +1,420 @@ +//===-- Algorithms to compose sized memory operations ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Higher order primitives that build upon the SizedOp facility. +// They constitute the basic blocks for composing memory functions. +// This file defines the following operations: +// - Skip +// - Tail +// - HeadTail +// - Loop +// - Align +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H + +#include "src/string/memory_utils/address.h" // Address +#include "src/string/memory_utils/utils.h" // offset_to_next_aligned + +#include // ptrdiff_t + +namespace __llvm_libc { + +// An operation that allows to skip the specified amount of bytes. +template struct Skip { + template struct Then { + template + static inline void set(DstAddrT dst, ubyte value) { + NextT::set(offsetAddr(dst), value); + } + + template + static inline uint64_t different(SrcAddrT1 src1, SrcAddrT2 src2) { + return NextT::different(offsetAddr(src1), offsetAddr(src2)); + } + + template + static inline int32_t three_way_cmp(SrcAddrT1 src1, SrcAddrT2 src2) { + return NextT::three_way_cmp(offsetAddr(src1), + offsetAddr(src2)); + } + + template + static inline int32_t three_way_cmp(SrcAddrT1 src1, SrcAddrT2 src2, + size_t runtime_size) { + return NextT::three_way_cmp(offsetAddr(src1), + offsetAddr(src2), + runtime_size - Bytes); + } + }; +}; + +// Compute the address of a tail operation. +// Because of the runtime size, we loose the alignment information. +template +static auto tailAddr(AddrT addr, size_t runtime_size) { + static_assert(IsAddressType::Value); + return offsetAddrAssumeAligned<1>(addr, runtime_size - Size); +} + +// Perform the operation on the last 'Size' bytes of the buffer. +// +// e.g. with +// [1234567812345678123] +// [__XXXXXXXXXXXXXX___] +// [________XXXXXXXX___] +// +// Precondition: `runtime_size >= Size`. +template struct Tail { + static constexpr size_t SIZE = SizedOp::SIZE; + template + static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) { + SizedOp::copy(tailAddr(dst, runtime_size), + tailAddr(src, runtime_size)); + } + + template + static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) { + SizedOp::move(tailAddr(dst, runtime_size), + tailAddr(src, runtime_size)); + } + + template + static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) { + SizedOp::set(tailAddr(dst, runtime_size), value); + } + + template + static inline uint64_t different(SrcAddrT1 src1, SrcAddrT2 src2, + size_t runtime_size) { + return SizedOp::different(tailAddr(src1, runtime_size), + tailAddr(src2, runtime_size)); + } + + template + static inline int32_t three_way_cmp(SrcAddrT1 src1, SrcAddrT2 src2, + size_t runtime_size) { + return SizedOp::three_way_cmp(tailAddr(src1, runtime_size), + tailAddr(src2, runtime_size)); + } +}; + +// Perform the operation on the first and last bytes of the buffer. +// This is useful for overlapping operations. +// +// e.g. with +// [1234567812345678123] +// [__XXXXXXXXXXXXXX___] +// [__XXXXXXXX_________] +// [________XXXXXXXX___] +// +// Precondition: `runtime_size >= Size && runtime_size <= 2 x Size`. +template struct HeadTail { + template + static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) { + SizedOp::copy(dst, src); + Tail::copy(dst, src, runtime_size); + } + + template + static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) { + static constexpr size_t SIZE = SizedOp::SIZE; + auto head = SizedOp::load(src); + auto tail = SizedOp::load(tailAddr(src, runtime_size)); + SizedOp::store(tailAddr(dst, runtime_size), tail); + SizedOp::store(dst, head); + } + + template + static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) { + SizedOp::set(dst, value); + Tail::set(dst, value, runtime_size); + } + + template + static inline uint64_t different(SrcAddrT1 src1, SrcAddrT2 src2, + size_t runtime_size) { + if (const auto res = SizedOp::different(src1, src2)) + return res; + return Tail::different(src1, src2, runtime_size); + } + + template + static inline int32_t three_way_cmp(SrcAddrT1 src1, SrcAddrT2 src2, + size_t runtime_size) { + if (const auto res = SizedOp::three_way_cmp(src1, src2)) + return res; + return Tail::three_way_cmp(src1, src2, runtime_size); + } +}; + +// Simple loop ending with a Tail operation. +// +// e.g. with +// [12345678123456781234567812345678] +// [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___] +// [__XXXXXXXX_______________________] +// [__________XXXXXXXX_______________] +// [__________________XXXXXXXX_______] +// [______________________XXXXXXXX___] +// +// Precondition: +// - runtime_size >= Size +template struct Loop { + static constexpr size_t SIZE = SizedOp::SIZE; + + template + static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) { + size_t offset = 0; + do { + SizedOp::copy(offsetAddrMultiplesOf(dst, offset), + offsetAddrMultiplesOf(src, offset)); + offset += SIZE; + } while (offset < runtime_size - SIZE); + Tail::copy(dst, src, runtime_size); + } + + // Move forward suitable when dst < src. We load the tail bytes before + // handling the loop. + // + // e.g. Moving two bytes + // [ | | | | |] + // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] + // [_________________________LLLLLLLL___] + // [___LLLLLLLL_________________________] + // [_SSSSSSSS___________________________] + // [___________LLLLLLLL_________________] + // [_________SSSSSSSS___________________] + // [___________________LLLLLLLL_________] + // [_________________SSSSSSSS___________] + // [_______________________SSSSSSSS_____] + template + static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) { + const auto tail_value = SizedOp::load(tailAddr(src, runtime_size)); + size_t offset = 0; + do { + SizedOp::move(offsetAddrMultiplesOf(dst, offset), + offsetAddrMultiplesOf(src, offset)); + offset += SIZE; + } while (offset < runtime_size - SIZE); + SizedOp::store(tailAddr(dst, runtime_size), tail_value); + } + + // Move backward suitable when dst > src. We load the head bytes before + // handling the loop. + // + // e.g. Moving two bytes + // [ | | | | |] + // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] + // [___LLLLLLLL_________________________] + // [_________________________LLLLLLLL___] + // [___________________________SSSSSSSS_] + // [_________________LLLLLLLL___________] + // [___________________SSSSSSSS_________] + // [_________LLLLLLLL___________________] + // [___________SSSSSSSS_________________] + // [_____SSSSSSSS_______________________] + template + static inline void move_backward(DstAddrT dst, SrcAddrT src, + size_t runtime_size) { + const auto head_value = SizedOp::load(src); + ptrdiff_t offset = runtime_size - SIZE; + do { + SizedOp::move(offsetAddrMultiplesOf(dst, offset), + offsetAddrMultiplesOf(src, offset)); + offset -= SIZE; + } while (offset >= 0); + SizedOp::store(dst, head_value); + } + + template + static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) { + size_t offset = 0; + do { + SizedOp::set(offsetAddrMultiplesOf(dst, offset), value); + offset += SIZE; + } while (offset < runtime_size - SIZE); + Tail::set(dst, value, runtime_size); + } + + template + static inline uint64_t different(SrcAddrT1 src1, SrcAddrT2 src2, + size_t runtime_size) { + size_t offset = 0; + do { + if (auto res = + SizedOp::different(offsetAddrMultiplesOf(src1, offset), + offsetAddrMultiplesOf(src2, offset))) + return res; + offset += SIZE; + } while (offset < runtime_size - SIZE); + return Tail::different(src1, src2, runtime_size); + } + + template + static inline int32_t three_way_cmp(SrcAddrT1 src1, SrcAddrT2 src2, + size_t runtime_size) { + size_t offset = 0; + do { + if (auto res = + SizedOp::three_way_cmp(offsetAddrMultiplesOf(src1, offset), + offsetAddrMultiplesOf(src2, offset))) + return res; + offset += SIZE; + } while (offset < runtime_size - SIZE); + return Tail::three_way_cmp(src1, src2, runtime_size); + } +}; + +// Aligns and calls the subsequent NextT operation +// +// e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as: +// Align<16, Arg::Dst>::Then>::copy(dst, src, runtime_size); +enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 }; +template struct Align { + template struct Then { + template + static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) { + SizedOp::copy(dst, src); + auto aligned = align(dst, src, runtime_size); + NextT::copy(aligned.arg1, aligned.arg2, aligned.size); + } + + // Move forward suitable when dst < src. The alignment is performed with + // an HeadTail operation of size ∈ [Alignment, 2 x Alignment]. + // + // e.g. Moving two bytes and making sure src is then aligned. + // [ | | | | ] + // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] + // [____LLLLLLLL_____________________] + // [___________LLLLLLLL______________] + // [_SSSSSSSS________________________] + // [________SSSSSSSS_________________] + // + // e.g. Moving two bytes and making sure dst is then aligned. + // [ | | | | ] + // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] + // [____LLLLLLLL_____________________] + // [______LLLLLLLL___________________] + // [_SSSSSSSS________________________] + // [___SSSSSSSS______________________] + template + static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) { + auto aligned_after_begin = align(dst, src, runtime_size); + // We move pointers forward by Size so we can perform HeadTail. + auto aligned = aligned_after_begin.stepForward(); + HeadTail::move(dst, src, runtime_size - aligned.size); + NextT::move(aligned.arg1, aligned.arg2, aligned.size); + } + + // Move backward suitable when dst > src. The alignment is performed with + // an HeadTail operation of size ∈ [Alignment, 2 x Alignment]. + // + // e.g. Moving two bytes backward and making sure src is then aligned. + // [ | | | | ] + // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] + // [ _________________LLLLLLLL_______] + // [ ___________________LLLLLLLL_____] + // [____________________SSSSSSSS_____] + // [______________________SSSSSSSS___] + // + // e.g. Moving two bytes and making sure dst is then aligned. + // [ | | | | ] + // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] + // [ _______________LLLLLLLL_________] + // [ ___________________LLLLLLLL_____] + // [__________________SSSSSSSS_______] + // [______________________SSSSSSSS___] + template + static inline void move_backward(DstAddrT dst, SrcAddrT src, + size_t runtime_size) { + const auto dst_end = offsetAddrAssumeAligned<1>(dst, runtime_size); + const auto src_end = offsetAddrAssumeAligned<1>(src, runtime_size); + auto aligned_after_end = align(dst_end, src_end, 0); + // We move pointers back by 2 x Size so we can perform HeadTail. + auto aligned = aligned_after_end.stepBack().stepBack(); + HeadTail::move(aligned.arg1, aligned.arg2, aligned.size); + NextT::move_backward(dst, src, runtime_size - aligned.size); + } + + template + static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) { + SizedOp::set(dst, value); + DstAddrT _(nullptr); + auto aligned = align(dst, _, runtime_size); + NextT::set(aligned.arg1, value, aligned.size); + } + + template + static inline uint64_t different(SrcAddrT1 src1, SrcAddrT2 src2, + size_t runtime_size) { + if (const auto res = SizedOp::different(src1, src2)) + return res; + auto aligned = align(src1, src2, runtime_size); + return NextT::different(aligned.arg1, aligned.arg2, aligned.size); + } + + template + static inline int32_t three_way_cmp(SrcAddrT1 src1, SrcAddrT2 src2, + size_t runtime_size) { + if (const auto res = SizedOp::three_way_cmp(src1, src2)) + return res; + auto aligned = align(src1, src2, runtime_size); + return NextT::three_way_cmp(aligned.arg1, aligned.arg2, aligned.size); + } + }; + +private: + static constexpr size_t SIZE = SizedOp::SIZE; + static_assert(SIZE > 1); + + template struct Aligned { + Arg1AddrT arg1; + Arg2AddrT arg2; + size_t size; + + Aligned stepForward() const { + return Aligned{offsetAddrMultiplesOf(arg1, SIZE), + offsetAddrMultiplesOf(arg2, SIZE), size - SIZE}; + } + + Aligned stepBack() const { + return Aligned{offsetAddrMultiplesOf(arg1, -SIZE), + offsetAddrMultiplesOf(arg2, -SIZE), size + SIZE}; + } + }; + + template + static auto makeAligned(Arg1AddrT arg1, Arg2AddrT arg2, size_t size) { + return Aligned{arg1, arg2, size}; + } + + template + static auto align(Arg1AddrT arg1, Arg2AddrT arg2, size_t runtime_size) { + static_assert(IsAddressType::Value); + static_assert(IsAddressType::Value); + if constexpr (AlignOn == Arg::_1) { + auto offset = offset_to_next_aligned(arg1.ptr_); + return makeAligned(offsetAddrAssumeAligned(arg1, offset), + offsetAddrAssumeAligned<1>(arg2, offset), + runtime_size - offset); + } else if constexpr (AlignOn == Arg::_2) { + auto offset = offset_to_next_aligned(arg2.ptr_); + return makeAligned(offsetAddrAssumeAligned<1>(arg1, offset), + offsetAddrAssumeAligned(arg2, offset), + runtime_size - offset); + } else { + DeferredStaticAssert("AlignOn must be either Arg::_1 or Arg::_2"); + } + } +}; + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H diff --git a/libc/src/string/memory_utils/backend_aarch64.h b/libc/src/string/memory_utils/backend_aarch64.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/backend_aarch64.h @@ -0,0 +1,63 @@ +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H + +#if !defined(LLVM_LIBC_ARCH_AARCH64) +#include "src/string/memory_utils/backend_scalar.h" + +#ifdef __ARM_NEON +#include +#endif + +namespace __llvm_libc { + +struct Aarch64Backend : public ScalarBackend { + static constexpr bool IS_BACKEND_TYPE = true; + + template , bool> = true> + static inline T load(const T *src) { + return ScalarBackend::template load(src); + } +}; + +// Implementation of the SizedOp abstraction for the set operation. +struct Zva64 { + static constexpr size_t SIZE = 64; + + template + static inline void set(DstAddrT dst, ubyte value) { +#if __SIZEOF_POINTER__ == 4 + asm("dc zva, %w[dst]" : : [dst] "r"(dst) : "memory"); +#else + asm("dc zva, %[dst]" : : [dst] "r"(dst) : "memory"); +#endif + } +}; + +inline static bool hasZva() { + uint64_t zva_val; + asm("mrs %[zva_val], dczid_el0" : [zva_val] "=r"(zva_val)); + // DC ZVA is permitted if DZP, bit [4] is zero. + // BS, bits [3:0] is log2 of the block size in words. + // So the next line checks whether the instruction is permitted and block size + // is 16 words (i.e. 64 bytes). + return (zva_val & 0b11111) == 0b00100; +} + +namespace aarch64 { +using _1 = SizedOp; +using _2 = SizedOp; +using _3 = SizedOp; +using _4 = SizedOp; +using _8 = SizedOp; +using _16 = SizedOp; +using _32 = SizedOp; +using _64 = SizedOp; +using _128 = SizedOp; +} // namespace aarch64 + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_ARCH_AARCH64 + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H diff --git a/libc/src/string/memory_utils/backend_scalar.h b/libc/src/string/memory_utils/backend_scalar.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/backend_scalar.h @@ -0,0 +1,93 @@ +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H + +#include "src/__support/CPP/TypeTraits.h" // ConditionalType, EnableIfType +#include "src/__support/endian.h" + +namespace __llvm_libc { + +struct ScalarBackend { + static constexpr bool IS_BACKEND_TYPE = true; + + template + static constexpr bool IsScalarType = + cpp::IsSameV || cpp::IsSameV || + cpp::IsSameV || cpp::IsSameV; + + template + static inline T load(const T *src) { + static_assert(IsScalarType); + static_assert(TS == Temporality::TEMPORAL, + "Scalar load does not support non-temporal access"); + return *src; + } + + template + static inline void store(T *dst, T value) { + static_assert(IsScalarType); + static_assert(TS == Temporality::TEMPORAL, + "Scalar store does not support non-temporal access"); + *dst = value; + } + + template static inline T splat(ubyte value) { + static_assert(IsScalarType); + return (T(~0) / T(0xFF)) * T(value); + } + + template static inline uint64_t not_equals(T v1, T v2) { + static_assert(IsScalarType); + return v1 ^ v2; + } + + template static inline int32_t three_way_cmp(T v1, T v2) { + DeferredStaticAssert("not implemented"); + } + + template + using getNextType = cpp::ConditionalType< + Size >= 8, uint64_t, + cpp::ConditionalType= 4, uint32_t, + cpp::ConditionalType= 2, uint16_t, uint8_t>>>; +}; + +template <> +int32_t inline ScalarBackend::three_way_cmp(uint8_t a, uint8_t b) { + const int16_t la = Endian::to_big_endian(a); + const int16_t lb = Endian::to_big_endian(b); + return la - lb; +} +template <> +int32_t inline ScalarBackend::three_way_cmp(uint16_t a, uint16_t b) { + const int32_t la = Endian::to_big_endian(a); + const int32_t lb = Endian::to_big_endian(b); + return la - lb; +} +template <> +int32_t inline ScalarBackend::three_way_cmp(uint32_t a, uint32_t b) { + const uint32_t la = Endian::to_big_endian(a); + const uint32_t lb = Endian::to_big_endian(b); + return la > lb ? 1 : la < lb ? -1 : 0; +} +template <> +int32_t inline ScalarBackend::three_way_cmp(uint64_t a, uint64_t b) { + const uint64_t la = Endian::to_big_endian(a); + const uint64_t lb = Endian::to_big_endian(b); + return la > lb ? 1 : la < lb ? -1 : 0; +} + +namespace scalar { +using _1 = SizedOp; +using _2 = SizedOp; +using _3 = SizedOp; +using _4 = SizedOp; +using _8 = SizedOp; +using _16 = SizedOp; +using _32 = SizedOp; +using _64 = SizedOp; +using _128 = SizedOp; +} // namespace scalar + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H diff --git a/libc/src/string/memory_utils/backend_x86.h b/libc/src/string/memory_utils/backend_x86.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/backend_x86.h @@ -0,0 +1,189 @@ +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H + +#if defined(LLVM_LIBC_ARCH_X86) +#include "src/__support/CPP/TypeTraits.h" // ConditionalType, EnableIfType +#include "src/string/memory_utils/backend_scalar.h" + +#ifdef __SSE2__ +#include +#endif // __SSE2__ + +#if defined(__SSE2__) +#define HAS_M128 true +#else +#define HAS_M128 false +#endif + +#if defined(__AVX2__) +#define HAS_M256 true +#else +#define HAS_M256 false +#endif + +#if defined(__AVX512F__) and defined(__AVX512BW__) +#define HAS_M512 true +#else +#define HAS_M512 false +#endif + +namespace __llvm_libc { +struct X86Backend : public ScalarBackend { + static constexpr bool IS_BACKEND_TYPE = true; + + template , bool> = true> + static inline T load(const T *src) { + return ScalarBackend::template load(src); + } + + template , bool> = true> + static inline T load(const T *src) { + DeferredStaticAssert("X86 non-temporal load needs aligned access"); + return {}; + } + + template , bool> = true> + static inline void store(T *dst, T value) { + ScalarBackend::template store(dst, value); + } + + template , bool> = true> + static inline void store(T *dst, T value) { + DeferredStaticAssert("X86 non-temporal store needs aligned access"); + } + + template static inline T splat(ubyte value) { + return ScalarBackend::template splat(value); + } + + template static inline uint64_t not_equals(T v1, T v2) { + return ScalarBackend::template not_equals(v1, v2); + } + + template , bool> = true> + static inline int32_t three_way_cmp(T v1, T v2) { + return ScalarBackend::template three_way_cmp(v1, v2); + } + + template , bool> = true> + static inline int32_t three_way_cmp(T v1, T v2) { + return char_diff(reinterpret_cast(&v1), + reinterpret_cast(&v2), not_equals(v1, v2)); + } + + template + using getNextType = cpp::ConditionalType< + (HAS_M512 && Size >= 64), __m512i, + cpp::ConditionalType< + (HAS_M256 && Size >= 32), __m256i, + cpp::ConditionalType<(HAS_M128 && Size >= 16), __m128i, + ScalarBackend::getNextType>>>; + +private: + static inline int32_t char_diff(const char *a, const char *b, uint64_t mask) { + const size_t diff_index = __builtin_ctzll(mask); + const int16_t ca = (unsigned char)a[diff_index]; + const int16_t cb = (unsigned char)b[diff_index]; + return ca - cb; + } +}; + +static inline void repmovsb(void *dst, const void *src, size_t runtime_size) { + asm volatile("rep movsb" + : "+D"(dst), "+S"(src), "+c"(runtime_size) + : + : "memory"); +} + +#define SPECIALIZE_LOAD(T, OS, AS, INTRISIC) \ + template <> inline T X86Backend::load(const T *src) { \ + return INTRISIC(const_cast(src)); \ + } +#define SPECIALIZE_STORE(T, OS, AS, INTRISIC) \ + template <> inline void X86Backend::store(T * dst, T value) { \ + INTRISIC(dst, value); \ + } + +#if HAS_M128 +SPECIALIZE_LOAD(__m128i, Temporality::TEMPORAL, Aligned::YES, _mm_load_si128) +SPECIALIZE_LOAD(__m128i, Temporality::TEMPORAL, Aligned::NO, _mm_loadu_si128) +SPECIALIZE_LOAD(__m128i, Temporality::NON_TEMPORAL, Aligned::YES, + _mm_stream_load_si128) +SPECIALIZE_STORE(__m128i, Temporality::TEMPORAL, Aligned::YES, _mm_store_si128) +SPECIALIZE_STORE(__m128i, Temporality::TEMPORAL, Aligned::NO, _mm_storeu_si128) +SPECIALIZE_STORE(__m128i, Temporality::NON_TEMPORAL, Aligned::YES, + _mm_stream_si128) +template <> inline __m128i X86Backend::splat<__m128i>(ubyte value) { + return _mm_set1_epi8(__builtin_bit_cast(char, value)); +} +template <> +inline uint64_t X86Backend::not_equals<__m128i>(__m128i a, __m128i b) { + using T = char __attribute__((__vector_size__(16))); + return _mm_movemask_epi8(T(a) != T(b)); +} +#endif // HAS_M128 + +#if HAS_M256 +SPECIALIZE_LOAD(__m256i, Temporality::TEMPORAL, Aligned::YES, _mm256_load_si256) +SPECIALIZE_LOAD(__m256i, Temporality::TEMPORAL, Aligned::NO, _mm256_loadu_si256) +SPECIALIZE_LOAD(__m256i, Temporality::NON_TEMPORAL, Aligned::YES, + _mm256_stream_load_si256) +SPECIALIZE_STORE(__m256i, Temporality::TEMPORAL, Aligned::YES, + _mm256_store_si256) +SPECIALIZE_STORE(__m256i, Temporality::TEMPORAL, Aligned::NO, + _mm256_storeu_si256) +SPECIALIZE_STORE(__m256i, Temporality::NON_TEMPORAL, Aligned::YES, + _mm256_stream_si256) +template <> inline __m256i X86Backend::splat<__m256i>(ubyte value) { + return _mm256_set1_epi8(__builtin_bit_cast(char, value)); +} +template <> +inline uint64_t X86Backend::not_equals<__m256i>(__m256i a, __m256i b) { + using T = char __attribute__((__vector_size__(32))); + return _mm256_movemask_epi8(T(a) != T(b)); +} +#endif // HAS_M256 + +#if HAS_M512 +SPECIALIZE_LOAD(__m512i, Temporality::TEMPORAL, Aligned::YES, _mm512_load_si512) +SPECIALIZE_LOAD(__m512i, Temporality::TEMPORAL, Aligned::NO, _mm512_loadu_si512) +SPECIALIZE_LOAD(__m512i, Temporality::NON_TEMPORAL, Aligned::YES, + _mm512_stream_load_si512) +SPECIALIZE_STORE(__m512i, Temporality::TEMPORAL, Aligned::YES, + _mm512_store_si512) +SPECIALIZE_STORE(__m512i, Temporality::TEMPORAL, Aligned::NO, + _mm512_storeu_si512) +SPECIALIZE_STORE(__m512i, Temporality::NON_TEMPORAL, Aligned::YES, + _mm512_stream_si512) +template <> inline __m512i X86Backend::splat<__m512i>(ubyte value) { + return _mm512_broadcastb_epi8(_mm_set1_epi8(__builtin_bit_cast(char, value))); +} +template <> +inline uint64_t X86Backend::not_equals<__m512i>(__m512i a, __m512i b) { + return _mm512_cmpneq_epi8_mask(a, b); +} +#endif // HAS_M512 + +namespace x86 { +using _1 = SizedOp; +using _2 = SizedOp; +using _3 = SizedOp; +using _4 = SizedOp; +using _8 = SizedOp; +using _16 = SizedOp; +using _32 = SizedOp; +using _64 = SizedOp; +using _128 = SizedOp; +} // namespace x86 + +} // namespace __llvm_libc + +#endif // defined(LLVM_LIBC_ARCH_X86) + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H diff --git a/libc/src/string/memory_utils/backends.h b/libc/src/string/memory_utils/backends.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/backends.h @@ -0,0 +1,56 @@ +//===-- Elementary operations to compose memory primitives ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the concept of a Backend. +// It constitutes the lowest level of the framework and is akin to instruction +// selection. It defines how to implement aligned/unaligned, +// temporal/non-temporal loads and stores for a particular architecture as well +// as efficient ways to fill and compare types. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H + +#include "src/string/memory_utils/address.h" // Temporality, Aligned +#include "src/string/memory_utils/sized_op.h" // SizedOp +#include // size_t +#include // uint##_t + +namespace __llvm_libc { + +// Backends must implement the following interface. +struct NoBackend { + static constexpr bool IS_BACKEND_TYPE = true; + // load + template static T load(const T *src); + + // store + template + static void store(T *dst, T value); + + // return a T filled with `value` + template static T splat(ubyte value); + + // returns zero iff v1 == v2 + template static uint64_t not_equals(T v1, T v2); + + // returns zero iff v1 == v2, -1 if v1 < v2 and 1 otherwise. + template static int32_t three_way_diff(T v1, T v2); + + template using getNextType = void; +}; + +} // namespace __llvm_libc + +// We inline all backend implementations here to simplify the build system. +// Each file need to be guarded with the appropriate LLVM_LIBC_ARCH_XXX ifdef. +#include "src/string/memory_utils/backend_aarch64.h" +#include "src/string/memory_utils/backend_scalar.h" +#include "src/string/memory_utils/backend_x86.h" + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H diff --git a/libc/src/string/memory_utils/bcmp_implementations.h b/libc/src/string/memory_utils/bcmp_implementations.h --- a/libc/src/string/memory_utils/bcmp_implementations.h +++ b/libc/src/string/memory_utils/bcmp_implementations.h @@ -10,50 +10,45 @@ #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BCMP_IMPLEMENTATIONS_H #include "src/__support/architectures.h" -#include "src/__support/common.h" -#include "src/string/memory_utils/elements.h" +#include "src/string/memory_utils/algorithm.h" +#include "src/string/memory_utils/backends.h" #include // size_t namespace __llvm_libc { -// Fixed-size difference between 'lhs' and 'rhs'. -template bool differs(const char *lhs, const char *rhs) { - return !Element::equals(lhs, rhs); -} -// Runtime-size difference between 'lhs' and 'rhs'. -template -bool differs(const char *lhs, const char *rhs, size_t size) { - return !Element::equals(lhs, rhs, size); -} - -static inline int inline_bcmp(const char *lhs, const char *rhs, size_t count) { +static inline uint64_t inline_bcmp_uint64_t(SrcAddr<1> lhs, SrcAddr<1> rhs, + size_t count) { #if defined(LLVM_LIBC_ARCH_X86) - using namespace ::__llvm_libc::x86; + using namespace x86; #elif defined(LLVM_LIBC_ARCH_AARCH64) - using namespace ::__llvm_libc::aarch64; + using namespace aarch64; #else - using namespace ::__llvm_libc::scalar; + using namespace scalar; #endif if (count == 0) return 0; if (count == 1) - return differs<_1>(lhs, rhs); + return _1::different(lhs, rhs); if (count == 2) - return differs<_2>(lhs, rhs); + return _2::different(lhs, rhs); if (count == 3) - return differs<_3>(lhs, rhs); + return _3::different(lhs, rhs); if (count <= 8) - return differs>(lhs, rhs, count); + return HeadTail<_4>::different(lhs, rhs, count); if (count <= 16) - return differs>(lhs, rhs, count); + return HeadTail<_8>::different(lhs, rhs, count); if (count <= 32) - return differs>(lhs, rhs, count); + return HeadTail<_16>::different(lhs, rhs, count); if (count <= 64) - return differs>(lhs, rhs, count); + return HeadTail<_32>::different(lhs, rhs, count); if (count <= 128) - return differs>(lhs, rhs, count); - return differs::Then>>(lhs, rhs, count); + return HeadTail<_64>::different(lhs, rhs, count); + return Align<_32>::Then>::different(lhs, rhs, count); +} + +static inline int inline_bcmp(SrcAddr<1> lhs, SrcAddr<1> rhs, size_t count) { + return !!inline_bcmp_uint64_t(lhs, rhs, count); } } // namespace __llvm_libc diff --git a/libc/src/string/memory_utils/memcmp_implementations.h b/libc/src/string/memory_utils/memcmp_implementations.h --- a/libc/src/string/memory_utils/memcmp_implementations.h +++ b/libc/src/string/memory_utils/memcmp_implementations.h @@ -10,93 +10,91 @@ #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP_IMPLEMENTATIONS_H #include "src/__support/architectures.h" -#include "src/__support/common.h" -#include "src/string/memory_utils/elements.h" +#include "src/string/memory_utils/algorithm.h" +#include "src/string/memory_utils/backends.h" #include // size_t namespace __llvm_libc { -static inline int inline_memcmp(const char *lhs, const char *rhs, - size_t count) { +static inline int inline_memcmp(SrcAddr<1> lhs, SrcAddr<1> rhs, size_t count) { #if defined(LLVM_LIBC_ARCH_X86) ///////////////////////////////////////////////////////////////////////////// // LLVM_LIBC_ARCH_X86 ///////////////////////////////////////////////////////////////////////////// - using namespace __llvm_libc::x86; + using namespace x86; if (count == 0) return 0; if (count == 1) - return three_way_compare<_1>(lhs, rhs); + return _1::three_way_cmp(lhs, rhs); if (count == 2) - return three_way_compare<_2>(lhs, rhs); + return _2::three_way_cmp(lhs, rhs); if (count == 3) - return three_way_compare<_3>(lhs, rhs); + return _3::three_way_cmp(lhs, rhs); if (count <= 8) - return three_way_compare>(lhs, rhs, count); + return HeadTail<_4>::three_way_cmp(lhs, rhs, count); if (count <= 16) - return three_way_compare>(lhs, rhs, count); + return HeadTail<_8>::three_way_cmp(lhs, rhs, count); if (count <= 32) - return three_way_compare>(lhs, rhs, count); + return HeadTail<_16>::three_way_cmp(lhs, rhs, count); if (count <= 64) - return three_way_compare>(lhs, rhs, count); + return HeadTail<_32>::three_way_cmp(lhs, rhs, count); if (count <= 128) - return three_way_compare>(lhs, rhs, count); - return three_way_compare::Then>>(lhs, rhs, count); + return HeadTail<_64>::three_way_cmp(lhs, rhs, count); + return Align<_32>::Then>::three_way_cmp(lhs, rhs, count); #elif defined(LLVM_LIBC_ARCH_AARCH64) ///////////////////////////////////////////////////////////////////////////// // LLVM_LIBC_ARCH_AARCH64 ///////////////////////////////////////////////////////////////////////////// - using namespace ::__llvm_libc::aarch64; + using namespace aarch64; if (count == 0) // [0, 0] return 0; if (count == 1) // [1, 1] - return three_way_compare<_1>(lhs, rhs); + return _1::three_way_cmp(lhs, rhs); if (count == 2) // [2, 2] - return three_way_compare<_2>(lhs, rhs); + return _2::three_way_cmp(lhs, rhs); if (count == 3) // [3, 3] - return three_way_compare<_3>(lhs, rhs); + return _3::three_way_cmp(lhs, rhs); if (count < 8) // [4, 7] - return three_way_compare>(lhs, rhs, count); + return HeadTail<_4>::three_way_cmp(lhs, rhs, count); if (count < 16) // [8, 15] - return three_way_compare>(lhs, rhs, count); + return HeadTail<_8>::three_way_cmp(lhs, rhs, count); if (unlikely(count >= 128)) // [128, ∞] - return three_way_compare::Then>>(lhs, rhs, count); - if (!equals<_16>(lhs, rhs)) // [16, 16] - return three_way_compare<_16>(lhs, rhs); + return Align<_16>::Then>::three_way_cmp(lhs, rhs, count); + if (_16::different(lhs, rhs)) // [16, 16] + return _16::three_way_cmp(lhs, rhs); if (count < 32) // [17, 31] - return three_way_compare>(lhs, rhs, count); - if (!equals::Then<_16>>(lhs, rhs)) // [32, 32] - return three_way_compare::Then<_16>>(lhs, rhs); + return Tail<_16>::three_way_cmp(lhs, rhs, count); + if (Skip<16>::Then<_16>::different(lhs, rhs)) // [32, 32] + return Skip<16>::Then<_16>::three_way_cmp(lhs, rhs); if (count < 64) // [33, 63] - return three_way_compare>(lhs, rhs, count); + return Tail<_32>::three_way_cmp(lhs, rhs, count); // [64, 127] - return three_way_compare::Then>>(lhs, rhs, count); + return Skip<32>::Then>::three_way_cmp(lhs, rhs, count); #else ///////////////////////////////////////////////////////////////////////////// // Default ///////////////////////////////////////////////////////////////////////////// - using namespace ::__llvm_libc::scalar; - + using namespace scalar; if (count == 0) return 0; if (count == 1) - return three_way_compare<_1>(lhs, rhs); + return _1::three_way_cmp(lhs, rhs); if (count == 2) - return three_way_compare<_2>(lhs, rhs); + return _2::three_way_cmp(lhs, rhs); if (count == 3) - return three_way_compare<_3>(lhs, rhs); + return _3::three_way_cmp(lhs, rhs); if (count <= 8) - return three_way_compare>(lhs, rhs, count); + return HeadTail<_4>::three_way_cmp(lhs, rhs, count); if (count <= 16) - return three_way_compare>(lhs, rhs, count); + return HeadTail<_8>::three_way_cmp(lhs, rhs, count); if (count <= 32) - return three_way_compare>(lhs, rhs, count); + return HeadTail<_16>::three_way_cmp(lhs, rhs, count); if (count <= 64) - return three_way_compare>(lhs, rhs, count); + return HeadTail<_32>::three_way_cmp(lhs, rhs, count); if (count <= 128) - return three_way_compare>(lhs, rhs, count); - return three_way_compare::Then>>(lhs, rhs, count); + return HeadTail<_64>::three_way_cmp(lhs, rhs, count); + return Align<_32>::Then>::three_way_cmp(lhs, rhs, count); #endif } diff --git a/libc/src/string/memory_utils/memcpy_implementations.h b/libc/src/string/memory_utils/memcpy_implementations.h --- a/libc/src/string/memory_utils/memcpy_implementations.h +++ b/libc/src/string/memory_utils/memcpy_implementations.h @@ -9,10 +9,11 @@ #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H +#include "src/__support/CPP/TypeTraits.h" // cpp::ConditionalType #include "src/__support/architectures.h" -#include "src/__support/common.h" -#include "src/string/memory_utils/elements.h" -#include "src/string/memory_utils/utils.h" +#include "src/__support/common.h" // LLVM_LIBC_IS_DEFINED +#include "src/string/memory_utils/algorithm.h" +#include "src/string/memory_utils/backends.h" #include // size_t @@ -37,14 +38,12 @@ namespace __llvm_libc { -static inline void inline_memcpy(char *__restrict dst, - const char *__restrict src, size_t count) { +static inline void inline_memcpy(DstAddr<1> dst, SrcAddr<1> src, size_t count) { #if defined(LLVM_LIBC_ARCH_X86) ///////////////////////////////////////////////////////////////////////////// // LLVM_LIBC_ARCH_X86 ///////////////////////////////////////////////////////////////////////////// - using namespace __llvm_libc::x86; - + // using x86 = Algorithms; // Whether to use only rep;movsb. constexpr bool USE_ONLY_REP_MOVSB = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB); @@ -59,96 +58,91 @@ -1; #endif // LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE + using namespace x86; + // Whether target supports AVX instructions. constexpr bool HAS_AVX = LLVM_LIBC_IS_DEFINED(__AVX__); - -#if defined(__AVX__) - using LoopBlockSize = _64; -#else - using LoopBlockSize = _32; -#endif - if (USE_ONLY_REP_MOVSB) - return copy(dst, src, count); - + return repmovsb(as(dst), as(src), count); if (count == 0) return; if (count == 1) - return copy<_1>(dst, src); + return _1::copy(dst, src); if (count == 2) - return copy<_2>(dst, src); + return _2::copy(dst, src); if (count == 3) - return copy<_3>(dst, src); + return _3::copy(dst, src); if (count == 4) - return copy<_4>(dst, src); + return _4::copy(dst, src); if (count < 8) - return copy>(dst, src, count); + return HeadTail<_4>::copy(dst, src, count); if (count < 16) - return copy>(dst, src, count); + return HeadTail<_8>::copy(dst, src, count); if (count < 32) - return copy>(dst, src, count); + return HeadTail<_16>::copy(dst, src, count); if (count < 64) - return copy>(dst, src, count); + return HeadTail<_32>::copy(dst, src, count); if (count < 128) - return copy>(dst, src, count); + return HeadTail<_64>::copy(dst, src, count); if (HAS_AVX && count < 256) - return copy>(dst, src, count); + return HeadTail<_128>::copy(dst, src, count); + using LoopBlock = cpp::ConditionalType; if (count <= REP_MOVS_B_SIZE) - return copy::Then>>(dst, src, - count); - return copy(dst, src, count); + return Align<_32, Arg::Dst>::Then>::copy(dst, src, count); + return repmovsb(as(dst), as(src), count); + #elif defined(LLVM_LIBC_ARCH_AARCH64) ///////////////////////////////////////////////////////////////////////////// // LLVM_LIBC_ARCH_AARCH64 ///////////////////////////////////////////////////////////////////////////// - using namespace __llvm_libc::scalar; + using namespace scalar; if (count == 0) return; if (count == 1) - return copy<_1>(dst, src); + return _1::copy(dst, src); if (count == 2) - return copy<_2>(dst, src); + return _2::copy(dst, src); if (count == 3) - return copy<_3>(dst, src); + return _3::copy(dst, src); if (count == 4) - return copy<_4>(dst, src); + return _4::copy(dst, src); if (count < 8) - return copy>(dst, src, count); + return HeadTail<_4>::copy(dst, src, count); if (count < 16) - return copy>(dst, src, count); + return HeadTail<_8>::copy(dst, src, count); if (count < 32) - return copy>(dst, src, count); + return HeadTail<_16>::copy(dst, src, count); if (count < 64) - return copy>(dst, src, count); + return HeadTail<_32>::copy(dst, src, count); if (count < 128) - return copy>(dst, src, count); - return copy::Then>>(dst, src, count); + return HeadTail<_64>::copy(dst, src, count); + return Align<_16, Arg::Src>::Then>::copy(dst, src, count); #else ///////////////////////////////////////////////////////////////////////////// // Default ///////////////////////////////////////////////////////////////////////////// - using namespace __llvm_libc::scalar; + using namespace scalar; if (count == 0) return; if (count == 1) - return copy<_1>(dst, src); + return _1::copy(dst, src); if (count == 2) - return copy<_2>(dst, src); + return _2::copy(dst, src); if (count == 3) - return copy<_3>(dst, src); + return _3::copy(dst, src); if (count == 4) - return copy<_4>(dst, src); + return _4::copy(dst, src); if (count < 8) - return copy>(dst, src, count); + return HeadTail<_4>::copy(dst, src, count); if (count < 16) - return copy>(dst, src, count); + return HeadTail<_8>::copy(dst, src, count); if (count < 32) - return copy>(dst, src, count); + return HeadTail<_16>::copy(dst, src, count); if (count < 64) - return copy>(dst, src, count); + return HeadTail<_32>::copy(dst, src, count); if (count < 128) - return copy>(dst, src, count); - return copy::Then>>(dst, src, count); + return HeadTail<_64>::copy(dst, src, count); + return Align<_32, Arg::Src>::Then>::copy(dst, src, count); #endif } diff --git a/libc/src/string/memory_utils/memset_implementations.h b/libc/src/string/memory_utils/memset_implementations.h --- a/libc/src/string/memory_utils/memset_implementations.h +++ b/libc/src/string/memory_utils/memset_implementations.h @@ -10,8 +10,8 @@ #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_IMPLEMENTATIONS_H #include "src/__support/architectures.h" -#include "src/string/memory_utils/elements.h" -#include "src/string/memory_utils/utils.h" +#include "src/string/memory_utils/algorithm.h" +#include "src/string/memory_utils/backends.h" #include // size_t @@ -48,85 +48,86 @@ // advance. SetAlignedBlocks<64> may waste up to 63 Bytes, SetAlignedBlocks<32> // may waste up to 31 Bytes. Benchmarks showed that SetAlignedBlocks<64> was not // superior for sizes that mattered. -inline static void inline_memset(char *dst, unsigned char value, size_t count) { +inline static void inline_memset(DstAddr<1> dst, ubyte value, size_t count) { #if defined(LLVM_LIBC_ARCH_X86) ///////////////////////////////////////////////////////////////////////////// // LLVM_LIBC_ARCH_X86 ///////////////////////////////////////////////////////////////////////////// - using namespace __llvm_libc::x86; + using namespace x86; if (count == 0) return; if (count == 1) - return splat_set<_1>(dst, value); + return _1::set(dst, value); if (count == 2) - return splat_set<_2>(dst, value); + return _2::set(dst, value); if (count == 3) - return splat_set<_3>(dst, value); + return _3::set(dst, value); if (count <= 8) - return splat_set>(dst, value, count); + return HeadTail<_4>::set(dst, value, count); if (count <= 16) - return splat_set>(dst, value, count); + return HeadTail<_8>::set(dst, value, count); if (count <= 32) - return splat_set>(dst, value, count); + return HeadTail<_16>::set(dst, value, count); if (count <= 64) - return splat_set>(dst, value, count); + return HeadTail<_32>::set(dst, value, count); if (count <= 128) - return splat_set>(dst, value, count); - return splat_set::Then>>(dst, value, count); + return HeadTail<_64>::set(dst, value, count); + return Align<_32, Arg::Dst>::Then>::set(dst, value, count); #elif defined(LLVM_LIBC_ARCH_AARCH64) ///////////////////////////////////////////////////////////////////////////// // LLVM_LIBC_ARCH_AARCH64 ///////////////////////////////////////////////////////////////////////////// - using namespace __llvm_libc::aarch64_memset; + using namespace aarch64; if (count == 0) return; if (count <= 3) { - splat_set<_1>(dst, value); + _1::set(dst, value); if (count > 1) - splat_set>(dst, value, count); + Tail<_2>::set(dst, value, count); return; } if (count <= 8) - return splat_set>(dst, value, count); + return HeadTail<_4>::set(dst, value, count); if (count <= 16) - return splat_set>(dst, value, count); + return HeadTail<_8>::set(dst, value, count); if (count <= 32) - return splat_set>(dst, value, count); + return HeadTail<_16>::set(dst, value, count); if (count <= 96) { - splat_set<_32>(dst, value); + _32::set(dst, value); if (count <= 64) - return splat_set>(dst, value, count); - splat_set::Then<_32>>(dst, value); - splat_set>(dst, value, count); + return Tail<_32>::set(dst, value, count); + Skip<32>::Then<_32>::set(dst, value); + Tail<_32>::set(dst, value, count); return; } - if (count < 448 || value != 0 || !AArch64ZVA(dst, count)) - return splat_set::Then>>(dst, value, count); + if (count >= 448 && value == ubyte::ZERO && hasZva()) + return Align<_64, Arg::_1>::Then>::set(dst, ubyte::ZERO, count); + else + return Align<_16, Arg::_1>::Then>::set(dst, value, count); #else ///////////////////////////////////////////////////////////////////////////// // Default ///////////////////////////////////////////////////////////////////////////// - using namespace ::__llvm_libc::scalar; - + using namespace scalar; if (count == 0) return; if (count == 1) - return splat_set<_1>(dst, value); + return _1::set(dst, value); if (count == 2) - return splat_set<_2>(dst, value); + return _2::set(dst, value); if (count == 3) - return splat_set<_3>(dst, value); + return _3::set(dst, value); if (count <= 8) - return splat_set>(dst, value, count); + return HeadTail<_4>::set(dst, value, count); if (count <= 16) - return splat_set>(dst, value, count); + return HeadTail<_8>::set(dst, value, count); if (count <= 32) - return splat_set>(dst, value, count); + return HeadTail<_16>::set(dst, value, count); if (count <= 64) - return splat_set>(dst, value, count); + return HeadTail<_32>::set(dst, value, count); if (count <= 128) - return splat_set>(dst, value, count); - return splat_set::Then>>(dst, value, count); + return HeadTail<_64>::set(dst, value, count); + return Align<_32, Arg::Dst>::Then>::set(dst, value, count); #endif } diff --git a/libc/src/string/memory_utils/sized_op.h b/libc/src/string/memory_utils/sized_op.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/sized_op.h @@ -0,0 +1,138 @@ +//===-- Sized Operations --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the SizedOp struct that serves as the middle end of the +// framework. It implements sized memory operations by breaking them down into +// simpler types which availability is described in the Backend. It also +// provides a way to load and store sized chunks of memory (necessary for the +// move operation). SizedOp are the building blocks of higher order algorithms +// like HeadTail, Align or Loop. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H + +#include // size_t + +#ifndef LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE +#define LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE \ + __has_builtin(__builtin_memcpy_inline) +#endif // LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE + +namespace __llvm_libc { + +template struct SizedOp { + static constexpr size_t SIZE = Size; + +private: + static_assert(Backend::IS_BACKEND_TYPE); + static_assert(Size > 0); + using type = typename Backend::template getNextType; + static constexpr size_t TYPE_SIZE = sizeof(type); + static_assert(Size >= TYPE_SIZE); + static constexpr size_t NEXT_SIZE = Size - TYPE_SIZE; + using NextBlock = SizedOp; + + template static constexpr Aligned isAligned() { + static_assert(IsAddressType::Value); + return AddrT::ALIGNMENT > 1 && AddrT::ALIGNMENT >= sizeof(T) ? Aligned::YES + : Aligned::NO; + } + + template static inline auto loadType(SrcAddrT src) { + static_assert(IsAddressType::Value && SrcAddrT::IS_READ); + constexpr auto AS = isAligned(); + constexpr auto TS = SrcAddrT::TEMPORALITY; + return Backend::template load(as(src)); + } + + template + static inline void storeType(type value, DstAddrT dst) { + static_assert(IsAddressType::Value && DstAddrT::IS_WRITE); + constexpr auto AS = isAligned(); + constexpr auto TS = DstAddrT::TEMPORALITY; + return Backend::template store(as(dst), value); + } + + struct Value { + alignas(alignof(type)) ubyte payload[Size]; + }; + +public: + template + static inline void copy(DstAddrT dst, SrcAddrT src) { + static_assert(IsAddressType::Value && DstAddrT::IS_WRITE); + static_assert(IsAddressType::Value && SrcAddrT::IS_READ); + if constexpr (LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE && + DstAddrT::TEMPORALITY == Temporality::TEMPORAL && + SrcAddrT::TEMPORALITY == Temporality::TEMPORAL) { + // delegate optimized copy to compiler. + __builtin_memcpy_inline(dst.ptr(), src.ptr(), Size); + return; + } + storeType(loadType(src), dst); + if constexpr (NEXT_SIZE) + NextBlock::copy(offsetAddr(dst), offsetAddr(src)); + } + + template + static inline void move(DstAddrT dst, SrcAddrT src) { + const auto payload = loadType(src); + if constexpr (NEXT_SIZE) + NextBlock::move(offsetAddr(dst), offsetAddr(src)); + storeType(payload, dst); + } + + template + static inline void set(DstAddrT dst, ubyte value) { + storeType(Backend::template splat(value), dst); + if constexpr (NEXT_SIZE) + NextBlock::set(offsetAddr(dst), value); + } + + template + static inline uint64_t different(SrcAddrT1 src1, SrcAddrT2 src2) { + const uint64_t current = + Backend::template not_equals(loadType(src1), loadType(src2)); + if constexpr (NEXT_SIZE) { + return current | (NextBlock::different(offsetAddr(src1), + offsetAddr(src2))); + } else { + return current; + } + } + + template + static inline int32_t three_way_cmp(SrcAddrT1 src1, SrcAddrT2 src2) { + const auto a = loadType(src1); + const auto b = loadType(src2); + if (Backend::template not_equals(a, b)) + return Backend::template three_way_cmp(a, b); + // if (int32_t res = three_way_cmp(a, b)) { + // return res; + // } + if constexpr (NEXT_SIZE) + return NextBlock::three_way_cmp(offsetAddr(src1), + offsetAddr(src2)); + return 0; + } + + template static Value load(SrcAddrT src) { + Value output; + copy(DstAddr(output.payload), src); + return output; + } + + template static void store(DstAddrT dst, Value value) { + copy(dst, SrcAddr(value.payload)); + } +}; + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H diff --git a/libc/src/string/mempcpy.cpp b/libc/src/string/mempcpy.cpp --- a/libc/src/string/mempcpy.cpp +++ b/libc/src/string/mempcpy.cpp @@ -15,11 +15,10 @@ namespace __llvm_libc { LLVM_LIBC_FUNCTION(void *, mempcpy, - (void *__restrict dest, const void *__restrict src, + (void *__restrict dst, const void *__restrict src, size_t count)) { - char *result = reinterpret_cast(dest); - inline_memcpy(result, reinterpret_cast(src), count); - return result + count; + inline_memcpy(dst, src, count); + return reinterpret_cast(dst) + count; } } // namespace __llvm_libc diff --git a/libc/src/string/memset.cpp b/libc/src/string/memset.cpp --- a/libc/src/string/memset.cpp +++ b/libc/src/string/memset.cpp @@ -7,14 +7,14 @@ //===----------------------------------------------------------------------===// #include "src/string/memset.h" + #include "src/__support/common.h" #include "src/string/memory_utils/memset_implementations.h" namespace __llvm_libc { LLVM_LIBC_FUNCTION(void *, memset, (void *dst, int value, size_t count)) { - inline_memset(reinterpret_cast(dst), - static_cast(value), count); + inline_memset(dst, static_cast(value), count); return dst; } diff --git a/libc/src/string/stpncpy.cpp b/libc/src/string/stpncpy.cpp --- a/libc/src/string/stpncpy.cpp +++ b/libc/src/string/stpncpy.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "src/string/stpncpy.h" -#include "src/string/memory_utils/memset_implementations.h" #include "src/__support/common.h" +#include "src/string/memory_utils/memset_implementations.h" namespace __llvm_libc { @@ -22,7 +22,7 @@ dest[i] = src[i]; // When n>strlen(src), n-strlen(src) \0 are appended. if (n > i) - inline_memset(dest + i, 0, n - i); + inline_memset(dest + i, ubyte::ZERO, n - i); return dest + i; } diff --git a/libc/src/string/strlcpy.cpp b/libc/src/string/strlcpy.cpp --- a/libc/src/string/strlcpy.cpp +++ b/libc/src/string/strlcpy.cpp @@ -24,7 +24,7 @@ return len; size_t n = len < size - 1 ? len : size - 1; inline_memcpy(dst, src, n); - inline_memset(dst + n, 0, size - n); + inline_memset(dst + n, ubyte::ZERO, size - n); return len; } diff --git a/libc/test/src/string/memory_utils/CMakeLists.txt b/libc/test/src/string/memory_utils/CMakeLists.txt --- a/libc/test/src/string/memory_utils/CMakeLists.txt +++ b/libc/test/src/string/memory_utils/CMakeLists.txt @@ -3,6 +3,9 @@ SUITE libc_string_unittests SRCS + address_test.cpp + algorithm_test.cpp + backend_test.cpp elements_test.cpp memory_access_test.cpp utils_test.cpp diff --git a/libc/test/src/string/memory_utils/address_test.cpp b/libc/test/src/string/memory_utils/address_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/memory_utils/address_test.cpp @@ -0,0 +1,78 @@ +#include "utils/UnitTest/Test.h" +#include + +namespace __llvm_libc { + +TEST(LlvmLibcAddress, AliasAreAddresses) { + ASSERT_TRUE(IsAddressType>::Value); + ASSERT_TRUE(IsAddressType>::Value); + ASSERT_TRUE(IsAddressType>::Value); + ASSERT_TRUE(IsAddressType>::Value); +} + +TEST(LlvmLibcAddress, AliasHaveRightPermissions) { + ASSERT_TRUE(SrcAddr<1>::IS_READ); + ASSERT_TRUE(NtSrcAddr<1>::IS_READ); + ASSERT_TRUE(DstAddr<1>::IS_WRITE); + ASSERT_TRUE(NtDstAddr<1>::IS_WRITE); +} + +TEST(LlvmLibcAddress, AliasHaveRightSemantic) { + ASSERT_EQ(SrcAddr<1>::TEMPORALITY, Temporality::TEMPORAL); + ASSERT_EQ(DstAddr<1>::TEMPORALITY, Temporality::TEMPORAL); + ASSERT_EQ(NtSrcAddr<1>::TEMPORALITY, Temporality::NON_TEMPORAL); + ASSERT_EQ(NtDstAddr<1>::TEMPORALITY, Temporality::NON_TEMPORAL); +} + +TEST(LlvmLibcAddress, AliasHaveRightAlignment) { + ASSERT_EQ(SrcAddr<1>::ALIGNMENT, size_t(1)); + ASSERT_EQ(SrcAddr<4>::ALIGNMENT, size_t(4)); +} + +TEST(LlvmLibcAddress, NarrowAlignment) { + // Address 8-byte aligned, offset by 8. + ASSERT_EQ(offsetAddr<8>(SrcAddr<8>(nullptr)).ALIGNMENT, 8UL); + // Address 16-byte aligned, offset by 4. + ASSERT_EQ(offsetAddr<4>(SrcAddr<16>(nullptr)).ALIGNMENT, 4UL); + // Address 4-byte aligned, offset by 16. + ASSERT_EQ(offsetAddr<16>(SrcAddr<4>(nullptr)).ALIGNMENT, 4UL); + // Address 4-byte aligned, offset by 1. + ASSERT_EQ(offsetAddr<1>(SrcAddr<4>(nullptr)).ALIGNMENT, 1UL); + // Address 4-byte aligned, offset by 2. + ASSERT_EQ(offsetAddr<2>(SrcAddr<4>(nullptr)).ALIGNMENT, 2UL); + // Address 4-byte aligned, offset by 6. + ASSERT_EQ(offsetAddr<6>(SrcAddr<4>(nullptr)).ALIGNMENT, 2UL); + // Address 4-byte aligned, offset by 10. + ASSERT_EQ(offsetAddr<10>(SrcAddr<4>(nullptr)).ALIGNMENT, 2UL); +} + +TEST(LlvmLibcAddress, OffsetAddr) { + ubyte a; + SrcAddr<1> addr(&a); + ASSERT_EQ((const void *)offsetAddr<4>(addr).ptr(), (const void *)(&a + 4)); + ASSERT_EQ((const void *)offsetAddr<32>(addr).ptr(), (const void *)(&a + 32)); +} + +TEST(LlvmLibcAddress, AssumeAligned) { + SrcAddr<16> addr(nullptr); + ASSERT_EQ(offsetAddrAssumeAligned<8>(addr, 0).ALIGNMENT, 8UL); + ASSERT_EQ(offsetAddrAssumeAligned<1>(addr, 0).ALIGNMENT, 1UL); + ASSERT_EQ(offsetAddrMultiplesOf<4>(addr, 0).ALIGNMENT, 4UL); + ASSERT_EQ(offsetAddrMultiplesOf<32>(addr, 0).ALIGNMENT, 16UL); +} + +TEST(LlvmLibcAddress, offsetAddrAssumeAligned) { + ubyte a; + SrcAddr<1> addr(&a); + ASSERT_EQ((const void *)offsetAddrAssumeAligned<1>(addr, 17).ptr(), + (const void *)(&a + 17)); +} + +TEST(LlvmLibcAddress, offsetAddrMultiplesOf) { + ubyte a; + SrcAddr<1> addr(&a); + ASSERT_EQ((const void *)offsetAddrMultiplesOf<4>(addr, 16).ptr(), + (const void *)(&a + 16)); +} + +} // namespace __llvm_libc diff --git a/libc/test/src/string/memory_utils/algorithm_test.cpp b/libc/test/src/string/memory_utils/algorithm_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/memory_utils/algorithm_test.cpp @@ -0,0 +1,529 @@ + +#define LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE 0 + +#include "utils/UnitTest/Test.h" +#include +#include +#include + +#include + +namespace __llvm_libc { + +struct alignas(64) Buffer : cpp::Array { + bool contains(const char *ptr) const { + return ptr >= data() && ptr < (data() + size()); + } + size_t getOffset(const char *ptr) const { return ptr - data(); } + void fill(char c) { + for (auto itr = begin(); itr != end(); ++itr) + *itr = c; + } +}; + +static Buffer buffer1; +static Buffer buffer2; +static std::ostringstream LOG; + +struct TestBackend { + static constexpr bool IS_BACKEND_TYPE = true; + + template static void log(const char *Action, const char *ptr) { + LOG << Action << "<" << sizeof(T) << "> "; + if (buffer1.contains(ptr)) + LOG << "a[" << buffer1.getOffset(ptr) << "]"; + else if (buffer2.contains(ptr)) + LOG << "b[" << buffer2.getOffset(ptr) << "]"; + LOG << "\n"; + } + + template + static T load(const T *src) { + log((AS == Aligned::YES ? "LdA" : "LdU"), + reinterpret_cast(src)); + return ScalarBackend::load(src); + } + + template + static void store(T *dst, T value) { + log((AS == Aligned::YES ? "StA" : "StU"), + reinterpret_cast(dst)); + ScalarBackend::store(dst, value); + } + + template static inline T splat(ubyte value) { + LOG << "Splat<" << sizeof(T) << "> " << (unsigned)value << '\n'; + return ScalarBackend::splat(value); + } + + template static inline uint64_t not_equals(T v1, T v2) { + LOG << "Neq<" << sizeof(T) << ">\n"; + return ScalarBackend::not_equals(v1, v2); + } + + template static inline int32_t three_way_cmp(T v1, T v2) { + LOG << "Diff<" << sizeof(T) << ">\n"; + return ScalarBackend::three_way_cmp(v1, v2); + } + + template using getNextType = ScalarBackend::getNextType; +}; + +struct LlvmLibcAlgorithm : public testing::Test { + void SetUp() override { + LOG = std::ostringstream(); + LOG << '\n'; + } + + void fillEqual() { + buffer1.fill('a'); + buffer2.fill('a'); + } + + void fillDifferent() { + buffer1.fill('a'); + buffer2.fill('b'); + } + + const char *getTrace() { + trace_ = LOG.str(); + return trace_.c_str(); + } + + const char *stripComments(const char *expected) { + expected_.clear(); + std::stringstream ss(expected); + std::string line; + while (std::getline(ss, line, '\n')) { + const auto pos = line.find('#'); + if (pos == std::string::npos) { + expected_ += line; + } else { + auto log = line.substr(0, pos); + while (!log.empty() && std::isspace(log.back())) + log.pop_back(); + expected_ += log; + } + expected_ += '\n'; + } + return expected_.c_str(); + } + + template SrcAddr buf1(size_t offset = 0) const { + return buffer1.data() + offset; + } + template SrcAddr buf2(size_t offset = 0) const { + return buffer2.data() + offset; + } + template DstAddr dst(size_t offset = 0) const { + return buffer1.data() + offset; + } + template SrcAddr src(size_t offset = 0) const { + return buffer2.data() + offset; + } + +private: + std::string trace_; + std::string expected_; +}; + +using _8 = SizedOp; + +/////////////////////////////////////////////////////////////////////////////// +//// Testing fixed fized forward operations +/////////////////////////////////////////////////////////////////////////////// + +TEST_F(LlvmLibcAlgorithm, copy_1) { + SizedOp::copy(dst(), src()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<1> b[0] +StU<1> a[0] +)")); +} + +TEST_F(LlvmLibcAlgorithm, copy_15) { + SizedOp::copy(dst(), src()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +StU<8> a[0] +LdU<4> b[8] +StU<4> a[8] +LdU<2> b[12] +StU<2> a[12] +LdU<1> b[14] +StU<1> a[14] +)")); +} + +TEST_F(LlvmLibcAlgorithm, copy_16) { + SizedOp::copy(dst(), src()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +StU<8> a[0] +LdU<8> b[8] +StU<8> a[8] +)")); +} + +/////////////////////////////////////////////////////////////////////////////// + +TEST_F(LlvmLibcAlgorithm, move_1) { + SizedOp::move(dst(), src()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<1> b[0] +StU<1> a[0] +)")); +} + +TEST_F(LlvmLibcAlgorithm, move_15) { + SizedOp::move(dst(), src()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +LdU<4> b[8] +LdU<2> b[12] +LdU<1> b[14] +StU<1> a[14] +StU<2> a[12] +StU<4> a[8] +StU<8> a[0] +)")); +} + +TEST_F(LlvmLibcAlgorithm, move_16) { + SizedOp::move(dst(), src()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +LdU<8> b[8] +StU<8> a[8] +StU<8> a[0] +)")); +} + +/////////////////////////////////////////////////////////////////////////////// + +TEST_F(LlvmLibcAlgorithm, set_1) { + SizedOp::set(dst(), ubyte{42}); + EXPECT_STREQ(getTrace(), stripComments(R"( +Splat<1> 42 +StU<1> a[0] +)")); +} + +TEST_F(LlvmLibcAlgorithm, set_15) { + SizedOp::set(dst(), ubyte{42}); + EXPECT_STREQ(getTrace(), stripComments(R"( +Splat<8> 42 +StU<8> a[0] +Splat<4> 42 +StU<4> a[8] +Splat<2> 42 +StU<2> a[12] +Splat<1> 42 +StU<1> a[14] +)")); +} + +TEST_F(LlvmLibcAlgorithm, set_16) { + SizedOp::set(dst(), ubyte{42}); + EXPECT_STREQ(getTrace(), stripComments(R"( +Splat<8> 42 +StU<8> a[0] +Splat<8> 42 +StU<8> a[8] +)")); +} + +/////////////////////////////////////////////////////////////////////////////// + +TEST_F(LlvmLibcAlgorithm, different_1) { + fillEqual(); + SizedOp::different(buf1(), buf2()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<1> a[0] +LdU<1> b[0] +Neq<1> +)")); +} + +TEST_F(LlvmLibcAlgorithm, different_15) { + fillEqual(); + SizedOp::different(buf1(), buf2()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> a[0] +LdU<8> b[0] +Neq<8> +LdU<4> a[8] +LdU<4> b[8] +Neq<4> +LdU<2> a[12] +LdU<2> b[12] +Neq<2> +LdU<1> a[14] +LdU<1> b[14] +Neq<1> +)")); +} + +TEST_F(LlvmLibcAlgorithm, different_15_no_shortcircuit) { + fillDifferent(); + SizedOp::different(buf1(), buf2()); + // If buffer compare different we continue to aggregate. + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> a[0] +LdU<8> b[0] +Neq<8> +LdU<4> a[8] +LdU<4> b[8] +Neq<4> +LdU<2> a[12] +LdU<2> b[12] +Neq<2> +LdU<1> a[14] +LdU<1> b[14] +Neq<1> +)")); +} + +TEST_F(LlvmLibcAlgorithm, different_16) { + fillEqual(); + SizedOp::different(buf1(), buf2()); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> a[0] +LdU<8> b[0] +Neq<8> +LdU<8> a[8] +LdU<8> b[8] +Neq<8> +)")); +} + +/////////////////////////////////////////////////////////////////////////////// + +TEST_F(LlvmLibcAlgorithm, three_way_cmp_eq_1) { + fillEqual(); + SizedOp::three_way_cmp(buf1(), buf2()); + // Buffer compare equal, returning 0 and no call to Diff. + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<1> a[0] +LdU<1> b[0] +Neq<1> +)")); +} + +TEST_F(LlvmLibcAlgorithm, three_way_cmp_eq_15) { + fillEqual(); + SizedOp::three_way_cmp(buf1(), buf2()); + // Buffer compare equal, returning 0 and no call to Diff. + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> a[0] +LdU<8> b[0] +Neq<8> +LdU<4> a[8] +LdU<4> b[8] +Neq<4> +LdU<2> a[12] +LdU<2> b[12] +Neq<2> +LdU<1> a[14] +LdU<1> b[14] +Neq<1> +)")); +} + +TEST_F(LlvmLibcAlgorithm, three_way_cmp_neq_15_shortcircuit) { + fillDifferent(); + SizedOp::three_way_cmp(buf1(), buf2()); + // If buffer compare different we stop early. + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> a[0] +LdU<8> b[0] +Neq<8> +Diff<8> +)")); +} + +TEST_F(LlvmLibcAlgorithm, three_way_cmp_eq_16) { + fillEqual(); + SizedOp::three_way_cmp(buf1(), buf2()); + // Buffer compare equal, returning 0 and no call to Diff. + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> a[0] +LdU<8> b[0] +Neq<8> +LdU<8> a[8] +LdU<8> b[8] +Neq<8> +)")); +} + +/////////////////////////////////////////////////////////////////////////////// +//// Testing tail operations +/////////////////////////////////////////////////////////////////////////////// + +TEST_F(LlvmLibcAlgorithm, tail_copy_8) { + Tail<_8>::copy(dst(), src(), 16); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[8] +StU<8> a[8] +)")); +} + +TEST_F(LlvmLibcAlgorithm, tail_move_8) { + Tail<_8>::move(dst(), src(), 16); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[8] +StU<8> a[8] +)")); +} + +TEST_F(LlvmLibcAlgorithm, tail_set_8) { + Tail<_8>::set(dst(), ubyte{42}, 16); + EXPECT_STREQ(getTrace(), stripComments(R"( +Splat<8> 42 +StU<8> a[8] +)")); +} + +TEST_F(LlvmLibcAlgorithm, tail_different_8) { + fillEqual(); + Tail<_8>::different(buf1(), buf2(), 16); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> a[8] +LdU<8> b[8] +Neq<8> +)")); +} + +TEST_F(LlvmLibcAlgorithm, tail_three_way_cmp_8) { + fillEqual(); + Tail<_8>::three_way_cmp(buf1(), buf2(), 16); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> a[8] +LdU<8> b[8] +Neq<8> +)")); +} + +/////////////////////////////////////////////////////////////////////////////// +//// Testing HeadTail operations +/////////////////////////////////////////////////////////////////////////////// + +TEST_F(LlvmLibcAlgorithm, head_tail_copy_8) { + HeadTail<_8>::copy(dst(), src(), 16); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +StU<8> a[0] +LdU<8> b[8] +StU<8> a[8] +)")); +} + +/////////////////////////////////////////////////////////////////////////////// +//// Testing Loop operations +/////////////////////////////////////////////////////////////////////////////// + +TEST_F(LlvmLibcAlgorithm, loop_copy_one_iteration_and_tail) { + Loop<_8>::copy(dst(), src(), 10); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +StU<8> a[0] # covers 0-7 +LdU<8> b[2] +StU<8> a[2] # covers 2-9 +)")); +} + +TEST_F(LlvmLibcAlgorithm, loop_copy_two_iteration_and_tail) { + Loop<_8>::copy(dst(), src(), 17); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +StU<8> a[0] # covers 0-7 +LdU<8> b[8] +StU<8> a[8] # covers 8-15 +LdU<8> b[9] +StU<8> a[9] # covers 9-16 +)")); +} + +TEST_F(LlvmLibcAlgorithm, loop_with_one_turn_is_inefficient_but_ok) { + Loop<_8>::copy(dst(), src(), 8); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +StU<8> a[0] # first iteration covers 0-7 +LdU<8> b[0] # tail also covers 0-7 but since Loop is supposed to be used +StU<8> a[0] # with a sufficient number of iterations the tail cost is amortised +)")); +} + +TEST_F(LlvmLibcAlgorithm, loop_with_round_number_of_turn) { + Loop<_8>::copy(dst(), src(), 24); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +StU<8> a[0] # first iteration covers 0-7 +LdU<8> b[8] +StU<8> a[8] # second iteration covers 8-15 +LdU<8> b[16] +StU<8> a[16] +)")); +} + +TEST_F(LlvmLibcAlgorithm, dst_aligned_loop) { + Loop<_8>::copy(dst<16>(), src(), 23); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[0] +StA<8> a[0] # store is aligned on 16B +LdU<8> b[8] +StA<8> a[8] # subsequent stores are aligned +LdU<8> b[15] +StU<8> a[15] # Tail is always unaligned +)")); +} + +TEST_F(LlvmLibcAlgorithm, aligned_loop) { + Loop<_8>::copy(dst<16>(), src<8>(), 23); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdA<8> b[0] # load is aligned on 8B +StA<8> a[0] # store is aligned on 16B +LdA<8> b[8] # subsequent loads are aligned +StA<8> a[8] # subsequent stores are aligned +LdU<8> b[15] # Tail is always unaligned +StU<8> a[15] # Tail is always unaligned +)")); +} + +/////////////////////////////////////////////////////////////////////////////// +//// Testing Align operations +/////////////////////////////////////////////////////////////////////////////// + +TEST_F(LlvmLibcAlgorithm, align_dst_copy_8) { + Align<_8, Arg::Dst>::Then>::copy(dst(2), src(3), 31); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[3] +StU<8> a[2] # First store covers unaligned bytes +LdU<8> b[9] +StA<8> a[8] # First aligned store +LdU<8> b[17] +StA<8> a[16] # Subsequent stores are aligned +LdU<8> b[25] +StA<8> a[24] # Subsequent stores are aligned +LdU<8> b[26] +StU<8> a[25] # Last store covers remaining bytes +)")); +} + +TEST_F(LlvmLibcAlgorithm, align_src_copy_8) { + Align<_8, Arg::Src>::Then>::copy(dst(2), src(3), 31); + EXPECT_STREQ(getTrace(), stripComments(R"( +LdU<8> b[3] # First load covers unaligned bytes +StU<8> a[2] +LdA<8> b[8] # First aligned load +StU<8> a[7] +LdA<8> b[16] # Subsequent loads are aligned +StU<8> a[15] +LdA<8> b[24] # Subsequent loads are aligned +StU<8> a[23] +LdU<8> b[26] # Last load covers remaining bytes +StU<8> a[25] +)")); +} + +} // namespace __llvm_libc diff --git a/libc/test/src/string/memory_utils/backend_test.cpp b/libc/test/src/string/memory_utils/backend_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/memory_utils/backend_test.cpp @@ -0,0 +1,184 @@ + +#include "src/__support/CPP/Array.h" +#include "src/__support/CPP/ArrayRef.h" +#include "src/__support/CPP/Bit.h" +#include "src/__support/architectures.h" +#include "src/string/memory_utils/backends.h" +#include "utils/UnitTest/Test.h" +#include + +namespace __llvm_libc { + +template using Buffer = cpp::Array; + +static char GetRandomChar() { + static constexpr const uint64_t a = 1103515245; + static constexpr const uint64_t c = 12345; + static constexpr const uint64_t m = 1ULL << 31; + static uint64_t seed = 123456789; + seed = (a * seed + c) % m; + return seed; +} + +static void Randomize(cpp::MutableArrayRef buffer) { + for (auto ¤t : buffer) + current = GetRandomChar(); +} + +template static Buffer GetRandomBuffer() { + Buffer buffer; + Randomize(buffer); + return buffer; +} + +template struct Conf { + static_assert(Backend::IS_BACKEND_TYPE); + using BufferT = Buffer; + using T = typename Backend::template getNextType; + static_assert(sizeof(T) == Size); + static constexpr size_t SIZE = Size; + + static BufferT splat(ubyte value) { + return bit_cast(Backend::template splat(value)); + } + + static uint64_t not_equals(const BufferT &v1, const BufferT &v2) { + return Backend::template not_equals(bit_cast(v1), bit_cast(v2)); + } + + static int32_t three_way_cmp(const BufferT &v1, const BufferT &v2) { + return Backend::template three_way_cmp(bit_cast(v1), bit_cast(v2)); + } +}; + +using FunctionTypes = testing::TypeList< // +#if defined(LLVM_LIBC_ARCH_X86) // + Conf, // + Conf, // + Conf, // + Conf, // +#if HAS_M128 + Conf, // +#endif +#if HAS_M256 + Conf, // +#endif +#if HAS_M512 + Conf, // +#endif +#endif // defined(LLVM_LIBC_ARCH_X86) + Conf, // + Conf, // + Conf, // + Conf // + >; + +TYPED_TEST(LlvmLibcMemoryBackend, splat, FunctionTypes) { + for (auto value : cpp::Array{0u, 1u, 255u}) { + alignas(64) const auto stored = ParamType::splat(bit_cast(value)); + for (size_t i = 0; i < ParamType::SIZE; ++i) + EXPECT_EQ(bit_cast(stored[i]), value); + } +} + +TYPED_TEST(LlvmLibcMemoryBackend, not_equals, FunctionTypes) { + alignas(64) const auto a = GetRandomBuffer(); + EXPECT_EQ(ParamType::not_equals(a, a), 0UL); + for (size_t i = 0; i < a.size(); ++i) { + alignas(64) auto b = a; + ++b[i]; + EXPECT_NE(ParamType::not_equals(a, b), 0UL); + EXPECT_NE(ParamType::not_equals(b, a), 0UL); + } +} + +TYPED_TEST(LlvmLibcMemoryBackend, three_way_cmp, FunctionTypes) { + alignas(64) const auto a = GetRandomBuffer(); + EXPECT_EQ(ParamType::three_way_cmp(a, a), 0); + for (size_t i = 0; i < a.size(); ++i) { + alignas(64) auto b = a; + ++b[i]; + const auto cmp = memcmp(&a, &b, sizeof(a)); + ASSERT_NE(cmp, 0); + if (cmp > 0) { + EXPECT_GT(ParamType::three_way_cmp(a, b), 0); + EXPECT_LT(ParamType::three_way_cmp(b, a), 0); + } else { + EXPECT_LT(ParamType::three_way_cmp(a, b), 0); + EXPECT_GT(ParamType::three_way_cmp(b, a), 0); + } + } +} + +template +struct LoadStoreConf { + static_assert(Backend::IS_BACKEND_TYPE); + using BufferT = Buffer; + using T = typename Backend::template getNextType; + static_assert(sizeof(T) == Size); + static constexpr size_t SIZE = Size; + + static BufferT load(const BufferT &ref) { + const auto *ptr = bit_cast(ref.data()); + const T value = Backend::template load(ptr); + return bit_cast(value); + } + + static void store(BufferT &ref, const BufferT value) { + auto *ptr = bit_cast(ref.data()); + Backend::template store(ptr, bit_cast(value)); + } +}; + +using LoadStoreTypes = testing::TypeList< // +#if defined(LLVM_LIBC_ARCH_X86) // + LoadStoreConf, // + LoadStoreConf, // + LoadStoreConf, // + LoadStoreConf, // + LoadStoreConf, // + LoadStoreConf, // + LoadStoreConf, // + LoadStoreConf, // +#if HAS_M128 + LoadStoreConf, // + LoadStoreConf, // + LoadStoreConf, // +#endif +#if HAS_M256 + LoadStoreConf, // + LoadStoreConf, // + LoadStoreConf, // +#endif +#if HAS_M512 + LoadStoreConf, // + LoadStoreConf, // + LoadStoreConf, // +#endif +#endif // defined(LLVM_LIBC_ARCH_X86) + LoadStoreConf, // + LoadStoreConf, // + LoadStoreConf, // + LoadStoreConf, // + LoadStoreConf, // + LoadStoreConf, // + LoadStoreConf, // + LoadStoreConf // + >; + +TYPED_TEST(LlvmLibcMemoryBackend, load, LoadStoreTypes) { + alignas(64) const auto expected = GetRandomBuffer(); + const auto loaded = ParamType::load(expected); + for (size_t i = 0; i < ParamType::SIZE; ++i) + EXPECT_EQ(loaded[i], expected[i]); +} + +TYPED_TEST(LlvmLibcMemoryBackend, store, LoadStoreTypes) { + alignas(64) const auto expected = GetRandomBuffer(); + alignas(64) typename ParamType::BufferT stored; + ParamType::store(stored, expected); + for (size_t i = 0; i < ParamType::SIZE; ++i) + EXPECT_EQ(stored[i], expected[i]); +} + +} // namespace __llvm_libc diff --git a/libc/utils/UnitTest/LibcTest.h b/libc/utils/UnitTest/LibcTest.h --- a/libc/utils/UnitTest/LibcTest.h +++ b/libc/utils/UnitTest/LibcTest.h @@ -89,6 +89,14 @@ return internal::test(Ctx, Cond, LHS, RHS, LHSStr, RHSStr, File, Line); } + template ::Value, int> = 0> + bool test(TestCondition Cond, ValType LHS, ValType RHS, const char *LHSStr, + const char *RHSStr, const char *File, unsigned long Line) { + return internal::test(Ctx, Cond, (long long)LHS, (long long)RHS, LHSStr, + RHSStr, File, Line); + } + template < typename ValType, cpp::EnableIfType::Value, ValType> = nullptr> diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -779,9 +779,13 @@ cc_library( name = "string_memory_utils", hdrs = [ - "src/string/memory_utils/elements.h", - "src/string/memory_utils/elements_aarch64.h", - "src/string/memory_utils/elements_x86.h", + "src/string/memory_utils/address.h", + "src/string/memory_utils/algorithm.h", + "src/string/memory_utils/backends.h", + "src/string/memory_utils/backend_scalar.h", + "src/string/memory_utils/backend_x86.h", + "src/string/memory_utils/backend_aarch64.h", + "src/string/memory_utils/sized_op.h", "src/string/memory_utils/utils.h", ], textual_hdrs = [ @@ -793,6 +797,7 @@ deps = [ ":__support_common", ":__support_cpp_bit", + ":__support_cpp_type_traits", ":libc_root", ], )