diff --git a/libc/src/__support/macros/properties/architectures.h b/libc/src/__support/macros/properties/architectures.h --- a/libc/src/__support/macros/properties/architectures.h +++ b/libc/src/__support/macros/properties/architectures.h @@ -49,6 +49,10 @@ #define LIBC_TARGET_ARCH_IS_RISCV64 #endif +#if defined(__riscv) && (__riscv_xlen == 32) +#define LIBC_TARGET_ARCH_IS_RISCV32 +#endif + #if (defined(LIBC_TARGET_ARCH_IS_AARCH64) || defined(LIBC_TARGET_ARCH_IS_ARM)) #define LIBC_TARGET_ARCH_IS_ANY_ARM #endif diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt --- a/libc/src/string/memory_utils/CMakeLists.txt +++ b/libc/src/string/memory_utils/CMakeLists.txt @@ -18,6 +18,7 @@ x86_64/memcmp_implementations.h x86_64/memcpy_implementations.h DEPS + libc.src.__support.common libc.src.__support.CPP.bit libc.src.__support.CPP.cstddef libc.src.__support.CPP.type_traits diff --git a/libc/src/string/memory_utils/memcpy_implementations.h b/libc/src/string/memory_utils/memcpy_implementations.h --- a/libc/src/string/memory_utils/memcpy_implementations.h +++ b/libc/src/string/memory_utils/memcpy_implementations.h @@ -26,24 +26,79 @@ namespace __llvm_libc { [[maybe_unused]] LIBC_INLINE void -inline_memcpy_embedded_tiny(Ptr __restrict dst, CPtr __restrict src, - size_t count) { +inline_memcpy_byte_per_byte(Ptr dst, CPtr src, size_t offset, size_t count) { LIBC_LOOP_NOUNROLL - for (size_t offset = 0; offset < count; ++offset) - builtin::Memcpy<1>::block(dst + offset, src + offset); + for (; offset < count; ++offset) + dst[offset] = src[offset]; +} + +[[maybe_unused]] LIBC_INLINE void +inline_memcpy_aligned_access_32bit(Ptr __restrict dst, CPtr __restrict src, + size_t count) { + constexpr size_t kAlign = sizeof(uint32_t); + if (count <= 2 * kAlign) + return inline_memcpy_byte_per_byte(dst, src, 0, count); + size_t bytes_to_dst_align = distance_to_align_up(dst); + inline_memcpy_byte_per_byte(dst, src, 0, bytes_to_dst_align); + size_t offset = bytes_to_dst_align; + size_t src_alignment = distance_to_align_down(src + offset); + for (; offset < count - kAlign; offset += kAlign) { + uint32_t value; + if (src_alignment == 0) + value = load32_aligned(src, offset); + else if (src_alignment == 2) + value = load32_aligned(src, offset); + else + value = load32_aligned(src, offset); + store32_aligned(value, dst, offset); + } + // remainder + inline_memcpy_byte_per_byte(dst, src, offset, count); +} + +[[maybe_unused]] LIBC_INLINE void +inline_memcpy_aligned_access_64bit(Ptr __restrict dst, CPtr __restrict src, + size_t count) { + constexpr size_t kAlign = sizeof(uint64_t); + if (count <= 2 * kAlign) + return inline_memcpy_byte_per_byte(dst, src, 0, count); + size_t bytes_to_dst_align = distance_to_align_up(dst); + inline_memcpy_byte_per_byte(dst, src, 0, bytes_to_dst_align); + size_t offset = bytes_to_dst_align; + size_t src_alignment = distance_to_align_down(src + offset); + for (; offset < count - kAlign; offset += kAlign) { + uint64_t value; + if (src_alignment == 0) + value = load64_aligned(src, offset); + else if (src_alignment == 4) + value = load64_aligned(src, offset); + else if (src_alignment == 2) + value = + load64_aligned(src, offset); + else + value = load64_aligned( + src, offset); + store64_aligned(value, dst, offset); + } + // remainder + inline_memcpy_byte_per_byte(dst, src, offset, count); } LIBC_INLINE void inline_memcpy(Ptr __restrict dst, CPtr __restrict src, size_t count) { using namespace __llvm_libc::builtin; #if defined(LIBC_COPT_MEMCPY_USE_EMBEDDED_TINY) - return inline_memcpy_embedded_tiny(dst, src, count); + return inline_memcpy_byte_per_byte(dst, src, 0, count); #elif defined(LIBC_TARGET_ARCH_IS_X86) return inline_memcpy_x86_maybe_interpose_repmovsb(dst, src, count); #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) return inline_memcpy_aarch64(dst, src, count); +#elif defined(LIBC_TARGET_ARCH_IS_RISCV64) + return inline_memcpy_aligned_access_64bit(dst, src, count); +#elif defined(LIBC_TARGET_ARCH_IS_RISCV32) + return inline_memcpy_aligned_access_32bit(dst, src, count); #else - return inline_memcpy_embedded_tiny(dst, src, count); + return inline_memcpy_byte_per_byte(dst, src, 0, count); #endif } diff --git a/libc/src/string/memory_utils/utils.h b/libc/src/string/memory_utils/utils.h --- a/libc/src/string/memory_utils/utils.h +++ b/libc/src/string/memory_utils/utils.h @@ -12,8 +12,9 @@ #include "src/__support/CPP/bit.h" #include "src/__support/CPP/cstddef.h" #include "src/__support/CPP/type_traits.h" -#include "src/__support/macros/attributes.h" //LIBC_INLINE -#include "src/__support/macros/config.h" // LIBC_HAS_BUILTIN +#include "src/__support/endian.h" +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/__support/macros/config.h" // LIBC_HAS_BUILTIN #include // size_t #include // intptr_t / uintptr_t @@ -97,8 +98,15 @@ #ifdef LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE __builtin_memcpy_inline(dst, src, Size); #else +// In memory functions `memcpy_inline` is instantiated several times with +// different value of the Size parameter. This doesn't play well with GCC's +// Value Range Analysis that wrongly detects out of bounds accesses. We disable +// the 'array-bounds' warning for the purpose of this function. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" for (size_t i = 0; i < Size; ++i) static_cast(dst)[i] = static_cast(src)[i]; +#pragma GCC diagnostic pop #endif } @@ -153,6 +161,81 @@ memcpy_inline(ptr, &value); } +// On architectures that do not allow for unaligned access we perform several +// aligned accesses and recombine them through shifts and logicals operations. +// For instance, if we know that the pointer is 2-byte aligned we can decompose +// a 64-bit operation into four 16-bit operations. + +// Loads a 'ValueType' by decomposing it into several loads that are assumed to +// be aligned. +// e.g. load_aligned(ptr); +template +ValueType load_aligned(CPtr src) { + static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS))); + const ValueType value = load(assume_aligned(src)); + if constexpr (sizeof...(TS) > 0) { + constexpr size_t shift = sizeof(T) * 8; + const ValueType next = load_aligned(src + sizeof(T)); + if constexpr (Endian::IS_LITTLE) + return value | (next << shift); + else if constexpr (Endian::IS_BIG) + return (value << shift) | next; + else + deferred_static_assert("Invalid endianness"); + } else { + return value; + } +} + +// Alias for loading a 'uint32_t'. +template +auto load32_aligned(CPtr src, size_t offset) { + static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t)); + return load_aligned(src + offset); +} + +// Alias for loading a 'uint64_t'. +template +auto load64_aligned(CPtr src, size_t offset) { + static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t)); + return load_aligned(src + offset); +} + +// Stores a 'ValueType' by decomposing it into several stores that are assumed +// to be aligned. +// e.g. store_aligned(value, ptr); +template +void store_aligned(ValueType value, Ptr dst) { + static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS))); + constexpr size_t shift = sizeof(T) * 8; + if constexpr (Endian::IS_LITTLE) { + store(assume_aligned(dst), value & ~T(0)); + if constexpr (sizeof...(TS) > 0) + store_aligned(value >> shift, dst + sizeof(T)); + } else if constexpr (Endian::IS_BIG) { + constexpr size_t OFFSET = (0 + ... + sizeof(TS)); + store(assume_aligned(dst + OFFSET), value & ~T(0)); + if constexpr (sizeof...(TS) > 0) + store_aligned(value >> shift, dst); + } else { + deferred_static_assert("Invalid endianness"); + } +} + +// Alias for storing a 'uint32_t'. +template +void store32_aligned(uint32_t value, Ptr dst, size_t offset) { + static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t)); + store_aligned(value, dst + offset); +} + +// Alias for storing a 'uint64_t'. +template +void store64_aligned(uint64_t value, Ptr dst, size_t offset) { + static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t)); + store_aligned(value, dst + offset); +} + // Advances the pointers p1 and p2 by offset bytes and decrease count by the // same amount. template diff --git a/libc/test/src/string/memory_utils/utils_test.cpp b/libc/test/src/string/memory_utils/utils_test.cpp --- a/libc/test/src/string/memory_utils/utils_test.cpp +++ b/libc/test/src/string/memory_utils/utils_test.cpp @@ -144,4 +144,44 @@ } } +TEST(LlvmLibcUtilsTest, LoadStoreAligned) { + const uint64_t init = 0xDEAD'C0DE'BEEF'F00D; + CPtr const src = reinterpret_cast(&init); + uint64_t store; + Ptr const dst = reinterpret_cast(&store); + + using LoadFun = uint64_t (*)(CPtr); + using StoreFun = void (*)(uint64_t, Ptr); + + { + LoadFun ld = load_aligned; + StoreFun st = store_aligned; + const uint64_t loaded = ld(src); + EXPECT_EQ(init, loaded); + store = 0; + st(init, dst); + EXPECT_EQ(init, store); + } + + { + LoadFun ld = load_aligned; + StoreFun st = store_aligned; + const uint64_t loaded = ld(src); + EXPECT_EQ(init, loaded); + store = 0; + st(init, dst); + EXPECT_EQ(init, store); + } + + { + LoadFun ld = load_aligned; + StoreFun st = store_aligned; + const uint64_t loaded = ld(src); + EXPECT_EQ(init, loaded); + store = 0; + st(init, dst); + EXPECT_EQ(init, store); + } +} + } // namespace __llvm_libc