diff --git a/libc/src/string/memory_utils/memcpy_utils.h b/libc/src/string/memory_utils/memcpy_utils.h --- a/libc/src/string/memory_utils/memcpy_utils.h +++ b/libc/src/string/memory_utils/memcpy_utils.h @@ -72,28 +72,35 @@ // Copies `count` bytes by blocks of `kBlockSize` bytes. // Copies at the start and end of the buffer are unaligned. -// Copies in the middle of the buffer are aligned to `kBlockSize`. +// Copies in the middle of the buffer are aligned to `kAlignment`. // // e.g. with // [12345678123456781234567812345678] -// [__XXXXXXXXXXXXXXXXXXXXXXXXXXX___] -// [__XXXXXXXX______________________] -// [________XXXXXXXX________________] -// [________________XXXXXXXX________] -// [_____________________XXXXXXXX___] +// [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___] +// [__XXXX___________________________] +// [_____XXXXXXXX____________________] +// [_____________XXXXXXXX____________] +// [_____________________XXXXXXXX____] +// [______________________XXXXXXXX___] // -// Precondition: `count > 2 * kBlockSize` for efficiency. -// `count >= kBlockSize` for correctness. -template +// Precondition: `kAlignment <= kBlockSize` +// `count > 2 * kBlockSize` for efficiency. +// `count >= kAlignment` for correctness. +template static void CopyAlignedBlocks(char *__restrict dst, const char *__restrict src, size_t count) { - CopyBlock(dst, src); // Copy first block + static_assert(is_power2(kAlignment), "kAlignment must be a power of two"); + static_assert(is_power2(kBlockSize), "kBlockSize must be a power of two"); + static_assert(kAlignment <= kBlockSize, + "kAlignment must be less or equal to block size"); + CopyBlock(dst, src); // Copy first block // Copy aligned blocks - const size_t ofla = offset_from_last_aligned(src); + const size_t ofla = offset_from_last_aligned(src); const size_t limit = count + ofla - kBlockSize; - for (size_t offset = kBlockSize; offset < limit; offset += kBlockSize) - CopyBlock(dst - ofla + offset, src - ofla + offset); + for (size_t offset = kAlignment; offset < limit; offset += kBlockSize) + CopyBlock(dst - ofla + offset, + assume_aligned(src - ofla + offset)); CopyLastBlock(dst, src, count); // Copy last block } diff --git a/libc/src/string/memory_utils/utils.h b/libc/src/string/memory_utils/utils.h --- a/libc/src/string/memory_utils/utils.h +++ b/libc/src/string/memory_utils/utils.h @@ -60,6 +60,10 @@ return offset_to_next_aligned(ptr); } +template static T *assume_aligned(T *ptr) { + return reinterpret_cast(__builtin_assume_aligned(ptr, alignment)); +} + } // namespace __llvm_libc #endif // LLVM_LIBC_SRC_MEMORY_UTILS_H diff --git a/libc/test/src/string/memory_utils/memcpy_utils_test.cpp b/libc/test/src/string/memory_utils/memcpy_utils_test.cpp --- a/libc/test/src/string/memory_utils/memcpy_utils_test.cpp +++ b/libc/test/src/string/memory_utils/memcpy_utils_test.cpp @@ -211,7 +211,24 @@ EXPECT_STREQ(trace.Read(), "011121111111"); } -TEST(MemcpyUtilsTest, MaxReloads) { +TEST(MemcpyUtilsTest, CopyAlignedBlocksWithAlignment) { + auto &trace = GetTrace(); + // Source is aligned and multiple of alignment. + // "11111111" + trace.Clear(); + CopyAlignedBlocks<8, 4>(I(0), I(0), 8); + EXPECT_STREQ(trace.Write(), "22221111"); + EXPECT_STREQ(trace.Read(), "22221111"); + + // Source is aligned and multiple of alignment. + // "111111111" + trace.Clear(); + CopyAlignedBlocks<8, 4>(I(0), I(0), 9); + EXPECT_STREQ(trace.Write(), "122211111"); + EXPECT_STREQ(trace.Read(), "122211111"); +} + +TEST(MemcpyUtilsTest, CopyAlignedBlocksMaxReloads) { auto &trace = GetTrace(); for (size_t alignment = 0; alignment < 32; ++alignment) { for (size_t count = 64; count < 768; ++count) { @@ -231,4 +248,24 @@ } } +TEST(MemcpyUtilsTest, CopyAlignedBlocksWithAlignmentMaxReloads) { + auto &trace = GetTrace(); + for (size_t alignment = 0; alignment < 32; ++alignment) { + for (size_t count = 64; count < 768; ++count) { + trace.Clear(); + // We should never reload more than twice when copying from count = 2x32. + CopyAlignedBlocks<32, 16>(I(alignment), I(0), count); + const char *const written = trace.Write(); + // First bytes are untouched. + for (size_t i = 0; i < alignment; ++i) + EXPECT_EQ(written[i], '0'); + // Next bytes are loaded once or twice but no more. + for (size_t i = alignment; i < count; ++i) { + EXPECT_GE(written[i], '1'); + EXPECT_LE(written[i], '2'); + } + } + } +} + } // namespace __llvm_libc