diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt --- a/libc/src/string/memory_utils/CMakeLists.txt +++ b/libc/src/string/memory_utils/CMakeLists.txt @@ -5,6 +5,7 @@ aarch64/bcmp_implementations.h aarch64/memcmp_implementations.h aarch64/memcpy_implementations.h + aarch64/memset_implementations.h bcmp_implementations.h bzero_implementations.h generic/aligned_access.h @@ -19,10 +20,12 @@ op_x86.h riscv/bcmp_implementations.h riscv/memcpy_implementations.h + riscv/memset_implementations.h utils.h x86_64/bcmp_implementations.h x86_64/memcmp_implementations.h x86_64/memcpy_implementations.h + x86_64/memset_implementations.h DEPS libc.src.__support.common libc.src.__support.CPP.bit diff --git a/libc/src/string/memory_utils/aarch64/memset_implementations.h b/libc/src/string/memory_utils/aarch64/memset_implementations.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/aarch64/memset_implementations.h @@ -0,0 +1,61 @@ +//===-- Memset implementation for aarch64 -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_MEMSET_IMPLEMENTATIONS_H +#define LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_MEMSET_IMPLEMENTATIONS_H + +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/string/memory_utils/op_aarch64.h" +#include "src/string/memory_utils/op_generic.h" +#include "src/string/memory_utils/utils.h" // Ptr, CPtr + +#include // size_t + +namespace __llvm_libc { + +[[maybe_unused]] LIBC_INLINE static void +inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) { + static_assert(aarch64::kNeon, "aarch64 supports vector types"); + using uint128_t = generic_v128; + using uint256_t = generic_v256; + using uint512_t = generic_v512; + if (count == 0) + return; + if (count <= 3) { + generic::Memset::block(dst, value); + if (count > 1) + generic::Memset::tail(dst, value, count); + return; + } + if (count <= 8) + return generic::Memset::head_tail(dst, value, count); + if (count <= 16) + return generic::Memset::head_tail(dst, value, count); + if (count <= 32) + return generic::Memset::head_tail(dst, value, count); + if (count <= (32 + 64)) { + generic::Memset::block(dst, value); + if (count <= 64) + return generic::Memset::tail(dst, value, count); + generic::Memset::block(dst + 32, value); + generic::Memset::tail(dst, value, count); + return; + } + if (count >= 448 && value == 0 && aarch64::neon::hasZva()) { + generic::Memset::block(dst, 0); + align_to_next_boundary<64>(dst, count); + return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count); + } else { + generic::Memset::block(dst, value); + align_to_next_boundary<16>(dst, count); + return generic::Memset::loop_and_tail(dst, value, count); + } +} + +} // namespace __llvm_libc + +#endif // LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_MEMSET_IMPLEMENTATIONS_H diff --git a/libc/src/string/memory_utils/memset_implementations.h b/libc/src/string/memory_utils/memset_implementations.h --- a/libc/src/string/memory_utils/memset_implementations.h +++ b/libc/src/string/memory_utils/memset_implementations.h @@ -9,126 +9,35 @@ #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_IMPLEMENTATIONS_H #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_IMPLEMENTATIONS_H -#include "src/__support/common.h" -#include "src/__support/macros/optimization.h" -#include "src/__support/macros/properties/architectures.h" -#include "src/string/memory_utils/generic/aligned_access.h" -#include "src/string/memory_utils/generic/byte_per_byte.h" -#include "src/string/memory_utils/op_aarch64.h" -#include "src/string/memory_utils/op_builtin.h" -#include "src/string/memory_utils/op_generic.h" -#include "src/string/memory_utils/op_x86.h" -#include "src/string/memory_utils/utils.h" +#include "src/__support/macros/config.h" // LIBC_INLINE +#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_ +#include "src/string/memory_utils/utils.h" // Ptr, CPtr #include // size_t -namespace __llvm_libc { - -#if defined(LIBC_TARGET_ARCH_IS_X86) -[[maybe_unused]] LIBC_INLINE static void -inline_memset_x86(Ptr dst, uint8_t value, size_t count) { -#if defined(__AVX512F__) - using uint128_t = generic_v128; - using uint256_t = generic_v256; - using uint512_t = generic_v512; -#elif defined(__AVX__) - using uint128_t = generic_v128; - using uint256_t = generic_v256; - using uint512_t = cpp::array; -#elif defined(__SSE2__) - using uint128_t = generic_v128; - using uint256_t = cpp::array; - using uint512_t = cpp::array; -#else - using uint128_t = cpp::array; - using uint256_t = cpp::array; - using uint512_t = cpp::array; -#endif - - if (count == 0) - return; - if (count == 1) - return generic::Memset::block(dst, value); - if (count == 2) - return generic::Memset::block(dst, value); - if (count == 3) - return generic::MemsetSequence::block(dst, value); - if (count <= 8) - return generic::Memset::head_tail(dst, value, count); - if (count <= 16) - return generic::Memset::head_tail(dst, value, count); - if (count <= 32) - return generic::Memset::head_tail(dst, value, count); - if (count <= 64) - return generic::Memset::head_tail(dst, value, count); - if (count <= 128) - return generic::Memset::head_tail(dst, value, count); - // Aligned loop - generic::Memset::block(dst, value); - align_to_next_boundary<32>(dst, count); - return generic::Memset::loop_and_tail(dst, value, count); -} -#endif // defined(LIBC_TARGET_ARCH_IS_X86) - -#if defined(LIBC_TARGET_ARCH_IS_AARCH64) -[[maybe_unused]] LIBC_INLINE static void -inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) { - static_assert(aarch64::kNeon, "aarch64 supports vector types"); - using uint128_t = generic_v128; - using uint256_t = generic_v256; - using uint512_t = generic_v512; - if (count == 0) - return; - if (count <= 3) { - generic::Memset::block(dst, value); - if (count > 1) - generic::Memset::tail(dst, value, count); - return; - } - if (count <= 8) - return generic::Memset::head_tail(dst, value, count); - if (count <= 16) - return generic::Memset::head_tail(dst, value, count); - if (count <= 32) - return generic::Memset::head_tail(dst, value, count); - if (count <= (32 + 64)) { - generic::Memset::block(dst, value); - if (count <= 64) - return generic::Memset::tail(dst, value, count); - generic::Memset::block(dst + 32, value); - generic::Memset::tail(dst, value, count); - return; - } - if (count >= 448 && value == 0 && aarch64::neon::hasZva()) { - generic::Memset::block(dst, 0); - align_to_next_boundary<64>(dst, count); - return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count); - } else { - generic::Memset::block(dst, value); - align_to_next_boundary<16>(dst, count); - return generic::Memset::loop_and_tail(dst, value, count); - } -} -#endif // defined(LIBC_TARGET_ARCH_IS_AARCH64) - -LIBC_INLINE static void inline_memset(Ptr dst, uint8_t value, size_t count) { #if defined(LIBC_TARGET_ARCH_IS_X86) - return inline_memset_x86(dst, value, count); +#include "src/string/memory_utils/x86_64/memset_implementations.h" +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_x86 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) - return inline_memset_aarch64(dst, value, count); -#elif defined(LIBC_TARGET_ARCH_IS_RISCV64) - return inline_memset_aligned_access_64bit(dst, value, count); -#elif defined(LIBC_TARGET_ARCH_IS_RISCV32) - return inline_memset_aligned_access_32bit(dst, value, count); +#include "src/string/memory_utils/aarch64/memset_implementations.h" +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_aarch64 +#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV) +#include "src/string/memory_utils/riscv/memset_implementations.h" +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_riscv #else - return inline_memset_byte_per_byte(dst, value, count); +// We may want to error instead of defaulting to suboptimal implementation. +#include "src/string/memory_utils/generic/byte_per_byte.h" +#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_byte_per_byte #endif -} + +namespace __llvm_libc { LIBC_INLINE static void inline_memset(void *dst, uint8_t value, size_t count) { - inline_memset(reinterpret_cast(dst), value, count); + LIBC_SRC_STRING_MEMORY_UTILS_MEMSET(reinterpret_cast(dst), value, count); } } // namespace __llvm_libc +#undef LIBC_SRC_STRING_MEMORY_UTILS_MEMSET + #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_IMPLEMENTATIONS_H diff --git a/libc/src/string/memory_utils/riscv/memset_implementations.h b/libc/src/string/memory_utils/riscv/memset_implementations.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/riscv/memset_implementations.h @@ -0,0 +1,33 @@ +//===-- Memset implementation for riscv -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LIBC_SRC_STRING_MEMORY_UTILS_RISCV_MEMSET_IMPLEMENTATIONS_H +#define LIBC_SRC_STRING_MEMORY_UTILS_RISCV_MEMSET_IMPLEMENTATIONS_H + +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_RISCV64 +#include "src/string/memory_utils/generic/aligned_access.h" +#include "src/string/memory_utils/utils.h" // Ptr, CPtr + +#include // size_t + +namespace __llvm_libc { + +LIBC_INLINE static void inline_memset_riscv(Ptr dst, uint8_t value, + size_t count) { +#if defined(LIBC_TARGET_ARCH_IS_RISCV64) + return inline_memset_aligned_access_64bit(dst, value, count); +#elif defined(LIBC_TARGET_ARCH_IS_RISCV32) + return inline_memset_aligned_access_32bit(dst, value, count); +#else +#error "Unimplemented" +#endif +} + +} // namespace __llvm_libc + +#endif // LIBC_SRC_STRING_MEMORY_UTILS_RISCV_MEMSET_IMPLEMENTATIONS_H diff --git a/libc/src/string/memory_utils/x86_64/memset_implementations.h b/libc/src/string/memory_utils/x86_64/memset_implementations.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/x86_64/memset_implementations.h @@ -0,0 +1,65 @@ +//===-- Memset implementation for x86_64 ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LIBC_SRC_STRING_MEMORY_UTILS_X86_64_MEMSET_IMPLEMENTATIONS_H +#define LIBC_SRC_STRING_MEMORY_UTILS_X86_64_MEMSET_IMPLEMENTATIONS_H + +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/string/memory_utils/op_generic.h" +#include "src/string/memory_utils/op_x86.h" +#include "src/string/memory_utils/utils.h" // Ptr, CPtr + +#include // size_t + +namespace __llvm_libc { + +[[maybe_unused]] LIBC_INLINE static void +inline_memset_x86(Ptr dst, uint8_t value, size_t count) { +#if defined(__AVX512F__) + using uint128_t = generic_v128; + using uint256_t = generic_v256; + using uint512_t = generic_v512; +#elif defined(__AVX__) + using uint128_t = generic_v128; + using uint256_t = generic_v256; + using uint512_t = cpp::array; +#elif defined(__SSE2__) + using uint128_t = generic_v128; + using uint256_t = cpp::array; + using uint512_t = cpp::array; +#else + using uint128_t = cpp::array; + using uint256_t = cpp::array; + using uint512_t = cpp::array; +#endif + + if (count == 0) + return; + if (count == 1) + return generic::Memset::block(dst, value); + if (count == 2) + return generic::Memset::block(dst, value); + if (count == 3) + return generic::MemsetSequence::block(dst, value); + if (count <= 8) + return generic::Memset::head_tail(dst, value, count); + if (count <= 16) + return generic::Memset::head_tail(dst, value, count); + if (count <= 32) + return generic::Memset::head_tail(dst, value, count); + if (count <= 64) + return generic::Memset::head_tail(dst, value, count); + if (count <= 128) + return generic::Memset::head_tail(dst, value, count); + // Aligned loop + generic::Memset::block(dst, value); + align_to_next_boundary<32>(dst, count); + return generic::Memset::loop_and_tail(dst, value, count); +} +} // namespace __llvm_libc + +#endif // LIBC_SRC_STRING_MEMORY_UTILS_X86_64_MEMSET_IMPLEMENTATIONS_H diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -2012,6 +2012,7 @@ "src/string/memory_utils/aarch64/bcmp_implementations.h", "src/string/memory_utils/aarch64/memcmp_implementations.h", "src/string/memory_utils/aarch64/memcpy_implementations.h", + "src/string/memory_utils/aarch64/memset_implementations.h", "src/string/memory_utils/bcmp_implementations.h", "src/string/memory_utils/bzero_implementations.h", "src/string/memory_utils/generic/aligned_access.h", @@ -2023,11 +2024,13 @@ "src/string/memory_utils/memset_implementations.h", "src/string/memory_utils/riscv/bcmp_implementations.h", "src/string/memory_utils/riscv/memcpy_implementations.h", + "src/string/memory_utils/riscv/memset_implementations.h", "src/string/memory_utils/strcmp_implementations.h", "src/string/memory_utils/strstr_implementations.h", "src/string/memory_utils/x86_64/bcmp_implementations.h", "src/string/memory_utils/x86_64/memcmp_implementations.h", "src/string/memory_utils/x86_64/memcpy_implementations.h", + "src/string/memory_utils/x86_64/memset_implementations.h", ], deps = [ ":__support_common",