diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h --- a/libc/src/__support/macros/properties/cpu_features.h +++ b/libc/src/__support/macros/properties/cpu_features.h @@ -38,6 +38,10 @@ #define LIBC_TARGET_CPU_HAS_AVX512BW #endif +#if defined(__riscv_vector) +#define LIBC_TARGET_CPU_HAS_RISCV_VEXT +#endif + #if defined(__ARM_FEATURE_FMA) || (defined(__AVX2__) && defined(__FMA__)) || \ defined(__NVPTX__) || defined(__AMDGPU__) || defined(__LIBC_RISCV_USE_FMA) #define LIBC_TARGET_CPU_HAS_FMA diff --git a/libc/src/string/memory_utils/riscv/vext.h b/libc/src/string/memory_utils/riscv/vext.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/riscv/vext.h @@ -0,0 +1,84 @@ +//===-- Memory functions vector implementations for riscv -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LIBC_SRC_STRING_MEMORY_UTILS_RISCV_VECTOR_EXTENSION_H +#define LIBC_SRC_STRING_MEMORY_UTILS_RISCV_VECTOR_EXTENSION_H + +#include "src/__support/macros/properties/cpu_features.h" + +#ifndef LIBC_TARGET_CPU_HAS_RISCV_VEXT +#error "Include this file only if riscv vector extension is available" +#endif + +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/string/memory_utils/utils.h" // Ptr, CPtr + +#include +#include // size_t + +namespace __llvm_libc { + +LIBC_INLINE static void inline_memcpy_vext(uint8_t *dst, const uint8_t *src, + size_t count) { + while (count > 0) { + const size_t vl = __riscv_vsetvl_e8m8(count); + __riscv_vse8_v_u8m8(dst, __riscv_vle8_v_u8m8(src, vl), vl); + count -= vl; + src += vl; + dst += vl; + } +} + +LIBC_INLINE static void inline_memset_vext(uint8_t *dst, const uint8_t value, + size_t count) { + const vuint8m8_t splat = + __riscv_vmv_v_x_u8m8(value, __riscv_vsetvlmax_e8m8()); + while (count > 0) { + const size_t vl = __riscv_vsetvl_e8m8(count); + __riscv_vse8_v_u8m8(dst, splat, vl); + count -= vl; + dst += vl; + } +} + +LIBC_INLINE static int inline_bcmp_vext(const uint8_t *p1, const uint8_t *p2, + size_t count) { + while (count > 0) { + const size_t vl = __riscv_vsetvl_e8m8(count); + const vuint8m8_t a = __riscv_vle8_v_u8m8(p1, vl); + const vuint8m8_t b = __riscv_vle8_v_u8m8(p2, vl); + const vbool1_t neq_mask = __riscv_vmsne_vv_u8m8_b1(a, b, vl); + const long first_neq_index = __riscv_vfirst_m_b1(neq_mask, vl); + if (first_neq_index >= 0) + return 1; + count -= vl; + p1 += vl; + p2 += vl; + } + return 0; +} + +LIBC_INLINE static int inline_memcmp_vext(const uint8_t *p1, const uint8_t *p2, + size_t count) { + while (count > 0) { + const size_t vl = __riscv_vsetvl_e8m8(count); + const vuint8m8_t a = __riscv_vle8_v_u8m8(p1, vl); + const vuint8m8_t b = __riscv_vle8_v_u8m8(p2, vl); + const vbool1_t neq_mask = __riscv_vmsne_vv_u8m8_b1(a, b, vl); + const long first_neq_index = __riscv_vfirst_m_b1(neq_mask, vl); + if (first_neq_index >= 0) + return p1[first_neq_index] - p2[first_neq_index]; + count -= vl; + p1 += vl; + p2 += vl; + } + return 0; +} + +} // namespace __llvm_libc + +#endif // LIBC_SRC_STRING_MEMORY_UTILS_RISCV_VECTOR_EXTENSION_H