diff --git a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake --- a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake +++ b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake @@ -8,6 +8,9 @@ if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX AVX2 AVX512F AVX512BW FMA) set(LIBC_COMPILE_OPTIONS_NATIVE -march=native) +elseif(${LIBC_TARGET_ARCHITECTURE_IS_RISCV64}) + set(ALL_CPU_FEATURES RISCV_VEXT) + set(LIBC_COMPILE_OPTIONS_NATIVE -mcpu=native) elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) set(LIBC_COMPILE_OPTIONS_NATIVE -mcpu=native) endif() @@ -49,7 +52,8 @@ endfunction() set(AVAILABLE_CPU_FEATURES "") -if(LIBC_CROSSBUILD) + +if(LIBC_CROSSBUILD OR CMAKE_CROSSCOMPILING) # If we are doing a cross build, we will just assume that all CPU features # are available. set(AVAILABLE_CPU_FEATURES ${ALL_CPU_FEATURES}) diff --git a/libc/cmake/modules/cpu_features/check_RISCV_VEXT.cpp b/libc/cmake/modules/cpu_features/check_RISCV_VEXT.cpp new file mode 100644 --- /dev/null +++ b/libc/cmake/modules/cpu_features/check_RISCV_VEXT.cpp @@ -0,0 +1,5 @@ +#include "src/__support/macros/properties/cpu_features.h" + +#ifndef LIBC_TARGET_CPU_HAS_RISCV_VEXT +#error unsupported +#endif diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h --- a/libc/src/__support/macros/properties/cpu_features.h +++ b/libc/src/__support/macros/properties/cpu_features.h @@ -38,6 +38,10 @@ #define LIBC_TARGET_CPU_HAS_AVX512BW #endif +#if defined(__riscv_vector) +#define LIBC_TARGET_CPU_HAS_RISCV_VEXT +#endif + #if defined(__ARM_FEATURE_FMA) || (defined(__AVX2__) && defined(__FMA__)) || \ defined(__NVPTX__) || defined(__AMDGPU__) || defined(__LIBC_RISCV_USE_FMA) #define LIBC_TARGET_CPU_HAS_FMA diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -490,6 +490,9 @@ add_bcmp(bcmp_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512BW) add_bcmp(bcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_bcmp(bcmp) +elseif(LIBC_TARGET_ARCHITECTURE_IS_RISCV64) + add_bcmp(bcmp_riscv64_vext COMPILE_OPTIONS -march=rv64idzve32x1p0 REQUIRE RISCV_VEXT) + add_bcmp(bcmp) else() add_bcmp(bcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_bcmp(bcmp) @@ -517,6 +520,9 @@ add_bzero(bzero_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F) add_bzero(bzero_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_bzero(bzero) +elseif(LIBC_TARGET_ARCHITECTURE_IS_RISCV64) + add_bzero(bzero_riscv64_vext COMPILE_OPTIONS -march=rv64idzve32x1p0 REQUIRE RISCV_VEXT) + add_bzero(bzero) else() add_bzero(bzero_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_bzero(bzero) @@ -547,6 +553,9 @@ elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) add_memcmp(memcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memcmp(memcmp) +elseif(LIBC_TARGET_ARCHITECTURE_IS_RISCV64) + add_memcmp(memcmp_riscv64_vext COMPILE_OPTIONS -march=rv64idzve32x1p0 REQUIRE RISCV_VEXT) + add_memcmp(memcmp) else() add_memcmp(memcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memcmp(memcmp) @@ -581,6 +590,9 @@ add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE} MLLVM_COMPILE_OPTIONS "-tail-merge-threshold=0") add_memcpy(memcpy MLLVM_COMPILE_OPTIONS "-tail-merge-threshold=0") +elseif(LIBC_TARGET_ARCHITECTURE_IS_RISCV64) + add_memcpy(memcpy_riscv64_vext COMPILE_OPTIONS -march=rv64idzve32x1p0 REQUIRE RISCV_VEXT) + add_memcpy(memcpy) else() add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memcpy(memcpy) @@ -613,6 +625,9 @@ add_memmove(memmove_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE} MLLVM_COMPILE_OPTIONS "-tail-merge-threshold=0") add_memmove(memmove MLLVM_COMPILE_OPTIONS "-tail-merge-threshold=0") +elseif(LIBC_TARGET_ARCHITECTURE_IS_RISCV64) + add_memmove(memmove_riscv64_vext COMPILE_OPTIONS -march=rv64idzve32x1p0 REQUIRE RISCV_VEXT) + add_memmove(memmove) else() add_memmove(memmove_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memmove(memmove) @@ -645,6 +660,9 @@ add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE} MLLVM_COMPILE_OPTIONS "-tail-merge-threshold=0") add_memset(memset MLLVM_COMPILE_OPTIONS "-tail-merge-threshold=0") +elseif(LIBC_TARGET_ARCHITECTURE_IS_RISCV64) + add_memset(memset_riscv64_vext COMPILE_OPTIONS -march=rv64idzve32x1p0 REQUIRE RISCV_VEXT) + add_memset(memset) else() add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memset(memset) diff --git a/libc/src/string/memory_utils/riscv/inline_bcmp.h b/libc/src/string/memory_utils/riscv/inline_bcmp.h --- a/libc/src/string/memory_utils/riscv/inline_bcmp.h +++ b/libc/src/string/memory_utils/riscv/inline_bcmp.h @@ -8,18 +8,26 @@ #ifndef LIBC_SRC_STRING_MEMORY_UTILS_RISCV_INLINE_BCMP_H #define LIBC_SRC_STRING_MEMORY_UTILS_RISCV_INLINE_BCMP_H -#include "src/__support/macros/attributes.h" // LIBC_INLINE -#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_RISCV64 +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/cpu_features.h" #include "src/string/memory_utils/generic/aligned_access.h" #include "src/string/memory_utils/utils.h" // Ptr, CPtr #include // size_t +#if defined(LIBC_TARGET_CPU_HAS_RISCV_VEXT) +#include "src/string/memory_utils/riscv/inline_vext.h" +#endif // LIBC_TARGET_CPU_HAS_RISCV_VEXT + namespace __llvm_libc { [[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_riscv(CPtr p1, CPtr p2, size_t count) { -#if defined(LIBC_TARGET_ARCH_IS_RISCV64) +#if defined(LIBC_TARGET_CPU_HAS_RISCV_VEXT) + return inline_bcmp_riscv_vext(reinterpret_cast(p1), + reinterpret_cast(p2), count); +#elif defined(LIBC_TARGET_ARCH_IS_RISCV64) return inline_bcmp_aligned_access_64bit(p1, p2, count); #elif defined(LIBC_TARGET_ARCH_IS_RISCV32) return inline_bcmp_aligned_access_32bit(p1, p2, count); diff --git a/libc/src/string/memory_utils/riscv/inline_memcmp.h b/libc/src/string/memory_utils/riscv/inline_memcmp.h --- a/libc/src/string/memory_utils/riscv/inline_memcmp.h +++ b/libc/src/string/memory_utils/riscv/inline_memcmp.h @@ -8,18 +8,26 @@ #ifndef LIBC_SRC_STRING_MEMORY_UTILS_RISCV_INLINE_MEMCMP_H #define LIBC_SRC_STRING_MEMORY_UTILS_RISCV_INLINE_MEMCMP_H -#include "src/__support/macros/attributes.h" // LIBC_INLINE -#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_RISCV64 +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/cpu_features.h" #include "src/string/memory_utils/generic/aligned_access.h" #include "src/string/memory_utils/utils.h" // Ptr, CPtr #include // size_t +#if defined(LIBC_TARGET_CPU_HAS_RISCV_VEXT) +#include "src/string/memory_utils/riscv/inline_vext.h" +#endif // LIBC_TARGET_CPU_HAS_RISCV_VEXT + namespace __llvm_libc { [[maybe_unused]] LIBC_INLINE MemcmpReturnType inline_memcmp_riscv(CPtr p1, CPtr p2, size_t count) { -#if defined(LIBC_TARGET_ARCH_IS_RISCV64) +#if defined(LIBC_TARGET_CPU_HAS_RISCV_VEXT) + return inline_memcmp_riscv_vext(reinterpret_cast(p1), + reinterpret_cast(p2), count); +#elif defined(LIBC_TARGET_ARCH_IS_RISCV64) return inline_memcmp_aligned_access_64bit(p1, p2, count); #elif defined(LIBC_TARGET_ARCH_IS_RISCV32) return inline_memcmp_aligned_access_32bit(p1, p2, count); diff --git a/libc/src/string/memory_utils/riscv/inline_memcpy.h b/libc/src/string/memory_utils/riscv/inline_memcpy.h --- a/libc/src/string/memory_utils/riscv/inline_memcpy.h +++ b/libc/src/string/memory_utils/riscv/inline_memcpy.h @@ -8,18 +8,27 @@ #ifndef LIBC_SRC_STRING_MEMORY_UTILS_RISCV_INLINE_MEMCPY_H #define LIBC_SRC_STRING_MEMORY_UTILS_RISCV_INLINE_MEMCPY_H -#include "src/__support/macros/attributes.h" // LIBC_INLINE -#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_RISCV64 +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/cpu_features.h" #include "src/string/memory_utils/generic/aligned_access.h" #include "src/string/memory_utils/utils.h" // Ptr, CPtr #include // size_t +#if defined(LIBC_TARGET_CPU_HAS_RISCV_VEXT) +#include "src/string/memory_utils/riscv/inline_vext.h" +#endif // LIBC_TARGET_CPU_HAS_RISCV_VEXT + namespace __llvm_libc { [[maybe_unused]] LIBC_INLINE void inline_memcpy_riscv(Ptr __restrict dst, CPtr __restrict src, size_t count) { -#if defined(LIBC_TARGET_ARCH_IS_RISCV64) +#if defined(LIBC_TARGET_CPU_HAS_RISCV_VEXT) + return inline_memcpy_riscv_vext(reinterpret_cast(dst), + reinterpret_cast(src), + count); +#elif defined(LIBC_TARGET_ARCH_IS_RISCV64) return inline_memcpy_aligned_access_64bit(dst, src, count); #elif defined(LIBC_TARGET_ARCH_IS_RISCV32) return inline_memcpy_aligned_access_32bit(dst, src, count); diff --git a/libc/src/string/memory_utils/riscv/inline_memset.h b/libc/src/string/memory_utils/riscv/inline_memset.h --- a/libc/src/string/memory_utils/riscv/inline_memset.h +++ b/libc/src/string/memory_utils/riscv/inline_memset.h @@ -8,18 +8,26 @@ #ifndef LIBC_SRC_STRING_MEMORY_UTILS_RISCV_INLINE_MEMSET_H #define LIBC_SRC_STRING_MEMORY_UTILS_RISCV_INLINE_MEMSET_H -#include "src/__support/macros/attributes.h" // LIBC_INLINE -#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_RISCV64 +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/cpu_features.h" #include "src/string/memory_utils/generic/aligned_access.h" #include "src/string/memory_utils/utils.h" // Ptr, CPtr #include // size_t +#if defined(LIBC_TARGET_CPU_HAS_RISCV_VEXT) +#include "src/string/memory_utils/riscv/inline_vext.h" +#endif // LIBC_TARGET_CPU_HAS_RISCV_VEXT + namespace __llvm_libc { LIBC_INLINE static void inline_memset_riscv(Ptr dst, uint8_t value, size_t count) { -#if defined(LIBC_TARGET_ARCH_IS_RISCV64) +#if defined(LIBC_TARGET_CPU_HAS_RISCV_VEXT) + return inline_memset_riscv_vext(reinterpret_cast(p1), value, + count); +#elif defined(LIBC_TARGET_ARCH_IS_RISCV64) return inline_memset_aligned_access_64bit(dst, value, count); #elif defined(LIBC_TARGET_ARCH_IS_RISCV32) return inline_memset_aligned_access_32bit(dst, value, count); diff --git a/libc/src/string/memory_utils/riscv/inline_vext.h b/libc/src/string/memory_utils/riscv/inline_vext.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/riscv/inline_vext.h @@ -0,0 +1,85 @@ +//===-- Memory functions vector implementations for riscv -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LIBC_SRC_STRING_MEMORY_UTILS_RISCV_VECTOR_EXTENSION_H +#define LIBC_SRC_STRING_MEMORY_UTILS_RISCV_VECTOR_EXTENSION_H + +#include "src/__support/macros/properties/cpu_features.h" + +#ifndef LIBC_TARGET_CPU_HAS_RISCV_VEXT +#error "Include this file only if riscv vector extension is available" +#endif + +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/string/memory_utils/utils.h" // Ptr, CPtr + +#include +#include // size_t + +namespace __llvm_libc { + +LIBC_INLINE static void +inline_memcpy_riscv_vext(uint8_t *dst, const uint8_t *src, size_t count) { + while (count > 0) { + const size_t vl = __riscv_vsetvl_e8m8(count); + __riscv_vse8_v_u8m8(dst, __riscv_vle8_v_u8m8(src, vl), vl); + count -= vl; + src += vl; + dst += vl; + } +} + +LIBC_INLINE static void +inline_memset_riscv_vext(uint8_t *dst, const uint8_t value, size_t count) { + const vuint8m8_t splat = + __riscv_vmv_v_x_u8m8(value, __riscv_vsetvlmax_e8m8()); + while (count > 0) { + const size_t vl = __riscv_vsetvl_e8m8(count); + __riscv_vse8_v_u8m8(dst, splat, vl); + count -= vl; + dst += vl; + } +} + +LIBC_INLINE static int inline_bcmp_riscv_vext(const uint8_t *p1, + const uint8_t *p2, size_t count) { + while (count > 0) { + const size_t vl = __riscv_vsetvl_e8m8(count); + const vuint8m8_t a = __riscv_vle8_v_u8m8(p1, vl); + const vuint8m8_t b = __riscv_vle8_v_u8m8(p2, vl); + const vbool1_t neq_mask = __riscv_vmsne_vv_u8m8_b1(a, b, vl); + const long first_neq_index = __riscv_vfirst_m_b1(neq_mask, vl); + if (first_neq_index >= 0) + return 1; + count -= vl; + p1 += vl; + p2 += vl; + } + return 0; +} + +LIBC_INLINE static int +inline_memcmp_riscv_vext(const uint8_t *p1, const uint8_t *p2, size_t count) { + while (count > 0) { + const size_t vl = __riscv_vsetvl_e8m8(count); + const vuint8m8_t a = __riscv_vle8_v_u8m8(p1, vl); + const vuint8m8_t b = __riscv_vle8_v_u8m8(p2, vl); + const vbool1_t neq_mask = __riscv_vmsne_vv_u8m8_b1(a, b, vl); + const long first_neq_index = __riscv_vfirst_m_b1(neq_mask, vl); + if (first_neq_index >= 0) + return static_cast(p1[first_neq_index]) - + static_cast(p2[first_neq_index]); + count -= vl; + p1 += vl; + p2 += vl; + } + return 0; +} + +} // namespace __llvm_libc + +#endif // LIBC_SRC_STRING_MEMORY_UTILS_RISCV_VECTOR_EXTENSION_H