diff --git a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake --- a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake +++ b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake @@ -6,7 +6,7 @@ set(ALL_CPU_FEATURES "") if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) - set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX2 AVX512F FMA) + set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX2 AVX512F AVX512BW FMA) set(LIBC_COMPILE_OPTIONS_NATIVE -march=native) elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) set(LIBC_COMPILE_OPTIONS_NATIVE -mcpu=native) diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -355,7 +355,7 @@ add_bcmp(bcmp_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2) add_bcmp(bcmp_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2) add_bcmp(bcmp_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2) - add_bcmp(bcmp_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F) + add_bcmp(bcmp_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512BW) add_bcmp(bcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_bcmp(bcmp) else() @@ -382,7 +382,7 @@ add_bzero(bzero_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2) add_bzero(bzero_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2) add_bzero(bzero_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2) - add_bzero(bzero_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F) + add_bzero(bzero_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512BW) add_bzero(bzero_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_bzero(bzero) else() @@ -409,7 +409,7 @@ add_memcmp(memcmp_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2) add_memcmp(memcmp_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2) add_memcmp(memcmp_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2) - add_memcmp(memcmp_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F) + add_memcmp(memcmp_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512BW) add_memcmp(memcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memcmp(memcmp) elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) @@ -439,7 +439,7 @@ add_memcpy(memcpy_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2) add_memcpy(memcpy_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2) add_memcpy(memcpy_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2) - add_memcpy(memcpy_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F) + add_memcpy(memcpy_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512BW) add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memcpy(memcpy) elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) @@ -471,7 +471,7 @@ add_memmove(memmove_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2) add_memmove(memmove_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2) add_memmove(memmove_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2) - add_memmove(memmove_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F) + add_memmove(memmove_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512BW) add_memmove(memmove_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memmove(memmove) elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) @@ -503,7 +503,7 @@ add_memset(memset_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2) add_memset(memset_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2) add_memset(memset_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2) - add_memset(memset_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F) + add_memset(memset_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512BW) add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memset(memset) elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) diff --git a/libc/src/string/memory_utils/op_x86.h b/libc/src/string/memory_utils/op_x86.h --- a/libc/src/string/memory_utils/op_x86.h +++ b/libc/src/string/memory_utils/op_x86.h @@ -20,15 +20,21 @@ #include "src/string/memory_utils/op_builtin.h" #include "src/string/memory_utils/op_generic.h" -#ifdef __SSE2__ +#if defined(__AVX512BW__) || defined(__AVX2__) || defined(__SSE2__) #include -#else +#endif + // Define fake functions to prevent the compiler from failing on undefined -// functions in case SSE2 is not present. +// functions in case the builtin is not present. +#ifndef __AVX512BW__ #define _mm512_cmpneq_epi8_mask(A, B) 0 -#define _mm_movemask_epi8(A) 0 +#endif +#ifndef __AVX2__ #define _mm256_movemask_epi8(A) 0 -#endif // __SSE2__ +#endif +#ifndef __SSE2__ +#define _mm_movemask_epi8(A) 0 +#endif namespace __llvm_libc::x86 {