diff --git a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake --- a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake +++ b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake @@ -2,11 +2,19 @@ # Cpu features definition and flags # ------------------------------------------------------------------------------ +# Initialize ALL_CPU_FEATURES as empty list. +set(ALL_CPU_FEATURES "") + if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) - set(ALL_CPU_FEATURES SSE SSE2 AVX AVX2 AVX512F) - list(SORT ALL_CPU_FEATURES) + set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX2 AVX512F) + set(LIBC_COMPILE_OPTIONS_NATIVE -march=native) +elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) + set(LIBC_COMPILE_OPTIONS_NATIVE -mcpu=native) endif() +# Making sure ALL_CPU_FEATURES is sorted. +list(SORT ALL_CPU_FEATURES) + # Function to check whether the target CPU supports the provided set of features. # Usage: # cpu_supports( @@ -22,49 +30,6 @@ endif() endfunction() -# Function to compute the flags to pass down to the compiler. -# Usage: -# compute_flags( -# -# MARCH -# REQUIRE -# REJECT -# ) -function(compute_flags output_var) - cmake_parse_arguments( - "COMPUTE_FLAGS" - "" # Optional arguments - "MARCH" # Single value arguments - "REQUIRE;REJECT" # Multi value arguments - ${ARGN}) - # Check that features are not required and rejected at the same time. - if(COMPUTE_FLAGS_REQUIRE AND COMPUTE_FLAGS_REJECT) - _intersection(var ${COMPUTE_FLAGS_REQUIRE} ${COMPUTE_FLAGS_REJECT}) - if(var) - message(FATAL_ERROR "Cpu Features REQUIRE and REJECT ${var}") - endif() - endif() - # Generate the compiler flags in `current`. - if(${CMAKE_CXX_COMPILER_ID} MATCHES "Clang|GNU") - if(COMPUTE_FLAGS_MARCH) - list(APPEND current "-march=${COMPUTE_FLAGS_MARCH}") - endif() - foreach(feature IN LISTS COMPUTE_FLAGS_REQUIRE) - string(TOLOWER ${feature} lowercase_feature) - list(APPEND current "-m${lowercase_feature}") - endforeach() - foreach(feature IN LISTS COMPUTE_FLAGS_REJECT) - string(TOLOWER ${feature} lowercase_feature) - list(APPEND current "-mno-${lowercase_feature}") - endforeach() - else() - # In future, we can extend for other compilers. - message(FATAL_ERROR "Unkown compiler ${CMAKE_CXX_COMPILER_ID}.") - endif() - # Export the list of flags. - set(${output_var} "${current}" PARENT_SCOPE) -endfunction() - # ------------------------------------------------------------------------------ # Internal helpers and utilities. # ------------------------------------------------------------------------------ @@ -94,54 +59,27 @@ endfunction() _generate_check_code() -# Compiles and runs the code generated above with the specified requirements. -# This is helpful to infer which features a particular target supports or if -# a specific features implies other features (e.g. BMI2 implies SSE2 and SSE). -function(_check_defined_cpu_feature output_var) - cmake_parse_arguments( - "CHECK_DEFINED" - "" # Optional arguments - "MARCH" # Single value arguments - "REQUIRE;REJECT" # Multi value arguments - ${ARGN}) - compute_flags( - flags - MARCH ${CHECK_DEFINED_MARCH} - REQUIRE ${CHECK_DEFINED_REQUIRE} - REJECT ${CHECK_DEFINED_REJECT}) +set(LIBC_CPU_FEATURES "" CACHE PATH "Host supported CPU features") + +if(CMAKE_CROSSCOMPILING) + _intersection(cpu_features "${ALL_CPU_FEATURES}" "${LIBC_CPU_FEATURES}") + if(NOT "${cpu_features}" STREQUAL "${LIBC_CPU_FEATURES}") + message(FATAL_ERROR "Unsupported CPU features: ${cpu_features}") + endif() + set(LIBC_CPU_FEATURES "${cpu_features}") +else() + # Populates the LIBC_CPU_FEATURES list from host. try_run( run_result compile_result "${CMAKE_CURRENT_BINARY_DIR}/check_${feature}" "${CMAKE_CURRENT_BINARY_DIR}/cpu_features/check_cpu_features.cpp" - COMPILE_DEFINITIONS ${flags} + COMPILE_DEFINITIONS ${LIBC_COMPILE_OPTIONS_NATIVE} COMPILE_OUTPUT_VARIABLE compile_output RUN_OUTPUT_VARIABLE run_output) if("${run_result}" EQUAL 0) - set(${output_var} - "${run_output}" - PARENT_SCOPE) + set(LIBC_CPU_FEATURES "${run_output}") elseif(NOT ${compile_result}) message(FATAL_ERROR "Failed to compile: ${compile_output}") else() message(FATAL_ERROR "Failed to run: ${run_output}") endif() -endfunction() - -set(LIBC_CPU_FEATURES "" CACHE PATH "supported CPU features") - -if(CMAKE_CROSSCOMPILING) - _intersection(cpu_features "${ALL_CPU_FEATURES}" "${LIBC_CPU_FEATURES}") - if(NOT "${cpu_features}" STREQUAL "${LIBC_CPU_FEATURES}") - message(FATAL_ERROR "Unsupported CPU features: ${cpu_features}") - endif() - set(LIBC_CPU_FEATURES "${cpu_features}") -else() - # Populates the LIBC_CPU_FEATURES list. - # Use -march=native only when the compiler supports it. - include(CheckCXXCompilerFlag) - CHECK_CXX_COMPILER_FLAG("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE) - if(COMPILER_SUPPORTS_MARCH_NATIVE) - _check_defined_cpu_feature(LIBC_CPU_FEATURES MARCH native) - else() - _check_defined_cpu_feature(LIBC_CPU_FEATURES) - endif() endif() diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -186,20 +186,15 @@ cmake_parse_arguments( "ADD_IMPL" "" # Optional arguments - "MARCH" # Single value arguments - "REQUIRE;REJECT;SRCS;HDRS;DEPENDS;COMPILE_OPTIONS" # Multi value arguments + "" # Single value arguments + "REQUIRE;SRCS;HDRS;DEPENDS;COMPILE_OPTIONS" # Multi value arguments ${ARGN}) - compute_flags(flags - MARCH ${ADD_IMPL_MARCH} - REQUIRE ${ADD_IMPL_REQUIRE} - REJECT ${ADD_IMPL_REJECT} - ) add_entrypoint_object(${impl_name} NAME ${name} SRCS ${ADD_IMPL_SRCS} HDRS ${ADD_IMPL_HDRS} DEPENDS ${ADD_IMPL_DEPENDS} - COMPILE_OPTIONS ${ADD_IMPL_COMPILE_OPTIONS} ${flags} -O2 + COMPILE_OPTIONS ${ADD_IMPL_COMPILE_OPTIONS} ) get_fq_target_name(${impl_name} fq_target_name) set_target_properties(${fq_target_name} PROPERTIES REQUIRE_CPU_FEATURES "${ADD_IMPL_REQUIRE}") @@ -210,17 +205,6 @@ # memcpy # ------------------------------------------------------------------------------ -# include the relevant architecture specific implementations -if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) - set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/${LIBC_TARGET_ARCHITECTURE}/memcpy.cpp) -elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) - set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/${LIBC_TARGET_ARCHITECTURE}/memcpy.cpp) -#Disable tail merging as it leads to lower performance - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mllvm --tail-merge-threshold=0") -else() - set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/memcpy.cpp) -endif() - function(add_memcpy memcpy_name) add_implementation(memcpy ${memcpy_name} SRCS ${MEMCPY_SRC} @@ -235,8 +219,23 @@ endfunction() if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) - add_memcpy(memcpy MARCH native) + set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/x86_64/memcpy.cpp) + add_memcpy(memcpy_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2) + add_memcpy(memcpy_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2) + add_memcpy(memcpy_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2) + add_memcpy(memcpy_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F) + add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) + add_memcpy(memcpy) +elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) + set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/aarch64/memcpy.cpp) + # Disable tail merging as it leads to lower performance. + # Note that '-mllvm' needs to be prefixed with 'SHELL:' to prevent CMake flag deduplication. + add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE} + COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0") + add_memcpy(memcpy COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0") else() + set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/memcpy.cpp) + add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memcpy(memcpy) endif() @@ -258,8 +257,14 @@ endfunction() if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) - add_memset(memset MARCH native) + add_memset(memset_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2) + add_memset(memset_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2) + add_memset(memset_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2) + add_memset(memset_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F) + add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) + add_memset(memset) else() + add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memset(memset) endif() @@ -282,15 +287,13 @@ endfunction() if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) - add_bzero(bzero MARCH native) + add_bzero(bzero_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2) + add_bzero(bzero_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2) + add_bzero(bzero_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2) + add_bzero(bzero_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F) + add_bzero(bzero_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) + add_bzero(bzero) else() + add_bzero(bzero_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_bzero(bzero) endif() - -# ------------------------------------------------------------------------------ -# Add all other relevant implementations for the native target. -# ------------------------------------------------------------------------------ - -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE}) - include(${LIBC_TARGET_ARCHITECTURE}/CMakeLists.txt) -endif() diff --git a/libc/src/string/aarch64/CMakeLists.txt b/libc/src/string/aarch64/CMakeLists.txt deleted file mode 100644 --- a/libc/src/string/aarch64/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -add_memcpy("memcpy_${LIBC_TARGET_ARCHITECTURE}") diff --git a/libc/src/string/x86_64/CMakeLists.txt b/libc/src/string/x86_64/CMakeLists.txt deleted file mode 100644 --- a/libc/src/string/x86_64/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -add_memcpy("memcpy_${LIBC_TARGET_ARCHITECTURE}_opt_none" REJECT "${ALL_CPU_FEATURES}") -add_memcpy("memcpy_${LIBC_TARGET_ARCHITECTURE}_opt_sse" REQUIRE "SSE" REJECT "SSE2") -add_memcpy("memcpy_${LIBC_TARGET_ARCHITECTURE}_opt_avx" REQUIRE "AVX" REJECT "AVX2") -add_memcpy("memcpy_${LIBC_TARGET_ARCHITECTURE}_opt_avx512f" REQUIRE "AVX512F") - -add_memset("memset_${LIBC_TARGET_ARCHITECTURE}_opt_none" REJECT "${ALL_CPU_FEATURES}") -add_memset("memset_${LIBC_TARGET_ARCHITECTURE}_opt_sse" REQUIRE "SSE" REJECT "SSE2") -add_memset("memset_${LIBC_TARGET_ARCHITECTURE}_opt_avx" REQUIRE "AVX" REJECT "AVX2") -add_memset("memset_${LIBC_TARGET_ARCHITECTURE}_opt_avx512f" REQUIRE "AVX512F") - -add_bzero("bzero_${LIBC_TARGET_ARCHITECTURE}_opt_none" REJECT "${ALL_CPU_FEATURES}") -add_bzero("bzero_${LIBC_TARGET_ARCHITECTURE}_opt_sse" REQUIRE "SSE" REJECT "SSE2") -add_bzero("bzero_${LIBC_TARGET_ARCHITECTURE}_opt_avx" REQUIRE "AVX" REJECT "AVX2") -add_bzero("bzero_${LIBC_TARGET_ARCHITECTURE}_opt_avx512f" REQUIRE "AVX512F") diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -196,6 +196,8 @@ libc_string_unittests DEPENDS ${fq_config_name} + COMPILE_OPTIONS + ${LIBC_COMPILE_OPTIONS_NATIVE} ${ARGN} ) else()