diff --git a/CMakeLists.txt b/CMakeLists.txt --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -244,6 +244,15 @@ if(NOT DEFINED X86CPU_ARCH AND ARCH STREQUAL "x86") include(DetectArchitecture) detect_x86_cpu_architecture(X86CPU_ARCH) + detect_x86_feature(sse HAVE_X86_SSE_INSTRUCTIONS) + detect_x86_feature(sse2 HAVE_X86_SSE2_INSTRUCTIONS) + detect_x86_feature(avx HAVE_X86_AVX_INSTRUCTIONS) + detect_x86_feature(avx2 HAVE_X86_AVX2_INSTRUCTIONS) + detect_x86_feature(avx512f HAVE_X86_AVX512F_INSTRUCTIONS) +endif() +if(NOT HAVE_LIBMVEC_X86 AND ARCH STREQUAL "x86") + include(DetectVectorLibrary) + detect_libmvec_x86(HAVE_LIBMVEC_X86) endif() if(NOT DEFINED ENDIAN) include(TestBigEndian) diff --git a/SingleSource/UnitTests/Vectorizer/CMakeLists.txt b/SingleSource/UnitTests/Vectorizer/CMakeLists.txt --- a/SingleSource/UnitTests/Vectorizer/CMakeLists.txt +++ b/SingleSource/UnitTests/Vectorizer/CMakeLists.txt @@ -1,2 +1,3 @@ +add_subdirectory(Veclib) llvm_singlesource() set_property(TARGET runtime-checks PROPERTY CXX_STANDARD 17) diff --git a/SingleSource/UnitTests/Vectorizer/Veclib/CMakeLists.txt b/SingleSource/UnitTests/Vectorizer/Veclib/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/SingleSource/UnitTests/Vectorizer/Veclib/CMakeLists.txt @@ -0,0 +1,30 @@ +set(FP_TOLERANCE 0.000002) + +function(llvm_singlesource_with_cflags_ldflags prefix additional_cflags additional_ldflags) + list(APPEND CFLAGS ${additional_cflags}) + list(APPEND LDFLAGS ${additional_ldflags}) + llvm_singlesource(PREFIX ${prefix}) +endfunction(llvm_singlesource_with_cflags_ldflags) + +if(HAVE_LIBMVEC_X86) + check_c_compiler_flag(-fveclib=libmvec COMPILER_HAS_FVECLIB_LIBMVEC_FLAG) + if(COMPILER_HAS_FVECLIB_LIBMVEC_FLAG) + llvm_singlesource_with_cflags_ldflags("libmvec-native-" "-fveclib=libmvec;-O2;-march=native" "-lm;-fveclib=libmvec") + if(HAVE_X86_SSE_INSTRUCTIONS) + llvm_singlesource_with_cflags_ldflags("libmvec-sse-" "-fveclib=libmvec;-O2;-msse" "-lm;-fveclib=libmvec") + endif() + if(HAVE_X86_SSE2_INSTRUCTIONS) + llvm_singlesource_with_cflags_ldflags("libmvec-sse2-" "-fveclib=libmvec;-O2;-msse2" "-lm;-fveclib=libmvec") + endif() + if(HAVE_X86_AVX_INSTRUCTIONS) + llvm_singlesource_with_cflags_ldflags("libmvec-avx-" "-fveclib=libmvec;-O2;-mavx" "-lm;-fveclib=libmvec") + endif() + if(HAVE_X86_AVX2_INSTRUCTIONS) + llvm_singlesource_with_cflags_ldflags("libmvec-avx2-" "-fveclib=libmvec;-O2;-mavx2" "-lm;-fveclib=libmvec") + endif() + if(HAVE_X86_AVX512F_INSTRUCTIONS) + llvm_singlesource_with_cflags_ldflags("libmvec-avx512f-" "-fveclib=libmvec;-O2;-mavx512f" "-lm;-fveclib=libmvec") + endif() + endif() +endif() +# Other vector math libraries may be added here... diff --git a/SingleSource/UnitTests/Vectorizer/Veclib/sincos.h b/SingleSource/UnitTests/Vectorizer/Veclib/sincos.h new file mode 100644 --- /dev/null +++ b/SingleSource/UnitTests/Vectorizer/Veclib/sincos.h @@ -0,0 +1,145 @@ +#define _GNU_SOURCE + +#include +#include + +// inner loop vectorization with linear access +void sincos_arr(REAL* sines, REAL* cosines, REAL* phases, int size) { + for (int i=0; i max_ulp_err * ulp) { + printf("%s.%s FAILED at %d: argument %g, value %g, reference %g, difference %g, (%g ULP).\n", + msg1, msg2, index, arg, val, ref, (val-ref), fabs(val-ref)/ulp); + return 1; + } else if (verbose) { + printf("%s.%s at %2d: argument %2g, value %10.7f, reference %10.7f.\n", + msg1, msg2, index, arg, val, ref); + } + return 0; +} + +int check_arr(REAL* sines, REAL* coses, REAL* phases, int* indices, int size, int nests, REAL max_ulp_err, const char* msg) +{ + int fail = 0; + + for (int i = 0; i < size; i++) { + int j = indices ? indices[i] : i; + long double ref_sin = sinl(phases[j]); + long double ref_cos = cosl(phases[j]); + for (int k=0; k + +typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16))); + +#if __WORDSIZE==32 +void _ZGVbN4vvv_sincosf(__m128 x, __m128 sinptr, __m128 cosptr); +#else +void _ZGVbN4vvv_sincosf(__m128 x, __m128 sinptrlo, __m128 sinptrhi, __m128 cosptrlo, __m128 cosptrhi); +#endif + +int main(void) { + union {__m128 vec; float arr[4];} x, sines, coses; + union {__m128 vec[2]; float* arr[4];} sinptr, cosptr; + for (int i=0; i<4; i++) { + x.arr[i] = 0.1f * i; + sinptr.arr[i] = &sines.arr[i]; + cosptr.arr[i] = &coses.arr[i]; + } + // use function from libmvec (the one we have a test for): +#if __WORDSIZE==32 + _ZGVbN4vvv_sincosf(x.vec, sinptr.vec[0], cosptr.vec[0]); +#else + _ZGVbN4vvv_sincosf(x.vec, sinptr.vec[0], sinptr.vec[1], cosptr.vec[0], cosptr.vec[1]); +#endif + // ensure the call cannot get optimized out: + int sin_is_less_than_cos = 1; + for (int i=0; i<4; i++) { + sin_is_less_than_cos &= (sines.arr[i] < coses.arr[i]); + } + return !sin_is_less_than_cos; +} diff --git a/cmake/modules/DetectVectorLibrary.cmake b/cmake/modules/DetectVectorLibrary.cmake new file mode 100644 --- /dev/null +++ b/cmake/modules/DetectVectorLibrary.cmake @@ -0,0 +1,28 @@ +##===- DetectVectorLibrary.cmake ------------------------------------------===## +# +# Performs a try_run to determine if a vector library is available. +# +##===----------------------------------------------------------------------===## + +# +# Performs a try_run to determine if libmvec-x86 is available +# + +function(detect_libmvec_x86 variable) + set(CMAKE_C_FLAGS -msse) + try_run(HAVE_RUN_${variable} HAVE_COMPILE_${variable} + ${CMAKE_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/cmake/modules/DetectLibmvecX86.c + LINK_LIBRARIES -lmvec) + + if(HAVE_COMPILE_${variable} AND NOT (HAVE_RUN_${variable} STREQUAL FAILED_TO_RUN)) + set(HAVE_LIBMVEC_X86 YES) + else() + set(HAVE_LIBMVEC_X86 NO) + endif() + set(${variable} ${HAVE_LIBMVEC_X86} PARENT_SCOPE) + message(STATUS "Check libmvec X86 vector math library available: ${HAVE_LIBMVEC_X86}") + +endfunction(detect_libmvec_x86) + +# Add other vector libraries here... diff --git a/cmake/modules/DetectX86FeatureFlag.c b/cmake/modules/DetectX86FeatureFlag.c new file mode 100644 --- /dev/null +++ b/cmake/modules/DetectX86FeatureFlag.c @@ -0,0 +1,14 @@ +#include + +#define xstr(s) str(s) +#define str(s) #s + +int main(void) { + __builtin_cpu_init(); + if (__builtin_cpu_supports(xstr(FEATURE_FLAG))) { + printf("YES"); + } else { + printf("NO"); + } + return 0; +}