diff --git a/libc/src/__support/common.h b/libc/src/__support/common.h --- a/libc/src/__support/common.h +++ b/libc/src/__support/common.h @@ -29,4 +29,27 @@ #define LLVM_LIBC_FUNCTION(type, name, arglist) type name arglist #endif +// LLVM_LIBC_IS_DEFINED checks whether a particular macro is defined. +// Usage: constexpr bool kUseAvx = LLVM_LIBC_IS_DEFINED(__AVX__); +// +// This works by comparing the stringified version of the macro with and without +// evaluation. If FOO is not undefined both stringifications yield "FOO". If FOO +// is defined, one stringification yields "FOO" while the other yields its +// stringified value "1". +#define LLVM_LIBC_IS_DEFINED(macro) \ + !details::same_string(LLVM_LIBC_IS_DEFINED__EVAL_AND_STRINGIZE(macro), #macro) +#define LLVM_LIBC_IS_DEFINED__EVAL_AND_STRINGIZE(s) #s +namespace details { +// C++14 version of compile time string equality. +// Once we require C++17 we can leverage std::string_view. e.g. +// std::string_view(lhs) == rhs; +constexpr bool same_string(char const *lhs, char const *rhs) { + for (; *lhs || *rhs; ++lhs, ++rhs) + if (*lhs != *rhs) + return false; + return true; +} + +} // namespace details + #endif // LLVM_LIBC_SUPPORT_COMMON_H diff --git a/libc/src/string/x86/memcpy.cpp b/libc/src/string/x86/memcpy.cpp --- a/libc/src/string/x86/memcpy.cpp +++ b/libc/src/string/x86/memcpy.cpp @@ -12,6 +12,26 @@ namespace __llvm_libc { +// Whether to use only rep;movsb. +constexpr bool kUseOnlyRepMovsb = + LLVM_LIBC_IS_DEFINED(LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB); + +// kRepMovsBSize == -1 : Only CopyAligned is used. +// kRepMovsBSize == 0 : Only RepMovsb is used. +// else CopyAligned is used up to kRepMovsBSize and then RepMovsb. +constexpr size_t kRepMovsBSize = +#ifdef LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE + LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE; +#else + -1; +#endif // LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE + +// Whether target supports AVX instructions. +constexpr bool kHasAvx = LLVM_LIBC_IS_DEFINED(__AVX__); + +// The chunk size used for the loop copy strategy. +constexpr size_t kLoopCopyBlockSize = kHasAvx ? 64 : 32; + static void CopyRepMovsb(char *__restrict dst, const char *__restrict src, size_t count) { // FIXME: Add MSVC support with @@ -21,12 +41,6 @@ asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory"); } -#if defined(__AVX__) -#define BEST_SIZE 64 -#else -#define BEST_SIZE 32 -#endif - // Design rationale // ================ // @@ -47,37 +61,35 @@ // with little change on the code side. static void memcpy_x86(char *__restrict dst, const char *__restrict src, size_t count) { - if (count == 0) - return; - if (count == 1) - return CopyBlock<1>(dst, src); - if (count == 2) - return CopyBlock<2>(dst, src); - if (count == 3) - return CopyBlock<3>(dst, src); - if (count == 4) - return CopyBlock<4>(dst, src); - if (count < 8) - return CopyBlockOverlap<4>(dst, src, count); - if (count < 16) - return CopyBlockOverlap<8>(dst, src, count); - if (count < 32) - return CopyBlockOverlap<16>(dst, src, count); - if (count < 64) - return CopyBlockOverlap<32>(dst, src, count); - if (count < 128) - return CopyBlockOverlap<64>(dst, src, count); -#if defined(__AVX__) - if (count < 256) - return CopyBlockOverlap<128>(dst, src, count); -#endif - // kRepMovsBSize == -1 : Only CopyAligned is used. - // kRepMovsBSize == 0 : Only RepMovsb is used. - // else CopyAligned is used to to kRepMovsBSize and then RepMovsb. - constexpr size_t kRepMovsBSize = -1; - if (count <= kRepMovsBSize) - return CopyAlignedBlocks(dst, src, count); - return CopyRepMovsb(dst, src, count); + if (kUseOnlyRepMovsb) { + return CopyRepMovsb(dst, src, count); + } else { + if (count == 0) + return; + if (count == 1) + return CopyBlock<1>(dst, src); + if (count == 2) + return CopyBlock<2>(dst, src); + if (count == 3) + return CopyBlock<3>(dst, src); + if (count == 4) + return CopyBlock<4>(dst, src); + if (count < 8) + return CopyBlockOverlap<4>(dst, src, count); + if (count < 16) + return CopyBlockOverlap<8>(dst, src, count); + if (count < 32) + return CopyBlockOverlap<16>(dst, src, count); + if (count < 64) + return CopyBlockOverlap<32>(dst, src, count); + if (count < 128) + return CopyBlockOverlap<64>(dst, src, count); + if (kHasAvx && count < 256) + return CopyBlockOverlap<128>(dst, src, count); + if (count <= kRepMovsBSize) + return CopyAlignedBlocks(dst, src, count); + return CopyRepMovsb(dst, src, count); + } } LLVM_LIBC_FUNCTION(void *, memcpy,