diff --git a/libc/src/__support/common.h b/libc/src/__support/common.h --- a/libc/src/__support/common.h +++ b/libc/src/__support/common.h @@ -29,4 +29,27 @@ #define LLVM_LIBC_FUNCTION(type, name, arglist) type name arglist #endif +namespace __llvm_libc { +namespace internal { +constexpr bool same_string(char const *lhs, char const *rhs) { + for (; *lhs || *rhs; ++lhs, ++rhs) + if (*lhs != *rhs) + return false; + return true; +} +} // namespace internal +} // namespace __llvm_libc + +// LLVM_LIBC_IS_DEFINED checks whether a particular macro is defined. +// Usage: constexpr bool kUseAvx = LLVM_LIBC_IS_DEFINED(__AVX__); +// +// This works by comparing the stringified version of the macro with and without +// evaluation. If FOO is not undefined both stringifications yield "FOO". If FOO +// is defined, one stringification yields "FOO" while the other yields its +// stringified value "1". +#define LLVM_LIBC_IS_DEFINED(macro) \ + !__llvm_libc::internal::same_string( \ + LLVM_LIBC_IS_DEFINED__EVAL_AND_STRINGIZE(macro), #macro) +#define LLVM_LIBC_IS_DEFINED__EVAL_AND_STRINGIZE(s) #s + #endif // LLVM_LIBC_SUPPORT_COMMON_H diff --git a/libc/src/string/x86/memcpy.cpp b/libc/src/string/x86/memcpy.cpp --- a/libc/src/string/x86/memcpy.cpp +++ b/libc/src/string/x86/memcpy.cpp @@ -12,6 +12,26 @@ namespace __llvm_libc { +// Whether to use only rep;movsb. +constexpr bool kUseOnlyRepMovsb = + LLVM_LIBC_IS_DEFINED(LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB); + +// kRepMovsBSize == -1 : Only CopyAligned is used. +// kRepMovsBSize == 0 : Only RepMovsb is used. +// else CopyAligned is used up to kRepMovsBSize and then RepMovsb. +constexpr size_t kRepMovsBSize = +#ifdef LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE + LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE; +#else + -1; +#endif // LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE + +// Whether target supports AVX instructions. +constexpr bool kHasAvx = LLVM_LIBC_IS_DEFINED(__AVX__); + +// The chunk size used for the loop copy strategy. +constexpr size_t kLoopCopyBlockSize = kHasAvx ? 64 : 32; + static void CopyRepMovsb(char *__restrict dst, const char *__restrict src, size_t count) { // FIXME: Add MSVC support with @@ -21,12 +41,6 @@ asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory"); } -#if defined(__AVX__) -#define BEST_SIZE 64 -#else -#define BEST_SIZE 32 -#endif - // Design rationale // ================ // @@ -47,6 +61,9 @@ // with little change on the code side. static void memcpy_x86(char *__restrict dst, const char *__restrict src, size_t count) { + if (kUseOnlyRepMovsb) + return CopyRepMovsb(dst, src, count); + if (count == 0) return; if (count == 1) @@ -67,16 +84,10 @@ return CopyBlockOverlap<32>(dst, src, count); if (count < 128) return CopyBlockOverlap<64>(dst, src, count); -#if defined(__AVX__) - if (count < 256) + if (kHasAvx && count < 256) return CopyBlockOverlap<128>(dst, src, count); -#endif - // kRepMovsBSize == -1 : Only CopyAligned is used. - // kRepMovsBSize == 0 : Only RepMovsb is used. - // else CopyAligned is used to to kRepMovsBSize and then RepMovsb. - constexpr size_t kRepMovsBSize = -1; if (count <= kRepMovsBSize) - return CopyAlignedBlocks(dst, src, count); + return CopyAlignedBlocks(dst, src, count); return CopyRepMovsb(dst, src, count); }