diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -31,7 +31,9 @@ HDRS strlen.h DEPENDS + libc.config.${LIBC_TARGET_OS}.lowlevel_utils libc.include.string + libc.src.__support.lowlevel_utils ) # ------------------------------------------------------------------------------ diff --git a/libc/src/string/strlen.cpp b/libc/src/string/strlen.cpp --- a/libc/src/string/strlen.cpp +++ b/libc/src/string/strlen.cpp @@ -9,27 +9,74 @@ #include "src/string/strlen.h" #include "src/__support/common.h" +#include "src/__support/lowlevel_utils.h" #include +#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) +#include +#endif namespace __llvm_libc { +template +struct Mask { + static constexpr bool opt = false; + static constexpr uintptr_t high = 0; + static constexpr uintptr_t low = 0; +}; + +template <> +struct Mask<4> { + static constexpr bool opt = true; + static constexpr uintptr_t high = 0x80808080L; + static constexpr uintptr_t low = 0x01010101L; +}; + +template <> +struct Mask<8> { + static constexpr bool opt = true; + static constexpr uintptr_t high = (Mask<4>::high << 32) | Mask<4>::high; + static constexpr uintptr_t low = (Mask<4>::low << 32) | Mask<4>::low; +}; + +static constexpr bool word_optimized = Mask::opt; +static constexpr uintptr_t high_mask = Mask::high; +static constexpr uintptr_t low_mask = Mask::low; + +__attribute__((no_sanitize("address", "bounds", "memory"))) +static inline bool safe_check_word(const uintptr_t *ptr) { +#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) + void *poison_addr = __asan_region_is_poisoned( + const_cast(reinterpret_cast(ptr)), + sizeof(uintptr_t)); +#endif + + uintptr_t value = *ptr; + if (((value - low_mask) & ~value & high_mask) != 0) { + return true; + } + +#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) + if (poison_addr != nullptr) { + // A NUL byte was not found and some part of the word is OOB. + __asan_report_error( + reinterpret_cast(get_current_pc()), + reinterpret_cast(get_current_bp()), + reinterpret_cast(get_current_sp()), poison_addr, false, 1); + } +#endif + + return false; +} + // For LLP64, i.e., Windows, pointers are 64 bit and long is only 32 bit // Thus, this implementation uses uintptr_t instead of long size_t LLVM_LIBC_ENTRYPOINT(strlen)(const char *src) { const char *charPtr; const uintptr_t *ptrPtr; - uintptr_t highMask = 0x80808080L; - uintptr_t lowMask = 0x01010101L; - - if (sizeof(uintptr_t) == 4) { - // do nothing - } else if (sizeof(uintptr_t) == 8) { - // setup 64 bit masks - highMask = ((highMask << 16) << 16) | highMask; - lowMask = ((lowMask << 16) << 16) | lowMask; - } else { + + if (!word_optimized) { // fallback to slow path const char *end = src; while (*end != '\0') @@ -48,9 +95,7 @@ // process sizeof(uintptr_t) bytes at a time for (;;) { - uintptr_t value = *ptrPtr; - - if (((value - lowMask) & ~value & highMask) != 0) { + if (safe_check_word(ptrPtr)) { // found a zero byte const char *cp = (const char *)ptrPtr; diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -29,6 +29,7 @@ SRCS strlen_test.cpp DEPENDS + libc.include.signal libc.src.string.strlen ) diff --git a/libc/test/src/string/strlen_test.cpp b/libc/test/src/string/strlen_test.cpp --- a/libc/test/src/string/strlen_test.cpp +++ b/libc/test/src/string/strlen_test.cpp @@ -9,6 +9,13 @@ #include "src/string/strlen.h" #include "utils/UnitTest/Test.h" +#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) +#include "include/signal.h" +#include "src/signal/signal.h" + +#include +#endif + TEST(StrLenTest, EmptyString) { const char *empty = ""; @@ -57,3 +64,32 @@ size_t result = __llvm_libc::strlen(two); ASSERT_EQ((size_t)1, result); } + +TEST(StrLenTest, SafeOOBRead) { + const char hello[8] = {'h', 'e', 'l', 'l', 'o', '\0', '0', '0'} ; +#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) + __asan_poison_memory_region(hello + 6, 2); +#endif + + size_t result = __llvm_libc::strlen(hello); + ASSERT_EQ((size_t)5, result); +} + +#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) +TEST(StrLenTest, OOBFailWhenCheckingWord) { + auto strlen_call = [] { + const char hello[8] = {'h', 'e', 'l', 'l', 'o', '0', '0', '0'}; + __asan_poison_memory_region(hello + 3, 6); + __llvm_libc::strlen(hello); }; + EXPECT_EXITS(strlen_call, 1); +} + +TEST(StrLenTest, OOBFailWhenReadingLastByte) { + auto strlen_call = [] { + const char hello[8] = {'h', 'e', 'l', 'l', 'o', '\0', '0', '0'}; + __asan_poison_memory_region(hello + 3, 5); + __llvm_libc::strlen(hello); }; + EXPECT_EXITS(strlen_call, 1); +} + +#endif