diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -27,6 +27,7 @@ libc.src.string.strcat libc.src.string.strlen libc.src.string.strcmp + libc.src.string.memchr # sys/mman.h entrypoints libc.src.sys.mman.mmap diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -44,6 +44,17 @@ libc.include.string ) +add_entrypoint_object( + memchr + SRCS + memchr.cpp + HDRS + memchr.h + DEPENDS + libc.include.string +) + + # Helper to define a function with multiple implementations # - Computes flags to satisfy required/rejected features and arch, # - Declares an entry point, diff --git a/libc/src/string/memchr.h b/libc/src/string/memchr.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memchr.h @@ -0,0 +1,20 @@ +//===-- Implementation header for memchr ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMCHR_H +#define LLVM_LIBC_SRC_STRING_MEMCHR_H + +#include + +namespace __llvm_libc { + +void *memchr(const void *src, int c, size_t n); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMCHR_H diff --git a/libc/src/string/memchr.cpp b/libc/src/string/memchr.cpp new file mode 100644 --- /dev/null +++ b/libc/src/string/memchr.cpp @@ -0,0 +1,23 @@ +//===-- Implementation of memchr ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memchr.h" +#include "src/__support/common.h" +#include + +namespace __llvm_libc { + +// TODO: Look at performance benefits of comparing words. +void *LLVM_LIBC_ENTRYPOINT(memchr)(const void *src, int c, size_t n) { + const unsigned char *str = reinterpret_cast(src); + for (; n && *str != c; --n, ++str) + ; + return n ? const_cast(str) : nullptr; +} + +} // namespace __llvm_libc diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt --- a/libc/test/src/CMakeLists.txt +++ b/libc/test/src/CMakeLists.txt @@ -91,4 +91,4 @@ target_link_libraries(libc-integration-test PRIVATE ${library_files} -) +) \ No newline at end of file diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -42,6 +42,16 @@ libc.src.string.strcmp ) +add_libc_unittest( + memchr_test + SUITE + libc_string_unittests + SRCS + memchr_test.cpp + DEPENDS + libc.src.string.memchr +) + # Tests all implementations that can run on the host. function(add_libc_multi_impl_test name) get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations) @@ -66,4 +76,3 @@ add_libc_multi_impl_test(memcpy SRCS memcpy_test.cpp) add_libc_multi_impl_test(memset SRCS memset_test.cpp) add_libc_multi_impl_test(bzero SRCS bzero_test.cpp) - diff --git a/libc/test/src/string/memchr_test.cpp b/libc/test/src/string/memchr_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/memchr_test.cpp @@ -0,0 +1,113 @@ +//===-- Unittests for memchr ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memchr.h" +#include "utils/UnitTest/Test.h" +#include + +// A helper function that calls memchr and abstracts away the explicit cast for +// readability purposes. +const char *call_memchr(const void *src, int c, size_t size) { + return reinterpret_cast(__llvm_libc::memchr(src, c, size)); +} + +TEST(MemChrTest, FindsCharacterAfterNullTerminator) { + // memchr should continue searching after a null terminator. + const size_t size = 5; + const unsigned char src[size] = {'a', '\0', 'b', 'c', '\0'}; + // Should return 'b', 'c', '\0' even when after null terminator. + ASSERT_STREQ(call_memchr(src, 'b', size), "bc"); +} + +TEST(MemChrTest, FindsCharacterInNonNullTerminatedCollection) { + const size_t size = 3; + const unsigned char src[size] = {'a', 'b', 'c'}; + // Should return 'b', 'c'. + const char *ret = call_memchr(src, 'b', size); + ASSERT_EQ(ret[0], 'b'); + ASSERT_EQ(ret[1], 'c'); +} + +TEST(MemChrTest, FindsFirstCharacter) { + const size_t size = 6; + const unsigned char src[size] = {'a', 'b', 'c', 'd', 'e', '\0'}; + // Should return original array since 'a' is the first character. + ASSERT_STREQ(call_memchr(src, 'a', size), "abcde"); +} + +TEST(MemChrTest, FindsMiddleCharacter) { + const size_t size = 6; + const unsigned char src[size] = {'a', 'b', 'c', 'd', 'e', '\0'}; + // Should return characters after (and including) 'c'. + ASSERT_STREQ(call_memchr(src, 'c', size), "cde"); +} + +TEST(MemChrTest, FindsLastCharacterThatIsNotNullTerminator) { + const size_t size = 6; + const unsigned char src[size] = {'a', 'b', 'c', 'd', 'e', '\0'}; + // Should return 'e' and null-terminator. + ASSERT_STREQ(call_memchr(src, 'e', size), "e"); +} + +TEST(MemChrTest, FindsNullTerminator) { + const size_t size = 6; + const unsigned char src[size] = {'a', 'b', 'c', 'd', 'e', '\0'}; + // Should return null terminator. + ASSERT_STREQ(call_memchr(src, '\0', size), ""); +} + +TEST(MemChrTest, CharacterNotWithinStringShouldReturnNullptr) { + const size_t size = 4; + const unsigned char src[size] = {'1', '2', '3', '?'}; + // Since 'z' is not within 'characters', should return nullptr. + ASSERT_STREQ(call_memchr(src, 'z', size), nullptr); +} + +TEST(MemChrTest, CharacterNotWithinSizeShouldReturnNullptr) { + const unsigned char src[5] = {'1', '2', '3', '4', '\0'}; + // Since '4' is not the first or second character, this should return nullptr. + const size_t size = 2; + ASSERT_STREQ(call_memchr(src, '4', size), nullptr); +} + +TEST(MemChrTest, TheSourceShouldNotChange) { + const size_t size = 6; + const unsigned char src[size] = {'a', 'b', 'c', 'd', 'e', '\0'}; + const char *src_copy = reinterpret_cast(src); + // When the character is found, the source string should not change. + __llvm_libc::memchr(src, 'd', size); + ASSERT_STREQ(reinterpret_cast(src), src_copy); + // Same case for when the character is not found. + __llvm_libc::memchr(src, 'z', size); + ASSERT_STREQ(reinterpret_cast(src), src_copy); +} + +TEST(MemChrTest, ShouldFindFirstOfDuplicates) { + const size_t size = 12; // 11 characters + null terminator. + const char *dups = "abc1def1ghi"; + // 1 is duplicated in 'dups', but it should find the first copy. + ASSERT_STREQ(call_memchr(dups, '1', size), "1def1ghi"); +} + +TEST(MemChrTest, EmptyStringShouldOnlyMatchNullTerminator) { + const size_t size = 1; // Null terminator. + const char *empty_string = ""; + // Null terminator should match. + ASSERT_STREQ(call_memchr(empty_string, '\0', size), ""); + // All other characters should not match. + ASSERT_STREQ(call_memchr(empty_string, 'A', size), nullptr); + ASSERT_STREQ(call_memchr(empty_string, '9', size), nullptr); + ASSERT_STREQ(call_memchr(empty_string, '?', size), nullptr); +} + +TEST(MemChrTest, SingleRepeatedCharacterShouldReturnFirst) { + const char *dups = "XXXXX"; + const size_t size = 6; // 5 characters + null terminator. + // Should return original string since X is first character. + ASSERT_STREQ(call_memchr(dups, 'X', size), dups); +}