diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -27,6 +27,7 @@ libc.src.string.strcat libc.src.string.strlen libc.src.string.strcmp + libc.src.string.memchr # sys/mman.h entrypoints libc.src.sys.mman.mmap diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -44,6 +44,17 @@ libc.include.string ) +add_entrypoint_object( + memchr + SRCS + memchr.cpp + HDRS + memchr.h + DEPENDS + libc.include.string +) + + # Helper to define a function with multiple implementations # - Computes flags to satisfy required/rejected features and arch, # - Declares an entry point, diff --git a/libc/src/string/memchr.h b/libc/src/string/memchr.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memchr.h @@ -0,0 +1,20 @@ +//===-- Implementation header for memchr ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMCHR_H +#define LLVM_LIBC_SRC_STRING_MEMCHR_H + +#include + +namespace __llvm_libc { + +void *memchr(const void *src, int c, size_t n); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMCHR_H diff --git a/libc/src/string/memchr.cpp b/libc/src/string/memchr.cpp new file mode 100644 --- /dev/null +++ b/libc/src/string/memchr.cpp @@ -0,0 +1,27 @@ +//===-- Implementation of memchr ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memchr.h" +#include "src/__support/common.h" +#include + +namespace __llvm_libc { + +// TODO: Look at performance benefits of comparing words. +void *LLVM_LIBC_ENTRYPOINT(memchr)(const void *src, int c, size_t n) { + const unsigned char *str = reinterpret_cast(src); + const unsigned char ch = static_cast(c); + for (; n && *str != ch; --n, ++str) + ; + if (n) + return reinterpret_cast(const_cast(str)); + + return 0; +} + +} // namespace __llvm_libc diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt --- a/libc/test/src/CMakeLists.txt +++ b/libc/test/src/CMakeLists.txt @@ -91,4 +91,4 @@ target_link_libraries(libc-integration-test PRIVATE ${library_files} -) +) \ No newline at end of file diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -42,6 +42,16 @@ libc.src.string.strcmp ) +add_libc_unittest( + memchr_test + SUITE + libc_string_unittests + SRCS + memchr_test.cpp + DEPENDS + libc.src.string.memchr +) + # Tests all implementations that can run on the host. function(add_libc_multi_impl_test name) get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations) @@ -66,4 +76,3 @@ add_libc_multi_impl_test(memcpy SRCS memcpy_test.cpp) add_libc_multi_impl_test(memset SRCS memset_test.cpp) add_libc_multi_impl_test(bzero SRCS bzero_test.cpp) - diff --git a/libc/test/src/string/memchr_test.cpp b/libc/test/src/string/memchr_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/memchr_test.cpp @@ -0,0 +1,134 @@ +//===-- Unittests for memchr ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memchr.h" +#include "utils/UnitTest/Test.h" +#include + +TEST(MemChrTest, FindsCharacterAfterNullTerminator) { + // memchr should continue searching after a null terminator. + const size_t size = 5; + const unsigned char src[size] = {'a', '\0', 'b', 'c', '\0'}; + // Should return 'b', 'c', '\0' even when after null terminator. + const char *ret = reinterpret_cast( + __llvm_libc::memchr(reinterpret_cast(src), 'b', size)); + ASSERT_EQ(ret[0], 'b'); + ASSERT_EQ(ret[1], 'c'); + ASSERT_EQ(ret[2], '\0'); +} + +TEST(MemChrTest, FindsCharacterInNonNullTerminatedCollection) { + const size_t size = 3; + const unsigned char src[size] = {'a', 'b', 'c'}; + // Should return 'b', 'c'. + const char *ret = reinterpret_cast( + __llvm_libc::memchr(reinterpret_cast(src), 'b', size)); + ASSERT_EQ(ret[0], 'b'); + ASSERT_EQ(ret[1], 'c'); +} + +TEST(MemChrTest, FindsFirstCharacter) { + const size_t size = 6; + const unsigned char src[size] = {'a', 'b', 'c', 'd', 'e', '\0'}; + // Should return original array since 'a' is the first character. + const char *ret = reinterpret_cast( + __llvm_libc::memchr(reinterpret_cast(src), 'a', size)); + ASSERT_STREQ(ret, "abcde"); +} + +TEST(MemChrTest, FindsMiddleCharacter) { + const size_t size = 6; + const unsigned char src[size] = {'a', 'b', 'c', 'd', 'e', '\0'}; + // Should return characters after (and including) 'c'. + const char *ret = reinterpret_cast( + __llvm_libc::memchr(reinterpret_cast(src), 'c', size)); + ASSERT_STREQ(ret, "cde"); +} + +TEST(MemChrTest, FindsLastCharacterThatIsNotNullTerminator) { + const size_t size = 6; + const unsigned char src[size] = {'a', 'b', 'c', 'd', 'e', '\0'}; + // Should return 'e' and null-terminator. + const char *ret = reinterpret_cast( + __llvm_libc::memchr(reinterpret_cast(src), 'e', size)); + ASSERT_STREQ(ret, "e"); +} + +TEST(MemChrTest, FindsNullTerminator) { + const size_t size = 6; + const unsigned char src[size] = {'a', 'b', 'c', 'd', 'e', '\0'}; + // Should return null-terminator. + const char *ret = reinterpret_cast( + __llvm_libc::memchr(reinterpret_cast(src), '\0', size)); + ASSERT_STREQ(ret, ""); +} + +TEST(MemChrTest, CharacterNotWithinStringShouldReturnZero) { + const size_t size = 4; + const unsigned char src[size] = {'1', '2', '3', '?'}; + // Since 'z' is not within 'characters', should return 0. + const char *ret = reinterpret_cast( + __llvm_libc::memchr(reinterpret_cast(src), 'z', size)); + ASSERT_STREQ(ret, 0); +} + +TEST(MemChrTest, TheSourceShouldNotChange) { + const size_t size = 6; + const unsigned char characters[size] = {'a', 'b', 'c', 'd', 'e', '\0'}; + const void *src = reinterpret_cast(characters); + const void *src_copy = src; + // When the character is found, the source string should not change. + __llvm_libc::memchr(src, 'd', size); + ASSERT_STREQ(reinterpret_cast(src), + reinterpret_cast(src_copy)); + // Same case for when the character is not found. + __llvm_libc::memchr(src, 'z', size); + ASSERT_STREQ(reinterpret_cast(src), + reinterpret_cast(src_copy)); +} + +TEST(MemChrTest, ShouldFindFirstOfDuplicates) { + const size_t size = 12; // 11 characters + null terminator. + const char *dups = "abc1def1ghi"; + // 1 is duplicated in 'dups', but it should find the first copy. + const char *ret = reinterpret_cast( + __llvm_libc::memchr(reinterpret_cast(dups), '1', size)); + ASSERT_STREQ(ret, "1def1ghi"); +} + +TEST(MemChrTest, EmptyStringShouldOnlyMatchNullTerminator) { + const size_t size = 1; // Null terminator. + const char *empty_string = ""; + const void *src = reinterpret_cast(empty_string); + + const char *ret1 = + reinterpret_cast(__llvm_libc::memchr(src, 'A', size)); + ASSERT_STREQ(ret1, 0); + + const char *ret2 = + reinterpret_cast(__llvm_libc::memchr(src, '9', size)); + ASSERT_STREQ(ret2, 0); + + const char *ret3 = + reinterpret_cast(__llvm_libc::memchr(src, '?', size)); + ASSERT_STREQ(ret3, 0); + + // Null terminator should match. + const char *ret4 = + reinterpret_cast(__llvm_libc::memchr(src, '\0', size)); + ASSERT_STREQ(ret4, ""); +} + +TEST(MemChrTest, SingleRepeatedCharacterShouldReturnFirst) { + const size_t size = 12; // 11 characters + null terminator. + const char *dups = "XXXXXXXXXXX"; + // Should return original string since X is first character. + const char *ret = reinterpret_cast( + __llvm_libc::memchr(reinterpret_cast(dups), 'X', size)); + ASSERT_STREQ(ret, dups); +}