diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -27,8 +27,10 @@ libc.src.string.strcat libc.src.string.strchr libc.src.string.strcpy + libc.src.string.strcspn libc.src.string.strlen libc.src.string.strnlen + libc.src.string.strpbrk libc.src.string.strrchr libc.src.string.strspn libc.src.string.strstr diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -45,8 +45,10 @@ libc.src.string.strchr libc.src.string.strcmp libc.src.string.strcpy + libc.src.string.strcspn libc.src.string.strlen libc.src.string.strnlen + libc.src.string.strpbrk libc.src.string.strrchr libc.src.string.strspn libc.src.string.strstr diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -1,5 +1,13 @@ add_subdirectory(memory_utils) +add_header_library( + string_utils + HDRS + string_utils.h + DEPENDS + libc.utils.CPP.standalone_cpp +) + add_entrypoint_object( strcat SRCS @@ -94,12 +102,34 @@ strrchr.h ) +add_entrypoint_object( + strcspn + SRCS + strcspn.cpp + HDRS + strcspn.h + DEPENDS + .string_utils +) + add_entrypoint_object( strspn SRCS strspn.cpp HDRS strspn.h + DEPENDS + libc.utils.CPP.standalone_cpp +) + +add_entrypoint_object( + strpbrk + SRCS + strpbrk.cpp + HDRS + strpbrk.h + DEPENDS + .string_utils ) # Helper to define a function with multiple implementations diff --git a/libc/src/string/strcspn.h b/libc/src/string/strcspn.h new file mode 100644 --- /dev/null +++ b/libc/src/string/strcspn.h @@ -0,0 +1,20 @@ +//===-- Implementation header for strcspn -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_STRCSPN_H +#define LLVM_LIBC_SRC_STRING_STRCSPN_H + +#include + +namespace __llvm_libc { + +size_t strcspn(const char *src, const char *segment); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_STRCSPN_H diff --git a/libc/src/string/strcspn.cpp b/libc/src/string/strcspn.cpp new file mode 100644 --- /dev/null +++ b/libc/src/string/strcspn.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of strcspn -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strcspn.h" + +#include "src/__support/common.h" +#include "src/string/string_utils.h" + +namespace __llvm_libc { + +size_t LLVM_LIBC_ENTRYPOINT(strcspn)(const char *src, const char *segment) { + return internal::complementary_span(src, segment); +} + +} // namespace __llvm_libc diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h new file mode 100644 --- /dev/null +++ b/libc/src/string/string_utils.h @@ -0,0 +1,36 @@ +//===-- String utils --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LIBC_SRC_STRING_STRING_UTILS_H +#define LIBC_SRC_STRING_STRING_UTILS_H + +#include "src/string/memory_utils/utils.h" + +#include "utils/CPP/Bitset.h" +#include // size_t + +namespace __llvm_libc { +namespace internal { + +// Returns the maximum length span that contains only characters not found in +// 'segment'. If no characters are found, returns the length of 'src'. +static inline size_t complementary_span(const char *src, const char *segment) { + const char *initial = src; + cpp::Bitset<256> bitset; + + for (; *segment; ++segment) + bitset.set(*segment); + for (; *src && !bitset.test(*src); ++src) + ; + return src - initial; +} + +} // namespace internal +} // namespace __llvm_libc + +#endif // LIBC_SRC_STRING_STRING_UTILS_H diff --git a/libc/src/string/strpbrk.h b/libc/src/string/strpbrk.h new file mode 100644 --- /dev/null +++ b/libc/src/string/strpbrk.h @@ -0,0 +1,18 @@ +//===-- Implementation header for strpbrk -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_STRPBRK_H +#define LLVM_LIBC_SRC_STRING_STRPBRK_H + +namespace __llvm_libc { + +char *strpbrk(const char *src, const char *breakset); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_STRPBRK_H diff --git a/libc/src/string/strpbrk.cpp b/libc/src/string/strpbrk.cpp new file mode 100644 --- /dev/null +++ b/libc/src/string/strpbrk.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of strpbrk -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strpbrk.h" + +#include "src/__support/common.h" +#include "src/string/string_utils.h" + +namespace __llvm_libc { + +char *LLVM_LIBC_ENTRYPOINT(strpbrk)(const char *src, const char *breakset) { + src += internal::complementary_span(src, breakset); + return *src ? const_cast(src) : nullptr; +} + +} // namespace __llvm_libc diff --git a/libc/src/string/strspn.cpp b/libc/src/string/strspn.cpp --- a/libc/src/string/strspn.cpp +++ b/libc/src/string/strspn.cpp @@ -11,7 +11,6 @@ #include "src/__support/common.h" #include "utils/CPP/Bitset.h" #include -#include namespace __llvm_libc { diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -102,6 +102,16 @@ libc.src.string.strrchr ) +add_libc_unittest( + strcspn_test + SUITE + libc_string_unittests + SRCS + strcspn_test.cpp + DEPENDS + libc.src.string.strcspn +) + add_libc_unittest( strspn_test SUITE @@ -112,6 +122,15 @@ libc.src.string.strspn ) +add_libc_unittest( + strpbrk_test + SUITE + libc_string_unittests + SRCS + strpbrk_test.cpp + DEPENDS + libc.src.string.strpbrk +) # Tests all implementations that can run on the host. function(add_libc_multi_impl_test name) diff --git a/libc/test/src/string/strcspn_test.cpp b/libc/test/src/string/strcspn_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/strcspn_test.cpp @@ -0,0 +1,50 @@ +//===-- Unittests for strcspn ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strcspn.h" + +#include "utils/UnitTest/Test.h" + +TEST(StrCSpnTest, ComplementarySpanShouldNotGoPastNullTerminator) { + const char src[5] = {'a', 'b', '\0', 'c', 'd'}; + EXPECT_EQ(__llvm_libc::strcspn(src, "b"), size_t{1}); + EXPECT_EQ(__llvm_libc::strcspn(src, "d"), size_t{2}); + + // Same goes for the segment to be searched for. + const char segment[5] = {'1', '2', '\0', '3', '4'}; + EXPECT_EQ(__llvm_libc::strcspn("123", segment), size_t{0}); +} + +TEST(StrCSpnTest, ComplementarySpanForEachIndividualCharacter) { + const char *src = "12345"; + // The complementary span size should increment accordingly. + EXPECT_EQ(__llvm_libc::strcspn(src, "1"), size_t{0}); + EXPECT_EQ(__llvm_libc::strcspn(src, "2"), size_t{1}); + EXPECT_EQ(__llvm_libc::strcspn(src, "3"), size_t{2}); + EXPECT_EQ(__llvm_libc::strcspn(src, "4"), size_t{3}); + EXPECT_EQ(__llvm_libc::strcspn(src, "5"), size_t{4}); +} + +TEST(StrCSpnTest, ComplementarySpanIsStringLengthIfNoCharacterFound) { + // Null terminator. + EXPECT_EQ(__llvm_libc::strcspn("", ""), size_t{0}); + EXPECT_EQ(__llvm_libc::strcspn("", "_"), size_t{0}); + // Single character. + EXPECT_EQ(__llvm_libc::strcspn("a", "b"), size_t{1}); + // Multiple characters. + EXPECT_EQ(__llvm_libc::strcspn("abc", "1"), size_t{3}); +} + +TEST(StrCSpnTest, DuplicatedCharactersNotPartOfComplementarySpan) { + // Complementary span should be zero in all these cases. + EXPECT_EQ(__llvm_libc::strcspn("a", "aa"), size_t{0}); + EXPECT_EQ(__llvm_libc::strcspn("aa", "a"), size_t{0}); + EXPECT_EQ(__llvm_libc::strcspn("aaa", "aa"), size_t{0}); + EXPECT_EQ(__llvm_libc::strcspn("aaaa", "aa"), size_t{0}); + EXPECT_EQ(__llvm_libc::strcspn("aaaa", "baa"), size_t{0}); +} diff --git a/libc/test/src/string/strpbrk_test.cpp b/libc/test/src/string/strpbrk_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/strpbrk_test.cpp @@ -0,0 +1,62 @@ +//===-- Unittests for strpbrk ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strpbrk.h" + +#include "utils/UnitTest/Test.h" + +TEST(StrPBrkTest, EmptyStringShouldReturnNullptr) { + // The search should not include the null terminator. + EXPECT_STREQ(__llvm_libc::strpbrk("", ""), nullptr); + EXPECT_STREQ(__llvm_libc::strpbrk("_", ""), nullptr); + EXPECT_STREQ(__llvm_libc::strpbrk("", "_"), nullptr); +} + +TEST(StrPBrkTest, ShouldNotFindAnythingAfterNullTerminator) { + const char src[4] = {'a', 'b', '\0', 'c'}; + EXPECT_STREQ(__llvm_libc::strpbrk(src, "c"), nullptr); +} + +TEST(StrPBrkTest, ShouldReturnNullptrIfNoCharactersFound) { + EXPECT_STREQ(__llvm_libc::strpbrk("12345", "abcdef"), nullptr); +} + +TEST(StrPBrkTest, FindsFirstCharacter) { + const char *src = "12345"; + EXPECT_STREQ(__llvm_libc::strpbrk(src, "1"), "12345"); + EXPECT_STREQ(__llvm_libc::strpbrk(src, "-1"), "12345"); + EXPECT_STREQ(__llvm_libc::strpbrk(src, "1_"), "12345"); + EXPECT_STREQ(__llvm_libc::strpbrk(src, "f1_"), "12345"); + ASSERT_STREQ(src, "12345"); +} + +TEST(StrPBrkTest, FindsMiddleCharacter) { + const char *src = "12345"; + EXPECT_STREQ(__llvm_libc::strpbrk(src, "3"), "345"); + EXPECT_STREQ(__llvm_libc::strpbrk(src, "?3"), "345"); + EXPECT_STREQ(__llvm_libc::strpbrk(src, "3F"), "345"); + EXPECT_STREQ(__llvm_libc::strpbrk(src, "z3_"), "345"); + ASSERT_STREQ(src, "12345"); +} + +TEST(StrPBrkTest, FindsLastCharacter) { + const char *src = "12345"; + EXPECT_STREQ(__llvm_libc::strpbrk(src, "5"), "5"); + EXPECT_STREQ(__llvm_libc::strpbrk(src, "r5"), "5"); + EXPECT_STREQ(__llvm_libc::strpbrk(src, "59"), "5"); + EXPECT_STREQ(__llvm_libc::strpbrk(src, "n5_"), "5"); + ASSERT_STREQ(src, "12345"); +} + +TEST(StrPBrkTest, FindsFirstOfRepeated) { + EXPECT_STREQ(__llvm_libc::strpbrk("A,B,C,D", ","), ",B,C,D"); +} + +TEST(StrPBrkTest, FindsFirstInBreakset) { + EXPECT_STREQ(__llvm_libc::strpbrk("12345", "34"), "345"); +}