diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -21,6 +21,7 @@ libc.src.string.strlen libc.src.string.strnlen libc.src.string.strrchr + libc.src.string.strspn libc.src.string.strstr ) diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -39,6 +39,7 @@ libc.src.string.strlen libc.src.string.strnlen libc.src.string.strrchr + libc.src.string.strspn libc.src.string.strstr # sys/mman.h entrypoints diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -94,6 +94,14 @@ strrchr.h ) +add_entrypoint_object( + strspn + SRCS + strspn.cpp + HDRS + strspn.h +) + # Helper to define a function with multiple implementations # - Computes flags to satisfy required/rejected features and arch, # - Declares an entry point, diff --git a/libc/src/string/strspn.h b/libc/src/string/strspn.h new file mode 100644 --- /dev/null +++ b/libc/src/string/strspn.h @@ -0,0 +1,20 @@ +//===-- Implementation header for strspn ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_STRSPN_H +#define LLVM_LIBC_SRC_STRING_STRSPN_H + +#include + +namespace __llvm_libc { + +size_t strspn(const char *src, const char *segment); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_STRSPN_H diff --git a/libc/src/string/strspn.cpp b/libc/src/string/strspn.cpp new file mode 100644 --- /dev/null +++ b/libc/src/string/strspn.cpp @@ -0,0 +1,50 @@ +//===-- Implementation of strspn ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strspn.h" + +#include "src/__support/common.h" +#include + +namespace __llvm_libc { + +constexpr size_t exploded_unsigned_char_bit = 8 * sizeof(size_t); + +// (256 / 8) / sizeof(size_t). +constexpr size_t bitset_size = 32 / sizeof(size_t); + +static void toggle_bit(size_t *bitset, const size_t ch) { + const size_t mask = size_t{1} << (ch % exploded_unsigned_char_bit); + bitset[ch / exploded_unsigned_char_bit] |= mask; +} + +static bool contains_bit(const size_t *bitset, const size_t ch) { + const size_t mask = size_t{1} << (ch % exploded_unsigned_char_bit); + return bitset[ch / exploded_unsigned_char_bit] & mask; +} + +size_t LLVM_LIBC_ENTRYPOINT(strspn)(const char *src, const char *segment) { + if (!segment[0]) + return 0; + + const char *initial = src; + if (!segment[1]) { // The segment is a single character. + for (; *src == *segment; ++src) + ; + return src - initial; + } + + size_t bitset[bitset_size] = {0}; + for (; *segment; ++segment) + toggle_bit(bitset, *segment); + for (; *src && contains_bit(bitset, *src); ++src) + ; + return src - initial; +} + +} // namespace __llvm_libc diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -102,6 +102,17 @@ libc.src.string.strrchr ) +add_libc_unittest( + strspn_test + SUITE + libc_string_unittests + SRCS + strspn_test.cpp + DEPENDS + libc.src.string.strspn +) + + # Tests all implementations that can run on the host. function(add_libc_multi_impl_test name) get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations) diff --git a/libc/test/src/string/strspn_test.cpp b/libc/test/src/string/strspn_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/strspn_test.cpp @@ -0,0 +1,83 @@ +//===-- Unittests for strspn ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strspn.h" + +#include "utils/UnitTest/Test.h" + +TEST(StrSpnTest, EmptyStringShouldReturnZero) { + // The search should not include the null terminator. + EXPECT_EQ(__llvm_libc::strspn("", ""), size_t{0}); + EXPECT_EQ(__llvm_libc::strspn("_", ""), size_t{0}); + EXPECT_EQ(__llvm_libc::strspn("", "_"), size_t{0}); +} + +TEST(StrSpnTest, ShouldNotMatchAnythingAfterNullTerminator) { + const char src[4] = {'a', 'b', '\0', 'c'}; + EXPECT_EQ(__llvm_libc::strspn(src, "ab"), size_t{2}); + EXPECT_EQ(__llvm_libc::strspn(src, "c"), size_t{0}); + + // Same goes for the segment to be searched for. + const char segment[4] = {'1', '2', '\0', '3'}; + EXPECT_EQ(__llvm_libc::strspn("123", segment), size_t{2}); +} + +TEST(StrSpnTest, OnlyFirstCharacterSegmentShouldMatch) { + const char *src = "12345"; + EXPECT_EQ(__llvm_libc::strspn(src, "1"), size_t{1}); + // Since '1' is not within the segment, the span + // size should remain zero. + EXPECT_EQ(__llvm_libc::strspn(src, "2"), size_t{0}); + EXPECT_EQ(__llvm_libc::strspn(src, "3"), size_t{0}); + EXPECT_EQ(__llvm_libc::strspn(src, "4"), size_t{0}); + EXPECT_EQ(__llvm_libc::strspn(src, "5"), size_t{0}); +} + +TEST(StrSpnTest, UnmatchedCharacterShouldNotCount) { + EXPECT_EQ(__llvm_libc::strspn("a", "b"), size_t{0}); + EXPECT_EQ(__llvm_libc::strspn("abcdef", "1"), size_t{0}); + EXPECT_EQ(__llvm_libc::strspn("123", "4"), size_t{0}); +} + +TEST(StrSpnTest, SequentialCharactersShouldMatch) { + const char *src = "abcde"; + EXPECT_EQ(__llvm_libc::strspn(src, "a"), size_t{1}); + EXPECT_EQ(__llvm_libc::strspn(src, "ab"), size_t{2}); + EXPECT_EQ(__llvm_libc::strspn(src, "abc"), size_t{3}); + EXPECT_EQ(__llvm_libc::strspn(src, "abcd"), size_t{4}); + EXPECT_EQ(__llvm_libc::strspn(src, "abcde"), size_t{5}); + // Same thing for when the roles are reversed. + EXPECT_EQ(__llvm_libc::strspn("abcde", src), size_t{5}); + EXPECT_EQ(__llvm_libc::strspn("abcd", src), size_t{4}); + EXPECT_EQ(__llvm_libc::strspn("abc", src), size_t{3}); + EXPECT_EQ(__llvm_libc::strspn("ab", src), size_t{2}); + EXPECT_EQ(__llvm_libc::strspn("a", src), size_t{1}); +} + +TEST(StrSpnTest, NonSequentialCharactersShouldNotMatch) { + const char *src = "123456789"; + EXPECT_EQ(__llvm_libc::strspn(src, "_1_abc_2_def_3_"), size_t{3}); + // Only matches 4 since '5' is not within the span. + EXPECT_EQ(__llvm_libc::strspn(src, "67__34abc12"), size_t{4}); +} + +TEST(StrSpnTest, ReverseCharactersShouldNotMatch) { + EXPECT_EQ(__llvm_libc::strspn("12345", "54321"), size_t{5}); + EXPECT_EQ(__llvm_libc::strspn("12345", "432"), size_t{0}); + // Only matches 1 since '2' is not within the span. + EXPECT_EQ(__llvm_libc::strspn("12345", "51"), size_t{1}); +} + +TEST(StrSpnTest, DuplicatedCharactersToBeSearchedForShouldStillMatch) { + // Only a single character, so only 1 match. + EXPECT_EQ(__llvm_libc::strspn("a", "aa"), size_t{1}); + // This should count once for each 'a' in the source string. + EXPECT_EQ(__llvm_libc::strspn("aa", "aa"), size_t{2}); + EXPECT_EQ(__llvm_libc::strspn("aaa", "aa"), size_t{3}); + EXPECT_EQ(__llvm_libc::strspn("aaaa", "aa"), size_t{4}); +}