diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -21,6 +21,7 @@ libc.src.string.strlen libc.src.string.strnlen libc.src.string.strrchr + libc.src.string.strspn libc.src.string.strstr ) diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -39,6 +39,7 @@ libc.src.string.strlen libc.src.string.strnlen libc.src.string.strrchr + libc.src.string.strspn libc.src.string.strstr # sys/mman.h entrypoints diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -94,6 +94,14 @@ strrchr.h ) +add_entrypoint_object( + strspn + SRCS + strspn.cpp + HDRS + strspn.h +) + # Helper to define a function with multiple implementations # - Computes flags to satisfy required/rejected features and arch, # - Declares an entry point, diff --git a/libc/src/string/strspn.h b/libc/src/string/strspn.h new file mode 100644 --- /dev/null +++ b/libc/src/string/strspn.h @@ -0,0 +1,20 @@ +//===-- Implementation header for strspn ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_STRSPN_H +#define LLVM_LIBC_SRC_STRING_STRSPN_H + +#include + +namespace __llvm_libc { + +size_t strspn(const char *src, const char *segment); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_STRSPN_H diff --git a/libc/src/string/strspn.cpp b/libc/src/string/strspn.cpp new file mode 100644 --- /dev/null +++ b/libc/src/string/strspn.cpp @@ -0,0 +1,62 @@ +//===-- Implementation of strspn ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strspn.h" + +#include "src/__support/common.h" +#include + +namespace __llvm_libc { + +// 8 bits of a character, multiplied by a factor of sizeof(size_t). +static constexpr size_t EXPLODED_CHAR_BIT = 8 * sizeof(size_t); + +// The size of the byteset used. The following must hold: +// 1 <= sizeof(size_t) <= 32. +static constexpr size_t BYTESET_SIZE = 32 / sizeof(size_t); + +// Toggles the bit produced by 'c' in the byteset provided. +static inline void toggle_bit(size_t *byteset, unsigned char c) { + const size_t ch = c; + const size_t mask = size_t{1} << (ch % EXPLODED_CHAR_BIT); + byteset[ch / EXPLODED_CHAR_BIT] |= mask; +} + +// Determines whether the byteset contains the bit produced by 'c'. +static inline bool contains_bit(size_t *byteset, unsigned char c) { + const size_t ch = c; + const size_t mask = size_t{1} << (ch % EXPLODED_CHAR_BIT); + return byteset[ch / EXPLODED_CHAR_BIT] & mask; +} + +// A simpler form would use a 256-character array, and simply toggle +// the value of the unsigned character to 1 if it exists within the +// segment. This reduces the size of the table by manipulating bits +// instead of bytes. +size_t LLVM_LIBC_ENTRYPOINT(strspn)(const char *src, const char *segment) { + if (!segment[0]) + return 0; + + const char *initial = src; + if (!segment[1]) { + // The segment is a single character. + for (; *src == *segment; ++src) + ; + return src - initial; + } + + size_t byteset[BYTESET_SIZE] = {0}; + + for (; *segment; ++segment) + toggle_bit(byteset, *segment); + for (; *src && contains_bit(byteset, *src); ++src) + ; + return src - initial; +} + +} // namespace __llvm_libc diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -102,6 +102,17 @@ libc.src.string.strrchr ) +add_libc_unittest( + strspn_test + SUITE + libc_string_unittests + SRCS + strspn_test.cpp + DEPENDS + libc.src.string.strspn +) + + # Tests all implementations that can run on the host. function(add_libc_multi_impl_test name) get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations) diff --git a/libc/test/src/string/strspn_test.cpp b/libc/test/src/string/strspn_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/strspn_test.cpp @@ -0,0 +1,83 @@ +//===-- Unittests for strspn ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strspn.h" + +#include "utils/UnitTest/Test.h" + +TEST(StrSpnTest, EmptyStringShouldReturnZero) { + // The search should not include the null terminator. + EXPECT_EQ(__llvm_libc::strspn("", ""), size_t{0}); + EXPECT_EQ(__llvm_libc::strspn("_", ""), size_t{0}); + EXPECT_EQ(__llvm_libc::strspn("", "_"), size_t{0}); +} + +TEST(StrSpnTest, ShouldNotMatchAnythingAfterNullTerminator) { + const char src[4] = {'a', 'b', '\0', 'c'}; + EXPECT_EQ(__llvm_libc::strspn(src, "ab"), size_t{2}); + EXPECT_EQ(__llvm_libc::strspn(src, "c"), size_t{0}); + + // Same goes for the segment to be searched for. + const char segment[4] = {'1', '2', '\0', '3'}; + EXPECT_EQ(__llvm_libc::strspn("123", segment), size_t{2}); +} + +TEST(StrSpnTest, OnlyFirstCharacterSegmentShouldMatch) { + const char *src = "12345"; + EXPECT_EQ(__llvm_libc::strspn(src, "1"), size_t{1}); + // Since '1' is not within the segment, the span + // size should remain zero. + EXPECT_EQ(__llvm_libc::strspn(src, "2"), size_t{0}); + EXPECT_EQ(__llvm_libc::strspn(src, "3"), size_t{0}); + EXPECT_EQ(__llvm_libc::strspn(src, "4"), size_t{0}); + EXPECT_EQ(__llvm_libc::strspn(src, "5"), size_t{0}); +} + +TEST(StrSpnTest, UnmatchedCharacterShouldNotCount) { + EXPECT_EQ(__llvm_libc::strspn("a", "b"), size_t{0}); + EXPECT_EQ(__llvm_libc::strspn("abcdef", "1"), size_t{0}); + EXPECT_EQ(__llvm_libc::strspn("123", "4"), size_t{0}); +} + +TEST(StrSpnTest, SequentialCharactersShouldMatch) { + const char *src = "abcde"; + EXPECT_EQ(__llvm_libc::strspn(src, "a"), size_t{1}); + EXPECT_EQ(__llvm_libc::strspn(src, "ab"), size_t{2}); + EXPECT_EQ(__llvm_libc::strspn(src, "abc"), size_t{3}); + EXPECT_EQ(__llvm_libc::strspn(src, "abcd"), size_t{4}); + EXPECT_EQ(__llvm_libc::strspn(src, "abcde"), size_t{5}); + // Same thing for when the roles are reversed. + EXPECT_EQ(__llvm_libc::strspn("abcde", src), size_t{5}); + EXPECT_EQ(__llvm_libc::strspn("abcd", src), size_t{4}); + EXPECT_EQ(__llvm_libc::strspn("abc", src), size_t{3}); + EXPECT_EQ(__llvm_libc::strspn("ab", src), size_t{2}); + EXPECT_EQ(__llvm_libc::strspn("a", src), size_t{1}); +} + +TEST(StrSpnTest, NonSequentialCharactersShouldNotMatch) { + const char *src = "123456789"; + EXPECT_EQ(__llvm_libc::strspn(src, "_1_abc_2_def_3_"), size_t{3}); + // Only matches 4 since '5' is not within the span. + EXPECT_EQ(__llvm_libc::strspn(src, "67__34abc12"), size_t{4}); +} + +TEST(StrSpnTest, ReverseCharactersShouldNotMatch) { + EXPECT_EQ(__llvm_libc::strspn("12345", "54321"), size_t{5}); + EXPECT_EQ(__llvm_libc::strspn("12345", "432"), size_t{0}); + // Only matches 1 since '2' is not within the span. + EXPECT_EQ(__llvm_libc::strspn("12345", "51"), size_t{1}); +} + +TEST(StrSpnTest, DuplicatedCharactersToBeSearchedForShouldStillMatch) { + // Only a single character, so only 1 match. + EXPECT_EQ(__llvm_libc::strspn("a", "aa"), size_t{1}); + // This should count once for each 'a' in the source string. + EXPECT_EQ(__llvm_libc::strspn("aa", "aa"), size_t{2}); + EXPECT_EQ(__llvm_libc::strspn("aaa", "aa"), size_t{3}); + EXPECT_EQ(__llvm_libc::strspn("aaaa", "aa"), size_t{4}); +}