diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -34,6 +34,7 @@ libc.src.string.strrchr libc.src.string.strspn libc.src.string.strstr + libc.src.string.strtok ) set(TARGET_LIBM_ENTRYPOINTS diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -52,6 +52,7 @@ libc.src.string.strrchr libc.src.string.strspn libc.src.string.strstr + libc.src.string.strtok # sys/mman.h entrypoints libc.src.sys.mman.mmap diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -132,6 +132,16 @@ .string_utils ) +add_entrypoint_object( + strtok + SRCS + strtok.cpp + HDRS + strtok.h + DEPENDS + libc.utils.CPP.standalone_cpp +) + # Helper to define a function with multiple implementations # - Computes flags to satisfy required/rejected features and arch, # - Declares an entry point, diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -9,8 +9,6 @@ #ifndef LIBC_SRC_STRING_STRING_UTILS_H #define LIBC_SRC_STRING_STRING_UTILS_H -#include "src/string/memory_utils/utils.h" - #include "utils/CPP/Bitset.h" #include // size_t diff --git a/libc/src/string/strtok.h b/libc/src/string/strtok.h new file mode 100644 --- /dev/null +++ b/libc/src/string/strtok.h @@ -0,0 +1,18 @@ +//===-- Implementation header for strtok ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_STRTOK_H +#define LLVM_LIBC_SRC_STRING_STRTOK_H + +namespace __llvm_libc { + +char *strtok(char *src, const char *delimiter_string); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_STRTOK_H diff --git a/libc/src/string/strtok.cpp b/libc/src/string/strtok.cpp new file mode 100644 --- /dev/null +++ b/libc/src/string/strtok.cpp @@ -0,0 +1,41 @@ +//===-- Implementation of strtok ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strtok.h" + +#include "src/__support/common.h" +#include "utils/CPP/Bitset.h" + +namespace __llvm_libc { + +static char *strtok_str = nullptr; + +char *LLVM_LIBC_ENTRYPOINT(strtok)(char *src, const char *delimiter_string) { + cpp::Bitset<256> delimiter_set; + for (; *delimiter_string; ++delimiter_string) + delimiter_set.set(*delimiter_string); + + src = src ? src : strtok_str; + for (; *src && delimiter_set.test(*src); ++src) + ; + if (!*src) + return nullptr; + + char *token = src; + for (; *src && !delimiter_set.test(*src); ++src) + ; + + strtok_str = src; + if (*strtok_str) { + *strtok_str = '\0'; + ++strtok_str; + } + return token; +} + +} // namespace __llvm_libc diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -132,6 +132,16 @@ libc.src.string.strpbrk ) +add_libc_unittest( + strtok_test + SUITE + libc_string_unittests + SRCS + strtok_test.cpp + DEPENDS + libc.src.string.strtok +) + # Tests all implementations that can run on the host. function(add_libc_multi_impl_test name) get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations) diff --git a/libc/test/src/string/strtok_test.cpp b/libc/test/src/string/strtok_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/strtok_test.cpp @@ -0,0 +1,78 @@ +//===-- Unittests for strtok ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strtok.h" +#include "utils/UnitTest/Test.h" + +TEST(StrTokTest, NoTokenFound) { + char empty[] = ""; + ASSERT_STREQ(__llvm_libc::strtok(empty, ""), nullptr); + ASSERT_STREQ(__llvm_libc::strtok(empty, "_"), nullptr); + + char single[] = "_"; + ASSERT_STREQ(__llvm_libc::strtok(single, ""), "_"); + + char multiple[] = "1,2"; + ASSERT_STREQ(__llvm_libc::strtok(multiple, ":"), "1,2"); +} + +TEST(StrTokTest, DelimiterAsFirstCharacterShouldBeIgnored) { + char src[] = ".123"; + ASSERT_STREQ(__llvm_libc::strtok(src, "."), "123"); +} + +TEST(StrTokTest, DelimiterIsMiddleCharacter) { + char src[] = "12,34"; + ASSERT_STREQ(__llvm_libc::strtok(src, ","), "12"); +} + +TEST(StrTokTest, DelimiterAsLastCharacterShouldBeIgnored) { + char src[] = "1234:"; + ASSERT_STREQ(__llvm_libc::strtok(src, ":"), "1234"); +} + +TEST(StrTokTest, MultipleDelimiters) { + char src[] = "12,.34"; + ASSERT_STREQ(__llvm_libc::strtok(src, "."), "12,"); + ASSERT_STREQ(__llvm_libc::strtok(src, ".,"), "12"); + ASSERT_STREQ(__llvm_libc::strtok(src, ",."), "12"); + ASSERT_STREQ(__llvm_libc::strtok(src, ":,."), "12"); +} + +TEST(StrTokTest, ShouldNotGoPastNullTerminator) { + char src[] = {'1', '2', '\0', ',', '3'}; + ASSERT_STREQ(__llvm_libc::strtok(src, ","), "12"); +} + +TEST(StrTokTest, SubsequentCallsShouldFindFollowingDelimiters) { + char src[] = "12,34.56"; + char *token = __llvm_libc::strtok(src, ",."); + ASSERT_STREQ(token, "12"); + token = __llvm_libc::strtok(nullptr, ",."); + ASSERT_STREQ(token, "34"); + token = __llvm_libc::strtok(nullptr, ",."); + ASSERT_STREQ(token, "56"); + token = __llvm_libc::strtok(nullptr, "_:,_"); + ASSERT_STREQ(token, nullptr); + // Subsequent calls after hitting the end of the string should also return + // nullptr. + token = __llvm_libc::strtok(nullptr, "_:,_"); + ASSERT_STREQ(token, nullptr); +} + +TEST(StrTokTest, DelimitersShouldNotBeIncludedInToken) { + char src[] = "__ab__:_cd__:__ef__:__"; + char *token = __llvm_libc::strtok(src, "_:"); + ASSERT_STREQ(token, "ab"); + token = __llvm_libc::strtok(nullptr, ":_"); + ASSERT_STREQ(token, "cd"); + token = __llvm_libc::strtok(nullptr, "_:,"); + ASSERT_STREQ(token, "ef"); + token = __llvm_libc::strtok(nullptr, "_:,_"); + ASSERT_STREQ(token, nullptr); +}