diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -35,6 +35,7 @@ libc.src.string.strspn libc.src.string.strstr libc.src.string.strtok + libc.src.string.strtok_r ) set(TARGET_LIBM_ENTRYPOINTS diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -236,6 +236,7 @@ "strspn", "strstr", "strtok", + "strtok_r", "strxfrm", ]; diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -53,6 +53,7 @@ libc.src.string.strspn libc.src.string.strstr libc.src.string.strtok + libc.src.string.strtok_r # sys/mman.h entrypoints libc.src.sys.mman.mmap diff --git a/libc/spec/posix.td b/libc/spec/posix.td --- a/libc/spec/posix.td +++ b/libc/spec/posix.td @@ -11,8 +11,12 @@ def ConstRestrictStructSigactionPtr : ConstType; def POSIX : StandardSpec<"POSIX"> { + // TODO: Change naming so that they're consistent with other files. PtrType CharPtr = PtrType; ConstType ConstCharPtr = ConstType; + RestrictedPtrType RestrictedCharPtr = RestrictedPtrType; + ConstType ConstRestrictedCharPtr = ConstType; + RestrictedPtrType CharRestrictedDoublePtr = RestrictedPtrType; NamedType OffTType = NamedType<"off_t">; NamedType SSizeTType = NamedType<"ssize_t">; @@ -221,6 +225,11 @@ RetValSpec, [ArgSpec, ArgSpec] >, + FunctionSpec< + "strtok_r", + RetValSpec, + [ArgSpec, ArgSpec] + >, ] >; diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -139,7 +139,17 @@ HDRS strtok.h DEPENDS - libc.utils.CPP.standalone_cpp + .string_utils +) + +add_entrypoint_object( + strtok_r + SRCS + strtok_r.cpp + HDRS + strtok_r.h + DEPENDS + .string_utils ) # Helper to define a function with multiple implementations diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -28,6 +28,39 @@ return src - initial; } +// Given the similarities between strtok and strtok_r, we can implement both +// using a utility function. On the first call, 'src' is scanned for the +// first character not found in 'delimiter_string'. Once found, it scans until +// the first character in the 'delimiter_string' or the null terminator is +// found. We define this span as a token. The end of the token is appended with +// a null terminator, and the token is returned. The point where the last token +// is found is then stored within 'context' for subsequent calls. Subsequent +// calls will use 'context' when a nullptr is passed in for 'src'. Once the null +// terminating character is reached, returns a nullptr. +static inline char *string_token(char *src, const char *delimiter_string, + char **saveptr) { + cpp::Bitset<256> delimiter_set; + for (; *delimiter_string; ++delimiter_string) + delimiter_set.set(*delimiter_string); + + src = src ? src : *saveptr; + for (; *src && delimiter_set.test(*src); ++src) + ; + if (!*src) { + *saveptr = src; + return nullptr; + } + char *token = src; + for (; *src && !delimiter_set.test(*src); ++src) + ; + if (*src) { + *src = '\0'; + ++src; + } + *saveptr = src; + return token; +} + } // namespace internal } // namespace __llvm_libc diff --git a/libc/src/string/strtok.cpp b/libc/src/string/strtok.cpp --- a/libc/src/string/strtok.cpp +++ b/libc/src/string/strtok.cpp @@ -9,34 +9,16 @@ #include "src/string/strtok.h" #include "src/__support/common.h" -#include "utils/CPP/Bitset.h" +#include "src/string/string_utils.h" namespace __llvm_libc { static char *strtok_str = nullptr; +// TODO: Place restrict qualifier where necessary for this and other function +// arguments. char *LLVM_LIBC_ENTRYPOINT(strtok)(char *src, const char *delimiter_string) { - cpp::Bitset<256> delimiter_set; - for (; *delimiter_string; ++delimiter_string) - delimiter_set.set(*delimiter_string); - - src = src ? src : strtok_str; - for (; *src && delimiter_set.test(*src); ++src) - ; - if (!*src) { - strtok_str = src; - return nullptr; - } - char *token = src; - for (; *src && !delimiter_set.test(*src); ++src) - ; - - strtok_str = src; - if (*strtok_str) { - *strtok_str = '\0'; - ++strtok_str; - } - return token; + return internal::string_token(src, delimiter_string, &strtok_str); } } // namespace __llvm_libc diff --git a/libc/src/string/strtok_r.h b/libc/src/string/strtok_r.h new file mode 100644 --- /dev/null +++ b/libc/src/string/strtok_r.h @@ -0,0 +1,18 @@ +//===-- Implementation header for strtok_r ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_STRTOK_R_H +#define LLVM_LIBC_SRC_STRING_STRTOK_R_H + +namespace __llvm_libc { + +char *strtok_r(char *src, const char *delimiter_string, char **saveptr); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_STRTOK_R_H diff --git a/libc/src/string/strtok_r.cpp b/libc/src/string/strtok_r.cpp new file mode 100644 --- /dev/null +++ b/libc/src/string/strtok_r.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of strtok_r ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strtok_r.h" + +#include "src/__support/common.h" +#include "src/string/string_utils.h" + +namespace __llvm_libc { + +char *LLVM_LIBC_ENTRYPOINT(strtok_r)(char *src, const char *delimiter_string, + char **saveptr) { + return internal::string_token(src, delimiter_string, saveptr); +} + +} // namespace __llvm_libc diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -142,6 +142,16 @@ libc.src.string.strtok ) +add_libc_unittest( + strtok_r_test + SUITE + libc_string_unittests + SRCS + strtok_r_test.cpp + DEPENDS + libc.src.string.strtok_r +) + # Tests all implementations that can run on the host. function(add_libc_multi_impl_test name) get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations) diff --git a/libc/test/src/string/strtok_r_test.cpp b/libc/test/src/string/strtok_r_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/strtok_r_test.cpp @@ -0,0 +1,111 @@ +//===-- Unittests for strtok_r -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strtok_r.h" +#include "utils/UnitTest/Test.h" + +TEST(StrTokReentrantTest, NoTokenFound) { + { // Empty source and delimiter string. + char empty[] = ""; + char *reserve = nullptr; + ASSERT_STREQ(__llvm_libc::strtok_r(empty, "", &reserve), nullptr); + // Another call to ensure that 'reserve' is not in a bad state. + ASSERT_STREQ(__llvm_libc::strtok_r(empty, "", &reserve), nullptr); + ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, "", &reserve), nullptr); + } + { // Empty source and single character delimiter string. + char empty[] = ""; + char *reserve = nullptr; + ASSERT_STREQ(__llvm_libc::strtok_r(empty, "_", &reserve), nullptr); + // Another call to ensure that 'reserve' is not in a bad state. + ASSERT_STREQ(__llvm_libc::strtok_r(empty, "_", &reserve), nullptr); + ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, "_", &reserve), nullptr); + } + { // Same character source and delimiter string. + char single[] = "_"; + char *reserve = nullptr; + ASSERT_STREQ(__llvm_libc::strtok_r(single, "_", &reserve), nullptr); + // Another call to ensure that 'reserve' is not in a bad state. + ASSERT_STREQ(__llvm_libc::strtok_r(single, "_", &reserve), nullptr); + ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, "_", &reserve), nullptr); + } + { // Multiple character source and single character delimiter string. + char multiple[] = "1,2"; + char *reserve = nullptr; + ASSERT_STREQ(__llvm_libc::strtok_r(multiple, ":", &reserve), "1,2"); + // Another call to ensure that 'reserve' is not in a bad state. + ASSERT_STREQ(__llvm_libc::strtok_r(multiple, ":", &reserve), "1,2"); + ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ":", &reserve), nullptr); + } +} + +TEST(StrTokReentrantTest, DelimiterAsFirstCharacterShouldBeIgnored) { + char src[] = ".123"; + char *reserve = nullptr; + ASSERT_STREQ(__llvm_libc::strtok_r(src, ".", &reserve), "123"); + // Another call to ensure that 'reserve' is not in a bad state. + ASSERT_STREQ(__llvm_libc::strtok_r(src, ".", &reserve), "123"); + ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ".", &reserve), nullptr); +} + +TEST(StrTokReentrantTest, DelimiterIsMiddleCharacter) { + char src[] = "12,34"; + char *reserve = nullptr; + ASSERT_STREQ(__llvm_libc::strtok_r(src, ",", &reserve), "12"); + // Another call to ensure that 'reserve' is not in a bad state. + ASSERT_STREQ(__llvm_libc::strtok_r(src, ",", &reserve), "12"); + ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ",", &reserve), nullptr); +} + +TEST(StrTokReentrantTest, DelimiterAsLastCharacterShouldBeIgnored) { + char src[] = "1234:"; + char *reserve = nullptr; + ASSERT_STREQ(__llvm_libc::strtok_r(src, ":", &reserve), "1234"); + // Another call to ensure that 'reserve' is not in a bad state. + ASSERT_STREQ(__llvm_libc::strtok_r(src, ":", &reserve), "1234"); + ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ":", &reserve), nullptr); +} + +TEST(StrTokReentrantTest, ShouldNotGoPastNullTerminator) { + char src[] = {'1', '2', '\0', ',', '3'}; + char *reserve = nullptr; + ASSERT_STREQ(__llvm_libc::strtok_r(src, ",", &reserve), "12"); + // Another call to ensure that 'reserve' is not in a bad state. + ASSERT_STREQ(__llvm_libc::strtok_r(src, ",", &reserve), "12"); + ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ",", &reserve), nullptr); +} + +TEST(StrTokReentrantTest, SubsequentCallsShouldFindFollowingDelimiters) { + char src[] = "12,34.56"; + char *reserve = nullptr; + char *token = __llvm_libc::strtok_r(src, ",.", &reserve); + ASSERT_STREQ(token, "12"); + token = __llvm_libc::strtok_r(nullptr, ",.", &reserve); + ASSERT_STREQ(token, "34"); + token = __llvm_libc::strtok_r(nullptr, ",.", &reserve); + ASSERT_STREQ(token, "56"); + token = __llvm_libc::strtok_r(nullptr, "_:,_", &reserve); + ASSERT_STREQ(token, nullptr); + // Subsequent calls after hitting the end of the string should also return + // nullptr. + token = __llvm_libc::strtok_r(nullptr, "_:,_", &reserve); + ASSERT_STREQ(token, nullptr); +} + +TEST(StrTokReentrantTest, DelimitersShouldNotBeIncludedInToken) { + char src[] = "__ab__:_cd__:__ef__:__"; + char *reserve = nullptr; + char *token = __llvm_libc::strtok_r(src, "_:", &reserve); + ASSERT_STREQ(token, "ab"); + token = __llvm_libc::strtok_r(nullptr, ":_", &reserve); + ASSERT_STREQ(token, "cd"); + token = __llvm_libc::strtok_r(nullptr, "_:,", &reserve); + ASSERT_STREQ(token, "ef"); + token = __llvm_libc::strtok_r(nullptr, "_:,_", &reserve); + ASSERT_STREQ(token, nullptr); +}