diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -35,6 +35,7 @@ libc.src.string.strspn libc.src.string.strstr libc.src.string.strtok + libc.src.string.strtok_r ) set(TARGET_LIBM_ENTRYPOINTS diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -236,6 +236,7 @@ "strspn", "strstr", "strtok", + "strtok_r", "strxfrm", ]; diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -53,6 +53,7 @@ libc.src.string.strspn libc.src.string.strstr libc.src.string.strtok + libc.src.string.strtok_r # sys/mman.h entrypoints libc.src.sys.mman.mmap diff --git a/libc/spec/posix.td b/libc/spec/posix.td --- a/libc/spec/posix.td +++ b/libc/spec/posix.td @@ -13,6 +13,9 @@ def POSIX : StandardSpec<"POSIX"> { PtrType CharPtr = PtrType; ConstType ConstCharPtr = ConstType; + RestrictedPtrType RestrictedCharPtr = RestrictedPtrType; + ConstType ConstRestrictedCharPtr = ConstType; + RestrictedPtrType CharRestrictedDoublePtr = RestrictedPtrType; NamedType OffTType = NamedType<"off_t">; NamedType SSizeTType = NamedType<"ssize_t">; @@ -221,6 +224,11 @@ RetValSpec, [ArgSpec, ArgSpec] >, + FunctionSpec< + "strtok_r", + RetValSpec, + [ArgSpec, ArgSpec] + >, ] >; diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -142,6 +142,16 @@ libc.utils.CPP.standalone_cpp ) +add_entrypoint_object( + strtok_r + SRCS + strtok_r.cpp + HDRS + strtok_r.h + DEPENDS + libc.utils.CPP.standalone_cpp +) + # Helper to define a function with multiple implementations # - Computes flags to satisfy required/rejected features and arch, # - Declares an entry point, diff --git a/libc/src/string/strtok_r.h b/libc/src/string/strtok_r.h new file mode 100644 --- /dev/null +++ b/libc/src/string/strtok_r.h @@ -0,0 +1,18 @@ +//===-- Implementation header for strtok_r ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_STRTOK_R_H +#define LLVM_LIBC_SRC_STRING_STRTOK_R_H + +namespace __llvm_libc { + +char *strtok_r(char *src, const char *delimiter_string, char **saveptr); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_STRTOK_R_H diff --git a/libc/src/string/strtok_r.cpp b/libc/src/string/strtok_r.cpp new file mode 100644 --- /dev/null +++ b/libc/src/string/strtok_r.cpp @@ -0,0 +1,42 @@ +//===-- Implementation of strtok_r ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strtok_r.h" + +#include "src/__support/common.h" +#include "utils/CPP/Bitset.h" + +namespace __llvm_libc { + +char *LLVM_LIBC_ENTRYPOINT(strtok_r)(char *src, const char *delimiter_string, + char **saveptr) { + cpp::Bitset<256> delimiter_set; + for (; *delimiter_string; ++delimiter_string) + delimiter_set.set(*delimiter_string); + + src = src ? src : *saveptr; + for (; *src && delimiter_set.test(*src); ++src) + ; + if (!*src) { + *saveptr = src; + return nullptr; + } + + char *token = src; + for (; *src && !delimiter_set.test(*src); ++src) + ; + + *saveptr = src; + if (**saveptr) { + **saveptr = '\0'; + ++(*saveptr); + } + return token; +} + +} // namespace __llvm_libc diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -142,6 +142,16 @@ libc.src.string.strtok ) +add_libc_unittest( + strtok_r_test + SUITE + libc_string_unittests + SRCS + strtok_r_test.cpp + DEPENDS + libc.src.string.strtok_r +) + # Tests all implementations that can run on the host. function(add_libc_multi_impl_test name) get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations) diff --git a/libc/test/src/string/strtok_r_test.cpp b/libc/test/src/string/strtok_r_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/strtok_r_test.cpp @@ -0,0 +1,92 @@ +//===-- Unittests for strtok_r -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strtok_r.h" +#include "utils/UnitTest/Test.h" + +TEST(StrTokReentrantTest, NoTokenFound) { + { + char empty[] = ""; + char *reserve = nullptr; + ASSERT_STREQ(__llvm_libc::strtok_r(empty, "", &reserve), nullptr); + ASSERT_STREQ(__llvm_libc::strtok_r(empty, "_", &reserve), nullptr); + } + { + char single[] = "_"; + char *reserve = nullptr; + ASSERT_STREQ(__llvm_libc::strtok_r(single, "", &reserve), "_"); + } + { + char multiple[] = "1,2"; + char *reserve = nullptr; + ASSERT_STREQ(__llvm_libc::strtok_r(multiple, ":", &reserve), "1,2"); + } +} + +TEST(StrTokReentrantTest, DelimiterAsFirstCharacterShouldBeIgnored) { + char src[] = ".123"; + char *reserve = nullptr; + ASSERT_STREQ(__llvm_libc::strtok_r(src, ".", &reserve), "123"); +} + +TEST(StrTokReentrantTest, DelimiterIsMiddleCharacter) { + char src[] = "12,34"; + char *reserve = nullptr; + ASSERT_STREQ(__llvm_libc::strtok_r(src, ",", &reserve), "12"); +} + +TEST(StrTokReentrantTest, DelimiterAsLastCharacterShouldBeIgnored) { + char src[] = "1234:"; + char *reserve = nullptr; + ASSERT_STREQ(__llvm_libc::strtok_r(src, ":", &reserve), "1234"); +} + +TEST(StrTokReentrantTest, MultipleDelimiters) { + char src[] = "12,.34"; + char *reserve = nullptr; + ASSERT_STREQ(__llvm_libc::strtok_r(src, ".", &reserve), "12,"); + ASSERT_STREQ(__llvm_libc::strtok_r(src, ".,", &reserve), "12"); + ASSERT_STREQ(__llvm_libc::strtok_r(src, ",.", &reserve), "12"); + ASSERT_STREQ(__llvm_libc::strtok_r(src, ":,.", &reserve), "12"); +} + +TEST(StrTokReentrantTest, ShouldNotGoPastNullTerminator) { + char src[] = {'1', '2', '\0', ',', '3'}; + char *reserve = nullptr; + ASSERT_STREQ(__llvm_libc::strtok_r(src, ",", &reserve), "12"); +} + +TEST(StrTokReentrantTest, SubsequentCallsShouldFindFollowingDelimiters) { + char src[] = "12,34.56"; + char *reserve = nullptr; + char *token = __llvm_libc::strtok_r(src, ",.", &reserve); + ASSERT_STREQ(token, "12"); + token = __llvm_libc::strtok_r(nullptr, ",.", &reserve); + ASSERT_STREQ(token, "34"); + token = __llvm_libc::strtok_r(nullptr, ",.", &reserve); + ASSERT_STREQ(token, "56"); + token = __llvm_libc::strtok_r(nullptr, "_:,_", &reserve); + ASSERT_STREQ(token, nullptr); + // Subsequent calls after hitting the end of the string should also return + // nullptr. + token = __llvm_libc::strtok_r(nullptr, "_:,_", &reserve); + ASSERT_STREQ(token, nullptr); +} + +TEST(StrTokReentrantTest, DelimitersShouldNotBeIncludedInToken) { + char src[] = "__ab__:_cd__:__ef__:__"; + char *reserve = nullptr; + char *token = __llvm_libc::strtok_r(src, "_:", &reserve); + ASSERT_STREQ(token, "ab"); + token = __llvm_libc::strtok_r(nullptr, ":_", &reserve); + ASSERT_STREQ(token, "cd"); + token = __llvm_libc::strtok_r(nullptr, "_:,", &reserve); + ASSERT_STREQ(token, "ef"); + token = __llvm_libc::strtok_r(nullptr, "_:,_", &reserve); + ASSERT_STREQ(token, nullptr); +}