diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -11,6 +11,7 @@ libc.src.string.strlen libc.src.string.memchr libc.src.string.strchr + libc.str.string.strstr ) set(TARGET_LIBM_ENTRYPOINTS diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -29,6 +29,7 @@ libc.src.string.strcmp libc.src.string.memchr libc.src.string.strchr + libc.src.string.strstr # sys/mman.h entrypoints libc.src.sys.mman.mmap diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -60,6 +60,16 @@ strchr.h ) +add_entrypoint_object( + strstr + SRCS + strstr.cpp + HDRS + strstr.h + DEPENDS + .strlen +) + # Helper to define a function with multiple implementations # - Computes flags to satisfy required/rejected features and arch, # - Declares an entry point, diff --git a/libc/src/string/strstr.h b/libc/src/string/strstr.h new file mode 100644 --- /dev/null +++ b/libc/src/string/strstr.h @@ -0,0 +1,18 @@ +//===-- Implementation header for strstr ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_STRSTR_H +#define LLVM_LIBC_SRC_STRING_STRSTR_H + +namespace __llvm_libc { + +char *strstr(const char *haystack, const char *needle); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_STRSTR_H diff --git a/libc/src/string/strstr.cpp b/libc/src/string/strstr.cpp new file mode 100644 --- /dev/null +++ b/libc/src/string/strstr.cpp @@ -0,0 +1,35 @@ +//===-- Implementation of strstr ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strstr.h" + +#include "src/__support/common.h" +#include "src/string/strlen.h" +#include + +namespace __llvm_libc { + +// TODO: This is a simple brute force implementation that uses strlen. +// This can (and will) be improved upon using well known string matching +// algorithms such as Boyer-Moore. +char *LLVM_LIBC_ENTRYPOINT(strstr)(const char *haystack, const char *needle) { + const size_t hsize = strlen(haystack); + const size_t nsize = strlen(needle); + if (nsize > hsize) + return nullptr; + size_t j; + for (size_t i = 0; i <= hsize - nsize; ++i) { + for (j = 0; j < nsize && haystack[i + j] == needle[j]; ++j) + ; + if (j == nsize) + return const_cast(haystack + i); + } + return nullptr; +} + +} // namespace __llvm_libc diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -62,6 +62,16 @@ libc.src.string.strchr ) +add_libc_unittest( + strstr_test + SUITE + libc_string_unittests + SRCS + strstr_test.cpp + DEPENDS + libc.src.string.strstr +) + # Tests all implementations that can run on the host. function(add_libc_multi_impl_test name) get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations) diff --git a/libc/test/src/string/strstr_test.cpp b/libc/test/src/string/strstr_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/strstr_test.cpp @@ -0,0 +1,134 @@ +//===-- Unittests for strstr ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strstr.h" +#include "utils/UnitTest/Test.h" + +TEST(StrStrTest, NeedleNotInHaystack) { + const char *haystack = "12345"; + const char *needle = "a"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), nullptr); +} + +TEST(StrStrTest, NeedleIsEmptyString) { + const char *haystack = "12345"; + const char *needle = ""; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), haystack); +} + +TEST(StrStrTest, HaystackIsEmptyString) { + const char *haystack = ""; + const char *needle = "12345"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), nullptr); +} + +TEST(StrStrTest, HaystackAndNeedleAreEmptyStrings) { + const char *haystack = ""; + const char *needle = ""; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), ""); +} + +TEST(StrStrTest, HaystackAndNeedleAreSingleCharacters) { + const char *haystack = "a"; + // Same characer returns that character. + ASSERT_STREQ(__llvm_libc::strstr(haystack, /*needle=*/"a"), "a"); + // Different character returns nullptr. + ASSERT_STREQ(__llvm_libc::strstr(haystack, /*needle=*/"b"), nullptr); +} + +TEST(StrStrTest, NeedleEqualToHaystack) { + const char *haystack = "12345"; + const char *needle = "12345"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), "12345"); +} + +TEST(StrStrTest, NeedleSmallerThanHaystack) { + const char *haystack = "12345"; + const char *needle = "345"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), "345"); +} + +TEST(StrStrTest, NeedleLargerThanHaystack) { + const char *haystack = "123"; + const char *needle = "12345"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), nullptr); +} + +TEST(StrStrTest, NeedleAtBeginning) { + const char *haystack = "12345"; + const char *needle = "12"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), "12345"); +} + +TEST(StrStrTest, NeedleInMiddle) { + const char *haystack = "abcdefghi"; + const char *needle = "def"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), "defghi"); +} + +TEST(StrStrTest, NeedleDirectlyBeforeNullTerminator) { + const char *haystack = "abcdefghi"; + const char *needle = "ghi"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), "ghi"); +} + +TEST(StrStrTest, NeedlePastNullTerminator) { + const char haystack[5] = {'1', '2', '\0', '3', '4'}; + // Shouldn't find anything after the null terminator. + ASSERT_STREQ(__llvm_libc::strstr(haystack, /*needle=*/"3"), nullptr); + ASSERT_STREQ(__llvm_libc::strstr(haystack, /*needle=*/"4"), nullptr); +} + +TEST(StrStrTest, PartialNeedle) { + const char *haystack = "la_ap_lap"; + const char *needle = "lap"; + // Shouldn't find la or ap. + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), "lap"); +} + +TEST(StrStrTest, MisspelledNeedle) { + const char *haystack = "atalloftwocities...wait, tale"; + const char *needle = "tale"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), "tale"); +} + +TEST(StrStrTest, AnagramNeedle) { + const char *haystack = "dgo_ogd_god_odg_gdo_dog"; + const char *needle = "dog"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), "dog"); +} + +TEST(StrStrTest, MorphedNeedle) { + // Changes a single letter in the needle to mismatch with the haystack. + const char *haystack = "once upon a time"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, "time"), "time"); + ASSERT_STREQ(__llvm_libc::strstr(haystack, "lime"), nullptr); + ASSERT_STREQ(__llvm_libc::strstr(haystack, "tome"), nullptr); + ASSERT_STREQ(__llvm_libc::strstr(haystack, "tire"), nullptr); + ASSERT_STREQ(__llvm_libc::strstr(haystack, "timo"), nullptr); +} + +TEST(StrStrTest, SummedDifferenceMethodUnsignedIntegers) { + // This is a bug found in a variation of the Rabin-Karp algorithm for + // sub-string search, which used unsigned integers instead of signed integers. + // When taking the difference between two characters, it is possible that it + // is negative. This bug resulted in returning nullptr when looking + // for "x", which should not be the case. + const char *haystack1 = "tx"; + const char *needle1 = "x"; + // First character of haystack: 't' = 116 + // First character of needle: 'x' = 120 + // 't' - 'x' = -4, which would lead to problems if using unsigned integers + // here. + ASSERT_STREQ(__llvm_libc::strstr(haystack1, needle1), "x"); + + // Similar case when switched. + const char *haystack2 = "xt"; + const char *needle2 = "t"; + ASSERT_STREQ(__llvm_libc::strstr(haystack2, needle2), "t"); +}