diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt --- a/libc/config/baremetal/arm/entrypoints.txt +++ b/libc/config/baremetal/arm/entrypoints.txt @@ -28,6 +28,7 @@ libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt --- a/libc/config/darwin/arm/entrypoints.txt +++ b/libc/config/darwin/arm/entrypoints.txt @@ -28,6 +28,7 @@ libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/darwin/x86_64/entrypoints.txt b/libc/config/darwin/x86_64/entrypoints.txt --- a/libc/config/darwin/x86_64/entrypoints.txt +++ b/libc/config/darwin/x86_64/entrypoints.txt @@ -24,6 +24,7 @@ libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -24,6 +24,7 @@ libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -37,6 +37,7 @@ libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -28,6 +28,7 @@ libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/linux/riscv64/entrypoints.txt b/libc/config/linux/riscv64/entrypoints.txt --- a/libc/config/linux/riscv64/entrypoints.txt +++ b/libc/config/linux/riscv64/entrypoints.txt @@ -37,6 +37,7 @@ libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -37,6 +37,7 @@ libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt --- a/libc/config/windows/entrypoints.txt +++ b/libc/config/windows/entrypoints.txt @@ -25,6 +25,7 @@ libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/spec/gnu_ext.td b/libc/spec/gnu_ext.td --- a/libc/spec/gnu_ext.td +++ b/libc/spec/gnu_ext.td @@ -57,7 +57,12 @@ [], // Macros [], // Types [], // Enumerations - [ + [ + FunctionSpec< + "memmem", + RetValSpec, + [ArgSpec, ArgSpec, ArgSpec, ArgSpec, FunctionSpec< "memrchr", RetValSpec, diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -49,6 +49,16 @@ .memory_utils.memcpy_implementation ) +add_entrypoint_object( + memmem + SRCS + memmem.cpp + HDRS + memmem.h + DEPENDS + .memory_utils.memcpy_implementation +) + add_entrypoint_object( memchr SRCS @@ -145,7 +155,7 @@ HDRS strcasestr.h DEPENDS - .memory_utils.strstr_implementation + .memory_utils.memmem_implementation libc.src.__support.ctype_utils ) @@ -362,7 +372,7 @@ HDRS strstr.h DEPENDS - .memory_utils.strstr_implementation + .memory_utils.memmem_implementation ) add_entrypoint_object( diff --git a/libc/src/string/memmem.h b/libc/src/string/memmem.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memmem.h @@ -0,0 +1,21 @@ +//===-- Implementation header for memmem ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMMOVE_H +#define LLVM_LIBC_SRC_STRING_MEMMOVE_H + +#include // For size_t + +namespace __llvm_libc { + +void *memmem(const void *haystack, size_t haystack_len, const void *needle, + size_t needle_len); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMMOVE_H diff --git a/libc/src/string/strstr.cpp b/libc/src/string/memmem.cpp copy from libc/src/string/strstr.cpp copy to libc/src/string/memmem.cpp --- a/libc/src/string/strstr.cpp +++ b/libc/src/string/memmem.cpp @@ -1,4 +1,4 @@ -//===-- Implementation of strstr ------------------------------------------===// +//===-- Implementation of memmem _-----------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,18 +6,18 @@ // //===----------------------------------------------------------------------===// -#include "src/string/strstr.h" - +#include "src/string/memmem.h" #include "src/__support/common.h" -#include "src/string/memory_utils/strstr_implementations.h" +#include "src/string/memory_utils/memmem_implementations.h" namespace __llvm_libc { -// TODO: This is a simple brute force implementation. This can be -// improved upon using well known string matching algorithms. -LLVM_LIBC_FUNCTION(char *, strstr, (const char *haystack, const char *needle)) { +LLVM_LIBC_FUNCTION(void *, memmem, + (const void *haystack, size_t haystack_len, + const void *needle, size_t needle_len)) { auto comp = [](char l, char r) -> int { return l - r; }; - return strstr_implementation(haystack, needle, comp); + return memmem_implementation(haystack, haystack_len, needle, needle_len, + comp); } } // namespace __llvm_libc diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt --- a/libc/src/string/memory_utils/CMakeLists.txt +++ b/libc/src/string/memory_utils/CMakeLists.txt @@ -72,7 +72,7 @@ ) add_header_library( - strstr_implementation + memmem_implementation HDRS - strstr_implementations.h + memmem_implementations.h ) diff --git a/libc/src/string/memory_utils/memmem_implementations.h b/libc/src/string/memory_utils/memmem_implementations.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/memmem_implementations.h @@ -0,0 +1,42 @@ +//===-- memmem implementation -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMMEM_IMPLEMENTATIONS_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMMEM_IMPLEMENTATIONS_H + +#include + +namespace __llvm_libc { + +template +constexpr static void * +memmem_implementation(const void *haystack, size_t haystack_len, + const void *needle, size_t needle_len, Comp &&comp) { + // TODO: simple brute force implementation. This can be + // improved upon using well known string matching algorithms. + if (!needle_len) + return const_cast(haystack); + + if (needle_len > haystack_len) + return nullptr; + + const unsigned char *h = static_cast(haystack); + const unsigned char *n = static_cast(needle); + for (size_t i = 0; i <= (haystack_len - needle_len); ++i) { + size_t j = 0; + for (; j < needle_len && !comp(h[i + j], n[j]); ++j) + ; + if (j == needle_len) + return const_cast(h + i); + } + return nullptr; +} + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMMEM_IMPLEMENTATIONS_H diff --git a/libc/src/string/memory_utils/strstr_implementations.h b/libc/src/string/memory_utils/strstr_implementations.h deleted file mode 100644 --- a/libc/src/string/memory_utils/strstr_implementations.h +++ /dev/null @@ -1,33 +0,0 @@ -//===-- str{,case}str implementation ----------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_STRSTR_IMPLEMENTATIONS_H -#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_STRSTR_IMPLEMENTATIONS_H - -#include - -namespace __llvm_libc { - -template -constexpr static char *strstr_implementation(const char *haystack, - const char *needle, Comp &&comp) { - // TODO: This is a simple brute force implementation. This can be - // improved upon using well known string matching algorithms. - for (size_t i = 0; comp(haystack[i], 0); ++i) { - size_t j = 0; - for (; comp(haystack[i + j], 0) && !comp(haystack[i + j], needle[j]); ++j) - ; - if (!comp(needle[j], 0)) - return const_cast(haystack + i); - } - return nullptr; -} - -} // namespace __llvm_libc - -#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_STRSTR_IMPLEMENTATIONS_H diff --git a/libc/src/string/strcasestr.cpp b/libc/src/string/strcasestr.cpp --- a/libc/src/string/strcasestr.cpp +++ b/libc/src/string/strcasestr.cpp @@ -10,7 +10,8 @@ #include "src/__support/common.h" #include "src/__support/ctype_utils.h" -#include "src/string/memory_utils/strstr_implementations.h" +#include "src/string/memory_utils/memmem_implementations.h" +#include "src/string/string_utils.h" namespace __llvm_libc { @@ -22,7 +23,11 @@ return __llvm_libc::internal::tolower(a) - __llvm_libc::internal::tolower(b); }; - return strstr_implementation(haystack, needle, case_cmp); + void *result = memmem_implementation( + static_cast(haystack), internal::string_length(haystack), + static_cast(needle), internal::string_length(needle), + case_cmp); + return static_cast(result); } } // namespace __llvm_libc diff --git a/libc/src/string/strstr.cpp b/libc/src/string/strstr.cpp --- a/libc/src/string/strstr.cpp +++ b/libc/src/string/strstr.cpp @@ -9,7 +9,8 @@ #include "src/string/strstr.h" #include "src/__support/common.h" -#include "src/string/memory_utils/strstr_implementations.h" +#include "src/string/memory_utils/memmem_implementations.h" +#include "src/string/string_utils.h" namespace __llvm_libc { @@ -17,7 +18,10 @@ // improved upon using well known string matching algorithms. LLVM_LIBC_FUNCTION(char *, strstr, (const char *haystack, const char *needle)) { auto comp = [](char l, char r) -> int { return l - r; }; - return strstr_implementation(haystack, needle, comp); + void *result = memmem_implementation( + static_cast(haystack), internal::string_length(haystack), + static_cast(needle), internal::string_length(needle), comp); + return static_cast(result); } } // namespace __llvm_libc diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -34,6 +34,16 @@ libc.src.string.mempcpy ) +add_libc_unittest( + memmem_test + SUITE + libc_string_unittests + SRCS + memmem_test.cpp + DEPENDS + libc.src.string.memmem +) + add_libc_unittest( memchr_test SUITE diff --git a/libc/test/src/string/memmem_test.cpp b/libc/test/src/string/memmem_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/memmem_test.cpp @@ -0,0 +1,135 @@ +//===-- Unittests for memmem ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memmem.h" +#include "test/UnitTest/Test.h" + +#include "src/string/string_utils.h" + +namespace __llvm_libc { + +TEST(LlvmLibcMemmemTest, EmptyHaystackEmptyNeedleReturnsHaystck) { + char *h = nullptr; + char *n = nullptr; + void *result = __llvm_libc::memmem(h, 0, n, 0); + ASSERT_EQ(static_cast(result), h); +} + +TEST(LlvmLibcMemmemTest, EmptyHaystackNonEmptyNeedleReturnsNull) { + char *h = nullptr; + char n[] = {'a', 'b', 'c'}; + void *result = __llvm_libc::memmem(h, 0, n, sizeof(n)); + ASSERT_EQ(result, static_cast(nullptr)); +} + +TEST(LlvmLibcMemmemTest, EmptyNeedleReturnsHaystack) { + char h[] = {'a', 'b', 'c'}; + char *n = nullptr; + void *result = __llvm_libc::memmem(h, sizeof(h), n, 0); + ASSERT_EQ(static_cast(result), h); +} + +TEST(LlvmLibcMemmemTest, ExactMatchReturnsHaystack) { + char h[] = {'a', 'b', 'c'}; + char n[] = {'a', 'b', 'c'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(static_cast(result), h); +} + +TEST(LlvmLibcMemmemTest, ReturnFirstMatchOfNeedle) { + char h[] = {'a', 'a', 'b', 'c'}; + char n[] = {'a'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(static_cast(result), h); +} + +TEST(LlvmLibcMemmemTest, ReturnFirstExactMatchOfNeedle) { + { + char h[] = {'a', 'b', 'a', 'c', 'a', 'a'}; + char n[] = {'a', 'a'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(static_cast(result), h + 4); + } + { + char h[] = {'a', 'a', 'b', 'a', 'b', 'a'}; + char n[] = {'a', 'b', 'a'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(static_cast(result), h + 1); + } +} + +TEST(LlvmLibcMemmemTest, NullTerminatorDoesNotInterruptMatch) { + char h[] = {'\0', 'a', 'b'}; + char n[] = {'a', 'b'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(static_cast(result), h + 1); +} + +TEST(LlvmLibcMemmemTest, ReturnNullIfNoExactMatch) { + { + char h[] = {'a'}; + char n[] = {'a', 'a'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(result, static_cast(nullptr)); + } + { + char h[] = { + 'a', + 'A', + }; + char n[] = {'a', 'a'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(result, static_cast(nullptr)); + } + { + char h[] = {'a'}; + char n[] = {'a', '\0'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(result, static_cast(nullptr)); + } + { + char h[] = {'\0'}; + char n[] = { + '\0', + '\0', + }; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(result, static_cast(nullptr)); + } +} + +TEST(LlvmLibcMemmemTest, ReturnMatchOfSpecifiedNeedleLength) { + { + char h[] = {'a', 'b', 'c'}; + char n[] = {'x', 'y', 'z'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, 0); + ASSERT_EQ(static_cast(result), h); + } + { + char h[] = {'a', 'b', 'c'}; + char n[] = {'b', 'c', 'a'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, 2); + ASSERT_EQ(static_cast(result), h + 1); + } +} + +TEST(LlvmLibcMemmemTest, ReturnNullIfInadequateHaystackLength) { + { + char h[] = {'a', 'b', 'c'}; + char n[] = {'c'}; + void *result = __llvm_libc::memmem(h, 2, n, sizeof(n)); + ASSERT_EQ(result, static_cast(nullptr)); + } + { + char h[] = {'a', 'b', 'c'}; + char n[] = {'a', 'b', 'c'}; + void *result = __llvm_libc::memmem(h, 2, n, sizeof(n)); + ASSERT_EQ(result, static_cast(nullptr)); + } +} +} // namespace __llvm_libc diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1590,7 +1590,7 @@ "src/string/memory_utils/memmove_implementations.h", "src/string/memory_utils/memset_implementations.h", "src/string/memory_utils/strcmp_implementations.h", - "src/string/memory_utils/strstr_implementations.h", + "src/string/memory_utils/memmem_implementations.h", "src/string/memory_utils/x86_64/memcmp_implementations.h", "src/string/memory_utils/x86_64/memcpy_implementations.h", ],