diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt --- a/libc/config/baremetal/arm/entrypoints.txt +++ b/libc/config/baremetal/arm/entrypoints.txt @@ -28,6 +28,7 @@ libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt --- a/libc/config/darwin/arm/entrypoints.txt +++ b/libc/config/darwin/arm/entrypoints.txt @@ -28,6 +28,7 @@ libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/darwin/x86_64/entrypoints.txt b/libc/config/darwin/x86_64/entrypoints.txt --- a/libc/config/darwin/x86_64/entrypoints.txt +++ b/libc/config/darwin/x86_64/entrypoints.txt @@ -24,6 +24,7 @@ libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -24,6 +24,7 @@ libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -37,6 +37,7 @@ libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -28,6 +28,7 @@ libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/linux/riscv64/entrypoints.txt b/libc/config/linux/riscv64/entrypoints.txt --- a/libc/config/linux/riscv64/entrypoints.txt +++ b/libc/config/linux/riscv64/entrypoints.txt @@ -37,6 +37,7 @@ libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -37,6 +37,7 @@ libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt --- a/libc/config/windows/entrypoints.txt +++ b/libc/config/windows/entrypoints.txt @@ -25,6 +25,7 @@ libc.src.string.memchr libc.src.string.memcmp libc.src.string.memcpy + libc.src.string.memmem libc.src.string.memmove libc.src.string.mempcpy libc.src.string.memrchr diff --git a/libc/spec/gnu_ext.td b/libc/spec/gnu_ext.td --- a/libc/spec/gnu_ext.td +++ b/libc/spec/gnu_ext.td @@ -57,7 +57,12 @@ [], // Macros [], // Types [], // Enumerations - [ + [ + FunctionSpec< + "memmem", + RetValSpec, + [ArgSpec, ArgSpec, ArgSpec, ArgSpec, FunctionSpec< "memrchr", RetValSpec, diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -49,6 +49,16 @@ .memory_utils.memcpy_implementation ) +add_entrypoint_object( + memmem + SRCS + memmem.cpp + HDRS + memmem.h + DEPENDS + .memory_utils.memmem_implementation +) + add_entrypoint_object( memchr SRCS diff --git a/libc/src/string/memmem.h b/libc/src/string/memmem.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memmem.h @@ -0,0 +1,21 @@ +//===-- Implementation header for memmem ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMMEM_H +#define LLVM_LIBC_SRC_STRING_MEMMEM_H + +#include // For size_t + +namespace __llvm_libc { + +void *memmem(const void *haystack, size_t haystack_len, const void *needle, + size_t needle_len); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMMEM_H diff --git a/libc/src/string/memmem.cpp b/libc/src/string/memmem.cpp new file mode 100644 --- /dev/null +++ b/libc/src/string/memmem.cpp @@ -0,0 +1,25 @@ +//===-- Implementation of memmem ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memmem.h" +#include "src/__support/common.h" +#include "src/string/memory_utils/memmem_implementations.h" + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(void *, memmem, + (const void *haystack, size_t haystack_len, + const void *needle, size_t needle_len)) { + constexpr auto comp = [](unsigned char l, unsigned char r) -> int { + return l - r; + }; + return memmem_implementation(haystack, haystack_len, needle, needle_len, + comp); +} + +} // namespace __llvm_libc diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt --- a/libc/src/string/memory_utils/CMakeLists.txt +++ b/libc/src/string/memory_utils/CMakeLists.txt @@ -72,7 +72,13 @@ ) add_header_library( - strstr_implementation + strstr_implementation HDRS strstr_implementations.h ) + +add_header_library( + memmem_implementation + HDRS + memmem_implementations.h +) diff --git a/libc/src/string/memory_utils/memmem_implementations.h b/libc/src/string/memory_utils/memmem_implementations.h new file mode 100644 --- /dev/null +++ b/libc/src/string/memory_utils/memmem_implementations.h @@ -0,0 +1,42 @@ +//===-- memmem implementation -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMMEM_IMPLEMENTATIONS_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMMEM_IMPLEMENTATIONS_H + +#include + +namespace __llvm_libc { + +template +constexpr static void * +memmem_implementation(const void *haystack, size_t haystack_len, + const void *needle, size_t needle_len, Comp &&comp) { + // TODO: simple brute force implementation. This can be + // improved upon using well known string matching algorithms. + if (!needle_len) + return const_cast(haystack); + + if (needle_len > haystack_len) + return nullptr; + + const unsigned char *h = static_cast(haystack); + const unsigned char *n = static_cast(needle); + for (size_t i = 0; i <= (haystack_len - needle_len); ++i) { + size_t j = 0; + for (; j < needle_len && !comp(h[i + j], n[j]); ++j) + ; + if (j == needle_len) + return const_cast(h + i); + } + return nullptr; +} + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMMEM_IMPLEMENTATIONS_H diff --git a/libc/src/string/memory_utils/strstr_implementations.h b/libc/src/string/memory_utils/strstr_implementations.h --- a/libc/src/string/memory_utils/strstr_implementations.h +++ b/libc/src/string/memory_utils/strstr_implementations.h @@ -9,6 +9,8 @@ #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_STRSTR_IMPLEMENTATIONS_H #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_STRSTR_IMPLEMENTATIONS_H +#include "src/string/memory_utils/memmem_implementations.h" +#include "src/string/string_utils.h" #include namespace __llvm_libc { @@ -16,16 +18,10 @@ template constexpr static char *strstr_implementation(const char *haystack, const char *needle, Comp &&comp) { - // TODO: This is a simple brute force implementation. This can be - // improved upon using well known string matching algorithms. - for (size_t i = 0; comp(haystack[i], 0); ++i) { - size_t j = 0; - for (; comp(haystack[i + j], 0) && !comp(haystack[i + j], needle[j]); ++j) - ; - if (!comp(needle[j], 0)) - return const_cast(haystack + i); - } - return nullptr; + void *result = memmem_implementation( + static_cast(haystack), internal::string_length(haystack), + static_cast(needle), internal::string_length(needle), comp); + return static_cast(result); } } // namespace __llvm_libc diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -34,6 +34,16 @@ libc.src.string.mempcpy ) +add_libc_unittest( + memmem_test + SUITE + libc_string_unittests + SRCS + memmem_test.cpp + DEPENDS + libc.src.string.memmem +) + add_libc_unittest( memchr_test SUITE diff --git a/libc/test/src/string/memmem_test.cpp b/libc/test/src/string/memmem_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/string/memmem_test.cpp @@ -0,0 +1,129 @@ +//===-- Unittests for memmem ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/memmem.h" +#include "test/UnitTest/Test.h" + +#include "src/string/string_utils.h" + +namespace __llvm_libc { + +TEST(LlvmLibcMemmemTest, EmptyHaystackEmptyNeedleReturnsHaystck) { + char *h = nullptr; + char *n = nullptr; + void *result = __llvm_libc::memmem(h, 0, n, 0); + ASSERT_EQ(static_cast(result), h); +} + +TEST(LlvmLibcMemmemTest, EmptyHaystackNonEmptyNeedleReturnsNull) { + char *h = nullptr; + char n[] = {'a', 'b', 'c'}; + void *result = __llvm_libc::memmem(h, 0, n, sizeof(n)); + ASSERT_EQ(result, static_cast(nullptr)); +} + +TEST(LlvmLibcMemmemTest, EmptyNeedleReturnsHaystack) { + char h[] = {'a', 'b', 'c'}; + char *n = nullptr; + void *result = __llvm_libc::memmem(h, sizeof(h), n, 0); + ASSERT_EQ(static_cast(result), h); +} + +TEST(LlvmLibcMemmemTest, ExactMatchReturnsHaystack) { + char h[] = {'a', 'b', 'c'}; + char n[] = {'a', 'b', 'c'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(static_cast(result), h); +} + +TEST(LlvmLibcMemmemTest, ReturnFirstMatchOfNeedle) { + char h[] = {'a', 'a', 'b', 'c'}; + char n[] = {'a'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(static_cast(result), h); +} + +TEST(LlvmLibcMemmemTest, ReturnFirstExactMatchOfNeedle) { + { + char h[] = {'a', 'b', 'a', 'c', 'a', 'a'}; + char n[] = {'a', 'a'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(static_cast(result), h + 4); + } + { + char h[] = {'a', 'a', 'b', 'a', 'b', 'a'}; + char n[] = {'a', 'b', 'a'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(static_cast(result), h + 1); + } +} + +TEST(LlvmLibcMemmemTest, NullTerminatorDoesNotInterruptMatch) { + char h[] = {'\0', 'a', 'b'}; + char n[] = {'a', 'b'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(static_cast(result), h + 1); +} + +TEST(LlvmLibcMemmemTest, ReturnNullIfNoExactMatch) { + { + char h[] = {'a'}; + char n[] = {'a', 'a'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(result, static_cast(nullptr)); + } + { + char h[] = {'a', 'A'}; + char n[] = {'a', 'a'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(result, static_cast(nullptr)); + } + { + char h[] = {'a'}; + char n[] = {'a', '\0'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(result, static_cast(nullptr)); + } + { + char h[] = {'\0'}; + char n[] = {'\0', '\0'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, sizeof(n)); + ASSERT_EQ(result, static_cast(nullptr)); + } +} + +TEST(LlvmLibcMemmemTest, ReturnMatchOfSpecifiedNeedleLength) { + { + char h[] = {'a', 'b', 'c'}; + char n[] = {'x', 'y', 'z'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, 0); + ASSERT_EQ(static_cast(result), h); + } + { + char h[] = {'a', 'b', 'c'}; + char n[] = {'b', 'c', 'a'}; + void *result = __llvm_libc::memmem(h, sizeof(h), n, 2); + ASSERT_EQ(static_cast(result), h + 1); + } +} + +TEST(LlvmLibcMemmemTest, ReturnNullIfInadequateHaystackLength) { + { + char h[] = {'a', 'b', 'c'}; + char n[] = {'c'}; + void *result = __llvm_libc::memmem(h, 2, n, sizeof(n)); + ASSERT_EQ(result, static_cast(nullptr)); + } + { + char h[] = {'a', 'b', 'c'}; + char n[] = {'a', 'b', 'c'}; + void *result = __llvm_libc::memmem(h, 2, n, sizeof(n)); + ASSERT_EQ(result, static_cast(nullptr)); + } +} +} // namespace __llvm_libc diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1591,6 +1591,7 @@ "src/string/memory_utils/memset_implementations.h", "src/string/memory_utils/strcmp_implementations.h", "src/string/memory_utils/strstr_implementations.h", + "src/string/memory_utils/memmem_implementations.h", "src/string/memory_utils/x86_64/memcmp_implementations.h", "src/string/memory_utils/x86_64/memcpy_implementations.h", ],