diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -239,6 +239,14 @@ COMMON_INTERCEPT_FUNCTION(fn) #endif +#if SANITIZER_GLIBC +#define COMMON_INTERCEPT_FUNCTION_GLIBC_VER(fn, ver) \ + COMMON_INTERCEPT_FUNCTION_VER(fn, ver) +#else +#define COMMON_INTERCEPT_FUNCTION_GLIBC_VER(fn, ver) \ + COMMON_INTERCEPT_FUNCTION(fn) +#endif + #ifndef COMMON_INTERCEPTOR_MEMSET_IMPL #define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, dst, v, size) \ { \ @@ -7779,7 +7787,7 @@ } #define INIT_REGEX \ COMMON_INTERCEPT_FUNCTION(regcomp); \ - COMMON_INTERCEPT_FUNCTION(regexec); \ + COMMON_INTERCEPT_FUNCTION_GLIBC_VER(regexec, "GLIBC_2.3.4"); \ COMMON_INTERCEPT_FUNCTION(regerror); \ COMMON_INTERCEPT_FUNCTION(regfree); #else diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/regex_startend.cpp b/compiler-rt/test/sanitizer_common/TestCases/Posix/regex_startend.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/regex_startend.cpp @@ -0,0 +1,69 @@ +// RUN: %clangxx -O0 -g %s -o %t && %run %t +/// Check that REG_STARTEND is handled correctly. +/// This is a regression test for https://github.com/google/sanitizers/issues/1371 +/// Previously, on GLibc systems, the interceptor was calling __compat_regexec +/// (regexec@GLIBC_2.2.5) insead of the newer __regexec (regexec@GLIBC_2.3.4). +/// The __compat_regexec strips the REG_STARTEND flag but does not report an error +/// if other flags are present. This can result in infinite loops for programs that +/// use REG_STARTEND to find all matches inside a buffer (since ignoring REG_STARTEND +/// means that the search always starts from the first character). + +#include +#include +#include +#include +#include + +void test_matched(const regex_t *preg, const char *string, size_t start, + size_t end, const char *expected) { + regmatch_t match[1]; + match[0].rm_so = start; + match[0].rm_eo = end; + int rv = regexec(preg, string, 1, match, REG_STARTEND); + int matchlen = (int)(match[0].rm_eo - match[0].rm_so); + const char *matchstart = string + match[0].rm_so; + if (rv == 0) { + printf("%s[%zd,%zd): matched: '%.*s'\n", string, start, end, + matchlen, matchstart); + if (expected == nullptr) { + printf("ERROR: expected no match but got '%.*s'\n", + matchlen, matchstart); + abort(); + } else if ((size_t)matchlen != strlen(expected) || + memcmp(matchstart, expected, strlen(expected)) != 0) { + printf("ERROR: expected '%s' match but got '%.*s'\n", + expected, matchlen, matchstart); + abort(); + } else { + printf("SUCCESS\n"); + } + } else if (rv == REG_NOMATCH) { + printf("%s[%zd,%zd): no match\n", string, start, end); + if (expected != nullptr) { + printf("ERROR: expected '%s' match but got no match\n", expected); + abort(); + } else { + printf("SUCCESS\n"); + } + } else { + printf("ERROR: unexpected regexec return value %d\n", rv); + abort(); + } +} + +int main(void) { + printf("Started test\n"); + regex_t regex; + int rv = regcomp(®ex, "[A-Z][A-Z]", 0); + assert(!rv); + test_matched(®ex, "ABCD", 0, 4, "AB"); + test_matched(®ex, "ABCD", 0, 1, nullptr); // Not long enough + test_matched(®ex, "ABCD", 1, 4, "BC"); + test_matched(®ex, "ABCD", 1, 2, nullptr); // Not long enough + test_matched(®ex, "ABCD", 2, 4, "CD"); + test_matched(®ex, "ABCD", 2, 3, nullptr); // Not long enough + test_matched(®ex, "ABCD", 3, 4, nullptr); // Not long enough + regfree(®ex); + printf("Finished test\n"); + return 0; +}