diff --git a/compiler-rt/lib/asan/asan_interceptors.h b/compiler-rt/lib/asan/asan_interceptors.h --- a/compiler-rt/lib/asan/asan_interceptors.h +++ b/compiler-rt/lib/asan/asan_interceptors.h @@ -145,6 +145,13 @@ VReport(1, "AddressSanitizer: failed to intercept '%s@@%s'\n", #name, \ #ver); \ } while (0) +#define ASAN_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver) \ + do { \ + if (!INTERCEPT_FUNCTION_VER(name, ver) && !INTERCEPT_FUNCTION(name)) \ + VReport(1, "AddressSanitizer: failed to intercept '%s@@%s' or '%s'\n", \ + #name, #ver, #name); \ + } while (0) + #else // OS X interceptors don't need to be initialized with INTERCEPT_FUNCTION. #define ASAN_INTERCEPT_FUNC(name) diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp --- a/compiler-rt/lib/asan/asan_interceptors.cpp +++ b/compiler-rt/lib/asan/asan_interceptors.cpp @@ -90,8 +90,10 @@ (void) ctx; \ #define COMMON_INTERCEPT_FUNCTION(name) ASAN_INTERCEPT_FUNC(name) -#define COMMON_INTERCEPT_FUNCTION_VER(name, ver) \ +#define COMMON_INTERCEPT_FUNCTION_VER(name, ver) \ ASAN_INTERCEPT_FUNC_VER(name, ver) +#define COMMON_INTERCEPT_FUNCTION_VER_UNVERSIONED_FALLBACK(name, ver) \ + ASAN_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver) #define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size) \ ASAN_WRITE_RANGE(ctx, ptr, size) #define COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, size) \ @@ -672,6 +674,7 @@ // Intercept threading-related functions #if ASAN_INTERCEPT_PTHREAD_CREATE +// TODO: this should probably have an unversioned fallback for newer arches? #if defined(ASAN_PTHREAD_CREATE_VERSION) ASAN_INTERCEPT_FUNC_VER(pthread_create, ASAN_PTHREAD_CREATE_VERSION); #else diff --git a/compiler-rt/lib/memprof/memprof_interceptors.h b/compiler-rt/lib/memprof/memprof_interceptors.h --- a/compiler-rt/lib/memprof/memprof_interceptors.h +++ b/compiler-rt/lib/memprof/memprof_interceptors.h @@ -50,5 +50,11 @@ if (!INTERCEPT_FUNCTION_VER(name, ver)) \ VReport(1, "MemProfiler: failed to intercept '%s@@%s'\n", #name, #ver); \ } while (0) +#define MEMPROF_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver) \ + do { \ + if (!INTERCEPT_FUNCTION_VER(name, ver) && !INTERCEPT_FUNCTION(name)) \ + VReport(1, "MemProfiler: failed to intercept '%s@@%s' or '%s'\n", #name, \ + #ver, #name); \ + } while (0) #endif // MEMPROF_INTERCEPTORS_H diff --git a/compiler-rt/lib/memprof/memprof_interceptors.cpp b/compiler-rt/lib/memprof/memprof_interceptors.cpp --- a/compiler-rt/lib/memprof/memprof_interceptors.cpp +++ b/compiler-rt/lib/memprof/memprof_interceptors.cpp @@ -59,6 +59,8 @@ #define COMMON_INTERCEPT_FUNCTION(name) MEMPROF_INTERCEPT_FUNC(name) #define COMMON_INTERCEPT_FUNCTION_VER(name, ver) \ MEMPROF_INTERCEPT_FUNC_VER(name, ver) +#define COMMON_INTERCEPT_FUNCTION_VER_UNVERSIONED_FALLBACK(name, ver) \ + MEMPROF_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver) #define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size) \ MEMPROF_WRITE_RANGE(ptr, size) #define COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, size) \ diff --git a/compiler-rt/lib/msan/msan_interceptors.cpp b/compiler-rt/lib/msan/msan_interceptors.cpp --- a/compiler-rt/lib/msan/msan_interceptors.cpp +++ b/compiler-rt/lib/msan/msan_interceptors.cpp @@ -1257,10 +1257,18 @@ VReport(1, "MemorySanitizer: failed to intercept '%s@@%s'\n", #name, \ #ver); \ } while (0) +#define MSAN_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver) \ + do { \ + if (!INTERCEPT_FUNCTION_VER(name, ver) && !INTERCEPT_FUNCTION(name)) \ + VReport(1, "MemorySanitizer: failed to intercept '%s@@%s' or '%s'\n", \ + #name, #ver, #name); \ + } while (0) #define COMMON_INTERCEPT_FUNCTION(name) MSAN_INTERCEPT_FUNC(name) -#define COMMON_INTERCEPT_FUNCTION_VER(name, ver) \ +#define COMMON_INTERCEPT_FUNCTION_VER(name, ver) \ MSAN_INTERCEPT_FUNC_VER(name, ver) +#define COMMON_INTERCEPT_FUNCTION_VER_UNVERSIONED_FALLBACK(name, ver) \ + MSAN_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver) #define COMMON_INTERCEPTOR_UNPOISON_PARAM(count) \ UnpoisonParam(count) #define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size) \ diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -239,6 +239,23 @@ COMMON_INTERCEPT_FUNCTION(fn) #endif +#if SANITIZER_GLIBC +// If we could not find the versioned symbol, fall back to an unversioned +// lookup. This is needed to work around a GLibc bug that causes dlsym +// with RTLD_NEXT to return the oldest versioned symbol. +// See https://sourceware.org/bugzilla/show_bug.cgi?id=14932. +// For certain symbols (e.g. regexec) we have to perform a versioned lookup, +// but that versioned symbol will only exist for architectures where the +// oldest Glibc version pre-dates support for that architecture. +// For example, regexec@GLIBC_2.3.4 exists on x86_64, but not RISC-V. +// See also https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98920. +#define COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(fn, ver) \ + COMMON_INTERCEPT_FUNCTION_VER_UNVERSIONED_FALLBACK(fn, ver) +#else +#define COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(fn, ver) \ + COMMON_INTERCEPT_FUNCTION(fn) +#endif + #ifndef COMMON_INTERCEPTOR_MEMSET_IMPL #define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, dst, v, size) \ { \ @@ -7781,7 +7798,7 @@ } #define INIT_REGEX \ COMMON_INTERCEPT_FUNCTION(regcomp); \ - COMMON_INTERCEPT_FUNCTION(regexec); \ + COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(regexec, "GLIBC_2.3.4"); \ COMMON_INTERCEPT_FUNCTION(regerror); \ COMMON_INTERCEPT_FUNCTION(regfree); #else diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp @@ -2263,6 +2263,8 @@ #define COMMON_INTERCEPT_FUNCTION(name) INTERCEPT_FUNCTION(name) #define COMMON_INTERCEPT_FUNCTION_VER(name, ver) \ INTERCEPT_FUNCTION_VER(name, ver) +#define COMMON_INTERCEPT_FUNCTION_VER_UNVERSIONED_FALLBACK(name, ver) \ + (INTERCEPT_FUNCTION_VER(name, ver) || INTERCEPT_FUNCTION(name)) #define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size) \ MemoryAccessRange(((TsanInterceptorContext *)ctx)->thr, \ diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/regex_startend.cpp b/compiler-rt/test/sanitizer_common/TestCases/Posix/regex_startend.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/regex_startend.cpp @@ -0,0 +1,61 @@ +// RUN: %clangxx -O0 -g %s -o %t && %run %t +/// Check that REG_STARTEND is handled correctly. +/// This is a regression test for https://github.com/google/sanitizers/issues/1371 +/// Previously, on GLibc systems, the interceptor was calling __compat_regexec +/// (regexec@GLIBC_2.2.5) insead of the newer __regexec (regexec@GLIBC_2.3.4). +/// The __compat_regexec strips the REG_STARTEND flag but does not report an error +/// if other flags are present. This can result in infinite loops for programs that +/// use REG_STARTEND to find all matches inside a buffer (since ignoring REG_STARTEND +/// means that the search always starts from the first character). + +#include +#include +#include +#include +#include + +void test_matched(const regex_t *preg, const char *string, size_t start, + size_t end, const char *expected) { + regmatch_t match[1]; + match[0].rm_so = start; + match[0].rm_eo = end; + int rv = regexec(preg, string, 1, match, REG_STARTEND); + int matchlen = (int)(match[0].rm_eo - match[0].rm_so); + const char *matchstart = string + match[0].rm_so; + if (rv == 0) { + if (expected == nullptr) { + fprintf(stderr, "ERROR: expected no match but got '%.*s'\n", + matchlen, matchstart); + abort(); + } else if ((size_t)matchlen != strlen(expected) || + memcmp(matchstart, expected, strlen(expected)) != 0) { + fprintf(stderr, "ERROR: expected '%s' match but got '%.*s'\n", + expected, matchlen, matchstart); + abort(); + } + } else if (rv == REG_NOMATCH) { + if (expected != nullptr) { + fprintf(stderr, "ERROR: expected '%s' match but got no match\n", expected); + abort(); + } + } else { + printf("ERROR: unexpected regexec return value %d\n", rv); + abort(); + } +} + +int main(void) { + regex_t regex; + int rv = regcomp(®ex, "[A-Z][A-Z]", 0); + assert(!rv); + test_matched(®ex, "ABCD", 0, 4, "AB"); + test_matched(®ex, "ABCD", 0, 1, nullptr); // Not long enough + test_matched(®ex, "ABCD", 1, 4, "BC"); + test_matched(®ex, "ABCD", 1, 2, nullptr); // Not long enough + test_matched(®ex, "ABCD", 2, 4, "CD"); + test_matched(®ex, "ABCD", 2, 3, nullptr); // Not long enough + test_matched(®ex, "ABCD", 3, 4, nullptr); // Not long enough + regfree(®ex); + printf("Successful test\n"); + return 0; +}