Index: lib/sanitizer_common/sanitizer_common_interceptors.inc =================================================================== --- lib/sanitizer_common/sanitizer_common_interceptors.inc +++ lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -7362,6 +7362,93 @@ #define INIT_GETVFSSTAT #endif +#if SANITIZER_INTERCEPT_REGEX +INTERCEPTOR(int, regcomp, void *preg, const char *pattern, int cflags) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, regcomp, preg, pattern, cflags); + if (pattern) + COMMON_INTERCEPTOR_READ_RANGE(ctx, pattern, REAL(strlen)(pattern) + 1); + int res = REAL(regcomp)(preg, pattern, cflags); + if (!res) + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, preg, struct_regex_sz); + return res; +} +INTERCEPTOR(int, regexec, const void *preg, const char *string, SIZE_T nmatch, + struct __sanitizer_regmatch *pmatch[], int eflags) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, regexec, preg, string, nmatch, pmatch, eflags); + if (preg) + COMMON_INTERCEPTOR_READ_RANGE(ctx, preg, struct_regex_sz); + if (string) + COMMON_INTERCEPTOR_READ_RANGE(ctx, string, REAL(strlen)(string) + 1); + int res = REAL(regexec)(preg, string, nmatch, pmatch, eflags); + if (!res && pmatch) + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pmatch, nmatch * struct_regmatch_sz); + return res; +} +INTERCEPTOR(SIZE_T, regerror, int errcode, const void *preg, char *errbuf, + SIZE_T errbuf_size) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, regerror, errcode, preg, errbuf, errbuf_size); + if (preg) + COMMON_INTERCEPTOR_READ_RANGE(ctx, preg, struct_regex_sz); + SIZE_T res = REAL(regerror)(errcode, preg, errbuf, errbuf_size); + if (errbuf && res > 0) + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, errbuf, res); + return res; +} +INTERCEPTOR(void, regfree, const void *preg) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, regfree, preg); + if (preg) + COMMON_INTERCEPTOR_READ_RANGE(ctx, preg, struct_regex_sz); + REAL(regfree)(preg); +} +INTERCEPTOR(SSIZE_T, regnsub, char *buf, SIZE_T bufsiz, const char *sub, + const struct __sanitizer_regmatch *rm, const char *str) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, regnsub, buf, bufsiz, sub, rm, str); + if (sub) + COMMON_INTERCEPTOR_READ_RANGE(ctx, sub, REAL(strlen)(sub) + 1); + // The implementation demands and hardcodes 10 elements + if (rm) + COMMON_INTERCEPTOR_READ_RANGE(ctx, rm, 10 * struct_regmatch_sz); + if (str) + COMMON_INTERCEPTOR_READ_RANGE(ctx, str, REAL(strlen)(str) + 1); + SSIZE_T res = REAL(regnsub)(buf, bufsiz, sub, rm, str); + if (res > 0 && buf) + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, REAL(strlen)(buf) + 1); + return res; +} +INTERCEPTOR(SSIZE_T, regasub, char **buf, const char *sub, + const struct __sanitizer_regmatch *rm, const char *sstr) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, regasub, buf, sub, rm, sstr); + if (sub) + COMMON_INTERCEPTOR_READ_RANGE(ctx, sub, REAL(strlen)(sub) + 1); + // Hardcode 10 elements as this is hardcoded size + if (rm) + COMMON_INTERCEPTOR_READ_RANGE(ctx, rm, 10 * struct_regmatch_sz); + if (sstr) + COMMON_INTERCEPTOR_READ_RANGE(ctx, sstr, REAL(strlen)(sstr) + 1); + SSIZE_T res = REAL(regasub)(buf, sub, rm, sstr); + if (res > 0 && buf) { + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, sizeof(char *)); + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *buf, REAL(strlen)(*buf) + 1); + } + return res; +} +#define INIT_REGEX \ + COMMON_INTERCEPT_FUNCTION(regcomp); \ + COMMON_INTERCEPT_FUNCTION(regexec); \ + COMMON_INTERCEPT_FUNCTION(regerror); \ + COMMON_INTERCEPT_FUNCTION(regfree); \ + COMMON_INTERCEPT_FUNCTION(regnsub); \ + COMMON_INTERCEPT_FUNCTION(regasub); +#else +#define INIT_REGEX +#endif + static void InitializeCommonInterceptors() { static u64 metadata_mem[sizeof(MetadataHashMap) / sizeof(u64) + 1]; interceptor_metadata_map = new((void *)&metadata_mem) MetadataHashMap(); @@ -7619,6 +7706,7 @@ INIT_MI_VECTOR_HASH; INIT_SETVBUF; INIT_GETVFSSTAT; + INIT_REGEX; INIT___PRINTF_CHK; } Index: lib/sanitizer_common/sanitizer_platform_interceptors.h =================================================================== --- lib/sanitizer_common/sanitizer_platform_interceptors.h +++ lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -521,5 +521,6 @@ #define SANITIZER_INTERCEPT_GETMNTINFO SI_NETBSD #define SANITIZER_INTERCEPT_MI_VECTOR_HASH SI_NETBSD #define SANITIZER_INTERCEPT_GETVFSSTAT SI_NETBSD +#define SANITIZER_INTERCEPT_REGEX SI_NETBSD #endif // #ifndef SANITIZER_PLATFORM_INTERCEPTORS_H Index: lib/sanitizer_common/sanitizer_platform_limits_netbsd.h =================================================================== --- lib/sanitizer_common/sanitizer_platform_limits_netbsd.h +++ lib/sanitizer_common/sanitizer_platform_limits_netbsd.h @@ -61,6 +61,14 @@ extern unsigned struct_kevent_sz; +extern unsigned struct_regex_sz; +extern unsigned struct_regmatch_sz; + +struct __sanitizer_regmatch { + OFF_T rm_so; + OFF_T rm_eo; +}; + union __sanitizer_sigval { int sival_int; uptr sival_ptr; Index: lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc =================================================================== --- lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc +++ lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc @@ -253,6 +253,8 @@ unsigned struct_timespec_sz = sizeof(struct timespec); unsigned struct_sembuf_sz = sizeof(struct sembuf); unsigned struct_kevent_sz = sizeof(struct kevent); +unsigned struct_regex_sz = sizeof(regex_t); +unsigned struct_regmatch_sz = sizeof(regmatch_t); unsigned struct_utimbuf_sz = sizeof(struct utimbuf); unsigned struct_itimerspec_sz = sizeof(struct itimerspec); unsigned struct_timex_sz = sizeof(struct timex); Index: test/sanitizer_common/TestCases/NetBSD/regex.cc =================================================================== --- /dev/null +++ test/sanitizer_common/TestCases/NetBSD/regex.cc @@ -0,0 +1,105 @@ +// RUN: %clangxx -O0 -g %s -o %t && %run %t 2>&1 | FileCheck %s + +#include +#include +#include +#include + +void test_matched(const regex_t *preg, const char *string) { + int rv = regexec(preg, string, 0, NULL, 0); + if (!rv) + printf("%s: matched\n", string); + else if (rv == REG_NOMATCH) + printf("%s: not-matched\n", string); + else + errx(1, "regexec\n"); +} + +void test_print_matches(const regex_t *preg, const char *string) { + regmatch_t rm[10]; + int rv = regexec(preg, string, __arraycount(rm), rm, 0); + if (!rv) { + for (size_t i = 0; i < __arraycount(rm); i++) { + // This condition shall be simplified, but verify that the data fields + // are accessible. + if (rm[i].rm_so == -1 && rm[i].rm_eo == -1) + continue; + printf("matched[%zu]='%.*s'\n", i, (int)(rm[i].rm_eo - rm[i].rm_so), + string + rm[i].rm_so); + } + } else if (rv == REG_NOMATCH) + printf("%s: not-matched\n", string); + else + errx(1, "regexec\n"); +} + +void test_nsub(const regex_t *preg, const char *string) { + regmatch_t rm[10]; + int rv = regexec(preg, string, __arraycount(rm), rm, 0); + if (!rv) { + char buf[1024]; + ssize_t ss = regnsub(buf, __arraycount(buf), "\\1xyz", rm, string); + if (ss == -1) + errx(1, "regnsub"); + printf("'%s' -> '%s'\n", string, buf); + } else if (rv == REG_NOMATCH) + printf("%s: not-matched\n", string); + else + errx(1, "regexec\n"); +} + +void test_asub(const regex_t *preg, const char *string) { + regmatch_t rm[10]; + int rv = regexec(preg, string, __arraycount(rm), rm, 0); + if (!rv) { + char *buf; + ssize_t ss = regasub(&buf, "\\1xyz", rm, string); + if (ss == -1) + errx(1, "regnsub"); + printf("'%s' -> '%s'\n", string, buf); + free(buf); + } else if (rv == REG_NOMATCH) + printf("%s: not-matched\n", string); + else + errx(1, "regexec\n"); +} + +int main(void) { + regex_t regex; + int rv; + + printf("regex\n"); + + rv = regcomp(®ex, "[[:upper:]]\\([[:upper:]]\\)", 0); + if (rv) + errx(1, "regcomp"); + + test_matched(®ex, "abc"); + test_matched(®ex, "ABC"); + + test_print_matches(®ex, "ABC"); + + test_nsub(®ex, "ABC DEF"); + test_asub(®ex, "GHI JKL"); + + regfree(®ex); + + rv = regcomp(®ex, "[[:upp:]]", 0); + if (!rv) + errx(1, "regcomp finished successfully"); + + char errbuf[1024]; + regerror(rv, ®ex, errbuf, sizeof errbuf); + printf("error: %s\n", errbuf); + + // CHECK: regex + // CHECK: abc: not-matched + // CHECK: ABC: matched + // CHECK: matched[0]='AB' + // CHECK: matched[1]='B' + // CHECK: 'ABC DEF' -> 'Bxyz' + // CHECK: 'GHI JKL' -> 'Hxyz' + // CHECK: error:{{.*}} + + return 0; +}