Index: lib/sanitizer_common/sanitizer_common_interceptors.inc =================================================================== --- lib/sanitizer_common/sanitizer_common_interceptors.inc +++ lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -494,6 +494,48 @@ #define INIT_STRCASESTR #endif +#if SANITIZER_INTERCEPT_STRTOK + +INTERCEPTOR(char*, strtok, char *str, const char *delimiters) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, strtok, str, delimiters); + if (!common_flags()->intercept_strtok) { + return REAL(strtok)(str, delimiters); + } + if (common_flags()->strict_string_checks) { + // If strict_string_checks is enabled, we check the whole first argument + // string on the first call (strtok saves this string in a static buffer + // for subsequent calls). We do not need to check strtok's result. + // As the delimiters can change, we check them every call. + if (str != nullptr) { + COMMON_INTERCEPTOR_READ_RANGE(ctx, str, REAL(strlen)(str) + 1); + } + COMMON_INTERCEPTOR_READ_RANGE(ctx, delimiters, + REAL(strlen)(delimiters) + 1); + return REAL(strtok)(str, delimiters); + } else { + // However, when strict_string_checks is disabled we cannot check the + // whole string on the first call. Instead, we check the result string + // which is guaranteed to be a NULL-terminated substring of the first + // argument. We also conservatively check one character of str and the + // delimiters. + if (str != nullptr) { + COMMON_INTERCEPTOR_READ_STRING(ctx, str, 1); + } + COMMON_INTERCEPTOR_READ_RANGE(ctx, delimiters, 1); + char *result = REAL(strtok)(str, delimiters); + if (result != nullptr) { + COMMON_INTERCEPTOR_READ_RANGE(ctx, result, REAL(strlen)(result) + 1); + } + return result; + } +} + +#define INIT_STRTOK COMMON_INTERCEPT_FUNCTION(strtok) +#else +#define INIT_STRTOK +#endif + #if SANITIZER_INTERCEPT_MEMMEM DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_memmem, uptr called_pc, const void *s1, SIZE_T len1, const void *s2, @@ -6052,6 +6094,7 @@ INIT_STRCHRNUL; INIT_STRRCHR; INIT_STRSPN; + INIT_STRTOK; INIT_STRPBRK; INIT_MEMSET; INIT_MEMMOVE; Index: lib/sanitizer_common/sanitizer_flags.inc =================================================================== --- lib/sanitizer_common/sanitizer_flags.inc +++ lib/sanitizer_common/sanitizer_flags.inc @@ -191,6 +191,9 @@ COMMON_FLAG(bool, intercept_strspn, true, "If set, uses custom wrappers for strspn and strcspn function " "to find more errors.") +COMMON_FLAG(bool, intercept_strtok, true, + "If set, uses a custom wrapper for the strtok function " + "to find more errors.") COMMON_FLAG(bool, intercept_strpbrk, true, "If set, uses custom wrappers for strpbrk function " "to find more errors.") Index: lib/sanitizer_common/sanitizer_platform_interceptors.h =================================================================== --- lib/sanitizer_common/sanitizer_platform_interceptors.h +++ lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -74,6 +74,7 @@ #define SANITIZER_INTERCEPT_STRCMP 1 #define SANITIZER_INTERCEPT_STRSTR 1 #define SANITIZER_INTERCEPT_STRCASESTR SI_NOT_WINDOWS +#define SANITIZER_INTERCEPT_STRTOK 1 #define SANITIZER_INTERCEPT_STRCHR 1 #define SANITIZER_INTERCEPT_STRCHRNUL SI_UNIX_NOT_MAC #define SANITIZER_INTERCEPT_STRRCHR 1 Index: test/asan/TestCases/strtok.c =================================================================== --- test/asan/TestCases/strtok.c +++ test/asan/TestCases/strtok.c @@ -0,0 +1,108 @@ +// RUN: %clang_asan %s -o %t + +// Test overflows with strict_string_checks + +// RUN: %env_asan_opts=strict_string_checks=true not %run %t test1 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK1 +// RUN: %env_asan_opts=intercept_strtok=false%run %t test1 2>&1 +// RUN: %env_asan_opts=strict_string_checks=true not %run %t test2 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK2 +// RUN: %env_asan_opts=intercept_strtok=false %run %t test2 2>&1 +// RUN: %env_asan_opts=strict_string_checks=true not %run %t test3 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK3 +// RUN: %env_asan_opts=intercept_strtok=false %run %t test3 2>&1 +// RUN: %env_asan_opts=strict_string_checks=true %run %t test4 2>&1 +// RUN: %env_asan_opts=intercept_strtok=false %run %t test4 2>&1 + +// Test overflows with !strict_string_checks +// RUN: %env_asan_opts=strict_string_checks=false not %run %t test5 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK5 +// RUN: %env_asan_opts=intercept_strtok=false %run %t test5 2>&1 +// RUN: %env_asan_opts=strict_string_checks=false not %run %t test6 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK6 +// RUN: %env_asan_opts=intercept_strtok=false %run %t test6 2>&1 + + +#include +#include +#include + +// Check that we find overflows in the delimiters on the first call +// with strict_string_checks. +void test1() { + char *token; + char s[4] = "abc"; + char token_delimiter[2] = "b"; + __asan_poison_memory_region ((char *)&token_delimiter[1], 2); + token = strtok(s, token_delimiter); + // CHECK1:'token_delimiter' <== Memory access at offset {{[0-9]+}} partially overflows this variable + assert(strcmp(token, "a") == 0); +} + +// Check that we find overflows in the delimiters on the second call (str == NULL) +// with strict_string_checks. +void test2() { + char *token; + char s[4] = "abc"; + char token_delimiter[2] = "b"; + token = strtok(s, token_delimiter); + assert(strcmp(token, "a") == 0); + __asan_poison_memory_region ((char *)&token_delimiter[1], 2); + token = strtok(NULL, token_delimiter); + // CHECK2:'token_delimiter' <== Memory access at offset {{[0-9]+}} partially overflows this variable + assert(strcmp(token, "c") == 0); +} + +// Check that we find overflows in the string (only on the first call) with strict_string_checks. +void test3() { + char *token; + char s[4] = "abc"; + char token_delimiter[2] = "b"; + __asan_poison_memory_region ((char *)&s[3], 2); + token = strtok(s, token_delimiter); + // CHECK3:'s' <== Memory access at offset {{[0-9]+}} partially overflows this variable + assert(token == s); +} + +// Check that we do not crash when strtok returns NULL with strict_string_checks. +void test4() { + char *token; + char s[] = ""; + char token_delimiter[] = "a"; + token = strtok(s, token_delimiter); + assert(token == NULL); +} + +// Check that we find overflows in the string (only on the first call) with !strict_string_checks. +void test5() { + char *token; + char s[4] = "abc"; + char token_delimiter[2] = "d"; + __asan_poison_memory_region ((char *)&s[2], 2); + __asan_poison_memory_region ((char *)&token_delimiter[1], 2); + token = strtok(s, token_delimiter); + // CHECK5:'s' <== Memory access at offset {{[0-9]+}} partially overflows this variable + assert(token == s); +} + +// Check that we find overflows in the delimiters (only on the first call) with !strict_string_checks. +void test6() { + char *token; + char s[4] = "abc"; + char token_delimiter[1] = {'d'}; + __asan_poison_memory_region ((char *)&token_delimiter[1], 2); + token = strtok(s, &token_delimiter[1]); + // CHECK6:'token_delimiter' <== Memory access at offset {{[0-9]+}} overflows this variable + assert(strcmp(token, "abc") == 0); +} + +int main(int argc, char **argv) { + if (argc != 2) return 1; + if (!strcmp(argv[1], "test1")) test1(); + if (!strcmp(argv[1], "test2")) test2(); + if (!strcmp(argv[1], "test3")) test3(); + if (!strcmp(argv[1], "test4")) test4(); + if (!strcmp(argv[1], "test5")) test5(); + if (!strcmp(argv[1], "test6")) test6(); + return 0; +}