Index: lib/dfsan/dfsan_custom.cc =================================================================== --- lib/dfsan/dfsan_custom.cc +++ lib/dfsan/dfsan_custom.cc @@ -18,6 +18,7 @@ #include "dfsan/dfsan.h" #include +#include #include #include #include @@ -26,6 +27,8 @@ #include #include #include +#include +#include #include #include #include @@ -839,4 +842,288 @@ *ret_label = 0; return write(fd, buf, count); } + +// Type used to extract a dfsan_label with va_arg() +typedef int dfsan_label_va; + +// A chunk of data representing the output of formatting either a constant +// string or a single format directive. +struct Chunk { + // Address of the beginning of the formatted string + const char *ptr; + // Size of the formatted string + size_t size; + + // Type of DFSan label (depends on the format directive) + enum { + // Constant string, no argument and thus no label + NONE = 0, + // Label for an argument of '%n' + IGNORED, + // Label for a '%s' argument + STRING, + // Label for any other type of argument + NUMERIC, + } label_type; + + // Value of the argument (if label_type == STRING) + const char *arg; +}; + +// Formats the input. The output is stored in 'str' starting from offset +// 'off'. The format directive is represented by the first 'format_size' bytes +// of 'format'. If 'has_size' is true, 'size' bounds the number of output +// bytes. Returns the return value of the vsnprintf call used to format the +// input. +static int format_chunk(char *str, size_t off, bool has_size, size_t size, + const char *format, size_t format_size, ...) { + char *chunk_format = (char *) malloc(format_size + 1); + assert(chunk_format); + internal_memcpy(chunk_format, format, format_size); + chunk_format[format_size] = '\0'; + + va_list ap; + va_start(ap, format_size); + int r = 0; + if (has_size) { + r = vsnprintf(str + off, off < size ? size - off : 0, chunk_format, ap); + } else { + r = vsprintf(str + off, chunk_format, ap); + } + va_end(ap); + + free(chunk_format); + return r; +} + +// Formats the input and propagates the input labels to the output. The output +// is stored in 'str'. If 'has_size' is true, 'size' bounds the number of +// output bytes. 'format' and 'ap' are the format string and the list of +// arguments for formatting. Returns the return value vsnprintf would return. +// +// The function tokenizes the format string in chunks representing either a +// constant string or a single format directive (e.g., '%.3f') and formats each +// chunk independently into the output string. This approach allows to figure +// out which bytes of the output string depends on which argument and thus to +// propagate labels more precisely. +static int format_buffer(char *str, bool has_size, size_t size, + const char *format, va_list ap) { + Chunk *chunks = nullptr; + size_t num_chunks = 0; + size_t off = 0; + + while (*format) { + chunks = (Chunk *)realloc(chunks, sizeof(Chunk) * ++num_chunks); + assert(chunks); + Chunk *chunk = chunks + (num_chunks - 1); + chunk->ptr = str + off; + chunk->arg = nullptr; + + int status = 0; + + if (*format != '%') { + // Ordinary character. Consume all the characters until a '%' or the end + // of the string. + size_t format_size = 0; + for (; *format && *format != '%'; ++format, ++format_size) {} + status = format_chunk(str, off, has_size, size, format - format_size, + format_size); + chunk->label_type = Chunk::NONE; + } else { + // Conversion directive. Consume all the characters until a conversion + // specifier or the end of the string. + bool end_format = false; +#define FORMAT_CHUNK(t) \ + format_chunk(str, off, has_size, size, format - format_size, \ + format_size + 1, va_arg(ap, t)) + + for (size_t format_size = 1; *++format && !end_format; ++format_size) { + switch (*format) { + case 'd': + case 'i': + case 'o': + case 'u': + case 'x': + case 'X': + switch (*(format - 1)) { + case 'h': + // Also covers the 'hh' case (since the size of the arg is still + // an int). + status = FORMAT_CHUNK(int); + break; + case 'l': + if (format_size >= 2 && *(format - 2) == 'l') { + status = FORMAT_CHUNK(long long int); + } else { + status = FORMAT_CHUNK(long int); + } + break; + case 'q': + status = FORMAT_CHUNK(long long int); + break; + case 'j': + status = FORMAT_CHUNK(intmax_t); + break; + case 'z': + status = FORMAT_CHUNK(size_t); + break; + case 't': + status = FORMAT_CHUNK(size_t); + break; + default: + status = FORMAT_CHUNK(int); + } + chunk->label_type = Chunk::NUMERIC; + end_format = true; + break; + + case 'a': + case 'A': + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + if (*(format - 1) == 'L') { + status = FORMAT_CHUNK(long double); + } else { + status = FORMAT_CHUNK(double); + } + chunk->label_type = Chunk::NUMERIC; + end_format = true; + break; + + case 'c': + status = FORMAT_CHUNK(int); + chunk->label_type = Chunk::NUMERIC; + end_format = true; + break; + + case 's': + chunk->arg = va_arg(ap, char *); + status = + format_chunk(str, off, has_size, size, + format - format_size, format_size + 1, + chunk->arg); + chunk->label_type = Chunk::STRING; + end_format = true; + break; + + case 'p': + status = FORMAT_CHUNK(void *); + chunk->label_type = Chunk::NUMERIC; + end_format = true; + break; + + case 'n': + *(va_arg(ap, int *)) = (int)off; + chunk->label_type = Chunk::IGNORED; + end_format = true; + break; + + case '%': + status = format_chunk(str, off, has_size, size, + format - format_size, format_size + 1); + chunk->label_type = Chunk::NONE; + end_format = true; + break; + + default: + break; + } + } +#undef FORMAT_CHUNK + } + + if (status < 0) { + free(chunks); + return status; + } + + // A return value of {v,}snprintf of size or more means that the output was + // truncated. + if (has_size) { + if (off < size) { + size_t ustatus = (size_t) status; + chunk->size = ustatus >= (size - off) ? + ustatus - (size - off) : ustatus; + } else { + chunk->size = 0; + } + } else { + chunk->size = status; + } + off += status; + } + + // Consume the labels of the output buffer, (optional) size, and format + // string. + // + // TODO(martignlo): Decide how to combine labels (e.g., whether to ignore or + // not the label of the format string). + va_arg(ap, dfsan_label_va); + if (has_size) { + va_arg(ap, dfsan_label_va); + } + va_arg(ap, dfsan_label_va); + + // Label each output chunk according to the label supplied as argument to the + // function. We need to go through all the chunks and arguments even if the + // string was only partially printed ({v,}snprintf case). + for (size_t i = 0; i < num_chunks; ++i) { + const Chunk *chunk = chunks + i; + + switch (chunk->label_type) { + case Chunk::NONE: + dfsan_set_label(0, (void*) chunk->ptr, chunk->size); + break; + case Chunk::IGNORED: + va_arg(ap, dfsan_label_va); + dfsan_set_label(0, (void*) chunk->ptr, chunk->size); + break; + case Chunk::NUMERIC: { + dfsan_label label = va_arg(ap, dfsan_label_va); + dfsan_set_label(label, (void*) chunk->ptr, chunk->size); + break; + } + case Chunk::STRING: { + // Consume the label of the pointer to the string + va_arg(ap, dfsan_label_va); + internal_memcpy(shadow_for((void *) chunk->ptr), + shadow_for((void *) chunk->arg), + sizeof(dfsan_label) * (strlen(chunk->arg))); + break; + } + } + } + + dfsan_label *ret_label_ptr = va_arg(ap, dfsan_label *); + *ret_label_ptr = 0; + + if (chunks) { + free(chunks); + } + + // Number of bytes written in total. + return off; +} + +SANITIZER_INTERFACE_ATTRIBUTE +int __dfsw_sprintf(char *str, const char *format, ...) { + va_list ap; + va_start(ap, format); + int ret = format_buffer(str, false, 0, format, ap); + va_end(ap); + return ret; +} + +SANITIZER_INTERFACE_ATTRIBUTE +int __dfsw_snprintf(char *str, size_t size, const char *format, ...) { + va_list ap; + va_start(ap, format); + int ret = format_buffer(str, true, size, format, ap); + va_end(ap); + return ret; +} } Index: lib/dfsan/done_abilist.txt =================================================================== --- lib/dfsan/done_abilist.txt +++ lib/dfsan/done_abilist.txt @@ -208,9 +208,11 @@ fun:sigaction=custom fun:gettimeofday=custom +# sprintf-like +fun:sprintf=custom +fun:snprintf=custom + # TODO: custom -fun:snprintf=discard -fun:vsnprintf=discard fun:asprintf=discard fun:qsort=discard Index: lib/sanitizer_common/scripts/check_lint.sh =================================================================== --- lib/sanitizer_common/scripts/check_lint.sh +++ lib/sanitizer_common/scripts/check_lint.sh @@ -29,7 +29,7 @@ MSAN_RTL_LINT_FILTER=${COMMON_LINT_FILTER} LSAN_RTL_LINT_FILTER=${COMMON_LINT_FILTER} LSAN_LIT_TEST_LINT_FILTER=${LSAN_RTL_LINT_FILTER},-whitespace/line_length -DFSAN_RTL_LINT_FILTER=${COMMON_LINT_FILTER},-runtime/int,-runtime/printf,-runtime/references +DFSAN_RTL_LINT_FILTER=${COMMON_LINT_FILTER},-runtime/int,-runtime/printf,-runtime/references,-readability/function COMMON_RTL_INC_LINT_FILTER=${COMMON_LINT_FILTER},-runtime/int,-runtime/sizeof,-runtime/printf,-readability/fn_size SANITIZER_INCLUDES_LINT_FILTER=${COMMON_LINT_FILTER},-runtime/int MKTEMP="mktemp -q /tmp/tmp.XXXXXXXXXX" Index: test/dfsan/custom.c =================================================================== --- test/dfsan/custom.c +++ test/dfsan/custom.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -376,7 +377,7 @@ dfsan_set_label(i_label, &fd, sizeof(fd)); dfsan_set_label(j_label, &(buf[3]), 1); dfsan_set_label(k_label, &buf_len, sizeof(buf_len)); - + res = write(fd, buf, buf_len); assert(write_callback_count == 2); ASSERT_READ_ZERO_LABEL(&res, sizeof(res)); @@ -774,6 +775,97 @@ close(fd); } +void test_sprintf() { + char buf[2048]; + memset(buf, 'a', sizeof(buf)); + + // Test formatting & label propagation. + dfsan_set_label(0, buf, sizeof(buf)); + const char* s = "world"; + int m = 8; + int d = 27; + dfsan_set_label(k_label, (void *) (s + 1), 2); + dfsan_set_label(i_label, &m, sizeof(m)); + dfsan_set_label(j_label, &d, sizeof(d)); + int n; + int r = sprintf(buf, "hello %s, %-d/%d/%d %f %n%d", s, 2014, m, d, + 12345.6781234, &n, 1000); + assert(r == 40); + assert(strcmp(buf, "hello world, 2014/8/27 12345.678123 1000") == 0); + ASSERT_READ_LABEL(buf, 7, 0); + ASSERT_READ_LABEL(buf + 7, 2, k_label); + ASSERT_READ_LABEL(buf + 9, 9, 0); + ASSERT_READ_LABEL(buf + 18, 1, i_label); + ASSERT_READ_LABEL(buf + 19, 1, 0); + ASSERT_READ_LABEL(buf + 20, 2, j_label); + ASSERT_READ_LABEL(buf + 22, 13, 0); + ASSERT_LABEL(r, 0); + assert(n == 36); + + // Test formatting of all supported types of format strings. +#define ASSERT_SPRINTF(expected, fmt, arg) { \ + memset(buf, 'a', sizeof(buf)); \ + assert(sprintf(buf, fmt, arg) == strlen(expected)); \ + assert(strcmp(buf, expected) == 0); \ + } + ASSERT_SPRINTF("foo -559038737 bar", "foo %d bar", 0xdeadbeef); + ASSERT_SPRINTF("foo 3735928559 bar", "foo %u bar", 0xdeadbeef); + ASSERT_SPRINTF("foo 12345 bar", "foo %i bar", 12345); + ASSERT_SPRINTF("foo 751 bar", "foo %o bar", 0751); + ASSERT_SPRINTF("foo babe bar", "foo %x bar", 0xbabe); + ASSERT_SPRINTF("foo 0000BABE bar", "foo %.8X bar", 0xbabe); + ASSERT_SPRINTF("foo -17 bar", "foo %hhd bar", 0xdeadbeef); + ASSERT_SPRINTF("foo -16657 bar", "foo %hd bar", 0xdeadbeef); + ASSERT_SPRINTF("foo deadbeefdeadbeef bar", "foo %lx bar", 0xdeadbeefdeadbeef); + ASSERT_SPRINTF("foo 0xdeadbeefdeadbeef bar", "foo %p bar", + (void *) 0xdeadbeefdeadbeef); + ASSERT_SPRINTF("foo 18446744073709551615 bar", "foo %ju bar", (intmax_t) -1); + ASSERT_SPRINTF("foo 18446744073709551615 bar", "foo %zu bar", (size_t) -1); + ASSERT_SPRINTF("foo 18446744073709551615 bar", "foo %tu bar", (size_t) -1); + + ASSERT_SPRINTF("foo 0x1.f9acffa7eb6bfp-4", "foo %a", 0.123456); + ASSERT_SPRINTF("foo 0X1.F9ACFFA7EB6BFP-4", "foo %A", 0.123456); + ASSERT_SPRINTF("foo 0.12346", "foo %.5f", 0.123456); + ASSERT_SPRINTF("foo 0.123456", "foo %g", 0.123456); + ASSERT_SPRINTF("foo 1.234560e-01", "foo %e", 0.123456); + ASSERT_SPRINTF("foo 1.234560E-01", "foo %E", 0.123456); + ASSERT_SPRINTF("foo 0.1234567891234560", "foo %.16Lf", + (long double) 0.123456789123456); + + ASSERT_SPRINTF("foo z bar", "foo %c bar", 'z'); + ASSERT_SPRINTF("foo bar", "foo %s", "bar"); + ASSERT_SPRINTF("foo % bar", "foo %% %s", "bar"); + + // %n already tested + +#undef ASSERT_SPRINTF +} + +void test_snprintf() { + char buf[2048]; + memset(buf, 'a', sizeof(buf)); + dfsan_set_label(0, buf, sizeof(buf)); + const char* s = "world"; + int y = 2014; + int m = 8; + int d = 27; + dfsan_set_label(k_label, (void *) (s + 1), 2); + dfsan_set_label(i_label, &y, sizeof(y)); + dfsan_set_label(j_label, &m, sizeof(m)); + int r = snprintf(buf, 19, "hello %s, %-d/%d/%d %f", s, y, m, d, + 12345.6781234); + // The return value is the number of bytes that would have been written to + // the final string if enough space had been available. + assert(r == 35); + assert(memcmp(buf, "hello world, 2014/", 19) == 0); + ASSERT_READ_LABEL(buf, 7, 0); + ASSERT_READ_LABEL(buf + 7, 2, k_label); + ASSERT_READ_LABEL(buf + 9, 4, 0); + ASSERT_READ_LABEL(buf + 13, 4, i_label); + ASSERT_READ_LABEL(buf + 17, 2, 0); + ASSERT_LABEL(r, 0); +} + int main(void) { i_label = dfsan_create_label("i", 0); j_label = dfsan_create_label("j", 0); @@ -810,7 +902,9 @@ test_select(); test_sigaction(); test_sigemptyset(); + test_snprintf(); test_socketpair(); + test_sprintf(); test_stat(); test_strcasecmp(); test_strchr();