diff --git a/compiler-rt/include/sanitizer/dfsan_interface.h b/compiler-rt/include/sanitizer/dfsan_interface.h --- a/compiler-rt/include/sanitizer/dfsan_interface.h +++ b/compiler-rt/include/sanitizer/dfsan_interface.h @@ -64,6 +64,12 @@ /// value. dfsan_label dfsan_get_label(long data); +/// Retrieves the immediate origin associated with the given data. The returned +/// origin may point to another origin. +/// +/// The type of 'data' is arbitrary. +dfsan_origin dfsan_get_origin(long data); + /// Retrieves the label associated with the data at the given address. dfsan_label dfsan_read_label(const void *addr, size_t size); @@ -111,6 +117,15 @@ void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2, size_t n, dfsan_label s1_label, dfsan_label s2_label, dfsan_label n_label); + +/// Prints the origin trace of the label at the address addr to stderr. It also +/// prints description at the beginning of the trace. If origin tracking is not +/// on, or the address is not labeled, it prints nothing. +void dfsan_print_origin_trace(const void *addr, const char *description); + +/// Retrieves the very first origin associated with the data at the given +/// address. +dfsan_origin dfsan_get_init_origin(const void *addr); #ifdef __cplusplus } // extern "C" diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp --- a/compiler-rt/lib/dfsan/dfsan.cpp +++ b/compiler-rt/lib/dfsan/dfsan.cpp @@ -31,6 +31,7 @@ #include "sanitizer_common/sanitizer_flags.h" #include "sanitizer_common/sanitizer_internal_defs.h" #include "sanitizer_common/sanitizer_libc.h" +#include "sanitizer_common/sanitizer_report_decorator.h" #include "sanitizer_common/sanitizer_stacktrace.h" using namespace __dfsan; @@ -699,6 +700,22 @@ return data_label; } +extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label __dfso_dfsan_get_label( + long data, dfsan_label data_label, dfsan_label *ret_label, + dfsan_origin data_origin, dfsan_origin *ret_origin) { + *ret_label = 0; + *ret_origin = 0; + return data_label; +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin __dfso_dfsan_get_origin( + long data, dfsan_label data_label, dfsan_label *ret_label, + dfsan_origin data_origin, dfsan_origin *ret_origin) { + *ret_label = 0; + *ret_origin = 0; + return data_origin; +} + SANITIZER_INTERFACE_ATTRIBUTE dfsan_label dfsan_read_label(const void *addr, uptr size) { if (size == 0) @@ -764,6 +781,83 @@ } } +class Decorator : public __sanitizer::SanitizerCommonDecorator { + public: + Decorator() : SanitizerCommonDecorator() {} + const char *Origin() const { return Magenta(); } +}; + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_print_origin_trace( + const void *addr, const char *description) { + Decorator d; + + if (!__dfsan_get_track_origins()) { + Printf( + " %sDFSan: origin tracking is not enabled. Did you specify the " + "-dfsan-track-origins=1 option?%s\n", + d.Warning(), d.Default()); + return; + } + + const dfsan_label *shadow_addr = __dfsan::shadow_for(addr); + const dfsan_label label = *shadow_addr; + if (!label) { + Printf(" %sDFSan: no tainted value at %x%s\n", d.Warning(), addr, + d.Default()); + return; + } + + const dfsan_origin *origin_addr = __dfsan::origin_for(addr); + const dfsan_origin origin = *origin_addr; + + Printf(" %sTaint value 0x%x (at 0x%x) origin tracking (%s)%s\n", d.Origin(), + label, addr, description ? description : "", d.Default()); + Origin o = Origin::FromRawId(origin); + bool found = false; + while (o.isChainedOrigin()) { + StackTrace stack; + u32 origin_id = o.raw_id(); + o = o.getNextChainedOrigin(&stack); + if (o.isChainedOrigin()) + Printf(" %sOrigin value: %x, Taint value was stored to memory at%s\n", + d.Origin(), origin_id, d.Default()); + else + Printf(" %sOrigin value: %x, Taint value was created at%s\n", d.Origin(), + origin_id, d.Default()); + stack.Print(); + found = true; + } + if (!found) + Printf( + " %sTaint value 0x%x (at %x) has invalid origin tracking. This can " + "be a bug of DFSan.%s\n", + d.Warning(), label, addr, d.Default()); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin +dfsan_get_init_origin(const void *addr) { + if (!__dfsan_get_track_origins()) + return 0; + + const dfsan_label *shadow_addr = __dfsan::shadow_for(addr); + const dfsan_label label = *shadow_addr; + if (!label) + return 0; + + const dfsan_origin *origin_addr = __dfsan::origin_for(addr); + const dfsan_origin origin = *origin_addr; + + Origin o = Origin::FromRawId(origin); + while (o.isChainedOrigin()) { + StackTrace stack; + dfsan_origin origin_id = o.raw_id(); + o = o.getNextChainedOrigin(&stack); + if (!o.isChainedOrigin()) + return origin_id; + } + return 0; +} + #define GET_FATAL_STACK_TRACE_PC_BP(pc, bp) \ BufferedStackTrace stack; \ stack.Unwind(pc, bp, nullptr, common_flags()->fast_unwind_on_fatal); diff --git a/compiler-rt/lib/dfsan/done_abilist.txt b/compiler-rt/lib/dfsan/done_abilist.txt --- a/compiler-rt/lib/dfsan/done_abilist.txt +++ b/compiler-rt/lib/dfsan/done_abilist.txt @@ -28,6 +28,13 @@ fun:dfsan_set_write_callback=custom fun:dfsan_flush=uninstrumented fun:dfsan_flush=discard +fun:dfsan_print_origin_trace=uninstrumented +fun:dfsan_print_origin_trace=discard +fun:dfsan_get_origin=uninstrumented +fun:dfsan_get_origin=custom +fun:dfsan_get_init_origin=uninstrumented +fun:dfsan_get_init_origin=discard + ############################################################################### # glibc diff --git a/compiler-rt/test/dfsan/origin_add_label.c b/compiler-rt/test/dfsan/origin_add_label.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_add_label.c @@ -0,0 +1,35 @@ +// RUN: %clang_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out + +// RUN: %clang_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true -mllvm -dfsan-instrumentation-with-call-threshold=0 %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out + +#include + +__attribute__((noinline)) uint64_t foo(uint64_t a, uint64_t b) { return a + b; } + +int main(int argc, char *argv[]) { + uint64_t a = 10; + uint64_t b = 20; + dfsan_add_label(4, &a, sizeof(a)); + dfsan_add_label(8, &a, sizeof(a)); + uint64_t c = foo(a, b); + dfsan_print_origin_trace((int*)(&c), NULL); + dfsan_print_origin_trace((int*)(&c) + 1, NULL); +} + +// CHECK: Taint value 0xc {{.*}} origin tracking () +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in main {{.*}}origin_add_label.c:[[@LINE-7]] + +// CHECK: Origin value: {{.*}}, Taint value was created at +// CHECK: #0 {{.*}} in main {{.*}}origin_add_label.c:[[@LINE-11]] + +// CHECK: Taint value 0xc {{.*}} origin tracking () +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in main {{.*}}origin_add_label.c:[[@LINE-14]] + +// CHECK: Origin value: {{.*}}, Taint value was created at +// CHECK: #0 {{.*}} in main {{.*}}origin_add_label.c:[[@LINE-18]] diff --git a/compiler-rt/test/dfsan/origin_disabled.c b/compiler-rt/test/dfsan/origin_disabled.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_disabled.c @@ -0,0 +1,13 @@ +// RUN: %clang_dfsan -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out + +#include + +int main(int argc, char *argv[]) { + uint64_t a = 10; + dfsan_set_label(8, &a, sizeof(a)); + dfsan_print_origin_trace(&a, NULL); +} + +// CHECK: DFSan: origin tracking is not enabled. Did you specify the -dfsan-track-origins=1 option? diff --git a/compiler-rt/test/dfsan/origin_invalid.c b/compiler-rt/test/dfsan/origin_invalid.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_invalid.c @@ -0,0 +1,18 @@ +// RUN: %clang_dfsan -mllvm -dfsan-fast-16-labels=true -mllvm -dfsan-track-origins=1 %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out + +#include + +int main(int argc, char *argv[]) { + uint64_t a = 10; + dfsan_set_label(8, &a, sizeof(a)); + size_t origin_addr = + (((size_t)&a & ~0x700000000000LL + 0x200000000000LL) & ~0x3UL); + asm("mov %0, %%rax": :"r"(origin_addr)); + asm("movq $0, (%rax)"); + dfsan_print_origin_trace(&a, "invalid"); +} + +// CHECK: Taint value 0x8 (at {{.*}}) origin tracking (invalid) +// CHECK: Taint value 0x8 (at {{.*}}) has invalid origin tracking. This can be a bug of DFSan. diff --git a/compiler-rt/test/dfsan/origin_ld_lost.c b/compiler-rt/test/dfsan/origin_ld_lost.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_ld_lost.c @@ -0,0 +1,21 @@ +// RUN: %clang_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out +// +// Test origin tracking can lost origins at 2-byte load with addr % 4 == 3. + +#include + +__attribute__((noinline)) uint16_t foo(uint16_t a, uint16_t b) { return a + b; } + +int main(int argc, char *argv[]) { + uint64_t a __attribute__((aligned(4))) = 1; + uint32_t b = 10; + dfsan_set_label(4, (uint8_t *)&a + 4, sizeof(uint8_t)); + uint16_t c = foo(*(uint16_t *)((uint8_t *)&a + 3), b); + dfsan_print_origin_trace(&c, "foo"); +} + +// CHECK: Taint value 0x4 {{.*}} origin tracking (foo) +// CHECK: Origin value: {{.*}}, Taint value was created at +// CHECK: #0 {{.*}} in main {{.*}}origin_ld_lost.c:[[@LINE-6]] diff --git a/compiler-rt/test/dfsan/origin_ldst.c b/compiler-rt/test/dfsan/origin_ldst.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_ldst.c @@ -0,0 +1,76 @@ +// RUN: %clang_dfsan -DTEST64 -DALIGN=8 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out +// +// RUN: %clang_dfsan -DTEST32 -DALIGN=4 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out +// +// RUN: %clang_dfsan -DALIGN=2 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out +// +// RUN: %clang_dfsan -DTEST64 -DALIGN=5 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out +// +// RUN: %clang_dfsan -DTEST32 -DALIGN=3 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out +// +// RUN: %clang_dfsan -DALIGN=1 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out +// +// Test origin tracking is accurate in terms of partial store/load, and +// different aligments. + +#include + +#ifdef TEST64 +typedef uint64_t FULL_TYPE; +typedef uint32_t HALF_TYPE; +#elif defined(TEST32) +typedef uint32_t FULL_TYPE; +typedef uint16_t HALF_TYPE; +#else +typedef uint16_t FULL_TYPE; +typedef uint8_t HALF_TYPE; +#endif + +__attribute__((noinline)) FULL_TYPE foo(FULL_TYPE a, FULL_TYPE b) { return a + b; } + +int main(int argc, char *argv[]) { + char x __attribute__((aligned(ALIGN))) = 1, y = 2; + dfsan_set_label(8, &x, sizeof(x)); + char z __attribute__((aligned(ALIGN))) = x + y; + dfsan_print_origin_trace(&z, NULL); + + FULL_TYPE a __attribute__((aligned(ALIGN))) = 1; + FULL_TYPE b = 10; + dfsan_set_label(4, (HALF_TYPE *)&a + 1, sizeof(HALF_TYPE)); + FULL_TYPE c __attribute__((aligned(ALIGN))) = foo(a, b); + dfsan_print_origin_trace(&c, NULL); + dfsan_print_origin_trace((HALF_TYPE *)&c + 1, NULL); +} + +// CHECK: Taint value 0x8 {{.*}} origin tracking () +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in main {{.*}}origin_ldst.c:[[@LINE-13]] + +// CHECK: Origin value: {{.*}}, Taint value was created at +// CHECK: #0 {{.*}} in main {{.*}}origin_ldst.c:[[@LINE-17]] + +// CHECK: Taint value 0x4 {{.*}} origin tracking () +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in main {{.*}}origin_ldst.c:[[@LINE-14]] + +// CHECK: Origin value: {{.*}}, Taint value was created at +// CHECK: #0 {{.*}} in main {{.*}}origin_ldst.c:[[@LINE-18]] + +// CHECK: Taint value 0x4 {{.*}} origin tracking () +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in main {{.*}}origin_ldst.c:[[@LINE-21]] + +// CHECK: Origin value: {{.*}}, Taint value was created at +// CHECK: #0 {{.*}} in main {{.*}}origin_ldst.c:[[@LINE-25]] diff --git a/compiler-rt/test/dfsan/origin_limit.c b/compiler-rt/test/dfsan/origin_limit.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_limit.c @@ -0,0 +1,39 @@ +// RUN: %clang_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t +// +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out +// +// RUN: DFSAN_OPTIONS=origin_history_size=2 %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK2 < %t.out +// +// RUN: DFSAN_OPTIONS=origin_history_size=0 %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK0 < %t.out + +#include + +#include + +__attribute__((noinline)) int foo(int a, int b) { return a + b; } + +int main(int argc, char *argv[]) { + int a = 10; + dfsan_set_label(8, &a, sizeof(a)); + int c = 0; + for (int i = 0; i < 17; ++i) { + c = foo(a, c); + printf("%lx", (unsigned long)&c); + } + dfsan_print_origin_trace(&c, NULL); +} + +// CHECK: Taint value 0x8 {{.*}} origin tracking () +// CHECK-COUNT 14: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: Origin value: {{.*}}, Taint value was created at + +// CHECK2: Taint value 0x8 {{.*}} origin tracking () +// CHECK2: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK2: Origin value: {{.*}}, Taint value was created at + +// CHECK0: Taint value 0x8 {{.*}} origin tracking () +// CHECK0-COUNT 16: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK0: Origin value: {{.*}}, Taint value was created at diff --git a/compiler-rt/test/dfsan/origin_memset.c b/compiler-rt/test/dfsan/origin_memset.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_memset.c @@ -0,0 +1,46 @@ +// RUN: %clang_dfsan -DOFFSET=0 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK0 < %t.out + +// RUN: %clang_dfsan -DOFFSET=10 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK10 < %t.out + +#include + +#include + +int xx[10000]; + +volatile int idx = 30; + +__attribute__((noinline)) +void fn_g(int a, int b) { + memset(&xx[idx], a, sizeof(a)); + memset(&xx[idx + 10], b, sizeof(b)); +} + +__attribute__((noinline)) +void fn_f(int a, int b) { + fn_g(a, b); +} + +int main(int argc, char *argv[]) { + int volatile z1; + int volatile z2; + dfsan_set_label(8, (void *)&z1, sizeof(z1)); + dfsan_set_label(16, (void *)&z2, sizeof(z2)); + fn_f(z1, z2); + dfsan_print_origin_trace(&xx[idx + OFFSET], NULL); + return 0; +} + +// CHECK0: Taint value 0x8 {{.*}} origin tracking () +// CHECK0: Origin value: {{.*}}, Taint value was created at + +// CHECK0: #0 {{.*}} in main {{.*}}origin_memset.c:[[@LINE-10]] + +// CHECK10: Taint value 0x10 {{.*}} origin tracking () +// CHECK10: Origin value: {{.*}}, Taint value was created at + +// CHECK10: #0 {{.*}} in main {{.*}}origin_memset.c:[[@LINE-14]] diff --git a/compiler-rt/test/dfsan/origin_overlapped.c b/compiler-rt/test/dfsan/origin_overlapped.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_overlapped.c @@ -0,0 +1,23 @@ +// RUN: %clang_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out + +#include + +int main(int argc, char *argv[]) { + char volatile z1; + char volatile z2; + dfsan_set_label(8, (void *)&z1, sizeof(z1)); + dfsan_set_label(16, (void *)&z2, sizeof(z2)); // overwritting the old origin. + char c = z1; + dfsan_print_origin_trace(&c, "bar"); + return 0; +} + +// CHECK: Taint value 0x8 {{.*}} origin tracking (bar) +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in main {{.*}}origin_overlapped.c:[[@LINE-7]] + +// CHECK: Origin value: {{.*}}, Taint value was created at + +// CHECK: #0 {{.*}} in main {{.*}}origin_overlapped.c:[[@LINE-12]] diff --git a/compiler-rt/test/dfsan/origin_set_label.c b/compiler-rt/test/dfsan/origin_set_label.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_set_label.c @@ -0,0 +1,34 @@ +// RUN: %clang_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out + +// RUN: %clang_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true -mllvm -dfsan-instrumentation-with-call-threshold=0 %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out + +#include + +__attribute__((noinline)) uint64_t foo(uint64_t a, uint64_t b) { return a + b; } + +int main(int argc, char *argv[]) { + uint64_t a = 10; + uint64_t b = 20; + dfsan_set_label(8, &a, sizeof(a)); + uint64_t c = foo(a, b); + dfsan_print_origin_trace(&c, NULL); + dfsan_print_origin_trace((int*)(&c) + 1, NULL); +} + +// CHECK: Taint value 0x8 {{.*}} origin tracking () +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in main {{.*}}origin_set_label.c:[[@LINE-7]] + +// CHECK: Origin value: {{.*}}, Taint value was created at +// CHECK: #0 {{.*}} in main {{.*}}origin_set_label.c:[[@LINE-11]] + +// CHECK: Taint value 0x8 {{.*}} origin tracking () +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in main {{.*}}origin_set_label.c:[[@LINE-14]] + +// CHECK: Origin value: {{.*}}, Taint value was created at +// CHECK: #0 {{.*}} in main {{.*}}origin_set_label.c:[[@LINE-18]] diff --git a/compiler-rt/test/dfsan/origin_untainted.c b/compiler-rt/test/dfsan/origin_untainted.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_untainted.c @@ -0,0 +1,12 @@ +// RUN: %clang_dfsan -mllvm -dfsan-fast-16-labels=true -mllvm -dfsan-track-origins=1 %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out + +#include + +int main(int argc, char *argv[]) { + uint64_t a = 10; + dfsan_print_origin_trace(&a, NULL); +} + +// CHECK: DFSan: no tainted value at {{.*}}