diff --git a/compiler-rt/lib/dfsan/dfsan.h b/compiler-rt/lib/dfsan/dfsan.h --- a/compiler-rt/lib/dfsan/dfsan.h +++ b/compiler-rt/lib/dfsan/dfsan.h @@ -43,6 +43,13 @@ void dfsan_clear_arg_tls(uptr offset, uptr size); // Zero out the TLS storage. void dfsan_clear_thread_local_state(); + +// Return the origin associated with the first taint byte in the size bytes +// from the address addr. +dfsan_origin dfsan_read_origin_of_first_taint(const void *addr, uptr size); + +// Copy or move the origins of the len bytes from src to dst. +void dfsan_mem_origin_transfer(const void *dst, const void *src, uptr len); } // extern "C" template diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp --- a/compiler-rt/lib/dfsan/dfsan.cpp +++ b/compiler-rt/lib/dfsan/dfsan.cpp @@ -49,14 +49,28 @@ // in DataFlowSanitizer.cpp. static const int kDFsanArgTlsSize = 800; static const int kDFsanRetvalTlsSize = 800; +static const int kDFsanArgOriginTlsSize = 800; SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u64 __dfsan_retval_tls[kDFsanRetvalTlsSize / sizeof(u64)]; +SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u32 __dfsan_retval_origin_tls; SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u64 __dfsan_arg_tls[kDFsanArgTlsSize / sizeof(u64)]; +SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u32 + __dfsan_arg_origin_tls[kDFsanArgOriginTlsSize / sizeof(u32)]; SANITIZER_INTERFACE_ATTRIBUTE uptr __dfsan_shadow_ptr_mask; +// Instrumented code may set this value in terms of -dfsan-track-origins. +// * undefined or 0: do not track origins. +// * 1: track origins at memory store operations. +// * 2: (TODO) track origins at memory store operations and callsites. +extern "C" SANITIZER_WEAK_ATTRIBUTE const int __dfsan_track_origins; + +int __dfsan_get_track_origins() { + return &__dfsan_track_origins ? __dfsan_track_origins : 0; +} + // On Linux/x86_64, memory is laid out as follows: // // +--------------------+ 0x800000000000 (top of memory) @@ -243,6 +257,26 @@ return label; } +// Return the union of all the n labels from addr at the high 32 bit, and the +// origin of the first taint byte at the low 32 bit. +extern "C" SANITIZER_INTERFACE_ATTRIBUTE u64 +__dfsan_load_label_and_origin(const void *addr, uptr n) { + dfsan_label label = 0; + u64 ret = 0; + uptr p = (uptr)addr; + dfsan_label *s = shadow_for((void *)p); + for (uptr i = 0; i < n; ++i, ++p, ++s) { + dfsan_label l = *s; + if (!l) + continue; + label |= l; + if (!ret) { + ret = *(dfsan_origin *)origin_for((void *)p); + } + } + return ret | (u64)label << 32; +} + extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_unimplemented(char *fname) { if (flags().warn_unimplemented) @@ -287,6 +321,20 @@ return label; } +// Return the origin of the first taint byte in the size bytes from the address +// addr. +static dfsan_origin GetOriginIfTainted(uptr addr, uptr size) { + for (uptr i = 0; i < size; ++i, ++addr) { + dfsan_label *s = shadow_for((void *)addr); + if (!is_shadow_addr_valid((uptr)s)) { + continue; + } + if (*s) + return *(dfsan_origin *)origin_for((void *)addr); + } + return 0; +} + // For platforms which support slow unwinder only, we need to restrict the store // context size to 1, basically only storing the current pc, because the slow // unwinder which is based on libunwind is not async signal safe and causes @@ -327,6 +375,163 @@ return chained.raw_id(); } +// Copy the origins of the size bytes from src to dst. The source and target +// memory ranges cannot be overlapped. This is used by memcpy. stack records the +// stack trace of the memcpy. +static void CopyOrigin(const void *dst, const void *src, uptr size, + StackTrace *stack) { + uptr d = (uptr)dst; + uptr beg = d & ~3UL; + // Copy left unaligned origin if that memory is tainted. + if (beg < d) { + dfsan_origin o = GetOriginIfTainted((uptr)src, beg + 4 - d); + if (o) { + o = ChainOrigin(o, stack); + *(dfsan_origin *)origin_for((void *)beg) = o; + } + beg += 4; + } + + uptr end = (d + size) & ~3UL; + // If both ends fall into the same 4-byte slot, we are done. + if (end < beg) + return; + + // Copy right unaligned origin if that memory is tainted. + if (end < d + size) { + dfsan_origin o = + GetOriginIfTainted((uptr)src + (end - d), (d + size) - end); + if (o) { + o = ChainOrigin(o, stack); + *(dfsan_origin *)origin_for((void *)end) = o; + } + } + + if (beg < end) { + // Align src up. + uptr s = ((uptr)src + 3) & ~3UL; + dfsan_origin *src = (dfsan_origin *)origin_for((void *)s); + u64 *src_s = (u64 *)shadow_for((void *)s); + dfsan_origin *src_end = + (dfsan_origin *)origin_for((void *)(s + (end - beg))); + dfsan_origin *dst = (dfsan_origin *)origin_for((void *)beg); + dfsan_origin src_o = 0; + dfsan_origin dst_o = 0; + for (; src < src_end; ++src, ++src_s, ++dst) { + if (!*src_s) + continue; + if (*src != src_o) { + src_o = *src; + dst_o = ChainOrigin(src_o, stack); + } + *dst = dst_o; + } + } +} + +// Copy the origins of the size bytes from src to dst. The source and target +// memory ranges may be overlapped. So the copy is done in a reverse order. +// This is used by memmove. stack records the stack trace of the memmove. +static void ReverseCopyOrigin(const void *dst, const void *src, uptr size, + StackTrace *stack) { + uptr d = (uptr)dst; + uptr end = (d + size) & ~3UL; + + // Copy right unaligned origin if that memory is tainted. + if (end < d + size) { + dfsan_origin o = + GetOriginIfTainted((uptr)src + (end - d), (d + size) - end); + if (o) { + o = ChainOrigin(o, stack); + *(dfsan_origin *)origin_for((void *)end) = o; + } + } + + uptr beg = d & ~3UL; + + if (beg + 4 < end) { + // Align src up. + uptr s = ((uptr)src + 3) & ~3UL; + dfsan_origin *src = (dfsan_origin *)origin_for((void *)(s + end - beg - 4)); + u64 *src_s = (u64 *)shadow_for((void *)(s + end - beg - 4)); + dfsan_origin *src_begin = (dfsan_origin *)origin_for((void *)s); + dfsan_origin *dst = (dfsan_origin *)origin_for((void *)(end - 4)); + dfsan_origin src_o = 0; + dfsan_origin dst_o = 0; + for (; src >= src_begin; --src, --src_s, --dst) { + if (!*src_s) + continue; + if (*src != src_o) { + src_o = *src; + dst_o = ChainOrigin(src_o, stack); + } + *dst = dst_o; + } + } + + // Copy left unaligned origin if that memory is tainted. + if (beg < d) { + dfsan_origin o = GetOriginIfTainted((uptr)src, beg + 4 - d); + if (o) { + o = ChainOrigin(o, stack); + *(dfsan_origin *)origin_for((void *)beg) = o; + } + } +} + +// Copy or move the origins of the len bytes from src to dst. The source and +// target memory ranges may or may not be overlapped. This is used by memory +// transfer operations. stack records the stack trace of the memory transfer +// operation. +static void MoveOrigin(const void *dst, const void *src, uptr size, + StackTrace *stack) { + if (!has_valid_shadow_addr(dst) || + !has_valid_shadow_addr((void *)((uptr)dst + size)) || + !has_valid_shadow_addr(src) || + !has_valid_shadow_addr((void *)((uptr)src + size))) { + return; + } + // If destination origin range overlaps with source origin range, move + // origins by coping origins in a reverse order; otherwise, copy origins in + // a normal order. + uptr src_aligned_beg = reinterpret_cast(src) & ~3UL; + uptr src_aligned_end = (reinterpret_cast(src) + size) & ~3UL; + uptr dst_aligned_beg = reinterpret_cast(dst) & ~3UL; + if (dst_aligned_beg < src_aligned_end && dst_aligned_beg >= src_aligned_beg) + return ReverseCopyOrigin(dst, src, size, stack); + return CopyOrigin(dst, src, size, stack); +} + +// Set the size bytes from the addres dst to be the origin value. +static void SetOrigin(const void *dst, uptr size, u32 origin) { + if (size == 0) + return; + + // Origin mapping is 4 bytes per 4 bytes of application memory. + // Here we extend the range such that its left and right bounds are both + // 4 byte aligned. + uptr x = unaligned_origin_for((uptr)dst); + uptr beg = x & ~3UL; // align down. + uptr end = (x + size + 3) & ~3UL; // align up. + u64 origin64 = ((u64)origin << 32) | origin; + // This is like memset, but the value is 32-bit. We unroll by 2 to write + // 64 bits at once. May want to unroll further to get 128-bit stores. + if (beg & 7ULL) { + if (*(u32 *)beg != origin) + *(u32 *)beg = origin; + beg += 4; + } + for (uptr addr = beg; addr < (end & ~7UL); addr += 8) { + if (*(u64 *)addr == origin64) + continue; + *(u64 *)addr = origin64; + } + if (end & 7ULL) { + if (*(u32 *)(end - 4) != origin) + *(u32 *)(end - 4) = origin; + } +} + static void WriteShadowIfDifferent(dfsan_label label, uptr shadow_addr, uptr size) { dfsan_label *labelp = (dfsan_label *)shadow_addr; @@ -346,6 +551,45 @@ } } +// Return a new origin chian with the previous ID id and the current stack +// trace. +extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin +__dfsan_chain_origin(dfsan_origin id) { + GET_CALLER_PC_BP_SP; + (void)sp; + GET_STORE_STACK_TRACE_PC_BP(pc, bp); + return ChainOrigin(id, &stack); +} + +// Copy or move the origins of the len bytes from src to dst. +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_mem_origin_transfer( + const void *dst, const void *src, uptr len) { + if (src == dst) + return; + GET_CALLER_PC_BP; + GET_STORE_STACK_TRACE_PC_BP(pc, bp); + MoveOrigin(dst, src, len, &stack); +} + +SANITIZER_INTERFACE_ATTRIBUTE void dfsan_mem_origin_transfer(const void *dst, + const void *src, + uptr len) { + __dfsan_mem_origin_transfer(dst, src, len); +} + +// If the label is taint, set the size bytes from the address p to be a new +// origin chain with the previous ID o and the current stack trace. This is +// used by instrumentation to reduce code size when too much code is inserted. +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_maybe_store_origin( + u16 s, void *p, uptr size, dfsan_origin o) { + if (UNLIKELY(s)) { + GET_CALLER_PC_BP_SP; + (void)sp; + GET_STORE_STACK_TRACE_PC_BP(pc, bp); + SetOrigin(p, size, ChainOrigin(o, &stack)); + } +} + extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_set_label( dfsan_label label, void *addr, uptr size) { const uptr beg_shadow_addr = (uptr)__dfsan::shadow_for(addr); @@ -407,6 +651,11 @@ return __dfsan_union_load(shadow_for(addr), size); } +SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin +dfsan_read_origin_of_first_taint(const void *addr, uptr size) { + return GetOriginIfTainted((uptr)addr, size); +} + extern "C" SANITIZER_INTERFACE_ATTRIBUTE const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label) { return &__dfsan_label_info[label]; @@ -516,6 +765,12 @@ void dfsan_clear_thread_local_state() { internal_memset(__dfsan_arg_tls, 0, sizeof(__dfsan_arg_tls)); internal_memset(__dfsan_retval_tls, 0, sizeof(__dfsan_retval_tls)); + + if (__dfsan_get_track_origins()) { + internal_memset(__dfsan_arg_origin_tls, 0, sizeof(__dfsan_arg_origin_tls)); + internal_memset(&__dfsan_retval_origin_tls, 0, + sizeof(__dfsan_retval_origin_tls)); + } } static void InitializePlatformEarly() {