diff --git a/compiler-rt/include/sanitizer/dfsan_interface.h b/compiler-rt/include/sanitizer/dfsan_interface.h --- a/compiler-rt/include/sanitizer/dfsan_interface.h +++ b/compiler-rt/include/sanitizer/dfsan_interface.h @@ -22,6 +22,7 @@ #endif typedef uint16_t dfsan_label; +typedef uint32_t dfsan_origin; /// Stores information associated with a specific label identifier. A label /// may be a base label created using dfsan_create_label, with associated @@ -63,6 +64,12 @@ /// value. dfsan_label dfsan_get_label(long data); +/// Retrieves the immediate origin associated with the given data. The returned +/// origin may point to another origin. +/// +/// The type of 'data' is arbitrary. +dfsan_origin dfsan_get_origin(long data); + /// Retrieves the label associated with the data at the given address. dfsan_label dfsan_read_label(const void *addr, size_t size); @@ -110,6 +117,16 @@ void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2, size_t n, dfsan_label s1_label, dfsan_label s2_label, dfsan_label n_label); + +/// Prints the origin trace of the label at the address addr. It also prints +/// description at the beginning of the trace. If origin tracking is not on, or +/// the address is not labeled, it prints nothing. +void dfsan_print_origin_trace(const void *addr, const char *description); + +/// Retrieves the very first origin associated with the data at the given +/// address. +dfsan_origin dfsan_get_init_origin(const void *addr); + #ifdef __cplusplus } // extern "C" diff --git a/compiler-rt/lib/dfsan/dfsan.h b/compiler-rt/lib/dfsan/dfsan.h --- a/compiler-rt/lib/dfsan/dfsan.h +++ b/compiler-rt/lib/dfsan/dfsan.h @@ -17,11 +17,13 @@ #include "sanitizer_common/sanitizer_internal_defs.h" #include "dfsan_platform.h" -using __sanitizer::uptr; using __sanitizer::u16; +using __sanitizer::u32; +using __sanitizer::uptr; // Copy declarations from public sanitizer/dfsan_interface.h header here. typedef u16 dfsan_label; +typedef u32 dfsan_origin; struct dfsan_label_info { dfsan_label l1; @@ -35,6 +37,15 @@ void dfsan_set_label(dfsan_label label, void *addr, uptr size); dfsan_label dfsan_read_label(const void *addr, uptr size); dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2); + +/// Retrieves the origin associated with the first taint data at the given +/// address. +dfsan_origin dfsan_read_origin_of_first_taint(const void *addr, uptr size); + +void dfsan_set_label_origin(dfsan_label label, dfsan_origin origin, void *addr, + uptr size); + +void dfsan_mem_origin_transfer(const void *dst, const void *src, uptr len); } // extern "C" template @@ -54,6 +65,29 @@ return shadow_for(const_cast(ptr)); } +inline uptr unaligned_origin_for(uptr ptr) { + return OriginAddr() + (ptr & ShadowMask()); +} + +inline dfsan_origin *origin_for(void *ptr) { + auto aligned_addr = unaligned_origin_for(reinterpret_cast(ptr)) & + ~(sizeof(dfsan_origin) - 1); + return reinterpret_cast(aligned_addr); +} + +inline const dfsan_origin *origin_for(const void *ptr) { + return origin_for(const_cast(ptr)); +} + +inline bool is_shadow_addr_valid(uptr shadow_addr) { + return (uptr)shadow_addr >= ShadowAddr() && (uptr)shadow_addr < OriginAddr(); +} + +inline bool has_valid_shadow_addr(const void *ptr) { + dfsan_label *ptr_s = shadow_for((void *)ptr); + return is_shadow_addr_valid((uptr)ptr_s); +} + struct Flags { #define DFSAN_FLAG(Type, Name, DefaultValue, Description) Type Name; #include "dfsan_flags.inc" @@ -67,6 +101,30 @@ return flags_data; } +// Backup DFSan runtime TLS state. +// Implementation must be async-signal-safe and use small data size, because +// instances of this class may live on the signal handler stack. +// +// This is used by only signal headlers for now. DFSan uses TLS to pass metadata +// of arguments and return values. When an instrumented function accesse the +// TLS, if a signal callback happens, and the callback calls other instrumented +// functions with updating the same TLS, the TLS is in an inconsistent state +// after the callback ends. This may cause either under-tainting or +// over-tainting. +// +// The current implementation simply resets TLS at restore. This prevents from +// over-tainting. Although under-tainting may still happen, a taint flow can be +// found eventually if we run a DFSan-instrumented program multiple times. The +// alternative option is saving the entire TLS. However the TLS storage takes +// 2k bytes, and signal calls could be nested. So it does not seem worth. +class ScopedThreadLocalStateBackup { + public: + ScopedThreadLocalStateBackup() { Backup(); } + ~ScopedThreadLocalStateBackup() { Restore(); } + void Backup(); + void Restore(); +}; + } // namespace __dfsan #endif // DFSAN_H diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp --- a/compiler-rt/lib/dfsan/dfsan.cpp +++ b/compiler-rt/lib/dfsan/dfsan.cpp @@ -20,6 +20,9 @@ #include "dfsan/dfsan.h" +#include "dfsan/dfsan_chained_origin_depot.h" +#include "dfsan/dfsan_origin.h" +#include "dfsan/dfsan_thread.h" #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_file.h" @@ -27,6 +30,7 @@ #include "sanitizer_common/sanitizer_flags.h" #include "sanitizer_common/sanitizer_internal_defs.h" #include "sanitizer_common/sanitizer_libc.h" +#include "sanitizer_common/sanitizer_report_decorator.h" #include "sanitizer_common/sanitizer_stacktrace.h" using namespace __dfsan; @@ -48,11 +52,20 @@ SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u64 __dfsan_retval_tls[kDFsanRetvalTlsSize / sizeof(u64)]; +SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u32 __dfsan_retval_origin_tls; SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u64 __dfsan_arg_tls[kDFsanArgTlsSize / sizeof(u64)]; +SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u32 + __dfsan_arg_origin_tls[kDFsanArgTlsSize / sizeof(u32)]; SANITIZER_INTERFACE_ATTRIBUTE uptr __dfsan_shadow_ptr_mask; +extern "C" SANITIZER_WEAK_ATTRIBUTE const int __dfsan_track_origins; + +int __dfsan_get_track_origins() { + return &__dfsan_track_origins ? __dfsan_track_origins : 0; +} + // On Linux/x86_64, memory is laid out as follows: // // +--------------------+ 0x800000000000 (top of memory) @@ -61,9 +74,11 @@ // | | // | unused | // | | -// +--------------------+ 0x200200000000 (kUnusedAddr) +// +--------------------+ 0x300200000000 (kUnusedAddr) // | union table | -// +--------------------+ 0x200000000000 (kUnionTableAddr) +// +--------------------+ 0x300000000000 (kUnionTableAddr) +// | origin | +// +--------------------+ 0x200000000000 (kOriginAddr) // | shadow memory | // +--------------------+ 0x000000010000 (kShadowAddr) // | reserved by kernel | @@ -237,6 +252,24 @@ return label; } +extern "C" SANITIZER_INTERFACE_ATTRIBUTE u64 +__dfsan_load_label_and_origin(const void *addr, uptr n) { + dfsan_label label = 0; + u64 ret = 0; + uptr p = (uptr)addr; + dfsan_label *s = shadow_for((void *)p); + for (uptr i = 0; i < n; ++i, ++p, ++s) { + dfsan_label l = *s; + if (!l) + continue; + label |= l; + if (!ret) { + ret = *(dfsan_origin *)origin_for((void *)p); + } + } + return ret | (u64)label << 32; +} + extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_unimplemented(char *fname) { if (flags().warn_unimplemented) @@ -281,6 +314,203 @@ return label; } +static dfsan_origin GetOriginIfTainted(uptr addr, uptr size) { + for (uptr i = 0; i < size; ++i, ++addr) { + dfsan_label *s = shadow_for((void *)addr); + if (!is_shadow_addr_valid((uptr)s)) { + continue; + } + if (*s) + return *(dfsan_origin *)origin_for((void *)addr); + } + return 0; +} + +// For platforms which support slow unwinder only, we restrict the store context +// size to 1, basically only storing the current pc. We do this because the slow +// unwinder which is based on libunwind is not async signal safe and causes +// random freezes in forking applications as well as in signal handlers. +#define GET_STORE_STACK_TRACE_PC_BP(pc, bp) \ + BufferedStackTrace stack; \ + int size = flags().store_context_size; \ + if (!SANITIZER_CAN_FAST_UNWIND) \ + size = Min(size, 1); \ + stack.Unwind(pc, bp, nullptr, true, size); + +#define PRINT_CALLER_STACK_TRACE \ + { \ + GET_CALLER_PC_BP_SP; \ + (void)sp; \ + GET_STORE_STACK_TRACE_PC_BP(pc, bp) \ + stack.Print(); \ + } + +static u32 ChainOrigin(u32 id, StackTrace *stack, bool from_init = false) { + DFsanThread *t = GetCurrentThread(); + if (t && t->InSignalHandler()) + return id; + + if (!from_init && id == 0 && flags().check_origin_invariant) { + Printf(" DFSan ChainOrigin invalid invariant\n"); + PRINT_CALLER_STACK_TRACE + } + + Origin o = Origin::FromRawId(id); + stack->tag = StackTrace::TAG_UNKNOWN; + Origin chained = Origin::CreateChainedOrigin(o, stack); + return chained.raw_id(); +} + +static void CopyOrigin(const void *dst, const void *src, uptr size, + StackTrace *stack) { + // if (!MEM_IS_APP(dst) || !MEM_IS_APP(src)) return; + + uptr d = (uptr)dst; + uptr beg = d & ~3UL; + // Copy left unaligned origin if that memory is tainted. + if (beg < d) { + dfsan_origin o = GetOriginIfTainted((uptr)src, beg + 4 - d); + if (o) { + o = ChainOrigin(o, stack); + *(dfsan_origin *)origin_for((void *)beg) = o; + } + beg += 4; + } + + uptr end = (d + size) & ~3UL; + // If both ends fall into the same 4-byte slot, we are done. + if (end < beg) + return; + + // Copy right unaligned origin if that memory is tainted. + if (end < d + size) { + dfsan_origin o = + GetOriginIfTainted((uptr)src + (end - d), (d + size) - end); + if (o) { + o = ChainOrigin(o, stack); + *(dfsan_origin *)origin_for((void *)end) = o; + } + } + + if (beg < end) { + // Align src up. + uptr s = ((uptr)src + 3) & ~3UL; + dfsan_origin *src = (dfsan_origin *)origin_for((void *)s); + u64 *src_s = (u64 *)shadow_for((void *)s); + dfsan_origin *src_end = + (dfsan_origin *)origin_for((void *)(s + (end - beg))); + dfsan_origin *dst = (dfsan_origin *)origin_for((void *)beg); + dfsan_origin src_o = 0; + dfsan_origin dst_o = 0; + for (; src < src_end; ++src, ++src_s, ++dst) { + if (!*src_s) + continue; + if (*src != src_o) { + src_o = *src; + dst_o = ChainOrigin(src_o, stack); + } + *dst = dst_o; + } + } +} + +static void ReverseCopyOrigin(const void *dst, const void *src, uptr size, + StackTrace *stack) { + // if (!MEM_IS_APP(dst) || !MEM_IS_APP(src)) return; + + uptr d = (uptr)dst; + uptr end = (d + size) & ~3UL; + + // Copy right unaligned origin if that memory is tainted. + if (end < d + size) { + dfsan_origin o = + GetOriginIfTainted((uptr)src + (end - d), (d + size) - end); + if (o) { + o = ChainOrigin(o, stack); + *(dfsan_origin *)origin_for((void *)end) = o; + } + } + + uptr beg = d & ~3UL; + + if (beg + 4 < end) { + // Align src up. + uptr s = ((uptr)src + 3) & ~3UL; + dfsan_origin *src = (dfsan_origin *)origin_for((void *)(s + end - beg - 4)); + u64 *src_s = (u64 *)shadow_for((void *)(s + end - beg - 4)); + dfsan_origin *src_begin = (dfsan_origin *)origin_for((void *)s); + dfsan_origin *dst = (dfsan_origin *)origin_for((void *)(end - 4)); + dfsan_origin src_o = 0; + dfsan_origin dst_o = 0; + for (; src >= src_begin; --src, --src_s, --dst) { + if (!*src_s) + continue; + if (*src != src_o) { + src_o = *src; + dst_o = ChainOrigin(src_o, stack); + } + *dst = dst_o; + } + } + + // Copy left unaligned origin if that memory is tainted. + if (beg < d) { + dfsan_origin o = GetOriginIfTainted((uptr)src, beg + 4 - d); + if (o) { + o = ChainOrigin(o, stack); + *(dfsan_origin *)origin_for((void *)beg) = o; + } + } +} + +static void MoveOrigin(const void *dst, const void *src, uptr size, + StackTrace *stack) { + if (!has_valid_shadow_addr(dst) || + !has_valid_shadow_addr((void *)((uptr)dst + size)) || + !has_valid_shadow_addr(src) || + !has_valid_shadow_addr((void *)((uptr)src + size))) { + return; + } + // If destination origin range overlaps with source origin range, move + // origins by coping origins in a reverse order; otherwise, copy origins in + // a normal order. + uptr src_aligned_beg = reinterpret_cast(src) & ~3UL; + uptr src_aligned_end = (reinterpret_cast(src) + size) & ~3UL; + uptr dst_aligned_beg = reinterpret_cast(dst) & ~3UL; + if (dst_aligned_beg < src_aligned_end && dst_aligned_beg >= src_aligned_beg) + return ReverseCopyOrigin(dst, src, size, stack); + return CopyOrigin(dst, src, size, stack); +} + +static void SetOrigin(const void *dst, uptr size, u32 origin) { + if (size == 0) + return; + + // Origin mapping is 4 bytes per 4 bytes of application memory. + // Here we extend the range such that its left and right bounds are both + // 4 byte aligned. + uptr x = unaligned_origin_for((uptr)dst); + uptr beg = x & ~3UL; // align down. + uptr end = (x + size + 3) & ~3UL; // align up. + u64 origin64 = ((u64)origin << 32) | origin; + // This is like memset, but the value is 32-bit. We unroll by 2 to write + // 64 bits at once. May want to unroll further to get 128-bit stores. + if (beg & 7ULL) { + if (*(u32 *)beg != origin) + *(u32 *)beg = origin; + beg += 4; + } + for (uptr addr = beg; addr < (end & ~7UL); addr += 8) { + if (*(u64 *)addr == origin64) + continue; + *(u64 *)addr = origin64; + } + if (end & 7ULL) { + if (*(u32 *)(end - 4) != origin) + *(u32 *)(end - 4) = origin; + } +} + static void WriteShadowIfDifferent(dfsan_label label, uptr shadow_addr, uptr size) { dfsan_label *labelp = (dfsan_label *)shadow_addr; @@ -300,15 +530,77 @@ } } -extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_set_label( - dfsan_label label, void *addr, uptr size) { +extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin +__dfsan_chain_origin(dfsan_origin id) { + GET_CALLER_PC_BP_SP; + (void)sp; + GET_STORE_STACK_TRACE_PC_BP(pc, bp); + return ChainOrigin(id, &stack); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_mem_origin_transfer( + const void *dst, const void *src, uptr len) { + // if (!MEM_IS_APP(dst)) return; + // if (!MEM_IS_APP(src)) return; + if (src == dst) + return; + GET_CALLER_PC_BP; + GET_STORE_STACK_TRACE_PC_BP(pc, bp); + MoveOrigin(dst, src, len, &stack); +} + +SANITIZER_INTERFACE_ATTRIBUTE void dfsan_mem_origin_transfer(const void *dst, + const void *src, + uptr len) { + __dfsan_mem_origin_transfer(dst, src, len); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_maybe_store_origin( + u16 s, void *p, uptr size, dfsan_origin o) { + if (UNLIKELY(s)) { + GET_CALLER_PC_BP_SP; + (void)sp; + GET_STORE_STACK_TRACE_PC_BP(pc, bp); + SetOrigin(p, size, ChainOrigin(o, &stack)); + } +} + +static void ClearOriginForZeroLabel(void *addr, uptr size) { + // If label is 0, releases the pages within the shadow address range, and sets + // the shadow addresses not on the pages to be 0. + const uptr beg_origin_addr = (uptr)__dfsan::origin_for(addr); + const void *end_addr = (void *)((uptr)addr + size); + const uptr end_origin_addr = (uptr)__dfsan::origin_for(end_addr); + const uptr page_size = GetPageSizeCached(); + const uptr beg_aligned = RoundUpTo(beg_origin_addr, page_size); + const uptr end_aligned = RoundDownTo(end_origin_addr, page_size); + + // dfsan_set_label can be called from the following cases + // 1) mapped ranges by new/delete and malloc/free. This case has origin memory + // size > 50k, and happens less frequently. + // 2) zero-filling internal data structures by utility libraries. This case + // has origin memory size < 16k, and happens more often. + // Set kNumPagesThreshold to be 4 to avoid releasing small pages. + const int kNumPagesThreshold = 4; + if (beg_aligned + kNumPagesThreshold * page_size >= end_aligned) + return; + + ReleaseMemoryPagesToOS(beg_aligned, end_aligned); +} + +void SetShadow(dfsan_label label, void *addr, uptr size, dfsan_origin origin) { const uptr beg_shadow_addr = (uptr)__dfsan::shadow_for(addr); if (0 != label) { WriteShadowIfDifferent(label, beg_shadow_addr, size); + if (__dfsan_get_track_origins()) + SetOrigin(addr, size, origin); return; } + if (__dfsan_get_track_origins()) + ClearOriginForZeroLabel(addr, size); + // If label is 0, releases the pages within the shadow address range, and sets // the shadow addresses not on the pages to be 0. const void *end_addr = (void *)((uptr)addr + size); @@ -332,13 +624,35 @@ WriteShadowIfDifferent(label, end_aligned, end_shadow_addr - end_aligned); } +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_set_label( + dfsan_label label, dfsan_origin origin, void *addr, uptr size) { + SetShadow(label, addr, size, origin); +} + +#define MAYBE_INIT_ORIGIN(label) \ + dfsan_origin init_origin = 0; \ + if (label && __dfsan_get_track_origins()) { \ + GET_CALLER_PC_BP; \ + GET_STORE_STACK_TRACE_PC_BP(pc, bp); \ + init_origin = ChainOrigin(0, &stack, true); \ + } + SANITIZER_INTERFACE_ATTRIBUTE void dfsan_set_label(dfsan_label label, void *addr, uptr size) { - __dfsan_set_label(label, addr, size); + MAYBE_INIT_ORIGIN(label) + SetShadow(label, addr, size, init_origin); } SANITIZER_INTERFACE_ATTRIBUTE void dfsan_add_label(dfsan_label label, void *addr, uptr size) { + if (0 == label) + return; + + if (__dfsan_get_track_origins()) { + MAYBE_INIT_ORIGIN(label) + SetOrigin(addr, size, init_origin); + } + for (dfsan_label *labelp = shadow_for(addr); size != 0; --size, ++labelp) if (*labelp != label) *labelp = __dfsan_union(*labelp, label); @@ -354,6 +668,22 @@ return data_label; } +extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label __dfso_dfsan_get_label( + long data, dfsan_label data_label, dfsan_label *ret_label, + dfsan_origin data_origin, dfsan_origin *ret_origin) { + *ret_label = 0; + *ret_origin = 0; + return data_label; +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin __dfso_dfsan_get_origin( + long data, dfsan_label data_label, dfsan_label *ret_label, + dfsan_origin data_origin, dfsan_origin *ret_origin) { + *ret_label = 0; + *ret_origin = 0; + return data_origin; +} + SANITIZER_INTERFACE_ATTRIBUTE dfsan_label dfsan_read_label(const void *addr, uptr size) { if (size == 0) @@ -361,6 +691,18 @@ return __dfsan_union_load(shadow_for(addr), size); } +SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin +dfsan_read_origin_of_first_taint(const void *addr, uptr size) { + return GetOriginIfTainted((uptr)addr, size); +} + +SANITIZER_INTERFACE_ATTRIBUTE void dfsan_set_label_origin(dfsan_label label, + dfsan_origin origin, + void *addr, + uptr size) { + __dfsan_set_label(label, origin, addr, size); +} + extern "C" SANITIZER_INTERFACE_ATTRIBUTE const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label) { return &__dfsan_label_info[label]; @@ -414,6 +756,75 @@ } } +class Decorator : public __sanitizer::SanitizerCommonDecorator { + public: + Decorator() : SanitizerCommonDecorator() {} + const char *Origin() const { return Magenta(); } + const char *Name() const { return Green(); } +}; + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_print_origin_trace( + const void *addr, const char *description) { + if (!__dfsan_get_track_origins()) { + Printf(" DFSan does not track any origins\n"); + return; + } + + const dfsan_label *shadow_addr = __dfsan::shadow_for(addr); + const dfsan_label label = *shadow_addr; + if (!label) { + Printf(" DFSan does not find any taint value at %x\n", addr); + return; + } + + const dfsan_origin *origin_addr = __dfsan::origin_for(addr); + const dfsan_origin origin = *origin_addr; + + Decorator d; + Printf(" %sTaint value 0x%x (at 0x%x) origin tracking (%s)%s\n", d.Origin(), + label, addr, description ? description : "", d.Default()); + Origin o = Origin::FromRawId(origin); + while (o.isChainedOrigin()) { + StackTrace stack; + u32 origin_id = o.raw_id(); + o = o.getNextChainedOrigin(&stack); + if (o.isChainedOrigin()) + Printf(" %sOrigin value: %x, Taint value was stored to memory at%s\n", + d.Origin(), origin_id, d.Default()); + else + Printf(" %sOrigin value: %x, Taint value was created at%s\n", d.Origin(), + origin_id, d.Default()); + stack.Print(); + } +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin +dfsan_get_init_origin(const void *addr) { + if (!__dfsan_get_track_origins()) { + return 0; + } + + const dfsan_label *shadow_addr = __dfsan::shadow_for(addr); + const dfsan_label label = *shadow_addr; + if (!label) { + return 0; + } + + const dfsan_origin *origin_addr = __dfsan::origin_for(addr); + const dfsan_origin origin = *origin_addr; + + Origin o = Origin::FromRawId(origin); + while (o.isChainedOrigin()) { + StackTrace stack; + dfsan_origin origin_id = o.raw_id(); + o = o.getNextChainedOrigin(&stack); + if (!o.isChainedOrigin()) { + return origin_id; + } + } + return 0; +} + #define GET_FATAL_STACK_TRACE_PC_BP(pc, bp) \ BufferedStackTrace stack; \ stack.Unwind(pc, bp, nullptr, common_flags()->fast_unwind_on_fatal); @@ -422,7 +833,17 @@ void *context, bool request_fast, u32 max_depth) { - Unwind(max_depth, pc, bp, context, 0, 0, false); + using namespace __dfsan; + DFsanThread *t = GetCurrentThread(); + if (!t || !StackTrace::WillUseFastUnwind(request_fast)) { + // Block reports from our interceptors during _Unwind_Backtrace. + // SymbolizerScope sym_scope; + return Unwind(max_depth, pc, bp, context, 0, 0, false); + } + if (StackTrace::WillUseFastUnwind(request_fast)) + Unwind(max_depth, pc, bp, nullptr, t->stack_top(), t->stack_bottom(), true); + else + Unwind(max_depth, pc, 0, context, 0, 0, false); } extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_print_stack_trace() { @@ -456,6 +877,19 @@ if (common_flags()->help) parser.PrintFlagDescriptions(); } +void ScopedThreadLocalStateBackup::Backup() {} + +void ScopedThreadLocalStateBackup::Restore() { + internal_memset(__dfsan_arg_tls, 0, sizeof(__dfsan_arg_tls)); + internal_memset(__dfsan_retval_tls, 0, sizeof(__dfsan_retval_tls)); + + if (__dfsan_get_track_origins()) { + internal_memset(__dfsan_arg_origin_tls, 0, sizeof(__dfsan_arg_origin_tls)); + internal_memset(&__dfsan_retval_origin_tls, 0, + sizeof(__dfsan_retval_origin_tls)); + } +} + static void InitializePlatformEarly() { AvoidCVE_2016_2143(); #ifdef DFSAN_RUNTIME_VMA @@ -493,7 +927,7 @@ Die(); } -static void dfsan_init(int argc, char **argv, char **envp) { +static void DFsanInit(int argc, char **argv, char **envp) { InitializeFlags(); ::InitializePlatformEarly(); @@ -508,7 +942,7 @@ // will load our executable in the middle of our unused region. This mostly // works so long as the program doesn't use too much memory. We support this // case by disabling memory protection when ASLR is disabled. - uptr init_addr = (uptr)&dfsan_init; + uptr init_addr = (uptr)&DFsanInit; if (!(init_addr >= UnusedAddr() && init_addr < AppAddr())) MmapFixedNoAccess(UnusedAddr(), AppAddr() - UnusedAddr()); @@ -519,10 +953,16 @@ Atexit(dfsan_fini); AddDieCallback(dfsan_fini); + DFsanTSDInit(DFsanTSDDtor); + DFsanThread *main_thread = DFsanThread::Create(nullptr, nullptr, nullptr); + SetCurrentThread(main_thread); + main_thread->ThreadStart(); + __dfsan_label_info[kInitializingLabel].desc = ""; } #if SANITIZER_CAN_USE_PREINIT_ARRAY -__attribute__((section(".preinit_array"), used)) -static void (*dfsan_init_ptr)(int, char **, char **) = dfsan_init; +__attribute__((section(".preinit_array"), + used)) static void (*dfsan_init_ptr)(int, char **, + char **) = DFsanInit; #endif diff --git a/compiler-rt/lib/dfsan/dfsan.syms.extra b/compiler-rt/lib/dfsan/dfsan.syms.extra --- a/compiler-rt/lib/dfsan/dfsan.syms.extra +++ b/compiler-rt/lib/dfsan/dfsan.syms.extra @@ -1,3 +1,4 @@ dfsan_* __dfsan_* __dfsw_* +__dfso_* \ No newline at end of file diff --git a/compiler-rt/lib/dfsan/dfsan_chained_origin_depot.h b/compiler-rt/lib/dfsan/dfsan_chained_origin_depot.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/dfsan/dfsan_chained_origin_depot.h @@ -0,0 +1,27 @@ +//===-- dfsan_chained_origin_depot.h --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A storage for chained origins. +//===----------------------------------------------------------------------===// +#ifndef DFSAN_CHAINED_ORIGIN_DEPOT_H +#define DFSAN_CHAINED_ORIGIN_DEPOT_H + +#include "sanitizer_common/sanitizer_common.h" + +namespace __dfsan { + +StackDepotStats *ChainedOriginDepotGetStats(); +bool ChainedOriginDepotPut(u32 here_id, u32 prev_id, u32 *new_id); +// Retrieves a stored stack trace by the id. +u32 ChainedOriginDepotGet(u32 id, u32 *other); +void ChainedOriginDepotLockAll(); +void ChainedOriginDepotUnlockAll(); + +} // namespace __dfsan + +#endif // DFSAN_CHAINED_ORIGIN_DEPOT_H diff --git a/compiler-rt/lib/dfsan/dfsan_chained_origin_depot.cpp b/compiler-rt/lib/dfsan/dfsan_chained_origin_depot.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/dfsan/dfsan_chained_origin_depot.cpp @@ -0,0 +1,128 @@ +//===-- dfsan_chained_origin_depot.cpp ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A storage for chained origins. +//===----------------------------------------------------------------------===// + +#include "dfsan_chained_origin_depot.h" + +#include "sanitizer_common/sanitizer_stackdepotbase.h" + +namespace __dfsan { + +struct ChainedOriginDepotDesc { + u32 here_id; + u32 prev_id; +}; + +struct ChainedOriginDepotNode { + ChainedOriginDepotNode *link; + u32 id; + u32 here_id; + u32 prev_id; + + typedef ChainedOriginDepotDesc args_type; + + bool eq(u32 hash, const args_type &args) const { + return here_id == args.here_id && prev_id == args.prev_id; + } + + static uptr storage_size(const args_type &args) { + return sizeof(ChainedOriginDepotNode); + } + + /* This is murmur2 hash for the 64->32 bit case. + It does not behave all that well because the keys have a very biased + distribution (I've seen 7-element buckets with the table only 14% full). + + here_id is built of + * (1 bits) Reserved, zero. + * (8 bits) Part id = bits 13..20 of the hash value of here_id's key. + * (23 bits) Sequential number (each part has each own sequence). + + prev_id has either the same distribution as here_id (but with 4:8:20) + split, or 0 indicating the start of a chain. Either case can dominate + depending on the workload. + */ + static u32 hash(const args_type &args) { + const u32 m = 0x5bd1e995; + const u32 seed = 0x9747b28c; + const u32 r = 24; + u32 h = seed; + u32 k = args.here_id; + k *= m; + k ^= k >> r; + k *= m; + h *= m; + h ^= k; + + k = args.prev_id; + k *= m; + k ^= k >> r; + k *= m; + h *= m; + h ^= k; + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + return h; + } + static bool is_valid(const args_type &args) { return true; } + void store(const args_type &args, u32 other_hash) { + here_id = args.here_id; + prev_id = args.prev_id; + } + + args_type load() const { + args_type ret = {here_id, prev_id}; + return ret; + } + + struct Handle { + ChainedOriginDepotNode *node_; + Handle() : node_(nullptr) {} + explicit Handle(ChainedOriginDepotNode *node) : node_(node) {} + bool valid() { return node_; } + u32 id() { return node_->id; } + int here_id() { return node_->here_id; } + int prev_id() { return node_->prev_id; } + }; + + Handle get_handle() { return Handle(this); } + + typedef Handle handle_type; +}; + +static StackDepotBase chainedOriginDepot; + +StackDepotStats *ChainedOriginDepotGetStats() { + return chainedOriginDepot.GetStats(); +} + +bool ChainedOriginDepotPut(u32 here_id, u32 prev_id, u32 *new_id) { + ChainedOriginDepotDesc desc = {here_id, prev_id}; + bool inserted; + ChainedOriginDepotNode::Handle h = chainedOriginDepot.Put(desc, &inserted); + *new_id = h.valid() ? h.id() : 0; + return inserted; +} + +// Retrieves a stored stack trace by the id. +u32 ChainedOriginDepotGet(u32 id, u32 *other) { + ChainedOriginDepotDesc desc = chainedOriginDepot.Get(id); + if (other) + *other = desc.prev_id; + return desc.here_id; +} + +void ChainedOriginDepotLockAll() { chainedOriginDepot.LockAll(); } + +void ChainedOriginDepotUnlockAll() { chainedOriginDepot.UnlockAll(); } + +} // namespace __dfsan diff --git a/compiler-rt/lib/dfsan/dfsan_custom.cpp b/compiler-rt/lib/dfsan/dfsan_custom.cpp --- a/compiler-rt/lib/dfsan/dfsan_custom.cpp +++ b/compiler-rt/lib/dfsan/dfsan_custom.cpp @@ -1,4 +1,4 @@ -//===-- dfsan.cpp ---------------------------------------------------------===// +//===-- dfsan_custom.cpp --------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -36,10 +36,15 @@ #include #include +#include + #include "dfsan/dfsan.h" +#include "dfsan/dfsan_chained_origin_depot.h" +#include "dfsan/dfsan_thread.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_internal_defs.h" #include "sanitizer_common/sanitizer_linux.h" +#include "sanitizer_common/sanitizer_stackdepot.h" using namespace __dfsan; @@ -62,6 +67,15 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_stat( + const char *path, struct stat *buf, dfsan_label path_label, + dfsan_label buf_label, dfsan_label *ret_label, dfsan_origin path_origin, + dfsan_origin buf_origin, dfsan_origin *ret_origin) { + int ret = __dfsw_stat(path, buf, path_label, buf_label, ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_fstat(int fd, struct stat *buf, dfsan_label fd_label, dfsan_label buf_label, @@ -73,29 +87,66 @@ return ret; } -SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_strchr(const char *s, int c, - dfsan_label s_label, - dfsan_label c_label, - dfsan_label *ret_label) { +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_fstat( + int fd, struct stat *buf, dfsan_label fd_label, dfsan_label buf_label, + dfsan_label *ret_label, dfsan_origin fd_origin, dfsan_origin buf_origin, + dfsan_origin *ret_origin) { + int ret = __dfsw_fstat(fd, buf, fd_label, buf_label, ret_label); + *ret_origin = 0; + return ret; +} + +static char *dfsan_strchr(const char *s, int c, size_t *pos) { for (size_t i = 0;; ++i) { if (s[i] == c || s[i] == 0) { - if (flags().strict_data_dependencies) { - *ret_label = s_label; - } else { - *ret_label = dfsan_union(dfsan_read_label(s, i + 1), - dfsan_union(s_label, c_label)); - } - // If s[i] is the \0 at the end of the string, and \0 is not the // character we are searching for, then return null. if (s[i] == 0 && c != 0) { + *pos = i + 1; return nullptr; } + *pos = i + 1; return const_cast(s + i); } } } +static void dfsan_strchr_label(const char *s, size_t pos, dfsan_label s_label, + dfsan_label c_label, dfsan_label *ret_label) { + if (flags().strict_data_dependencies) { + *ret_label = s_label; + } else { + *ret_label = + dfsan_union(dfsan_read_label(s, pos), dfsan_union(s_label, c_label)); + } +} + +SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_strchr(const char *s, int c, + dfsan_label s_label, + dfsan_label c_label, + dfsan_label *ret_label) { + size_t pos; + char *r = dfsan_strchr(s, c, &pos); + dfsan_strchr_label(s, pos, s_label, c_label, ret_label); + return r; +} + +SANITIZER_INTERFACE_ATTRIBUTE char *__dfso_strchr( + const char *s, int c, dfsan_label s_label, dfsan_label c_label, + dfsan_label *ret_label, dfsan_origin s_origin, dfsan_origin c_origin, + dfsan_origin *ret_origin) { + size_t pos; + char *r = dfsan_strchr(s, c, &pos); + dfsan_strchr_label(s, pos, s_label, c_label, ret_label); + if (flags().strict_data_dependencies) { + *ret_origin = s_origin; + } else { + dfsan_origin o = dfsan_read_origin_of_first_taint(s, pos); + *ret_origin = o ? o : (s_label ? s_origin : c_origin); + } + return r; +} + SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_strpbrk(const char *s, const char *accept, dfsan_label s_label, @@ -114,29 +165,84 @@ return const_cast(ret); } +SANITIZER_INTERFACE_ATTRIBUTE char *__dfso_strpbrk( + const char *s, const char *accept, dfsan_label s_label, + dfsan_label accept_label, dfsan_label *ret_label, dfsan_origin s_origin, + dfsan_origin accept_origin, dfsan_origin *ret_origin) { + const char *ret = strpbrk(s, accept); + if (flags().strict_data_dependencies) { + *ret_label = ret ? s_label : 0; + *ret_origin = ret ? s_origin : 0; + } else { + size_t s_bytes_read = (ret ? ret - s : strlen(s)) + 1; + *ret_label = + dfsan_union(dfsan_read_label(s, s_bytes_read), + dfsan_union(dfsan_read_label(accept, strlen(accept) + 1), + dfsan_union(s_label, accept_label))); + dfsan_origin o = dfsan_read_origin_of_first_taint(s, s_bytes_read); + if (o) { + *ret_origin = o; + } else { + o = dfsan_read_origin_of_first_taint(accept, strlen(accept) + 1); + *ret_origin = o ? o : (s_label ? s_origin : accept_origin); + } + } + return const_cast(ret); +} + static int dfsan_memcmp_bcmp(const void *s1, const void *s2, size_t n, - dfsan_label s1_label, dfsan_label s2_label, - dfsan_label n_label, dfsan_label *ret_label) { + size_t *pos) { const char *cs1 = (const char *) s1, *cs2 = (const char *) s2; for (size_t i = 0; i != n; ++i) { if (cs1[i] != cs2[i]) { - if (flags().strict_data_dependencies) { - *ret_label = 0; - } else { - *ret_label = dfsan_union(dfsan_read_label(cs1, i + 1), - dfsan_read_label(cs2, i + 1)); - } + *pos = i + 1; return cs1[i] - cs2[i]; } } + *pos = n; + return 0; +} + +static dfsan_label dfsan_get_label_of_string_prefixs(const void *s1, + const void *s2, + size_t pos) { + if (flags().strict_data_dependencies) + return 0; + return dfsan_union(dfsan_read_label((const char *)s1, pos), + dfsan_read_label((const char *)s2, pos)); +} +static void dfsan_get_origin_of_string_prefixs(const void *s1, const void *s2, + size_t pos, + dfsan_label *ret_label, + dfsan_origin *ret_origin) { + *ret_label = dfsan_get_label_of_string_prefixs(s1, s2, pos); if (flags().strict_data_dependencies) { - *ret_label = 0; - } else { - *ret_label = dfsan_union(dfsan_read_label(cs1, n), - dfsan_read_label(cs2, n)); + *ret_origin = 0; + return; } - return 0; + dfsan_origin o = dfsan_read_origin_of_first_taint(s1, pos); + if (o) + *ret_origin = o; + else + *ret_origin = dfsan_read_origin_of_first_taint(s2, pos); +} + +static int dfsan_memcmp_bcmp_label(const void *s1, const void *s2, size_t n, + dfsan_label *ret_label) { + size_t pos; + int r = dfsan_memcmp_bcmp(s1, s2, n, &pos); + *ret_label = dfsan_get_label_of_string_prefixs(s1, s2, pos); + return r; +} + +static int dfsan_memcmp_bcmp_origin(const void *s1, const void *s2, size_t n, + dfsan_label *ret_label, + dfsan_origin *ret_origin) { + size_t pos; + int r = dfsan_memcmp_bcmp(s1, s2, n, &pos); + dfsan_get_origin_of_string_prefixs(s1, s2, pos, ret_label, ret_origin); + return r; } DECLARE_WEAK_INTERCEPTOR_HOOK(dfsan_weak_hook_memcmp, uptr caller_pc, @@ -144,6 +250,12 @@ dfsan_label s1_label, dfsan_label s2_label, dfsan_label n_label) +DECLARE_WEAK_INTERCEPTOR_HOOK(dfsan_weak_hook_origin_memcmp, uptr caller_pc, + const void *s1, const void *s2, size_t n, + dfsan_label s1_label, dfsan_label s2_label, + dfsan_label n_label, dfsan_origin s1_origin, + dfsan_origin s2_origin, dfsan_origin n_origin) + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_memcmp(const void *s1, const void *s2, size_t n, dfsan_label s1_label, dfsan_label s2_label, @@ -151,7 +263,18 @@ dfsan_label *ret_label) { CALL_WEAK_INTERCEPTOR_HOOK(dfsan_weak_hook_memcmp, GET_CALLER_PC(), s1, s2, n, s1_label, s2_label, n_label); - return dfsan_memcmp_bcmp(s1, s2, n, s1_label, s2_label, n_label, ret_label); + return dfsan_memcmp_bcmp_label(s1, s2, n, ret_label); +} + +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_memcmp( + const void *s1, const void *s2, size_t n, dfsan_label s1_label, + dfsan_label s2_label, dfsan_label n_label, dfsan_label *ret_label, + dfsan_origin s1_origin, dfsan_origin s2_origin, dfsan_origin n_origin, + dfsan_origin *ret_origin) { + CALL_WEAK_INTERCEPTOR_HOOK(dfsan_weak_hook_origin_memcmp, GET_CALLER_PC(), s1, + s2, n, s1_label, s2_label, n_label, s1_origin, + s2_origin, n_origin); + return dfsan_memcmp_bcmp_origin(s1, s2, n, ret_label, ret_origin); } SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_bcmp(const void *s1, const void *s2, @@ -159,50 +282,106 @@ dfsan_label s2_label, dfsan_label n_label, dfsan_label *ret_label) { - return dfsan_memcmp_bcmp(s1, s2, n, s1_label, s2_label, n_label, ret_label); + return dfsan_memcmp_bcmp_label(s1, s2, n, ret_label); +} + +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_bcmp( + const void *s1, const void *s2, size_t n, dfsan_label s1_label, + dfsan_label s2_label, dfsan_label n_label, dfsan_label *ret_label, + dfsan_origin s1_origin, dfsan_origin s2_origin, dfsan_origin n_origin, + dfsan_origin *ret_origin) { + return dfsan_memcmp_bcmp_origin(s1, s2, n, ret_label, ret_origin); +} + +static int dfsan_strcmp(const char *s1, const char *s2, size_t *pos) { + size_t i = 0; + for (;; ++i) { + if (s1[i] != s2[i] || s1[i] == 0 || s2[i] == 0) { + *pos = i + 1; + return s1[i] - s2[i]; + } + } + *pos = i; + return 0; } DECLARE_WEAK_INTERCEPTOR_HOOK(dfsan_weak_hook_strcmp, uptr caller_pc, const char *s1, const char *s2, dfsan_label s1_label, dfsan_label s2_label) +DECLARE_WEAK_INTERCEPTOR_HOOK(dfsan_weak_hook_origin_strcmp, uptr caller_pc, + const char *s1, const char *s2, + dfsan_label s1_label, dfsan_label s2_label, + dfsan_origin s1_origin, dfsan_origin s2_origin) + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_strcmp(const char *s1, const char *s2, dfsan_label s1_label, dfsan_label s2_label, dfsan_label *ret_label) { CALL_WEAK_INTERCEPTOR_HOOK(dfsan_weak_hook_strcmp, GET_CALLER_PC(), s1, s2, s1_label, s2_label); - for (size_t i = 0;; ++i) { - if (s1[i] != s2[i] || s1[i] == 0 || s2[i] == 0) { - if (flags().strict_data_dependencies) { - *ret_label = 0; - } else { - *ret_label = dfsan_union(dfsan_read_label(s1, i + 1), - dfsan_read_label(s2, i + 1)); - } - return s1[i] - s2[i]; + size_t pos; + int r = dfsan_strcmp(s1, s2, &pos); + *ret_label = dfsan_get_label_of_string_prefixs(s1, s2, pos); + return r; +} + +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_strcmp( + const char *s1, const char *s2, dfsan_label s1_label, dfsan_label s2_label, + dfsan_label *ret_label, dfsan_origin s1_origin, dfsan_origin s2_origin, + dfsan_origin *ret_origin) { + CALL_WEAK_INTERCEPTOR_HOOK(dfsan_weak_hook_origin_strcmp, GET_CALLER_PC(), s1, + s2, s1_label, s2_label, s1_origin, s2_origin); + size_t pos; + int r = dfsan_strcmp(s1, s2, &pos); + dfsan_get_origin_of_string_prefixs(s1, s2, pos, ret_label, ret_origin); + return r; +} + +static int dfsan_strcasecmp(const char *s1, const char *s2, size_t *pos) { + size_t i = 0; + for (;; ++i) { + char s1_lower = tolower(s1[i]); + char s2_lower = tolower(s2[i]); + + if (s1_lower != s2_lower || s1[i] == 0 || s2[i] == 0) { + *pos = i + 1; + return s1_lower - s2_lower; } } + *pos = i; return 0; } SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_strcasecmp(const char *s1, const char *s2, dfsan_label s1_label, dfsan_label s2_label, dfsan_label *ret_label) { - for (size_t i = 0;; ++i) { - char s1_lower = tolower(s1[i]); - char s2_lower = tolower(s2[i]); - - if (s1_lower != s2_lower || s1[i] == 0 || s2[i] == 0) { - if (flags().strict_data_dependencies) { - *ret_label = 0; - } else { - *ret_label = dfsan_union(dfsan_read_label(s1, i + 1), - dfsan_read_label(s2, i + 1)); - } - return s1_lower - s2_lower; + size_t pos; + int r = dfsan_strcasecmp(s1, s2, &pos); + *ret_label = dfsan_get_label_of_string_prefixs(s1, s2, pos); + return r; +} + +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_strcasecmp( + const char *s1, const char *s2, dfsan_label s1_label, dfsan_label s2_label, + dfsan_label *ret_label, dfsan_origin s1_origin, dfsan_origin s2_origin, + dfsan_origin *ret_origin) { + size_t pos; + int r = dfsan_strcasecmp(s1, s2, &pos); + dfsan_get_origin_of_string_prefixs(s1, s2, pos, ret_label, ret_origin); + return r; +} + +static int dfsan_strncmp(const char *s1, const char *s2, size_t n, + size_t *pos) { + size_t i = 0; + for (;; ++i) { + if (s1[i] != s2[i] || s1[i] == 0 || s2[i] == 0 || i == n - 1) { + *pos = i + 1; + return s1[i] - s2[i]; } } + *pos = i; return 0; } @@ -211,6 +390,12 @@ dfsan_label s1_label, dfsan_label s2_label, dfsan_label n_label) +DECLARE_WEAK_INTERCEPTOR_HOOK(dfsan_weak_hook_origin_strncmp, uptr caller_pc, + const char *s1, const char *s2, size_t n, + dfsan_label s1_label, dfsan_label s2_label, + dfsan_label n_label, dfsan_origin s1_origin, + dfsan_origin s2_origin, dfsan_origin n_origin) + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_strncmp(const char *s1, const char *s2, size_t n, dfsan_label s1_label, dfsan_label s2_label, @@ -224,46 +409,80 @@ CALL_WEAK_INTERCEPTOR_HOOK(dfsan_weak_hook_strncmp, GET_CALLER_PC(), s1, s2, n, s1_label, s2_label, n_label); - for (size_t i = 0;; ++i) { - if (s1[i] != s2[i] || s1[i] == 0 || s2[i] == 0 || i == n - 1) { - if (flags().strict_data_dependencies) { - *ret_label = 0; - } else { - *ret_label = dfsan_union(dfsan_read_label(s1, i + 1), - dfsan_read_label(s2, i + 1)); - } - return s1[i] - s2[i]; - } - } - return 0; + size_t pos; + int r = dfsan_strncmp(s1, s2, n, &pos); + *ret_label = dfsan_get_label_of_string_prefixs(s1, s2, pos); + return r; } -SANITIZER_INTERFACE_ATTRIBUTE int -__dfsw_strncasecmp(const char *s1, const char *s2, size_t n, - dfsan_label s1_label, dfsan_label s2_label, - dfsan_label n_label, dfsan_label *ret_label) { +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_strncmp( + const char *s1, const char *s2, size_t n, dfsan_label s1_label, + dfsan_label s2_label, dfsan_label n_label, dfsan_label *ret_label, + dfsan_origin s1_origin, dfsan_origin s2_origin, dfsan_origin n_origin, + dfsan_origin *ret_origin) { if (n == 0) { *ret_label = 0; + *ret_origin = 0; return 0; } - for (size_t i = 0;; ++i) { + CALL_WEAK_INTERCEPTOR_HOOK(dfsan_weak_hook_origin_strncmp, GET_CALLER_PC(), + s1, s2, n, s1_label, s2_label, n_label, s1_origin, + s2_origin, n_origin); + + size_t pos; + int r = dfsan_strncmp(s1, s2, n, &pos); + dfsan_get_origin_of_string_prefixs(s1, s2, pos, ret_label, ret_origin); + return r; +} + +static int dfsan_strncasecmp(const char *s1, const char *s2, size_t n, + size_t *pos) { + size_t i = 0; + for (;; ++i) { char s1_lower = tolower(s1[i]); char s2_lower = tolower(s2[i]); if (s1_lower != s2_lower || s1[i] == 0 || s2[i] == 0 || i == n - 1) { - if (flags().strict_data_dependencies) { - *ret_label = 0; - } else { - *ret_label = dfsan_union(dfsan_read_label(s1, i + 1), - dfsan_read_label(s2, i + 1)); - } + *pos = i + 1; return s1_lower - s2_lower; } } + *pos = i; return 0; } +SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_strncasecmp( + const char *s1, const char *s2, size_t n, dfsan_label s1_label, + dfsan_label s2_label, dfsan_label n_label, dfsan_label *ret_label) { + if (n == 0) { + *ret_label = 0; + return 0; + } + + size_t pos; + int r = dfsan_strncasecmp(s1, s2, n, &pos); + *ret_label = dfsan_get_label_of_string_prefixs(s1, s2, pos); + return r; +} + +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_strncasecmp( + const char *s1, const char *s2, size_t n, dfsan_label s1_label, + dfsan_label s2_label, dfsan_label n_label, dfsan_label *ret_label, + dfsan_origin s1_origin, dfsan_origin s2_origin, dfsan_origin n_origin, + dfsan_origin *ret_origin) { + if (n == 0) { + *ret_label = 0; + *ret_origin = 0; + return 0; + } + + size_t pos; + int r = dfsan_strncasecmp(s1, s2, n, &pos); + dfsan_get_origin_of_string_prefixs(s1, s2, pos, ret_label, ret_origin); + return r; +} + SANITIZER_INTERFACE_ATTRIBUTE void *__dfsw_calloc(size_t nmemb, size_t size, dfsan_label nmemb_label, dfsan_label size_label, @@ -274,6 +493,15 @@ return p; } +SANITIZER_INTERFACE_ATTRIBUTE void *__dfso_calloc( + size_t nmemb, size_t size, dfsan_label nmemb_label, dfsan_label size_label, + dfsan_label *ret_label, dfsan_origin nmemb_origin, dfsan_origin size_origin, + dfsan_origin *ret_origin) { + void *p = __dfsw_calloc(nmemb, size, nmemb_label, size_label, ret_label); + *ret_origin = 0; + return p; +} + SANITIZER_INTERFACE_ATTRIBUTE size_t __dfsw_strlen(const char *s, dfsan_label s_label, dfsan_label *ret_label) { size_t ret = strlen(s); @@ -285,6 +513,31 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE size_t __dfso_strlen(const char *s, + dfsan_label s_label, + dfsan_label *ret_label, + dfsan_origin s_origin, + dfsan_origin *ret_origin) { + size_t ret = __dfsw_strlen(s, s_label, ret_label); + if (flags().strict_data_dependencies) { + *ret_origin = 0; + } else { + *ret_origin = dfsan_read_origin_of_first_taint(s, ret + 1); + } + return ret; +} + +static void *dfsan_memmove(void *dest, const void *src, size_t n) { + dfsan_label *sdest = shadow_for(dest); + const dfsan_label *ssrc = shadow_for(src); + internal_memmove((void *)sdest, (const void *)ssrc, n * sizeof(dfsan_label)); + return internal_memmove(dest, src, n); +} + +static void *dfsan_origin_memmove(void *dest, const void *src, size_t n) { + dfsan_mem_origin_transfer(dest, src, n); + return dfsan_memmove(dest, src, n); +} static void *dfsan_memcpy(void *dest, const void *src, size_t n) { dfsan_label *sdest = shadow_for(dest); @@ -293,11 +546,22 @@ return internal_memcpy(dest, src, n); } +static void *dfsan_origin_memcpy(void *dest, const void *src, size_t n) { + dfsan_mem_origin_transfer(dest, src, n); + return dfsan_memcpy(dest, src, n); +} + static void dfsan_memset(void *s, int c, dfsan_label c_label, size_t n) { internal_memset(s, c, n); dfsan_set_label(c_label, s, n); } +static void dfsan_origin_memset(void *s, int c, dfsan_label c_label, + dfsan_origin c_origin, size_t n) { + internal_memset(s, c, n); + dfsan_set_label_origin(c_label, c_origin, s, n); +} + SANITIZER_INTERFACE_ATTRIBUTE void *__dfsw_memcpy(void *dest, const void *src, size_t n, dfsan_label dest_label, dfsan_label src_label, @@ -306,6 +570,36 @@ return dfsan_memcpy(dest, src, n); } +SANITIZER_INTERFACE_ATTRIBUTE +void *__dfso_memcpy(void *dest, const void *src, size_t n, + dfsan_label dest_label, dfsan_label src_label, + dfsan_label n_label, dfsan_label *ret_label, + dfsan_origin dest_origin, dfsan_origin src_origin, + dfsan_origin n_origin, dfsan_origin *ret_origin) { + *ret_label = dest_label; + *ret_origin = dest_origin; + return dfsan_origin_memcpy(dest, src, n); +} + +SANITIZER_INTERFACE_ATTRIBUTE +void *__dfsw_memmove(void *dest, const void *src, size_t n, + dfsan_label dest_label, dfsan_label src_label, + dfsan_label n_label, dfsan_label *ret_label) { + *ret_label = dest_label; + return dfsan_memmove(dest, src, n); +} + +SANITIZER_INTERFACE_ATTRIBUTE +void *__dfso_memmove(void *dest, const void *src, size_t n, + dfsan_label dest_label, dfsan_label src_label, + dfsan_label n_label, dfsan_label *ret_label, + dfsan_origin dest_origin, dfsan_origin src_origin, + dfsan_origin n_origin, dfsan_origin *ret_origin) { + *ret_label = dest_label; + *ret_origin = dest_origin; + return dfsan_origin_memmove(dest, src, n); +} + SANITIZER_INTERFACE_ATTRIBUTE void *__dfsw_memset(void *s, int c, size_t n, dfsan_label s_label, dfsan_label c_label, @@ -315,6 +609,49 @@ return s; } +SANITIZER_INTERFACE_ATTRIBUTE +void *__dfso_memset(void *s, int c, size_t n, dfsan_label s_label, + dfsan_label c_label, dfsan_label n_label, + dfsan_label *ret_label, dfsan_origin s_origin, + dfsan_origin c_origin, dfsan_origin n_origin, + dfsan_origin *ret_origin) { + dfsan_origin_memset(s, c, c_label, c_origin, n); + *ret_label = s_label; + *ret_origin = s_origin; + return s; +} + +SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_strcat(char *dest, const char *src, + dfsan_label dest_label, + dfsan_label src_label, + dfsan_label *ret_label) { + size_t dest_len = strlen(dest); + char *ret = strcat(dest, src); + dfsan_label *sdest = shadow_for(dest + dest_len); + const dfsan_label *ssrc = shadow_for(src); + internal_memcpy((void *)sdest, (const void *)ssrc, + strlen(src) * sizeof(dfsan_label)); + *ret_label = dest_label; + return ret; +} + +SANITIZER_INTERFACE_ATTRIBUTE char *__dfso_strcat( + char *dest, const char *src, dfsan_label dest_label, dfsan_label src_label, + dfsan_label *ret_label, dfsan_origin dest_origin, dfsan_origin src_origin, + dfsan_origin *ret_origin) { + size_t dest_len = strlen(dest); + char *ret = strcat(dest, src); + dfsan_label *sdest = shadow_for(dest + dest_len); + const dfsan_label *ssrc = shadow_for(src); + size_t src_len = strlen(src); + dfsan_mem_origin_transfer(dest + dest_len, src, src_len); + internal_memcpy((void *)sdest, (const void *)ssrc, + src_len * sizeof(dfsan_label)); + *ret_label = dest_label; + *ret_origin = dest_origin; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE char * __dfsw_strdup(const char *s, dfsan_label s_label, dfsan_label *ret_label) { size_t len = strlen(s); @@ -324,6 +661,19 @@ return static_cast(p); } +SANITIZER_INTERFACE_ATTRIBUTE char *__dfso_strdup(const char *s, + dfsan_label s_label, + dfsan_label *ret_label, + dfsan_origin s_origin, + dfsan_origin *ret_origin) { + size_t len = strlen(s); + void *p = malloc(len + 1); + dfsan_origin_memcpy(p, s, len + 1); + *ret_label = 0; + *ret_origin = 0; + return static_cast(p); +} + SANITIZER_INTERFACE_ATTRIBUTE char * __dfsw_strncpy(char *s1, const char *s2, size_t n, dfsan_label s1_label, dfsan_label s2_label, dfsan_label n_label, @@ -340,6 +690,24 @@ return s1; } +SANITIZER_INTERFACE_ATTRIBUTE char *__dfso_strncpy( + char *s1, const char *s2, size_t n, dfsan_label s1_label, + dfsan_label s2_label, dfsan_label n_label, dfsan_label *ret_label, + dfsan_origin s1_origin, dfsan_origin s2_origin, dfsan_origin n_origin, + dfsan_origin *ret_origin) { + size_t len = strlen(s2); + if (len < n) { + dfsan_origin_memcpy(s1, s2, len + 1); + dfsan_origin_memset(s1 + len + 1, 0, 0, 0, n - len - 1); + } else { + dfsan_origin_memcpy(s1, s2, n); + } + + *ret_label = s1_label; + *ret_origin = s1_origin; + return s1; +} + SANITIZER_INTERFACE_ATTRIBUTE ssize_t __dfsw_pread(int fd, void *buf, size_t count, off_t offset, dfsan_label fd_label, dfsan_label buf_label, @@ -352,11 +720,21 @@ return ret; } -SANITIZER_INTERFACE_ATTRIBUTE ssize_t -__dfsw_read(int fd, void *buf, size_t count, - dfsan_label fd_label, dfsan_label buf_label, - dfsan_label count_label, - dfsan_label *ret_label) { +SANITIZER_INTERFACE_ATTRIBUTE ssize_t __dfso_pread( + int fd, void *buf, size_t count, off_t offset, dfsan_label fd_label, + dfsan_label buf_label, dfsan_label count_label, dfsan_label offset_label, + dfsan_label *ret_label, dfsan_origin fd_origin, dfsan_origin buf_origin, + dfsan_origin count_origin, dfsan_label offset_origin, + dfsan_origin *ret_origin) { + ssize_t ret = __dfsw_pread(fd, buf, count, offset, fd_label, buf_label, + count_label, offset_label, ret_label); + *ret_origin = 0; + return ret; +} + +SANITIZER_INTERFACE_ATTRIBUTE ssize_t __dfsw_read( + int fd, void *buf, size_t count, dfsan_label fd_label, + dfsan_label buf_label, dfsan_label count_label, dfsan_label *ret_label) { ssize_t ret = read(fd, buf, count); if (ret > 0) dfsan_set_label(0, buf, ret); @@ -364,6 +742,17 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE ssize_t __dfso_read( + int fd, void *buf, size_t count, dfsan_label fd_label, + dfsan_label buf_label, dfsan_label count_label, dfsan_label *ret_label, + dfsan_origin fd_origin, dfsan_origin buf_origin, dfsan_origin count_origin, + dfsan_origin *ret_origin) { + ssize_t ret = + __dfsw_read(fd, buf, count, fd_label, buf_label, count_label, ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_clock_gettime(clockid_t clk_id, struct timespec *tp, dfsan_label clk_id_label, @@ -376,7 +765,16 @@ return ret; } -static void unpoison(const void *ptr, uptr size) { +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_clock_gettime( + clockid_t clk_id, struct timespec *tp, dfsan_label clk_id_label, + dfsan_label tp_label, dfsan_label *ret_label, dfsan_origin clk_id_origin, + dfsan_origin tp_origin, dfsan_origin *ret_origin) { + int ret = __dfsw_clock_gettime(clk_id, tp, clk_id_label, tp_label, ret_label); + *ret_origin = 0; + return ret; +} + +static void dfsan_set_zero_label(const void *ptr, uptr size) { dfsan_set_label(0, const_cast(ptr), size); } @@ -389,23 +787,50 @@ void *handle = dlopen(filename, flag); link_map *map = GET_LINK_MAP_BY_DLOPEN_HANDLE(handle); if (map) - ForEachMappedRegion(map, unpoison); + ForEachMappedRegion(map, dfsan_set_zero_label); *ret_label = 0; return handle; } -struct pthread_create_info { - void *(*start_routine_trampoline)(void *, void *, dfsan_label, dfsan_label *); - void *start_routine; - void *arg; -}; +SANITIZER_INTERFACE_ATTRIBUTE void *__dfso_dlopen( + const char *filename, int flag, dfsan_label filename_label, + dfsan_label flag_label, dfsan_label *ret_label, + dfsan_origin filename_origin, dfsan_origin flag_origin, + dfsan_origin *ret_origin) { + void *handle = + __dfsw_dlopen(filename, flag, filename_label, flag_label, ret_label); + *ret_origin = 0; + return handle; +} -static void *pthread_create_cb(void *p) { - pthread_create_info pci(*(pthread_create_info *)p); - free(p); - dfsan_label ret_label; - return pci.start_routine_trampoline(pci.start_routine, pci.arg, 0, - &ret_label); +static void *DFsanThreadStartFunc(void *arg) { + DFsanThread *t = (DFsanThread *)arg; + SetCurrentThread(t); + return t->ThreadStart(); +} + +static int dfsan_pthread_create(pthread_t *thread, const pthread_attr_t *attr, + void *start_routine_trampoline, + void *start_routine, void *arg, + dfsan_label *ret_label, + bool track_origins = false) { + pthread_attr_t myattr; + if (!attr) { + pthread_attr_init(&myattr); + attr = &myattr; + } + + AdjustStackSize((void *)attr); + + DFsanThread *t = + DFsanThread::Create(start_routine_trampoline, + (thread_callback_t)start_routine, arg, track_origins); + int res = pthread_create(thread, attr, DFsanThreadStartFunc, t); + + if (attr == &myattr) + pthread_attr_destroy(&myattr); + *ret_label = 0; + return res; } SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_pthread_create( @@ -415,15 +840,23 @@ void *start_routine, void *arg, dfsan_label thread_label, dfsan_label attr_label, dfsan_label start_routine_label, dfsan_label arg_label, dfsan_label *ret_label) { - pthread_create_info *pci = - (pthread_create_info *)malloc(sizeof(pthread_create_info)); - pci->start_routine_trampoline = start_routine_trampoline; - pci->start_routine = start_routine; - pci->arg = arg; - int rv = pthread_create(thread, attr, pthread_create_cb, (void *)pci); - if (rv != 0) - free(pci); - *ret_label = 0; + return dfsan_pthread_create(thread, attr, (void *)start_routine_trampoline, + start_routine, arg, ret_label); +} + +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_pthread_create( + pthread_t *thread, const pthread_attr_t *attr, + void *(*start_routine_trampoline)(void *, void *, dfsan_label, + dfsan_label *, dfsan_origin, + dfsan_origin *), + void *start_routine, void *arg, dfsan_label thread_label, + dfsan_label attr_label, dfsan_label start_routine_label, + dfsan_label arg_label, dfsan_label *ret_label, dfsan_origin thread_origin, + dfsan_origin attr_origin, dfsan_origin start_routine_origin, + dfsan_origin arg_origin, dfsan_origin *ret_origin) { + int rv = dfsan_pthread_create(thread, attr, (void *)start_routine_trampoline, + start_routine, arg, ret_label, true); + *ret_origin = 0; return rv; } @@ -439,6 +872,17 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_pthread_join( + pthread_t thread, void **retval, dfsan_label thread_label, + dfsan_label retval_label, dfsan_label *ret_label, + dfsan_origin thread_origin, dfsan_origin retval_origin, + dfsan_origin *ret_origin) { + int ret = __dfsw_pthread_join(thread, retval, thread_label, retval_label, + ret_label); + *ret_origin = 0; + return ret; +} + struct dl_iterate_phdr_info { int (*callback_trampoline)(void *callback, struct dl_phdr_info *info, size_t size, void *data, dfsan_label info_label, @@ -448,6 +892,17 @@ void *data; }; +struct dl_iterate_phdr_origin_info { + int (*callback_trampoline)(void *callback, struct dl_phdr_info *info, + size_t size, void *data, dfsan_label info_label, + dfsan_label size_label, dfsan_label data_label, + dfsan_label *ret_label, dfsan_origin info_origin, + dfsan_origin size_origin, dfsan_origin data_origin, + dfsan_origin *ret_origin); + void *callback; + void *data; +}; + int dl_iterate_phdr_cb(struct dl_phdr_info *info, size_t size, void *data) { dl_iterate_phdr_info *dipi = (dl_iterate_phdr_info *)data; dfsan_set_label(0, *info); @@ -461,6 +916,21 @@ 0, &ret_label); } +int dl_iterate_phdr_origin_cb(struct dl_phdr_info *info, size_t size, + void *data) { + dl_iterate_phdr_origin_info *dipi = (dl_iterate_phdr_origin_info *)data; + dfsan_set_label(0, *info); + dfsan_set_label(0, const_cast(info->dlpi_name), + strlen(info->dlpi_name) + 1); + dfsan_set_label( + 0, const_cast(reinterpret_cast(info->dlpi_phdr)), + sizeof(*info->dlpi_phdr) * info->dlpi_phnum); + dfsan_label ret_label; + dfsan_origin ret_origin; + return dipi->callback_trampoline(dipi->callback, info, size, dipi->data, 0, 0, + 0, &ret_label, 0, 0, 0, &ret_origin); +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_dl_iterate_phdr( int (*callback_trampoline)(void *callback, struct dl_phdr_info *info, size_t size, void *data, dfsan_label info_label, @@ -473,6 +943,24 @@ return dl_iterate_phdr(dl_iterate_phdr_cb, &dipi); } +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_dl_iterate_phdr( + int (*callback_trampoline)(void *callback, struct dl_phdr_info *info, + size_t size, void *data, dfsan_label info_label, + dfsan_label size_label, dfsan_label data_label, + dfsan_label *ret_label, dfsan_origin info_origin, + dfsan_origin size_origin, + dfsan_origin data_origin, + dfsan_origin *ret_origin), + void *callback, void *data, dfsan_label callback_label, + dfsan_label data_label, dfsan_label *ret_label, + dfsan_origin callback_origin, dfsan_origin data_origin, + dfsan_origin *ret_origin) { + dl_iterate_phdr_origin_info dipi = {callback_trampoline, callback, data}; + *ret_label = 0; + *ret_origin = 0; + return dl_iterate_phdr(dl_iterate_phdr_origin_cb, &dipi); +} + // This function is only available for glibc 2.27 or newer. Mark it weak so // linking succeeds with older glibcs. SANITIZER_WEAK_ATTRIBUTE void _dl_get_tls_static_info(size_t *sizep, @@ -487,6 +975,13 @@ dfsan_set_label(0, alignp, sizeof(*alignp)); } +SANITIZER_INTERFACE_ATTRIBUTE void __dfso__dl_get_tls_static_info( + size_t *sizep, size_t *alignp, dfsan_label sizep_label, + dfsan_label alignp_label, dfsan_origin sizep_origin, + dfsan_origin alignp_origin) { + __dfsw__dl_get_tls_static_info(sizep, alignp, sizep_label, alignp_label); +} + SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_ctime_r(const time_t *timep, char *buf, dfsan_label timep_label, dfsan_label buf_label, dfsan_label *ret_label) { @@ -501,6 +996,26 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE +char *__dfso_ctime_r(const time_t *timep, char *buf, dfsan_label timep_label, + dfsan_label buf_label, dfsan_label *ret_label, + dfsan_origin timep_origin, dfsan_origin buf_origin, + dfsan_origin *ret_origin) { + char *ret = ctime_r(timep, buf); + if (ret) { + dfsan_set_label_origin( + dfsan_read_label(timep, sizeof(time_t)), + dfsan_read_origin_of_first_taint(timep, sizeof(time_t)), buf, + strlen(buf) + 1); + *ret_label = buf_label; + *ret_origin = buf_origin; + } else { + *ret_label = 0; + *ret_origin = 0; + } + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_fgets(char *s, int size, FILE *stream, dfsan_label s_label, dfsan_label size_label, dfsan_label stream_label, @@ -515,6 +1030,22 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE +char *__dfso_fgets(char *s, int size, FILE *stream, dfsan_label s_label, + dfsan_label size_label, dfsan_label stream_label, + dfsan_label *ret_label, dfsan_origin s_origin, + dfsan_origin size_origin, dfsan_origin stream_origin, + dfsan_origin *ret_origin) { + char *ret = __dfsw_fgets(s, size, stream, s_label, size_label, stream_label, + ret_label); + if (ret) { + *ret_origin = s_origin; + } else { + *ret_origin = 0; + } + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_getcwd(char *buf, size_t size, dfsan_label buf_label, dfsan_label size_label, dfsan_label *ret_label) { @@ -528,6 +1059,20 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE +char *__dfso_getcwd(char *buf, size_t size, dfsan_label buf_label, + dfsan_label size_label, dfsan_label *ret_label, + dfsan_origin buf_origin, dfsan_origin size_origin, + dfsan_origin *ret_origin) { + char *ret = __dfsw_getcwd(buf, size, buf_label, size_label, ret_label); + if (ret) { + *ret_origin = buf_origin; + } else { + *ret_origin = 0; + } + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_get_current_dir_name(dfsan_label *ret_label) { char *ret = get_current_dir_name(); @@ -538,6 +1083,14 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE +char *__dfso_get_current_dir_name(dfsan_label *ret_label, + dfsan_origin *ret_origin) { + char *ret = __dfsw_get_current_dir_name(ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_gethostname(char *name, size_t len, dfsan_label name_label, dfsan_label len_label, dfsan_label *ret_label) { @@ -549,6 +1102,16 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE +int __dfso_gethostname(char *name, size_t len, dfsan_label name_label, + dfsan_label len_label, dfsan_label *ret_label, + dfsan_origin name_origin, dfsan_origin len_origin, + dfsan_label *ret_origin) { + int ret = __dfsw_gethostname(name, len, name_label, len_label, ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_getrlimit(int resource, struct rlimit *rlim, dfsan_label resource_label, dfsan_label rlim_label, @@ -561,6 +1124,17 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE +int __dfso_getrlimit(int resource, struct rlimit *rlim, + dfsan_label resource_label, dfsan_label rlim_label, + dfsan_label *ret_label, dfsan_origin resource_origin, + dfsan_origin rlim_origin, dfsan_origin *ret_origin) { + int ret = + __dfsw_getrlimit(resource, rlim, resource_label, rlim_label, ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_getrusage(int who, struct rusage *usage, dfsan_label who_label, dfsan_label usage_label, dfsan_label *ret_label) { @@ -572,6 +1146,16 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE +int __dfso_getrusage(int who, struct rusage *usage, dfsan_label who_label, + dfsan_label usage_label, dfsan_label *ret_label, + dfsan_origin who_origin, dfsan_origin usage_origin, + dfsan_label *ret_origin) { + int ret = __dfsw_getrusage(who, usage, who_label, usage_label, ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_strcpy(char *dest, const char *src, dfsan_label dst_label, dfsan_label src_label, dfsan_label *ret_label) { @@ -585,14 +1169,34 @@ } SANITIZER_INTERFACE_ATTRIBUTE -long int __dfsw_strtol(const char *nptr, char **endptr, int base, - dfsan_label nptr_label, dfsan_label endptr_label, - dfsan_label base_label, dfsan_label *ret_label) { - char *tmp_endptr; - long int ret = strtol(nptr, &tmp_endptr, base); +char *__dfso_strcpy(char *dest, const char *src, dfsan_label dst_label, + dfsan_label src_label, dfsan_label *ret_label, + dfsan_origin dst_origin, dfsan_origin src_origin, + dfsan_origin *ret_origin) { + char *ret = strcpy(dest, src); // NOLINT + if (ret) { + size_t str_len = strlen(src) + 1; + dfsan_mem_origin_transfer(dest, src, str_len); + internal_memcpy(shadow_for(dest), shadow_for(src), + sizeof(dfsan_label) * str_len); + } + *ret_label = dst_label; + *ret_origin = dst_origin; + return ret; +} + +static long int dfsan_strtol(const char *nptr, char **endptr, int base, + char **tmp_endptr) { + long int ret = strtol(nptr, tmp_endptr, base); if (endptr) { - *endptr = tmp_endptr; + *endptr = *tmp_endptr; } + return ret; +} + +static void dfsan_strtoxl_label(const char *nptr, const char *tmp_endptr, + dfsan_label base_label, + dfsan_label *ret_label) { if (tmp_endptr > nptr) { // If *tmp_endptr is '\0' include its label as well. *ret_label = dfsan_union( @@ -601,18 +1205,57 @@ } else { *ret_label = 0; } +} + +static void dfsan_strtoxl_origin(const char *nptr, const char *tmp_endptr, + dfsan_label base_label, dfsan_label *ret_label, + dfsan_origin base_origin, + dfsan_origin *ret_origin) { + if (tmp_endptr > nptr) { + // If *tmp_endptr is '\0' include its label as well. + *ret_origin = base_label + ? base_origin + : dfsan_read_origin_of_first_taint( + nptr, tmp_endptr - nptr + (*tmp_endptr ? 0 : 1)); + } else { + *ret_label = 0; + } +} + +SANITIZER_INTERFACE_ATTRIBUTE +long int __dfsw_strtol(const char *nptr, char **endptr, int base, + dfsan_label nptr_label, dfsan_label endptr_label, + dfsan_label base_label, dfsan_label *ret_label) { + char *tmp_endptr; + long int ret = dfsan_strtol(nptr, endptr, base, &tmp_endptr); + dfsan_strtoxl_label(nptr, tmp_endptr, base_label, ret_label); return ret; } SANITIZER_INTERFACE_ATTRIBUTE -double __dfsw_strtod(const char *nptr, char **endptr, +long int __dfso_strtol(const char *nptr, char **endptr, int base, dfsan_label nptr_label, dfsan_label endptr_label, - dfsan_label *ret_label) { + dfsan_label base_label, dfsan_label *ret_label, + dfsan_origin nptr_origin, dfsan_origin endptr_origin, + dfsan_origin base_origin, dfsan_origin *ret_origin) { char *tmp_endptr; - double ret = strtod(nptr, &tmp_endptr); + long int ret = dfsan_strtol(nptr, endptr, base, &tmp_endptr); + dfsan_strtoxl_label(nptr, tmp_endptr, base_label, ret_label); + dfsan_strtoxl_origin(nptr, tmp_endptr, base_label, ret_label, base_origin, + ret_origin); + return ret; +} + +static double dfsan_strtod(const char *nptr, char **endptr, char **tmp_endptr) { + double ret = strtod(nptr, tmp_endptr); if (endptr) { - *endptr = tmp_endptr; + *endptr = *tmp_endptr; } + return ret; +} + +static void dfsan_strtod_lable(const char *nptr, const char *tmp_endptr, + dfsan_label *ret_label) { if (tmp_endptr > nptr) { // If *tmp_endptr is '\0' include its label as well. *ret_label = dfsan_read_label( @@ -621,25 +1264,75 @@ } else { *ret_label = 0; } +} + +SANITIZER_INTERFACE_ATTRIBUTE +double __dfsw_strtod(const char *nptr, char **endptr, dfsan_label nptr_label, + dfsan_label endptr_label, dfsan_label *ret_label) { + char *tmp_endptr; + double ret = dfsan_strtod(nptr, endptr, &tmp_endptr); + dfsan_strtod_lable(nptr, tmp_endptr, ret_label); return ret; } SANITIZER_INTERFACE_ATTRIBUTE -long long int __dfsw_strtoll(const char *nptr, char **endptr, int base, - dfsan_label nptr_label, dfsan_label endptr_label, - dfsan_label base_label, dfsan_label *ret_label) { +double __dfso_strtod(const char *nptr, char **endptr, dfsan_label nptr_label, + dfsan_label endptr_label, dfsan_label *ret_label, + dfsan_origin nptr_origin, dfsan_origin endptr_origin, + dfsan_origin *ret_origin) { char *tmp_endptr; - long long int ret = strtoll(nptr, &tmp_endptr, base); - if (endptr) { - *endptr = tmp_endptr; - } + double ret = dfsan_strtod(nptr, endptr, &tmp_endptr); + dfsan_strtod_lable(nptr, tmp_endptr, ret_label); if (tmp_endptr > nptr) { // If *tmp_endptr is '\0' include its label as well. - *ret_label = dfsan_union( - base_label, - dfsan_read_label(nptr, tmp_endptr - nptr + (*tmp_endptr ? 0 : 1))); + *ret_origin = dfsan_read_origin_of_first_taint( + nptr, tmp_endptr - nptr + (*tmp_endptr ? 0 : 1)); } else { - *ret_label = 0; + *ret_origin = 0; + } + return ret; +} + +static long long int dfsan_strtoll(const char *nptr, char **endptr, int base, + char **tmp_endptr) { + long long int ret = strtoll(nptr, tmp_endptr, base); + if (endptr) { + *endptr = *tmp_endptr; + } + return ret; +} + +SANITIZER_INTERFACE_ATTRIBUTE +long long int __dfsw_strtoll(const char *nptr, char **endptr, int base, + dfsan_label nptr_label, dfsan_label endptr_label, + dfsan_label base_label, dfsan_label *ret_label) { + char *tmp_endptr; + long long int ret = dfsan_strtoll(nptr, endptr, base, &tmp_endptr); + dfsan_strtoxl_label(nptr, tmp_endptr, base_label, ret_label); + return ret; +} + +SANITIZER_INTERFACE_ATTRIBUTE +long long int __dfso_strtoll(const char *nptr, char **endptr, int base, + dfsan_label nptr_label, dfsan_label endptr_label, + dfsan_label base_label, dfsan_label *ret_label, + dfsan_origin nptr_origin, + dfsan_origin endptr_origin, + dfsan_origin base_origin, + dfsan_origin *ret_origin) { + char *tmp_endptr; + long long int ret = dfsan_strtoll(nptr, endptr, base, &tmp_endptr); + dfsan_strtoxl_label(nptr, tmp_endptr, base_label, ret_label); + dfsan_strtoxl_origin(nptr, tmp_endptr, base_label, ret_label, base_origin, + ret_origin); + return ret; +} + +static unsigned long int dfsan_strtoul(const char *nptr, char **endptr, + int base, char **tmp_endptr) { + unsigned long int ret = strtoul(nptr, tmp_endptr, base); + if (endptr) { + *endptr = *tmp_endptr; } return ret; } @@ -649,17 +1342,30 @@ dfsan_label nptr_label, dfsan_label endptr_label, dfsan_label base_label, dfsan_label *ret_label) { char *tmp_endptr; - unsigned long int ret = strtoul(nptr, &tmp_endptr, base); + unsigned long int ret = dfsan_strtoul(nptr, endptr, base, &tmp_endptr); + dfsan_strtoxl_label(nptr, tmp_endptr, base_label, ret_label); + return ret; +} + +SANITIZER_INTERFACE_ATTRIBUTE +unsigned long int __dfso_strtoul( + const char *nptr, char **endptr, int base, dfsan_label nptr_label, + dfsan_label endptr_label, dfsan_label base_label, dfsan_label *ret_label, + dfsan_origin nptr_origin, dfsan_origin endptr_origin, + dfsan_origin base_origin, dfsan_origin *ret_origin) { + char *tmp_endptr; + unsigned long int ret = dfsan_strtoul(nptr, endptr, base, &tmp_endptr); + dfsan_strtoxl_label(nptr, tmp_endptr, base_label, ret_label); + dfsan_strtoxl_origin(nptr, tmp_endptr, base_label, ret_label, base_origin, + ret_origin); + return ret; +} + +static long long unsigned int dfsan_strtoull(const char *nptr, char **endptr, + int base, char **tmp_endptr) { + long long unsigned int ret = strtoull(nptr, tmp_endptr, base); if (endptr) { - *endptr = tmp_endptr; - } - if (tmp_endptr > nptr) { - // If *tmp_endptr is '\0' include its label as well. - *ret_label = dfsan_union( - base_label, - dfsan_read_label(nptr, tmp_endptr - nptr + (*tmp_endptr ? 0 : 1))); - } else { - *ret_label = 0; + *endptr = *tmp_endptr; } return ret; } @@ -671,18 +1377,22 @@ dfsan_label base_label, dfsan_label *ret_label) { char *tmp_endptr; - long long unsigned int ret = strtoull(nptr, &tmp_endptr, base); - if (endptr) { - *endptr = tmp_endptr; - } - if (tmp_endptr > nptr) { - // If *tmp_endptr is '\0' include its label as well. - *ret_label = dfsan_union( - base_label, - dfsan_read_label(nptr, tmp_endptr - nptr + (*tmp_endptr ? 0 : 1))); - } else { - *ret_label = 0; - } + long long unsigned int ret = dfsan_strtoull(nptr, endptr, base, &tmp_endptr); + dfsan_strtoxl_label(nptr, tmp_endptr, base_label, ret_label); + return ret; +} + +SANITIZER_INTERFACE_ATTRIBUTE +long long unsigned int __dfso_strtoull( + const char *nptr, char **endptr, int base, dfsan_label nptr_label, + dfsan_label endptr_label, dfsan_label base_label, dfsan_label *ret_label, + dfsan_origin nptr_origin, dfsan_origin endptr_origin, + dfsan_origin base_origin, dfsan_origin *ret_origin) { + char *tmp_endptr; + long long unsigned int ret = dfsan_strtoull(nptr, endptr, base, &tmp_endptr); + dfsan_strtoxl_label(nptr, tmp_endptr, base_label, ret_label); + dfsan_strtoxl_origin(nptr, tmp_endptr, base_label, ret_label, base_origin, + ret_origin); return ret; } @@ -696,6 +1406,14 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE +time_t __dfso_time(time_t *t, dfsan_label t_label, dfsan_label *ret_label, + dfsan_origin t_origin, dfsan_origin *ret_origin) { + time_t ret = __dfsw_time(t, t_label, ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_inet_pton(int af, const char *src, void *dst, dfsan_label af_label, dfsan_label src_label, dfsan_label dst_label, @@ -709,6 +1427,25 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE +int __dfso_inet_pton(int af, const char *src, void *dst, dfsan_label af_label, + dfsan_label src_label, dfsan_label dst_label, + dfsan_label *ret_label, dfsan_origin af_origin, + dfsan_origin src_origin, dfsan_origin dst_origin, + dfsan_origin *ret_origin) { + int ret = inet_pton(af, src, dst); + if (ret == 1) { + int src_len = strlen(src) + 1; + dfsan_set_label_origin( + dfsan_read_label(src, src_len), + dfsan_read_origin_of_first_taint(src, src_len), dst, + af == AF_INET ? sizeof(struct in_addr) : sizeof(in6_addr)); + } + *ret_label = 0; + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE struct tm *__dfsw_localtime_r(const time_t *timep, struct tm *result, dfsan_label timep_label, dfsan_label result_label, @@ -724,6 +1461,27 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE +struct tm *__dfso_localtime_r(const time_t *timep, struct tm *result, + dfsan_label timep_label, dfsan_label result_label, + dfsan_label *ret_label, dfsan_origin timep_origin, + dfsan_origin result_origin, + dfsan_origin *ret_origin) { + struct tm *ret = localtime_r(timep, result); + if (ret) { + dfsan_set_label_origin( + dfsan_read_label(timep, sizeof(time_t)), + dfsan_read_origin_of_first_taint(timep, sizeof(time_t)), result, + sizeof(struct tm)); + *ret_label = result_label; + *ret_origin = result_origin; + } else { + *ret_label = 0; + *ret_origin = 0; + } + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_getpwuid_r(id_t uid, struct passwd *pwd, char *buf, size_t buflen, struct passwd **result, @@ -742,6 +1500,22 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE +int __dfso_getpwuid_r(id_t uid, struct passwd *pwd, char *buf, size_t buflen, + struct passwd **result, dfsan_label uid_label, + dfsan_label pwd_label, dfsan_label buf_label, + dfsan_label buflen_label, dfsan_label result_label, + dfsan_label *ret_label, dfsan_origin uid_origin, + dfsan_origin pwd_origin, dfsan_origin buf_origin, + dfsan_origin buflen_origin, dfsan_origin result_origin, + dfsan_origin *ret_origin) { + int ret = + __dfsw_getpwuid_r(uid, pwd, buf, buflen, result, uid_label, pwd_label, + buf_label, buflen_label, result_label, ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout, dfsan_label epfd_label, @@ -754,6 +1528,21 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE +int __dfso_epoll_wait(int epfd, struct epoll_event *events, int maxevents, + int timeout, dfsan_label epfd_label, + dfsan_label events_label, dfsan_label maxevents_label, + dfsan_label timeout_label, dfsan_label *ret_label, + dfsan_origin epfd_origin, dfsan_origin events_origin, + dfsan_origin maxevents_origin, + dfsan_origin timeout_origin, dfsan_origin *ret_origin) { + int ret = __dfsw_epoll_wait(epfd, events, maxevents, timeout, epfd_label, + events_label, maxevents_label, timeout_label, + ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_poll(struct pollfd *fds, nfds_t nfds, int timeout, dfsan_label dfs_label, dfsan_label nfds_label, @@ -768,6 +1557,18 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE +int __dfso_poll(struct pollfd *fds, nfds_t nfds, int timeout, + dfsan_label dfs_label, dfsan_label nfds_label, + dfsan_label timeout_label, dfsan_label *ret_label, + dfsan_origin dfs_origin, dfsan_origin nfds_origin, + dfsan_origin timeout_origin, dfsan_origin *ret_origin) { + int ret = __dfsw_poll(fds, nfds, timeout, dfs_label, nfds_label, + timeout_label, ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout, @@ -791,6 +1592,22 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE +int __dfso_select(int nfds, fd_set *readfds, fd_set *writefds, + fd_set *exceptfds, struct timeval *timeout, + dfsan_label nfds_label, dfsan_label readfds_label, + dfsan_label writefds_label, dfsan_label exceptfds_label, + dfsan_label timeout_label, dfsan_label *ret_label, + dfsan_origin nfds_origin, dfsan_origin readfds_origin, + dfsan_origin writefds_origin, dfsan_origin exceptfds_origin, + dfsan_origin timeout_origin, dfsan_origin *ret_origin) { + int ret = __dfsw_select(nfds, readfds, writefds, exceptfds, timeout, + nfds_label, readfds_label, writefds_label, + exceptfds_label, timeout_label, ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_sched_getaffinity(pid_t pid, size_t cpusetsize, cpu_set_t *mask, dfsan_label pid_label, @@ -804,20 +1621,127 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE +int __dfso_sched_getaffinity(pid_t pid, size_t cpusetsize, cpu_set_t *mask, + dfsan_label pid_label, + dfsan_label cpusetsize_label, + dfsan_label mask_label, dfsan_label *ret_label, + dfsan_origin pid_origin, + dfsan_origin cpusetsize_origin, + dfsan_origin mask_origin, + dfsan_origin *ret_origin) { + int ret = __dfsw_sched_getaffinity(pid, cpusetsize, mask, pid_label, + cpusetsize_label, mask_label, ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_sigemptyset(sigset_t *set, dfsan_label set_label, dfsan_label *ret_label) { int ret = sigemptyset(set); dfsan_set_label(0, set, sizeof(sigset_t)); + *ret_label = 0; + return ret; +} + +SANITIZER_INTERFACE_ATTRIBUTE +int __dfso_sigemptyset(sigset_t *set, dfsan_label set_label, + dfsan_label *ret_label, dfsan_origin set_origin, + dfsan_origin *ret_origin) { + int ret = __dfsw_sigemptyset(set, set_label, ret_label); + *ret_origin = 0; return ret; } +class SignalHandlerScope { + public: + SignalHandlerScope() { + if (DFsanThread *t = GetCurrentThread()) + t->EnterSignalHandler(); + } + ~SignalHandlerScope() { + if (DFsanThread *t = GetCurrentThread()) + t->LeaveSignalHandler(); + } +}; + +// sigactions_mu guarantees atomicity of sigaction() and signal() calls. +// Access to sigactions[] is gone with relaxed atomics to avoid data race with +// the signal handler. +const int kMaxSignals = 1024; +static atomic_uintptr_t sigactions[kMaxSignals]; +static StaticSpinMutex sigactions_mu; + +static void SignalHandler(int signo) { + SignalHandlerScope signal_handler_scope; + ScopedThreadLocalStateBackup stlsb; + + // Clear shadows for all inputs provided by system. This is why DFSan + // instrumentation generates a trampoline function to each function pointer, + // and uses the trampoline to clear shadows. However sigaction does not use + // a function pointer directly, so we have to do this manually. + dfsan_set_label(0, &signo, sizeof(signo)); + + typedef void (*signal_cb)(int x); + signal_cb cb = + (signal_cb)atomic_load(&sigactions[signo], memory_order_relaxed); + cb(signo); +} + +static void SignalAction(int signo, siginfo_t *si, void *uc) { + SignalHandlerScope signal_handler_scope; + ScopedThreadLocalStateBackup stlsb; + + // Clear shadows for all inputs provided by system. Similar to SignalHandler. + dfsan_set_label(0, &signo, sizeof(signo)); + dfsan_set_label(0, &si, sizeof(si)); + dfsan_set_label(0, &uc, sizeof(uc)); + dfsan_set_label(0, si, sizeof(__sanitizer_sigaction)); + dfsan_set_label(0, uc, __sanitizer::ucontext_t_sz); + + typedef void (*sigaction_cb)(int, siginfo_t *, void *); + sigaction_cb cb = + (sigaction_cb)atomic_load(&sigactions[signo], memory_order_relaxed); + cb(signo, si, uc); +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_sigaction(int signum, const struct sigaction *act, struct sigaction *oldact, dfsan_label signum_label, dfsan_label act_label, dfsan_label oldact_label, dfsan_label *ret_label) { - int ret = sigaction(signum, act, oldact); + SpinMutexLock lock(&sigactions_mu); + CHECK_LT(signum, kMaxSignals); + uptr old_cb = atomic_load(&sigactions[signum], memory_order_relaxed); + struct sigaction new_act; + struct sigaction *pnew_act = act ? &new_act : nullptr; + if (act) { + internal_memcpy(pnew_act, act, sizeof(struct sigaction)); + if (pnew_act->sa_flags & __sanitizer::sa_siginfo) { + uptr cb = (uptr)(pnew_act->sa_sigaction); + if (cb != __sanitizer::sig_ign && cb != __sanitizer::sig_dfl) { + atomic_store(&sigactions[signum], cb, memory_order_relaxed); + pnew_act->sa_sigaction = (decltype(pnew_act->sa_sigaction))SignalAction; + } + } else { + uptr cb = (uptr)(pnew_act->sa_handler); + if (cb != __sanitizer::sig_ign && cb != __sanitizer::sig_dfl) { + atomic_store(&sigactions[signum], cb, memory_order_relaxed); + pnew_act->sa_handler = (decltype(pnew_act->sa_handler))SignalHandler; + } + } + } + + int ret = sigaction(signum, pnew_act, oldact); + + if (ret == 0 && oldact) { + uptr cb = (uptr)oldact->sa_sigaction; + if (cb == (uptr)SignalAction || cb == (uptr)SignalHandler) { + oldact->sa_sigaction = (decltype(oldact->sa_sigaction))old_cb; + } + } + if (oldact) { dfsan_set_label(0, oldact, sizeof(struct sigaction)); } @@ -825,6 +1749,55 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE +int __dfso_sigaction(int signum, const struct sigaction *act, + struct sigaction *oldact, dfsan_label signum_label, + dfsan_label act_label, dfsan_label oldact_label, + dfsan_label *ret_label, dfsan_origin signum_origin, + dfsan_origin act_origin, dfsan_origin oldact_origin, + dfsan_origin *ret_origin) { + int ret = __dfsw_sigaction(signum, act, oldact, signum_label, act_label, + oldact_label, ret_label); + *ret_origin = 0; + return ret; +} + +static sighandler_t dfsan_signal(int signum, sighandler_t handler, + dfsan_label *ret_label) { + CHECK_LT(signum, kMaxSignals); + SpinMutexLock lock(&sigactions_mu); + uptr cb = (uptr)handler; + if (cb != __sanitizer::sig_ign && cb != __sanitizer::sig_dfl) { + atomic_store(&sigactions[signum], cb, memory_order_relaxed); + cb = (uptr)&SignalHandler; + } + sighandler_t ret = signal(signum, (sighandler_t)cb); + *ret_label = 0; + return ret; +} + +SANITIZER_INTERFACE_ATTRIBUTE +sighandler_t __dfsw_signal(int signum, + void *(*handler_trampoline)(void *, int, dfsan_label, + dfsan_label *), + sighandler_t handler, dfsan_label signum_label, + dfsan_label handler_label, dfsan_label *ret_label) { + return dfsan_signal(signum, handler, ret_label); +} + +SANITIZER_INTERFACE_ATTRIBUTE +sighandler_t __dfso_signal( + int signum, + void *(*handler_trampoline)(void *, int, dfsan_label, dfsan_label *, + dfsan_origin, dfsan_origin *), + sighandler_t handler, dfsan_label signum_label, dfsan_label handler_label, + dfsan_label *ret_label, dfsan_origin signum_origin, + dfsan_origin handler_origin, dfsan_origin *ret_origin) { + sighandler_t ret = dfsan_signal(signum, handler, ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_sigaltstack(const stack_t *ss, stack_t *old_ss, dfsan_label ss_label, dfsan_label old_ss_label, dfsan_label *ret_label) { @@ -835,6 +1808,16 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE +int __dfso_sigaltstack(const stack_t *ss, stack_t *old_ss, dfsan_label ss_label, + dfsan_label old_ss_label, dfsan_label *ret_label, + dfsan_origin ss_origin, dfsan_origin old_ss_origin, + dfsan_origin *ret_origin) { + int ret = __dfsw_sigaltstack(ss, old_ss, ss_label, old_ss_label, ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_gettimeofday(struct timeval *tv, struct timezone *tz, dfsan_label tv_label, dfsan_label tz_label, @@ -850,6 +1833,16 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE +int __dfso_gettimeofday(struct timeval *tv, struct timezone *tz, + dfsan_label tv_label, dfsan_label tz_label, + dfsan_label *ret_label, dfsan_origin tv_origin, + dfsan_origin tz_origin, dfsan_origin *ret_origin) { + int ret = __dfsw_gettimeofday(tv, tz, tv_label, tz_label, ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE void *__dfsw_memchr(void *s, int c, size_t n, dfsan_label s_label, dfsan_label c_label, @@ -868,6 +1861,23 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE void *__dfso_memchr( + void *s, int c, size_t n, dfsan_label s_label, dfsan_label c_label, + dfsan_label n_label, dfsan_label *ret_label, dfsan_origin s_origin, + dfsan_origin c_origin, dfsan_origin n_origin, dfsan_origin *ret_origin) { + void *ret = __dfsw_memchr(s, c, n, s_label, c_label, n_label, ret_label); + if (flags().strict_data_dependencies) { + *ret_origin = ret ? s_origin : 0; + } else { + size_t len = + ret ? reinterpret_cast(ret) - reinterpret_cast(s) + 1 + : n; + dfsan_origin o = dfsan_read_origin_of_first_taint(s, len); + *ret_origin = o ? o : (s_label ? s_origin : c_origin); + } + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_strrchr(char *s, int c, dfsan_label s_label, dfsan_label c_label, @@ -884,6 +1894,25 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE char *__dfso_strrchr( + char *s, int c, dfsan_label s_label, dfsan_label c_label, + dfsan_label *ret_label, dfsan_origin s_origin, dfsan_origin c_origin, + dfsan_origin *ret_origin) { + char *ret = strrchr(s, c); + if (flags().strict_data_dependencies) { + *ret_label = ret ? s_label : 0; + *ret_origin = ret ? s_origin : 0; + } else { + size_t s_len = strlen(s) + 1; + *ret_label = + dfsan_union(dfsan_read_label(s, s_len), dfsan_union(s_label, c_label)); + dfsan_origin o = dfsan_read_origin_of_first_taint(s, s_len); + *ret_origin = o ? o : (s_label ? s_origin : c_origin); + } + + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_strstr(char *haystack, char *needle, dfsan_label haystack_label, dfsan_label needle_label, @@ -902,6 +1931,36 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE char *__dfso_strstr(char *haystack, char *needle, + dfsan_label haystack_label, + dfsan_label needle_label, + dfsan_label *ret_label, + dfsan_origin haystack_origin, + dfsan_origin needle_origin, + dfsan_origin *ret_origin) { + char *ret = strstr(haystack, needle); + if (flags().strict_data_dependencies) { + *ret_label = ret ? haystack_label : 0; + *ret_origin = ret ? haystack_origin : 0; + } else { + size_t needle_len = strlen(needle); + size_t len = ret ? ret + needle_len - haystack : strlen(haystack) + 1; + *ret_label = + dfsan_union(dfsan_read_label(haystack, len), + dfsan_union(dfsan_read_label(needle, needle_len + 1), + dfsan_union(haystack_label, needle_label))); + dfsan_origin o = dfsan_read_origin_of_first_taint(haystack, len); + if (o) { + *ret_origin = o; + } else { + o = dfsan_read_origin_of_first_taint(needle, needle_len + 1); + *ret_origin = o ? o : (haystack_label ? haystack_origin : needle_origin); + } + } + + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_nanosleep(const struct timespec *req, struct timespec *rem, dfsan_label req_label, @@ -916,6 +1975,15 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_nanosleep( + const struct timespec *req, struct timespec *rem, dfsan_label req_label, + dfsan_label rem_label, dfsan_label *ret_label, dfsan_origin req_origin, + dfsan_origin rem_origin, dfsan_origin *ret_origin) { + int ret = __dfsw_nanosleep(req, rem, req_label, rem_label, ret_label); + *ret_origin = 0; + return ret; +} + static void clear_msghdr_labels(size_t bytes_written, struct msghdr *msg) { dfsan_set_label(0, msg, sizeof(*msg)); dfsan_set_label(0, msg->msg_name, msg->msg_namelen); @@ -944,6 +2012,21 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_recvmmsg( + int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags, + struct timespec *timeout, dfsan_label sockfd_label, + dfsan_label msgvec_label, dfsan_label vlen_label, dfsan_label flags_label, + dfsan_label timeout_label, dfsan_label *ret_label, + dfsan_origin sockfd_origin, dfsan_origin msgvec_origin, + dfsan_origin vlen_origin, dfsan_origin flags_origin, + dfsan_origin timeout_origin, dfsan_origin *ret_origin) { + int ret = __dfsw_recvmmsg(sockfd, msgvec, vlen, flags, timeout, sockfd_label, + msgvec_label, vlen_label, flags_label, + timeout_label, ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE ssize_t __dfsw_recvmsg( int sockfd, struct msghdr *msg, int flags, dfsan_label sockfd_label, dfsan_label msg_label, dfsan_label flags_label, dfsan_label *ret_label) { @@ -954,6 +2037,17 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE ssize_t __dfso_recvmsg( + int sockfd, struct msghdr *msg, int flags, dfsan_label sockfd_label, + dfsan_label msg_label, dfsan_label flags_label, dfsan_label *ret_label, + dfsan_origin sockfd_origin, dfsan_origin msg_origin, + dfsan_origin flags_origin, dfsan_origin *ret_origin) { + ssize_t ret = __dfsw_recvmsg(sockfd, msg, flags, sockfd_label, msg_label, + flags_label, ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_socketpair(int domain, int type, int protocol, int sv[2], dfsan_label domain_label, dfsan_label type_label, @@ -967,6 +2061,18 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_socketpair( + int domain, int type, int protocol, int sv[2], dfsan_label domain_label, + dfsan_label type_label, dfsan_label protocol_label, dfsan_label sv_label, + dfsan_label *ret_label, dfsan_origin domain_origin, + dfsan_origin type_origin, dfsan_origin protocol_origin, + dfsan_origin sv_origin, dfsan_origin *ret_origin) { + int ret = __dfsw_socketpair(domain, type, protocol, sv, domain_label, + type_label, protocol_label, sv_label, ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_getsockopt( int sockfd, int level, int optname, void *optval, socklen_t *optlen, dfsan_label sockfd_label, dfsan_label level_label, @@ -981,6 +2087,21 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_getsockopt( + int sockfd, int level, int optname, void *optval, socklen_t *optlen, + dfsan_label sockfd_label, dfsan_label level_label, + dfsan_label optname_label, dfsan_label optval_label, + dfsan_label optlen_label, dfsan_label *ret_label, + dfsan_origin sockfd_origin, dfsan_origin level_origin, + dfsan_origin optname_origin, dfsan_origin optval_origin, + dfsan_origin optlen_origin, dfsan_origin *ret_origin) { + int ret = __dfsw_getsockopt(sockfd, level, optname, optval, optlen, + sockfd_label, level_label, optname_label, + optval_label, optlen_label, ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_getsockname( int sockfd, struct sockaddr *addr, socklen_t *addrlen, dfsan_label sockfd_label, dfsan_label addr_label, dfsan_label addrlen_label, @@ -996,6 +2117,18 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_getsockname( + int sockfd, struct sockaddr *addr, socklen_t *addrlen, + dfsan_label sockfd_label, dfsan_label addr_label, dfsan_label addrlen_label, + dfsan_label *ret_label, dfsan_origin sockfd_origin, + dfsan_origin addr_origin, dfsan_origin addrlen_origin, + dfsan_origin *ret_origin) { + int ret = __dfsw_getsockname(sockfd, addr, addrlen, sockfd_label, addr_label, + addrlen_label, ret_label); + *ret_origin = 0; + return ret; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_getpeername( int sockfd, struct sockaddr *addr, socklen_t *addrlen, dfsan_label sockfd_label, dfsan_label addr_label, dfsan_label addrlen_label, @@ -1011,6 +2144,18 @@ return ret; } +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_getpeername( + int sockfd, struct sockaddr *addr, socklen_t *addrlen, + dfsan_label sockfd_label, dfsan_label addr_label, dfsan_label addrlen_label, + dfsan_label *ret_label, dfsan_origin sockfd_origin, + dfsan_origin addr_origin, dfsan_origin addrlen_origin, + dfsan_origin *ret_origin) { + int ret = __dfsw_getpeername(sockfd, addr, addrlen, sockfd_label, addr_label, + addrlen_label, ret_label); + *ret_origin = 0; + return ret; +} + // Type of the trampoline function passed to the custom version of // dfsan_set_write_callback. typedef void (*write_trampoline_t)( @@ -1018,6 +2163,11 @@ int fd, const void *buf, ssize_t count, dfsan_label fd_label, dfsan_label buf_label, dfsan_label count_label); +typedef void (*write_origin_trampoline_t)( + void *callback, int fd, const void *buf, ssize_t count, + dfsan_label fd_label, dfsan_label buf_label, dfsan_label count_label, + dfsan_origin fd_origin, dfsan_origin buf_origin, dfsan_origin count_origin); + // Calls to dfsan_set_write_callback() set the values in this struct. // Calls to the custom version of write() read (and invoke) them. static struct { @@ -1025,6 +2175,11 @@ void *write_callback = nullptr; } write_callback_info; +static struct { + write_origin_trampoline_t write_callback_trampoline = nullptr; + void *write_callback = nullptr; +} write_origin_callback_info; + SANITIZER_INTERFACE_ATTRIBUTE void __dfsw_dfsan_set_write_callback( write_trampoline_t write_callback_trampoline, @@ -1035,6 +2190,15 @@ write_callback_info.write_callback = write_callback; } +SANITIZER_INTERFACE_ATTRIBUTE void __dfso_dfsan_set_write_callback( + write_origin_trampoline_t write_callback_trampoline, void *write_callback, + dfsan_label write_callback_label, dfsan_label *ret_label, + dfsan_origin write_callback_origin, dfsan_origin *ret_origin) { + write_origin_callback_info.write_callback_trampoline = + write_callback_trampoline; + write_origin_callback_info.write_callback = write_callback; +} + SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_write(int fd, const void *buf, size_t count, dfsan_label fd_label, dfsan_label buf_label, @@ -1049,6 +2213,23 @@ *ret_label = 0; return write(fd, buf, count); } + +SANITIZER_INTERFACE_ATTRIBUTE int __dfso_write( + int fd, const void *buf, size_t count, dfsan_label fd_label, + dfsan_label buf_label, dfsan_label count_label, dfsan_label *ret_label, + dfsan_origin fd_origin, dfsan_origin buf_origin, dfsan_origin count_origin, + dfsan_origin *ret_origin) { + if (write_origin_callback_info.write_callback) { + write_origin_callback_info.write_callback_trampoline( + write_origin_callback_info.write_callback, fd, buf, count, fd_label, + buf_label, count_label, fd_origin, buf_origin, count_origin); + } + + *ret_label = 0; + *ret_origin = 0; + return write(fd, buf, count); +} + } // namespace __dfsan // Type used to extract a dfsan_label with va_arg() @@ -1138,6 +2319,7 @@ // positional arguments. static int format_buffer(char *str, size_t size, const char *fmt, dfsan_label *va_labels, dfsan_label *ret_label, + dfsan_origin *va_origins, dfsan_origin *ret_origin, va_list ap) { Formatter formatter(str, fmt, size); @@ -1193,8 +2375,13 @@ default: retval = formatter.format(va_arg(ap, int)); } - dfsan_set_label(*va_labels++, formatter.str_cur(), - formatter.num_written_bytes(retval)); + if (va_origins == nullptr) + dfsan_set_label(*va_labels++, formatter.str_cur(), + formatter.num_written_bytes(retval)); + else + dfsan_set_label_origin(*va_labels++, *va_origins++, + formatter.str_cur(), + formatter.num_written_bytes(retval)); end_fmt = true; break; @@ -1211,21 +2398,36 @@ } else { retval = formatter.format(va_arg(ap, double)); } - dfsan_set_label(*va_labels++, formatter.str_cur(), - formatter.num_written_bytes(retval)); + if (va_origins == nullptr) + dfsan_set_label(*va_labels++, formatter.str_cur(), + formatter.num_written_bytes(retval)); + else + dfsan_set_label_origin(*va_labels++, *va_origins++, + formatter.str_cur(), + formatter.num_written_bytes(retval)); end_fmt = true; break; case 'c': retval = formatter.format(va_arg(ap, int)); - dfsan_set_label(*va_labels++, formatter.str_cur(), - formatter.num_written_bytes(retval)); + if (va_origins == nullptr) + dfsan_set_label(*va_labels++, formatter.str_cur(), + formatter.num_written_bytes(retval)); + else + dfsan_set_label_origin(*va_labels++, *va_origins++, + formatter.str_cur(), + formatter.num_written_bytes(retval)); end_fmt = true; break; case 's': { char *arg = va_arg(ap, char *); retval = formatter.format(arg); + if (va_origins) { + va_origins++; + dfsan_mem_origin_transfer(formatter.str_cur(), arg, + formatter.num_written_bytes(retval)); + } va_labels++; internal_memcpy(shadow_for(formatter.str_cur()), shadow_for(arg), sizeof(dfsan_label) * @@ -1236,8 +2438,13 @@ case 'p': retval = formatter.format(va_arg(ap, void *)); - dfsan_set_label(*va_labels++, formatter.str_cur(), - formatter.num_written_bytes(retval)); + if (va_origins == nullptr) + dfsan_set_label(*va_labels++, formatter.str_cur(), + formatter.num_written_bytes(retval)); + else + dfsan_set_label_origin(*va_labels++, *va_origins++, + formatter.str_cur(), + formatter.num_written_bytes(retval)); end_fmt = true; break; @@ -1245,6 +2452,8 @@ int *ptr = va_arg(ap, int *); *ptr = (int)formatter.str_off; va_labels++; + if (va_origins) + va_origins++; dfsan_set_label(0, ptr, sizeof(ptr)); end_fmt = true; break; @@ -1260,6 +2469,8 @@ case '*': formatter.width = va_arg(ap, int); va_labels++; + if (va_origins) + va_origins++; break; default: @@ -1277,6 +2488,8 @@ } *ret_label = 0; + if (ret_origin) + *ret_origin = 0; // Number of bytes written in total. return formatter.str_off; @@ -1289,7 +2502,22 @@ dfsan_label *ret_label, ...) { va_list ap; va_start(ap, ret_label); - int ret = format_buffer(str, ~0ul, format, va_labels, ret_label, ap); + int ret = format_buffer(str, ~0ul, format, va_labels, ret_label, nullptr, + nullptr, ap); + va_end(ap); + return ret; +} + +SANITIZER_INTERFACE_ATTRIBUTE +int __dfso_sprintf(char *str, const char *format, dfsan_label str_label, + dfsan_label format_label, dfsan_label *va_labels, + dfsan_label *ret_label, dfsan_origin str_origin, + dfsan_origin format_origin, dfsan_origin *va_origins, + dfsan_origin *ret_origin, ...) { + va_list ap; + va_start(ap, ret_origin); + int ret = format_buffer(str, ~0ul, format, va_labels, ret_label, va_origins, + ret_origin, ap); va_end(ap); return ret; } @@ -1301,11 +2529,53 @@ dfsan_label *ret_label, ...) { va_list ap; va_start(ap, ret_label); - int ret = format_buffer(str, size, format, va_labels, ret_label, ap); + int ret = format_buffer(str, size, format, va_labels, ret_label, nullptr, + nullptr, ap); + va_end(ap); + return ret; +} + +SANITIZER_INTERFACE_ATTRIBUTE +int __dfso_snprintf(char *str, size_t size, const char *format, + dfsan_label str_label, dfsan_label size_label, + dfsan_label format_label, dfsan_label *va_labels, + dfsan_label *ret_label, dfsan_origin str_origin, + dfsan_origin size_origin, dfsan_origin format_origin, + dfsan_origin *va_origins, dfsan_origin *ret_origin, ...) { + va_list ap; + va_start(ap, ret_origin); + int ret = format_buffer(str, size, format, va_labels, ret_label, va_origins, + ret_origin, ap); va_end(ap); return ret; } +static void BeforeFork() { + StackDepotLockAll(); + ChainedOriginDepotLockAll(); +} + +static void AfterFork() { + ChainedOriginDepotUnlockAll(); + StackDepotUnlockAll(); +} + +SANITIZER_INTERFACE_ATTRIBUTE +pid_t __dfsw_fork(dfsan_label *ret_label) { + pid_t pid = fork(); + *ret_label = 0; + return pid; +} + +SANITIZER_INTERFACE_ATTRIBUTE +pid_t __dfso_fork(dfsan_label *ret_label, dfsan_origin *ret_origin) { + BeforeFork(); + pid_t pid = __dfsw_fork(ret_label); + AfterFork(); + *ret_origin = 0; + return pid; +} + // Default empty implementations (weak). Users should redefine them. SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_guard, u32 *) {} SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_guard_init, u32 *, diff --git a/compiler-rt/lib/dfsan/dfsan_flags.inc b/compiler-rt/lib/dfsan/dfsan_flags.inc --- a/compiler-rt/lib/dfsan/dfsan_flags.inc +++ b/compiler-rt/lib/dfsan/dfsan_flags.inc @@ -29,3 +29,10 @@ DFSAN_FLAG(const char *, dump_labels_at_exit, "", "The path of the file where " "to dump the labels when the " "program terminates.") +DFSAN_FLAG(int, origin_history_size, Origin::kMaxDepth, "") +DFSAN_FLAG(int, origin_history_per_stack_limit, 20000, "") +DFSAN_FLAG(int, store_context_size, 20, + "Like malloc_context_size, but for taint stores.") +DFSAN_FLAG(bool, check_origin_invariant, false, + "Whether to check if the origin invariant holds.") + diff --git a/compiler-rt/lib/dfsan/dfsan_origin.h b/compiler-rt/lib/dfsan/dfsan_origin.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/dfsan/dfsan_origin.h @@ -0,0 +1,124 @@ +//===-- dfsan_origin.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Origin id utils. +//===----------------------------------------------------------------------===// +#ifndef DFSAN_ORIGIN_H +#define DFSAN_ORIGIN_H + +#include "dfsan_chained_origin_depot.h" +#include "sanitizer_common/sanitizer_stackdepot.h" + +namespace __dfsan { + +// Origin handling. +// +// Origin is a 32-bit identifier that is attached to any taint value in the +// program and describes, more or less exactly, how this memory came to be +// tainted. +// +// Chained origin id is like: +// zzzz xxxx xxxx xxxx +// +// Chained origin id describes an event of storing a taint value to +// memory. The xxx part is a value of ChainedOriginDepot, which is a mapping of +// (stack_id, prev_id) -> id, where +// * stack_id describes the event. +// StackDepot keeps a mapping between those and corresponding stack traces. +// * prev_id is another origin id that describes the earlier part of the +// taint value history. 0 prev_id indicates the start of a chain. +// Following a chain of prev_id provides the full recorded history of a taint +// value. +// +// This, effectively, defines a tree (or 2 trees, see below) where nodes are +// points in value history marked with origin ids, and edges are events that are +// marked with stack_id. +// +// The "zzzz" bits of chained origin id are used to store the length (or depth) +// of the origin chain. + +class Origin { + public: + static bool isValidId(u32 id) { return id != 0; } + + u32 raw_id() const { return raw_id_; } + + bool isChainedOrigin() const { return Origin::isValidId(raw_id_); } + + u32 getChainedId() const { + CHECK(Origin::isValidId(raw_id_)); + return raw_id_ & kChainedIdMask; + } + + // Returns the next origin in the chain and the current stack trace. + Origin getNextChainedOrigin(StackTrace *stack) const { + CHECK(Origin::isValidId(raw_id_)); + u32 prev_id; + u32 stack_id = ChainedOriginDepotGet(getChainedId(), &prev_id); + if (stack) + *stack = StackDepotGet(stack_id); + return Origin(prev_id); + } + + static Origin CreateChainedOrigin(Origin prev, StackTrace *stack) { + int depth = prev.isChainedOrigin() ? prev.depth() : -1; + // depth is the length of the chain minus 1. + // origin_history_size of 0 means unlimited depth. + if (flags().origin_history_size > 0) { + if (depth + 1 >= flags().origin_history_size) { + return prev; + } else { + ++depth; + CHECK(depth < (1 << kDepthBits)); + } + } + + StackDepotHandle h = StackDepotPut_WithHandle(*stack); + if (!h.valid()) + return prev; + + if (flags().origin_history_per_stack_limit > 0) { + int use_count = h.use_count(); + if (use_count > flags().origin_history_per_stack_limit) + return prev; + } + + u32 chained_id; + bool inserted = ChainedOriginDepotPut(h.id(), prev.raw_id(), &chained_id); + CHECK((chained_id & kChainedIdMask) == chained_id); + + if (inserted && flags().origin_history_per_stack_limit > 0) + h.inc_use_count_unsafe(); + + return Origin((depth << kDepthShift) | chained_id); + } + + static Origin FromRawId(u32 id) { return Origin(id); } + + private: + static const int kDepthBits = 4; + static const int kDepthShift = 32 - kDepthBits; + + static const u32 kChainedIdMask = ((u32)-1) >> (32 - kDepthShift); + + u32 raw_id_; + + explicit Origin(u32 raw_id) : raw_id_(raw_id) {} + + int depth() const { + CHECK(isChainedOrigin()); + return (raw_id_ >> kDepthShift) & ((1 << kDepthBits) - 1); + } + + public: + static const int kMaxDepth = (1 << kDepthBits) - 1; +}; + +} // namespace __dfsan + +#endif // DFSAN_ORIGIN_H diff --git a/compiler-rt/lib/dfsan/dfsan_platform.h b/compiler-rt/lib/dfsan/dfsan_platform.h --- a/compiler-rt/lib/dfsan/dfsan_platform.h +++ b/compiler-rt/lib/dfsan/dfsan_platform.h @@ -19,7 +19,8 @@ #if defined(__x86_64__) struct Mapping { static const uptr kShadowAddr = 0x10000; - static const uptr kUnionTableAddr = 0x200000000000; + static const uptr kOriginAddr = 0x200000000000; + static const uptr kUnionTableAddr = 0x300000000000; static const uptr kAppAddr = 0x700000008000; static const uptr kShadowMask = ~0x700000000000; }; @@ -60,6 +61,9 @@ enum MappingType { MAPPING_SHADOW_ADDR, +#if defined(__x86_64__) + MAPPING_ORIGIN_ADDR, +#endif MAPPING_UNION_TABLE_ADDR, MAPPING_APP_ADDR, MAPPING_SHADOW_MASK @@ -69,6 +73,10 @@ uptr MappingImpl(void) { switch (Type) { case MAPPING_SHADOW_ADDR: return Mapping::kShadowAddr; +#if defined(__x86_64__) + case MAPPING_ORIGIN_ADDR: + return Mapping::kOriginAddr; +#endif case MAPPING_UNION_TABLE_ADDR: return Mapping::kUnionTableAddr; case MAPPING_APP_ADDR: return Mapping::kAppAddr; case MAPPING_SHADOW_MASK: return Mapping::kShadowMask; @@ -95,6 +103,11 @@ return MappingArchImpl(); } +#if defined(__x86_64__) +ALWAYS_INLINE +uptr OriginAddr() { return MappingArchImpl(); } +#endif + ALWAYS_INLINE uptr UnionTableAddr() { return MappingArchImpl(); diff --git a/compiler-rt/lib/dfsan/dfsan_thread.h b/compiler-rt/lib/dfsan/dfsan_thread.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/dfsan/dfsan_thread.h @@ -0,0 +1,77 @@ +//===-- dfsan_thread.h -------------------------------------------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of DataFlowSanitizer. +// +//===----------------------------------------------------------------------===// + +#ifndef DFSAN_THREAD_H +#define DFSAN_THREAD_H + +#include "sanitizer_common/sanitizer_common.h" + +namespace __dfsan { + +class DFsanThread { + public: + static DFsanThread *Create(void *start_routine_trampoline, + thread_callback_t start_routine, void *arg, + bool track_origins = false); + static void TSDDtor(void *tsd); + void Destroy(); + + void Init(); // Should be called from the thread itself. + thread_return_t ThreadStart(); + + uptr stack_top(); + uptr stack_bottom(); + bool IsMainThread() { return start_routine_ == nullptr; } + + bool AddrIsInStack(uptr addr); + + bool InSignalHandler() { return in_signal_handler_; } + void EnterSignalHandler() { in_signal_handler_++; } + void LeaveSignalHandler() { in_signal_handler_--; } + + void StartSwitchFiber(uptr bottom, uptr size); + void FinishSwitchFiber(uptr *bottom_old, uptr *size_old); + + int destructor_iterations_; + + private: + // NOTE: There is no DFsanThread constructor. It is allocated + // via mmap() and *must* be valid in zero-initialized state. + void SetThreadStackAndTls(); + struct StackBounds { + uptr bottom; + uptr top; + }; + StackBounds GetStackBounds() const; + + void *start_routine_trampoline_; + thread_callback_t start_routine_; + void *arg_; + bool track_origins_; + + bool stack_switching_; + + StackBounds stack_; + StackBounds next_stack_; + + unsigned in_signal_handler_; +}; + +DFsanThread *GetCurrentThread(); +void SetCurrentThread(DFsanThread *t); +void DFsanTSDInit(void (*destructor)(void *tsd)); +void DFsanTSDDtor(void *tsd); + +} // namespace __dfsan + +#endif // DFSAN_THREAD_H diff --git a/compiler-rt/lib/dfsan/dfsan_thread.cpp b/compiler-rt/lib/dfsan/dfsan_thread.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/dfsan/dfsan_thread.cpp @@ -0,0 +1,158 @@ + +#include "dfsan_thread.h" + +#include + +#include "dfsan.h" + +namespace __dfsan { + +DFsanThread *DFsanThread::Create(void *start_routine_trampoline, + thread_callback_t start_routine, void *arg, + bool track_origins) { + uptr PageSize = GetPageSizeCached(); + uptr size = RoundUpTo(sizeof(DFsanThread), PageSize); + DFsanThread *thread = (DFsanThread *)MmapOrDie(size, __func__); + thread->start_routine_trampoline_ = start_routine_trampoline; + thread->start_routine_ = start_routine; + thread->arg_ = arg; + thread->track_origins_ = track_origins; + thread->destructor_iterations_ = GetPthreadDestructorIterations(); + + return thread; +} + +void DFsanThread::SetThreadStackAndTls() { + uptr tls_size = 0; + uptr stack_size = 0; + uptr tls_begin_; + GetThreadStackAndTls(IsMainThread(), &stack_.bottom, &stack_size, &tls_begin_, + &tls_size); + stack_.top = stack_.bottom + stack_size; + + int local; + CHECK(AddrIsInStack((uptr)&local)); +} + +void DFsanThread::Init() { + SetThreadStackAndTls(); + // CHECK(MEM_IS_APP(stack_.bottom)); + // CHECK(MEM_IS_APP(stack_.top - 1)); +} + +void DFsanThread::TSDDtor(void *tsd) { + DFsanThread *t = (DFsanThread *)tsd; + t->Destroy(); +} + +void DFsanThread::Destroy() { + uptr size = RoundUpTo(sizeof(DFsanThread), GetPageSizeCached()); + UnmapOrDie(this, size); +} + +thread_return_t DFsanThread::ThreadStart() { + Init(); + + if (!start_routine_) { + // start_routine_ == 0 if we're on the main thread or on one of the + // OS X libdispatch worker threads. But nobody is supposed to call + // ThreadStart() for the worker threads. + return 0; + } + + CHECK(start_routine_trampoline_); + + typedef void *(*thread_callback_trampoline_t)(void *, void *, dfsan_label, + dfsan_label *); + typedef void *(*thread_callback_origin_trampoline_t)( + void *, void *, dfsan_label, dfsan_label *, dfsan_origin, dfsan_origin *); + + dfsan_label ret_label; + if (!track_origins_) + return ((thread_callback_trampoline_t) + start_routine_trampoline_)((void *)start_routine_, arg_, 0, + &ret_label); + + dfsan_origin ret_origin; + return ((thread_callback_origin_trampoline_t) + start_routine_trampoline_)((void *)start_routine_, arg_, 0, + &ret_label, 0, &ret_origin); +} + +DFsanThread::StackBounds DFsanThread::GetStackBounds() const { + if (!stack_switching_) + return {stack_.bottom, stack_.top}; + const uptr cur_stack = GET_CURRENT_FRAME(); + // Note: need to check next stack first, because FinishSwitchFiber + // may be in process of overwriting stack_.top/bottom_. But in such case + // we are already on the next stack. + if (cur_stack >= next_stack_.bottom && cur_stack < next_stack_.top) + return {next_stack_.bottom, next_stack_.top}; + return {stack_.bottom, stack_.top}; +} + +uptr DFsanThread::stack_top() { return GetStackBounds().top; } + +uptr DFsanThread::stack_bottom() { return GetStackBounds().bottom; } + +bool DFsanThread::AddrIsInStack(uptr addr) { + const auto bounds = GetStackBounds(); + return addr >= bounds.bottom && addr < bounds.top; +} + +void DFsanThread::StartSwitchFiber(uptr bottom, uptr size) { + CHECK(!stack_switching_); + next_stack_.bottom = bottom; + next_stack_.top = bottom + size; + stack_switching_ = true; +} + +void DFsanThread::FinishSwitchFiber(uptr *bottom_old, uptr *size_old) { + CHECK(stack_switching_); + if (bottom_old) + *bottom_old = stack_.bottom; + if (size_old) + *size_old = stack_.top - stack_.bottom; + stack_.bottom = next_stack_.bottom; + stack_.top = next_stack_.top; + stack_switching_ = false; + next_stack_.top = 0; + next_stack_.bottom = 0; +} + +static pthread_key_t tsd_key; +static bool tsd_key_inited = false; + +void DFsanTSDInit(void (*destructor)(void *tsd)) { + CHECK(!tsd_key_inited); + tsd_key_inited = true; + CHECK_EQ(0, pthread_key_create(&tsd_key, destructor)); +} + +static THREADLOCAL DFsanThread *dfsan_current_thread; + +DFsanThread *GetCurrentThread() { return dfsan_current_thread; } + +void SetCurrentThread(DFsanThread *t) { + // Make sure we do not reset the current DFsanThread. + CHECK_EQ(0, dfsan_current_thread); + dfsan_current_thread = t; + // Make sure that DFsanTSDDtor gets called at the end. + CHECK(tsd_key_inited); + pthread_setspecific(tsd_key, (void *)t); +} + +void DFsanTSDDtor(void *tsd) { + DFsanThread *t = (DFsanThread *)tsd; + if (t->destructor_iterations_ > 1) { + t->destructor_iterations_--; + CHECK_EQ(0, pthread_setspecific(tsd_key, tsd)); + return; + } + dfsan_current_thread = nullptr; + // Make sure that signal handler can not see a stale current thread pointer. + atomic_signal_fence(memory_order_seq_cst); + DFsanThread::TSDDtor(tsd); +} + +} // namespace __dfsan diff --git a/compiler-rt/lib/dfsan/done_abilist.txt b/compiler-rt/lib/dfsan/done_abilist.txt --- a/compiler-rt/lib/dfsan/done_abilist.txt +++ b/compiler-rt/lib/dfsan/done_abilist.txt @@ -28,6 +28,12 @@ fun:dfsan_set_write_callback=custom fun:dfsan_flush=uninstrumented fun:dfsan_flush=discard +fun:dfsan_print_origin_trace=uninstrumented +fun:dfsan_print_origin_trace=discard +fun:dfsan_get_origin=uninstrumented +fun:dfsan_get_origin=custom +fun:dfsan_get_init_origin=uninstrumented +fun:dfsan_get_init_origin=discard ############################################################################### # glibc @@ -214,6 +220,7 @@ fun:inet_pton=custom fun:localtime_r=custom fun:memcpy=custom +fun:memmove=custom fun:memset=custom fun:strcpy=custom fun:strdup=custom @@ -223,6 +230,7 @@ fun:strtoll=custom fun:strtoul=custom fun:strtoull=custom +fun:strcat=custom # Functions that produce an output that is computed from the input, but is not # necessarily data dependent. @@ -252,6 +260,7 @@ fun:select=custom fun:sigemptyset=custom fun:sigaction=custom +fun:signal=custom fun:gettimeofday=custom # sprintf-like @@ -262,6 +271,9 @@ fun:asprintf=discard fun:qsort=discard +# fork +fun:fork=custom + ############################################################################### # pthread ############################################################################### diff --git a/compiler-rt/test/dfsan/custom.cpp b/compiler-rt/test/dfsan/custom.cpp --- a/compiler-rt/test/dfsan/custom.cpp +++ b/compiler-rt/test/dfsan/custom.cpp @@ -118,6 +118,16 @@ ASSERT_LABEL(str2[3], i_label); } +void test_memmove() { + char str[] = "str1xx"; + dfsan_set_label(i_label, &str[3], 1); + + ASSERT_ZERO_LABEL(memmove(str + 2, str, 4)); + assert(0 == memcmp(str + 2, "str1", 4)); + ASSERT_ZERO_LABEL(str[4]); + ASSERT_LABEL(str[5], i_label); +} + void test_memset() { char buf[8]; int j = 'a'; @@ -564,6 +574,7 @@ ret = fgets(buf, sizeof(buf), f); ASSERT_LABEL(ret, j_label); fclose(f); + free(buf); } void test_getcwd() { @@ -607,6 +618,27 @@ ASSERT_READ_ZERO_LABEL(&usage, sizeof(usage)); } +void test_strcat() { + char src[] = "world"; + char dst[] = "hello \0 "; + char *p = dst; + dfsan_set_label(k_label, &p, sizeof(p)); + dfsan_set_label(i_label, src, sizeof(src) + 1); + dfsan_set_label(j_label, dst, sizeof(dst) + 1); + char *ret = strcat(p, src); + ASSERT_LABEL(ret, k_label); + assert(ret == dst); + assert(strcmp(src, dst + 6) == 0); + for (int i = 0; i < 6; ++i) { + ASSERT_LABEL(dst[i], j_label); + } + for (int i = 6; i < strlen(dst); ++i) { + ASSERT_LABEL(dst[i], i_label); + assert(dfsan_get_label(dst[i]) == dfsan_get_label(src[i - 6])); + } + ASSERT_LABEL(dst[11], j_label); +} + void test_strcpy() { char src[] = "hello world"; char dst[sizeof(src) + 2]; @@ -820,6 +852,14 @@ ASSERT_READ_ZERO_LABEL(&oldact, sizeof(oldact)); } +static void SignalHandler(int signo) {} + +void test_signal() { + sighandler_t old_signal_handler = signal(SIGHUP, SignalHandler); + ASSERT_ZERO_LABEL(old_signal_handler); + (void)signal(SIGHUP, old_signal_handler); +} + void test_sigaltstack() { stack_t old_altstack = {}; dfsan_set_label(j_label, &old_altstack, sizeof(old_altstack)); @@ -1236,6 +1276,10 @@ ASSERT_LABEL(r, 0); } +// Tested by a seperate source file. This empty function is here to appease the +// check-wrappers script. +void test_fork() {} + int main(void) { #ifdef FAST_16_LABELS i_label = 1; @@ -1277,6 +1321,7 @@ test_memchr(); test_memcmp(); test_memcpy(); + test_memmove(); test_memset(); test_nanosleep(); test_poll(); @@ -1289,6 +1334,7 @@ test_sched_getaffinity(); test_select(); test_sigaction(); + test_signal(); test_sigaltstack(); test_sigemptyset(); test_snprintf(); @@ -1298,6 +1344,7 @@ test_strcasecmp(); test_strchr(); test_strcmp(); + test_strcat(); test_strcpy(); test_strdup(); test_strlen(); @@ -1314,4 +1361,5 @@ test_strtoull(); test_time(); test_write(); + test_fork(); } diff --git a/compiler-rt/test/dfsan/origin_add_label.c b/compiler-rt/test/dfsan/origin_add_label.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_add_label.c @@ -0,0 +1,35 @@ +// RUN: %clang_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out + +// RUN: %clang_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true -mllvm -dfsan-instrumentation-with-call-threshold=0 %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out + +#include + +__attribute__((noinline)) uint64_t foo(uint64_t a, uint64_t b) { return a + b; } + +int main(int argc, char *argv[]) { + uint64_t a = 10; + uint64_t b = 20; + dfsan_add_label(4, &a, sizeof(a)); + dfsan_add_label(8, &a, sizeof(a)); + uint64_t c = foo(a, b); + dfsan_print_origin_trace((int*)(&c), NULL); + dfsan_print_origin_trace((int*)(&c) + 1, NULL); +} + +// CHECK: Taint value 0xc {{.*}} origin tracking () +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in main {{.*}}origin_add_label.c:[[@LINE-7]] + +// CHECK: Origin value: {{.*}}, Taint value was created at +// CHECK: #0 {{.*}} in main {{.*}}origin_add_label.c:[[@LINE-11]] + +// CHECK: Taint value 0xc {{.*}} origin tracking () +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in main {{.*}}origin_add_label.c:[[@LINE-14]] + +// CHECK: Origin value: {{.*}}, Taint value was created at +// CHECK: #0 {{.*}} in main {{.*}}origin_add_label.c:[[@LINE-18]] diff --git a/compiler-rt/test/dfsan/custom.cpp b/compiler-rt/test/dfsan/origin_custom.cpp copy from compiler-rt/test/dfsan/custom.cpp copy to compiler-rt/test/dfsan/origin_custom.cpp --- a/compiler-rt/test/dfsan/custom.cpp +++ b/compiler-rt/test/dfsan/origin_custom.cpp @@ -1,8 +1,5 @@ -// RUN: %clang_dfsan %s -o %t && DFSAN_OPTIONS="strict_data_dependencies=0" %run %t -// RUN: %clang_dfsan -mllvm -dfsan-args-abi %s -o %t && DFSAN_OPTIONS="strict_data_dependencies=0" %run %t -// RUN: %clang_dfsan -DFAST_16_LABELS -mllvm -dfsan-fast-16-labels %s -o %t && DFSAN_OPTIONS="strict_data_dependencies=0" %run %t -// RUN: %clang_dfsan -DSTRICT_DATA_DEPENDENCIES %s -o %t && %run %t -// RUN: %clang_dfsan -DSTRICT_DATA_DEPENDENCIES -mllvm -dfsan-args-abi %s -o %t && %run %t +// RUN: %clang_dfsan -DFAST_16_LABELS -mllvm -dfsan-fast-16-labels -mllvm -dfsan-track-origins=1 -mllvm -dfsan-combine-pointer-labels-on-load=false -DSTRICT_DATA_DEPENDENCIES %s -o %t && %run %t +// RUN: %clang_dfsan -DFAST_16_LABELS -mllvm -dfsan-fast-16-labels -mllvm -dfsan-track-origins=1 -mllvm -dfsan-combine-pointer-labels-on-load=false %s -o %t && DFSAN_OPTIONS="strict_data_dependencies=0" %run %t // Tests custom implementations of various glibc functions. @@ -35,32 +32,96 @@ dfsan_label i_label = 0; dfsan_label j_label = 0; dfsan_label k_label = 0; +dfsan_label m_label = 0; +dfsan_label n_label = 0; dfsan_label i_j_label = 0; #define ASSERT_ZERO_LABEL(data) \ - assert(0 == dfsan_get_label((long) (data))) + assert(0 == dfsan_get_label((long)(data))) #define ASSERT_READ_ZERO_LABEL(ptr, size) \ assert(0 == dfsan_read_label(ptr, size)) #define ASSERT_LABEL(data, label) \ - assert(label == dfsan_get_label((long) (data))) + assert(label == dfsan_get_label((long)(data))) #define ASSERT_READ_LABEL(ptr, size, label) \ assert(label == dfsan_read_label(ptr, size)) +#define ASSERT_ZERO_ORIGIN(data) \ + assert(0 == dfsan_get_origin((long)(data))) + +#define ASSERT_ZERO_ORIGINS(ptr, size) \ + for (int i = 0; i < size; ++i) { \ + assert(0 == dfsan_get_origin((long)(((char *)ptr)[i]))); \ + } + +#define ASSERT_ORIGIN(data, origin) \ + assert(origin == dfsan_get_origin((long)(data))) + +#define ASSERT_ORIGINS(ptr, size, origin) \ + for (int i = 0; i < size; ++i) { \ + assert(origin == dfsan_get_origin((long)(((char *)ptr)[i]))); \ + } + +#define ASSERT_INIT_ORIGIN(ptr, origin) \ + assert(origin == dfsan_get_init_origin(ptr)) + +#define ASSERT_INIT_ORIGIN_EQ_ORIGIN(ptr, data) \ + assert(dfsan_get_origin((long)(data)) == dfsan_get_init_origin(ptr)) + +#define ASSERT_INIT_ORIGINS(ptr, size, origin) \ + for (int i = 0; i < size; ++i) { \ + assert(origin == dfsan_get_init_origin(&((char *)ptr)[i])); \ + } + +#define ASSERT_EQ_ORIGIN(data1, data2) \ + assert(dfsan_get_origin((long)(data1)) == dfsan_get_origin((long)(data2))) + +#define DEFINE_AND_SAVE_ORIGINS(val) \ + dfsan_origin val##_o[sizeof(val)]; \ + for (int i = 0; i < sizeof(val); ++i) \ + val##_o[i] = dfsan_get_origin((long)(((char *)(&val))[i])); + +#define SAVE_ORIGINS(val) \ + for (int i = 0; i < sizeof(val); ++i) \ + val##_o[i] = dfsan_get_origin((long)(((char *)(&val))[i])); + +#define ASSERT_SAVED_ORIGINS(val) \ + for (int i = 0; i < sizeof(val); ++i) \ + ASSERT_ORIGIN(((char *)(&val))[i], val##_o[i]); + +#define DEFINE_AND_SAVE_N_ORIGINS(val, n) \ + dfsan_origin val##_o[n]; \ + for (int i = 0; i < n; ++i) \ + val##_o[i] = dfsan_get_origin((long)(val[i])); + +#define ASSERT_SAVED_N_ORIGINS(val, n) \ + for (int i = 0; i < n; ++i) \ + ASSERT_ORIGIN(val[i], val##_o[i]); + void test_stat() { int i = 1; dfsan_set_label(i_label, &i, sizeof(i)); struct stat s; s.st_dev = i; - assert(0 == stat("/", &s)); + DEFINE_AND_SAVE_ORIGINS(s) + int ret = stat("/", &s); + assert(0 == ret); + ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); ASSERT_ZERO_LABEL(s.st_dev); + ASSERT_SAVED_ORIGINS(s) s.st_dev = i; - assert(-1 == stat("/nonexistent", &s)); + SAVE_ORIGINS(s) + ret = stat("/nonexistent", &s); + assert(-1 == ret); + ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); ASSERT_LABEL(s.st_dev, i_label); + ASSERT_SAVED_ORIGINS(s) } void test_fstat() { @@ -70,9 +131,13 @@ struct stat s; int fd = open("/dev/zero", O_RDONLY); s.st_dev = i; + DEFINE_AND_SAVE_ORIGINS(s) int rv = fstat(fd, &s); assert(0 == rv); + ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); ASSERT_ZERO_LABEL(s.st_dev); + ASSERT_SAVED_ORIGINS(s) } void test_memcmp() { @@ -84,9 +149,16 @@ assert(rv < 0); #ifdef STRICT_DATA_DEPENDENCIES ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); #else ASSERT_LABEL(rv, i_j_label); + ASSERT_EQ_ORIGIN(rv, str1[3]); #endif + + rv = memcmp(str1, str2, sizeof(str1) - 2); + assert(rv == 0); + ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); } void test_bcmp() { @@ -98,13 +170,16 @@ assert(rv != 0); #ifdef STRICT_DATA_DEPENDENCIES ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); #else ASSERT_LABEL(rv, i_j_label); + ASSERT_EQ_ORIGIN(rv, str1[3]); #endif rv = bcmp(str1, str2, sizeof(str1) - 2); assert(rv == 0); ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); } void test_memcpy() { @@ -112,20 +187,60 @@ char str2[sizeof(str1)]; dfsan_set_label(i_label, &str1[3], 1); - ASSERT_ZERO_LABEL(memcpy(str2, str1, sizeof(str1))); + DEFINE_AND_SAVE_N_ORIGINS(str1, sizeof(str1)) + + char *ptr2 = str2; + dfsan_set_label(j_label, &ptr2, sizeof(ptr2)); + + void *r = memcpy(ptr2, str1, sizeof(str1)); + ASSERT_LABEL(r, j_label); + ASSERT_EQ_ORIGIN(r, ptr2); assert(0 == memcmp(str2, str1, sizeof(str1))); ASSERT_ZERO_LABEL(str2[0]); ASSERT_LABEL(str2[3], i_label); + + for (int i = 0; i < sizeof(str2); ++i) { + if (!dfsan_get_label(str2[i])) + continue; + ASSERT_INIT_ORIGIN(&(str2[i]), str1_o[i]); + } +} + +void test_memmove() { + char str[] = "str1xx"; + dfsan_set_label(i_label, &str[3], 1); + + DEFINE_AND_SAVE_N_ORIGINS(str, sizeof(str)) + + char *ptr = str + 2; + dfsan_set_label(j_label, &ptr, sizeof(ptr)); + + void *r = memmove(ptr, str, 4); + ASSERT_LABEL(r, j_label); + ASSERT_EQ_ORIGIN(r, ptr); + assert(0 == memcmp(str + 2, "str1", 4)); + ASSERT_ZERO_LABEL(str[4]); + ASSERT_LABEL(str[5], i_label); + + for (int i = 0; i < 4; ++i) { + if (!dfsan_get_label(ptr[i])) + continue; + ASSERT_INIT_ORIGIN(&(ptr[i]), str_o[i]); + } } void test_memset() { char buf[8]; int j = 'a'; + char *ptr = buf; dfsan_set_label(j_label, &j, sizeof(j)); - - ASSERT_ZERO_LABEL(memset(&buf, j, sizeof(buf))); + dfsan_set_label(k_label, &ptr, sizeof(ptr)); + void *ret = memset(ptr, j, sizeof(buf)); + ASSERT_LABEL(ret, k_label); + ASSERT_EQ_ORIGIN(ret, ptr); for (int i = 0; i < 8; ++i) { ASSERT_LABEL(buf[i], j_label); + ASSERT_EQ_ORIGIN(buf[i], j); assert(buf[i] == 'a'); } } @@ -139,8 +254,20 @@ assert(rv < 0); #ifdef STRICT_DATA_DEPENDENCIES ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); #else ASSERT_LABEL(rv, i_j_label); + ASSERT_EQ_ORIGIN(rv, str1[3]); +#endif + + rv = strcmp(str1, str1); + assert(rv == 0); +#ifdef STRICT_DATA_DEPENDENCIES + ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); +#else + ASSERT_LABEL(rv, i_label); + ASSERT_EQ_ORIGIN(rv, str1[3]); #endif } @@ -152,18 +279,30 @@ assert(rv == 4); #ifdef STRICT_DATA_DEPENDENCIES ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); #else ASSERT_LABEL(rv, i_label); + ASSERT_EQ_ORIGIN(rv, str1[3]); #endif } void test_strdup() { char str1[] = "str1"; dfsan_set_label(i_label, &str1[3], 1); + DEFINE_AND_SAVE_N_ORIGINS(str1, sizeof(str1)) char *strd = strdup(str1); + ASSERT_ZERO_LABEL(strd); + ASSERT_ZERO_ORIGIN(strd); ASSERT_ZERO_LABEL(strd[0]); ASSERT_LABEL(strd[3], i_label); + + for (int i = 0; i < sizeof(strd); ++i) { + if (!dfsan_get_label(strd[i])) + continue; + ASSERT_INIT_ORIGIN(&(strd[i]), str1_o[i]); + } + free(strd); } @@ -176,15 +315,21 @@ assert(strd == str2); assert(strcmp(str1, str2) == 0); ASSERT_ZERO_LABEL(strd); + ASSERT_ZERO_ORIGIN(strd); ASSERT_ZERO_LABEL(strd[0]); ASSERT_ZERO_LABEL(strd[1]); ASSERT_ZERO_LABEL(strd[2]); ASSERT_LABEL(strd[3], i_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&(strd[3]), str1[3]); + + char *p2 = str2; + dfsan_set_label(j_label, &p2, sizeof(p2)); - strd = strncpy(str2, str1, 3); + strd = strncpy(p2, str1, 3); assert(strd == str2); assert(strncmp(str1, str2, 3) == 0); - ASSERT_ZERO_LABEL(strd); + ASSERT_LABEL(strd, j_label); + ASSERT_EQ_ORIGIN(strd, p2); ASSERT_ZERO_LABEL(strd[0]); ASSERT_ZERO_LABEL(strd[1]); ASSERT_ZERO_LABEL(strd[2]); @@ -199,13 +344,30 @@ assert(rv < 0); #ifdef STRICT_DATA_DEPENDENCIES ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); #else ASSERT_LABEL(rv, dfsan_union(i_label, j_label)); + ASSERT_EQ_ORIGIN(rv, str1[3]); #endif + rv = strncmp(str1, str2, 0); + assert(rv == 0); + ASSERT_ZERO_LABEL(rv); + rv = strncmp(str1, str2, 3); assert(rv == 0); ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); + + rv = strncmp(str1, str1, 4); + assert(rv == 0); +#ifdef STRICT_DATA_DEPENDENCIES + ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); +#else + ASSERT_LABEL(rv, i_label); + ASSERT_EQ_ORIGIN(rv, str1[3]); +#endif } void test_strcasecmp() { @@ -218,16 +380,20 @@ assert(rv < 0); #ifdef STRICT_DATA_DEPENDENCIES ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); #else ASSERT_LABEL(rv, dfsan_union(i_label, j_label)); + ASSERT_EQ_ORIGIN(rv, str1[3]); #endif rv = strcasecmp(str1, str3); assert(rv == 0); #ifdef STRICT_DATA_DEPENDENCIES ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); #else ASSERT_LABEL(rv, dfsan_union(i_label, j_label)); + ASSERT_EQ_ORIGIN(rv, str1[3]); #endif char s1[] = "AbZ"; @@ -239,8 +405,10 @@ assert(rv > 0); // 'Z' > 'y' #ifdef STRICT_DATA_DEPENDENCIES ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); #else ASSERT_LABEL(rv, dfsan_union(i_label, j_label)); + ASSERT_EQ_ORIGIN(rv, s1[2]); #endif } @@ -253,37 +421,46 @@ assert(rv < 0); #ifdef STRICT_DATA_DEPENDENCIES ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); #else ASSERT_LABEL(rv, dfsan_union(i_label, j_label)); + ASSERT_EQ_ORIGIN(rv, str1[3]); #endif rv = strncasecmp(str1, str2, 3); assert(rv == 0); ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); char s1[] = "AbZ"; char s2[] = "aBy"; + dfsan_set_label(i_label, &s1[2], 1); dfsan_set_label(j_label, &s2[2], 1); rv = strncasecmp(s1, s2, 0); assert(rv == 0); // Compare zero chars. ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); rv = strncasecmp(s1, s2, 1); assert(rv == 0); // 'A' == 'a' ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); rv = strncasecmp(s1, s2, 2); assert(rv == 0); // 'b' == 'B' ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); rv = strncasecmp(s1, s2, 3); assert(rv > 0); // 'Z' > 'y' #ifdef STRICT_DATA_DEPENDENCIES ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); #else ASSERT_LABEL(rv, dfsan_union(i_label, j_label)); + ASSERT_EQ_ORIGIN(rv, s1[2]); #endif } @@ -291,35 +468,56 @@ char str1[] = "str1"; dfsan_set_label(i_label, &str1[3], 1); - char *crv = strchr(str1, 'r'); + char *p1 = str1; + char c = 'r'; + dfsan_set_label(k_label, &c, sizeof(c)); + + char *crv = strchr(p1, c); assert(crv == &str1[2]); +#ifdef STRICT_DATA_DEPENDENCIES ASSERT_ZERO_LABEL(crv); +#else + ASSERT_LABEL(crv, k_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&crv, c); +#endif + + dfsan_set_label(j_label, &p1, sizeof(p1)); + crv = strchr(p1, 'r'); + assert(crv == &str1[2]); + ASSERT_LABEL(crv, j_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&crv, p1); - crv = strchr(str1, '1'); + crv = strchr(p1, '1'); assert(crv == &str1[3]); #ifdef STRICT_DATA_DEPENDENCIES - ASSERT_ZERO_LABEL(crv); + ASSERT_LABEL(crv, j_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&crv, p1); #else - ASSERT_LABEL(crv, i_label); + ASSERT_LABEL(crv, i_j_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&crv, str1[3]); #endif - crv = strchr(str1, 'x'); + crv = strchr(p1, 'x'); assert(!crv); #ifdef STRICT_DATA_DEPENDENCIES - ASSERT_ZERO_LABEL(crv); + ASSERT_LABEL(crv, j_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&crv, p1); #else - ASSERT_LABEL(crv, i_label); + ASSERT_LABEL(crv, i_j_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&crv, str1[3]); #endif // `man strchr` says: // The terminating null byte is considered part of the string, so that if c // is specified as '\0', these functions return a pointer to the terminator. - crv = strchr(str1, '\0'); + crv = strchr(p1, '\0'); assert(crv == &str1[4]); #ifdef STRICT_DATA_DEPENDENCIES - ASSERT_ZERO_LABEL(crv); + ASSERT_LABEL(crv, j_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&crv, p1); #else - ASSERT_LABEL(crv, i_label); + ASSERT_LABEL(crv, i_j_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&crv, str1[3]); #endif } @@ -327,13 +525,15 @@ // With any luck this sequence of calls will cause calloc to return the same // pointer both times. This is probably the best we can do to test this // function. - char *crv = (char *) calloc(4096, 1); + char *crv = (char *)calloc(4096, 1); ASSERT_ZERO_LABEL(crv[0]); + ASSERT_ZERO_ORIGIN(crv); dfsan_set_label(i_label, crv, 100); free(crv); - crv = (char *) calloc(4096, 1); + crv = (char *)calloc(4096, 1); ASSERT_ZERO_LABEL(crv[0]); + ASSERT_ZERO_ORIGIN(crv); free(crv); } @@ -374,6 +574,9 @@ dfsan_set_label(i_label, &rmmsg[1].msg_len, sizeof(rmmsg[1].msg_len)); dfsan_set_label(i_label, &timeout, sizeof(timeout)); + dfsan_origin msg_len0_o = dfsan_get_origin((long)(rmmsg[0].msg_len)); + dfsan_origin msg_len1_o = dfsan_get_origin((long)(rmmsg[1].msg_len)); + // Receive messages and check labels. int received_msgs = recvmmsg(sockfds[1], rmmsg, 2, 0, &timeout); assert(received_msgs == sent_msgs); @@ -381,6 +584,7 @@ assert(rmmsg[1].msg_len == smmsg[1].msg_len); assert(memcmp(sbuf0, rbuf0, 8) == 0); assert(memcmp(sbuf1, rbuf1, 7) == 0); + ASSERT_ZERO_ORIGIN(received_msgs); ASSERT_ZERO_LABEL(received_msgs); ASSERT_ZERO_LABEL(rmmsg[0].msg_len); ASSERT_ZERO_LABEL(rmmsg[1].msg_len); @@ -391,6 +595,9 @@ ASSERT_LABEL(timeout.tv_sec, i_label); ASSERT_LABEL(timeout.tv_nsec, i_label); + ASSERT_ORIGIN((long)(rmmsg[0].msg_len), msg_len0_o); + ASSERT_ORIGIN((long)(rmmsg[1].msg_len), msg_len1_o); + close(sockfds[0]); close(sockfds[1]); } @@ -418,14 +625,19 @@ dfsan_set_label(i_label, rbuf, sizeof(rbuf)); dfsan_set_label(i_label, &rmsg, sizeof(rmsg)); + DEFINE_AND_SAVE_ORIGINS(rmsg) + ssize_t received = recvmsg(sockfds[1], &rmsg, 0); assert(received == sent); assert(memcmp(sbuf, rbuf, 8) == 0); + ASSERT_ZERO_ORIGIN(received); ASSERT_ZERO_LABEL(received); ASSERT_READ_ZERO_LABEL(&rmsg, sizeof(rmsg)); ASSERT_READ_ZERO_LABEL(&rbuf[0], 8); ASSERT_READ_LABEL(&rbuf[8], 1, i_label); + ASSERT_SAVED_ORIGINS(rmsg) + close(sockfds[0]); close(sockfds[1]); } @@ -435,6 +647,7 @@ dfsan_set_label(i_label, buf, 1); dfsan_set_label(j_label, buf + 15, 1); + DEFINE_AND_SAVE_ORIGINS(buf) ASSERT_LABEL(buf[0], i_label); ASSERT_LABEL(buf[15], j_label); @@ -444,6 +657,9 @@ ASSERT_ZERO_LABEL(rv); ASSERT_ZERO_LABEL(buf[0]); ASSERT_ZERO_LABEL(buf[15]); + ASSERT_ZERO_ORIGIN(rv); + ASSERT_SAVED_ORIGINS(buf) + close(fd); } @@ -452,6 +668,7 @@ dfsan_set_label(i_label, buf, 1); dfsan_set_label(j_label, buf + 15, 1); + DEFINE_AND_SAVE_ORIGINS(buf) ASSERT_LABEL(buf[0], i_label); ASSERT_LABEL(buf[15], j_label); @@ -461,6 +678,9 @@ ASSERT_ZERO_LABEL(rv); ASSERT_ZERO_LABEL(buf[0]); ASSERT_ZERO_LABEL(buf[15]); + ASSERT_ZERO_ORIGIN(rv); + ASSERT_SAVED_ORIGINS(buf) + close(fd); } @@ -468,40 +688,59 @@ void *map = dlopen(NULL, RTLD_NOW); assert(map); ASSERT_ZERO_LABEL(map); + ASSERT_ZERO_ORIGIN(map); dlclose(map); map = dlopen("/nonexistent", RTLD_NOW); assert(!map); ASSERT_ZERO_LABEL(map); + ASSERT_ZERO_ORIGIN(map); } void test_clock_gettime() { struct timespec tp; dfsan_set_label(j_label, ((char *)&tp) + 3, 1); + dfsan_origin origin = dfsan_get_origin((long)(((char *)&tp)[3])); int t = clock_gettime(CLOCK_REALTIME, &tp); assert(t == 0); ASSERT_ZERO_LABEL(t); ASSERT_ZERO_LABEL(((char *)&tp)[3]); + ASSERT_ZERO_ORIGIN(t); + ASSERT_ORIGIN(((char *)&tp)[3], origin); } void test_ctime_r() { - char *buf = (char*) malloc(64); + char *buf = (char *)malloc(64); time_t t = 0; + DEFINE_AND_SAVE_ORIGINS(buf) + dfsan_origin buf_ptr_o = dfsan_get_origin((long)buf); + dfsan_origin t_o = dfsan_get_origin((long)t); + char *ret = ctime_r(&t, buf); ASSERT_ZERO_LABEL(ret); + ASSERT_ORIGIN(ret, buf_ptr_o); assert(buf == ret); ASSERT_READ_ZERO_LABEL(buf, strlen(buf) + 1); + ASSERT_SAVED_ORIGINS(buf) dfsan_set_label(i_label, &t, sizeof(t)); + t_o = dfsan_get_origin((long)t); ret = ctime_r(&t, buf); ASSERT_ZERO_LABEL(ret); + ASSERT_ORIGIN(ret, buf_ptr_o); ASSERT_READ_LABEL(buf, strlen(buf) + 1, i_label); + for (int i = 0; i < strlen(buf) + 1; ++i) + ASSERT_ORIGIN(buf[i], t_o); t = 0; dfsan_set_label(j_label, &buf, sizeof(&buf)); + buf_ptr_o = dfsan_get_origin((long)buf); ret = ctime_r(&t, buf); ASSERT_LABEL(ret, j_label); + ASSERT_ORIGIN(ret, buf_ptr_o); ASSERT_READ_ZERO_LABEL(buf, strlen(buf) + 1); + for (int i = 0; i < strlen(buf) + 1; ++i) + ASSERT_ORIGIN(buf[i], t_o); } static int write_callback_count = 0; @@ -513,7 +752,7 @@ write_callback_count++; last_fd = fd; - last_buf = (const unsigned char*) buf; + last_buf = (const unsigned char *)buf; last_count = count; } @@ -527,6 +766,8 @@ write_callback_count = 0; + DEFINE_AND_SAVE_ORIGINS(buf) + // Callback should be invoked on every call to write(). int res = write(fd, buf, buf_len); assert(write_callback_count == 1); @@ -534,6 +775,13 @@ ASSERT_READ_ZERO_LABEL(&last_fd, sizeof(last_fd)); ASSERT_READ_ZERO_LABEL(last_buf, sizeof(last_buf)); ASSERT_READ_ZERO_LABEL(&last_count, sizeof(last_count)); + ASSERT_ZERO_ORIGINS(&res, sizeof(res)); + ASSERT_ZERO_ORIGINS(&last_fd, sizeof(last_fd)); + + for (int i = 0; i < sizeof(last_buf); ++i) + ASSERT_ORIGIN(last_buf[i], buf_o[i]); + + ASSERT_ZERO_ORIGINS(&last_count, sizeof(last_count)); // Add a label to write() arguments. Check that the labels are readable from // the values passed to the callback. @@ -541,6 +789,10 @@ dfsan_set_label(j_label, &(buf[3]), 1); dfsan_set_label(k_label, &buf_len, sizeof(buf_len)); + dfsan_origin fd_o = dfsan_get_origin((long)fd); + dfsan_origin buf3_o = dfsan_get_origin((long)(buf[3])); + dfsan_origin buf_len_o = dfsan_get_origin((long)buf_len); + res = write(fd, buf, buf_len); assert(write_callback_count == 2); ASSERT_READ_ZERO_LABEL(&res, sizeof(res)); @@ -548,79 +800,166 @@ ASSERT_READ_LABEL(&last_buf[3], sizeof(last_buf[3]), j_label); ASSERT_READ_LABEL(last_buf, sizeof(last_buf), j_label); ASSERT_READ_LABEL(&last_count, sizeof(last_count), k_label); + ASSERT_ZERO_ORIGINS(&res, sizeof(res)); + ASSERT_INIT_ORIGINS(&last_fd, sizeof(last_fd), fd_o); + ASSERT_INIT_ORIGINS(&last_buf[3], sizeof(last_buf[3]), buf3_o); + + for (int i = 0; i < sizeof(last_buf); ++i) { + size_t i_addr = size_t(&last_buf[i]); + if (((size_t(&last_buf[3]) & ~3UL) > i_addr) || + (((size_t(&last_buf[3]) + 4) & ~3UL) <= i_addr)) + ASSERT_ORIGIN(last_buf[i], buf_o[i]); + } + + ASSERT_INIT_ORIGINS(&last_count, sizeof(last_count), buf_len_o); dfsan_set_write_callback(NULL); } void test_fgets() { - char *buf = (char*) malloc(128); + char *buf = (char *)malloc(128); + FILE *f = fopen("/etc/passwd", "r"); dfsan_set_label(j_label, buf, 1); + DEFINE_AND_SAVE_N_ORIGINS(buf, 128) + char *ret = fgets(buf, sizeof(buf), f); assert(ret == buf); ASSERT_ZERO_LABEL(ret); + ASSERT_EQ_ORIGIN(ret, buf); ASSERT_READ_ZERO_LABEL(buf, 128); + ASSERT_SAVED_N_ORIGINS(buf, 128) + dfsan_set_label(j_label, &buf, sizeof(&buf)); ret = fgets(buf, sizeof(buf), f); ASSERT_LABEL(ret, j_label); + ASSERT_EQ_ORIGIN(ret, buf); + ASSERT_SAVED_N_ORIGINS(buf, 128) + fclose(f); + free(buf); } void test_getcwd() { char buf[1024]; char *ptr = buf; + dfsan_set_label(i_label, buf + 2, 2); - char* ret = getcwd(buf, sizeof(buf)); + DEFINE_AND_SAVE_ORIGINS(buf) + + char *ret = getcwd(buf, sizeof(buf)); assert(ret == buf); assert(ret[0] == '/'); + ASSERT_ZERO_LABEL(ret); + ASSERT_EQ_ORIGIN(ret, buf); ASSERT_READ_ZERO_LABEL(buf + 2, 2); + ASSERT_SAVED_ORIGINS(buf) + dfsan_set_label(i_label, &ptr, sizeof(ptr)); ret = getcwd(ptr, sizeof(buf)); ASSERT_LABEL(ret, i_label); + ASSERT_EQ_ORIGIN(ret, ptr); + ASSERT_SAVED_ORIGINS(buf) } void test_get_current_dir_name() { - char* ret = get_current_dir_name(); + char *ret = get_current_dir_name(); assert(ret); assert(ret[0] == '/'); ASSERT_READ_ZERO_LABEL(ret, strlen(ret) + 1); + ASSERT_ZERO_ORIGIN(ret); } void test_gethostname() { char buf[1024]; dfsan_set_label(i_label, buf + 2, 2); - assert(gethostname(buf, sizeof(buf)) == 0); + DEFINE_AND_SAVE_ORIGINS(buf) + int ret = gethostname(buf, sizeof(buf)); + assert(ret == 0); + ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); ASSERT_READ_ZERO_LABEL(buf + 2, 2); + ASSERT_SAVED_ORIGINS(buf) } void test_getrlimit() { struct rlimit rlim; dfsan_set_label(i_label, &rlim, sizeof(rlim)); - assert(getrlimit(RLIMIT_CPU, &rlim) == 0); + DEFINE_AND_SAVE_ORIGINS(rlim); + int ret = getrlimit(RLIMIT_CPU, &rlim); + assert(ret == 0); + ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); ASSERT_READ_ZERO_LABEL(&rlim, sizeof(rlim)); + ASSERT_SAVED_ORIGINS(rlim) } void test_getrusage() { struct rusage usage; dfsan_set_label(i_label, &usage, sizeof(usage)); - assert(getrusage(RUSAGE_SELF, &usage) == 0); + DEFINE_AND_SAVE_ORIGINS(usage); + int ret = getrusage(RUSAGE_SELF, &usage); + assert(ret == 0); + ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); ASSERT_READ_ZERO_LABEL(&usage, sizeof(usage)); + ASSERT_SAVED_ORIGINS(usage) +} + +void test_strcat() { + char src[] = "world"; + int volatile x = 0; + char dst[] = "hello \0 "; + int volatile y = 0; + char *p = dst; + dfsan_set_label(k_label, &p, sizeof(p)); + dfsan_set_label(i_label, src, sizeof(src) + 1); + dfsan_set_label(j_label, dst, sizeof(dst) + 1); + dfsan_origin dst_o = dfsan_get_origin((long)(dst[0])); + char *ret = strcat(p, src); + ASSERT_LABEL(ret, k_label); + ASSERT_EQ_ORIGIN(ret, p); + assert(ret == dst); + assert(strcmp(src, dst + 6) == 0); + char *start = (char *)(((size_t)(dst + 6)) & ~3UL); + char *end = (char *)(((size_t)(dst + 15)) & ~3UL); + for (int i = 0; i < 12; ++i) { + if (dst + i < start || dst + i >= end) { + ASSERT_INIT_ORIGIN(&dst[i], dst_o); + } else { + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&dst[i], src[0]); + } + } + for (int i = 0; i < 6; ++i) { + ASSERT_LABEL(dst[i], j_label); + } + for (int i = 6; i < strlen(dst); ++i) { + ASSERT_LABEL(dst[i], i_label); + assert(dfsan_get_label(dst[i]) == dfsan_get_label(src[i - 6])); + } + ASSERT_LABEL(dst[11], j_label); } void test_strcpy() { char src[] = "hello world"; char dst[sizeof(src) + 2]; + char *p_dst = dst; dfsan_set_label(0, src, sizeof(src)); dfsan_set_label(0, dst, sizeof(dst)); + dfsan_set_label(k_label, &p_dst, sizeof(p_dst)); dfsan_set_label(i_label, src + 2, 1); dfsan_set_label(j_label, src + 3, 1); dfsan_set_label(j_label, dst + 4, 1); dfsan_set_label(i_label, dst + 12, 1); - char *ret = strcpy(dst, src); + char *ret = strcpy(p_dst, src); assert(ret == dst); assert(strcmp(src, dst) == 0); + ASSERT_LABEL(ret, k_label); + ASSERT_EQ_ORIGIN(ret, p_dst); for (int i = 0; i < strlen(src) + 1; ++i) { assert(dfsan_get_label(dst[i]) == dfsan_get_label(src[i])); + if (dfsan_get_label(dst[i])) + assert(dfsan_get_init_origin(&(dst[i])) == dfsan_get_origin(src[i])); } // Note: if strlen(src) + 1 were used instead to compute the first untouched // byte of dest, the label would be I|J. This is because strlen() might @@ -630,67 +969,143 @@ } void test_strtol() { - char buf[] = "1234578910"; + char non_number_buf[] = "ab "; char *endptr = NULL; + long int ret = strtol(non_number_buf, &endptr, 10); + assert(ret == 0); + assert(endptr == non_number_buf); + ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); + + char buf[] = "1234578910"; + int base = 10; + dfsan_set_label(k_label, &base, sizeof(base)); + ret = strtol(buf, &endptr, base); + assert(ret == 1234578910); + assert(endptr == buf + 10); + ASSERT_LABEL(ret, k_label); + ASSERT_EQ_ORIGIN(ret, base); + dfsan_set_label(i_label, buf + 1, 1); dfsan_set_label(j_label, buf + 10, 1); - long int ret = strtol(buf, &endptr, 10); + ret = strtol(buf, &endptr, 10); assert(ret == 1234578910); assert(endptr == buf + 10); ASSERT_LABEL(ret, i_j_label); + ASSERT_EQ_ORIGIN(ret, buf[1]); } void test_strtoll() { - char buf[] = "1234578910 "; + char non_number_buf[] = "ab "; char *endptr = NULL; + long long int ret = strtoll(non_number_buf, &endptr, 10); + assert(ret == 0); + assert(endptr == non_number_buf); + ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); + + char buf[] = "1234578910 "; + int base = 10; + dfsan_set_label(k_label, &base, sizeof(base)); + ret = strtoll(buf, &endptr, base); + assert(ret == 1234578910); + assert(endptr == buf + 10); + ASSERT_LABEL(ret, k_label); + ASSERT_EQ_ORIGIN(ret, base); + dfsan_set_label(i_label, buf + 1, 1); dfsan_set_label(j_label, buf + 2, 1); - long long int ret = strtoll(buf, &endptr, 10); + ret = strtoll(buf, &endptr, 10); assert(ret == 1234578910); assert(endptr == buf + 10); ASSERT_LABEL(ret, i_j_label); + ASSERT_EQ_ORIGIN(ret, buf[1]); } void test_strtoul() { - char buf[] = "ffffffffffffaa"; + char non_number_buf[] = "xy "; char *endptr = NULL; + long unsigned int ret = strtoul(non_number_buf, &endptr, 16); + assert(ret == 0); + assert(endptr == non_number_buf); + ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); + + char buf[] = "ffffffffffffaa"; + int base = 16; + dfsan_set_label(k_label, &base, sizeof(base)); + ret = strtoul(buf, &endptr, base); + assert(ret == 72057594037927850); + assert(endptr == buf + 14); + ASSERT_LABEL(ret, k_label); + ASSERT_EQ_ORIGIN(ret, base); + dfsan_set_label(i_label, buf + 1, 1); dfsan_set_label(j_label, buf + 2, 1); - long unsigned int ret = strtol(buf, &endptr, 16); + ret = strtoul(buf, &endptr, 16); assert(ret == 72057594037927850); assert(endptr == buf + 14); ASSERT_LABEL(ret, i_j_label); + ASSERT_EQ_ORIGIN(ret, buf[1]); } void test_strtoull() { - char buf[] = "ffffffffffffffaa"; + char non_number_buf[] = "xy "; char *endptr = NULL; + long unsigned int ret = strtoull(non_number_buf, &endptr, 16); + assert(ret == 0); + assert(endptr == non_number_buf); + ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); + + char buf[] = "ffffffffffffffaa"; + int base = 16; + dfsan_set_label(k_label, &base, sizeof(base)); + ret = strtoull(buf, &endptr, base); + assert(ret == 0xffffffffffffffaa); + assert(endptr == buf + 16); + ASSERT_LABEL(ret, k_label); + ASSERT_EQ_ORIGIN(ret, base); + dfsan_set_label(i_label, buf + 1, 1); dfsan_set_label(j_label, buf + 2, 1); - long long unsigned int ret = strtoull(buf, &endptr, 16); + ret = strtoull(buf, &endptr, 16); assert(ret == 0xffffffffffffffaa); assert(endptr == buf + 16); ASSERT_LABEL(ret, i_j_label); + ASSERT_EQ_ORIGIN(ret, buf[1]); } void test_strtod() { - char buf[] = "12345.76 foo"; + char non_number_buf[] = "ab "; char *endptr = NULL; + double ret = strtod(non_number_buf, &endptr); + assert(ret == 0); + assert(endptr == non_number_buf); + ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); + + char buf[] = "12345.76 foo"; dfsan_set_label(i_label, buf + 1, 1); dfsan_set_label(j_label, buf + 6, 1); - double ret = strtod(buf, &endptr); + ret = strtod(buf, &endptr); assert(ret == 12345.76); assert(endptr == buf + 8); ASSERT_LABEL(ret, i_j_label); + ASSERT_EQ_ORIGIN(ret, buf[1]); } void test_time() { time_t t = 0; dfsan_set_label(i_label, &t, 1); + DEFINE_AND_SAVE_ORIGINS(t) time_t ret = time(&t); assert(ret == t); assert(ret > 0); + ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); ASSERT_ZERO_LABEL(t); + ASSERT_SAVED_ORIGINS(t) } void test_inet_pton() { @@ -699,7 +1114,10 @@ struct in_addr in4; int ret4 = inet_pton(AF_INET, addr4, &in4); assert(ret4 == 1); + ASSERT_ZERO_LABEL(ret4); + ASSERT_ZERO_ORIGIN(ret4); ASSERT_READ_LABEL(&in4, sizeof(in4), i_label); + ASSERT_ORIGINS(&in4, sizeof(in4), dfsan_get_origin((long)(addr4[3]))) assert(in4.s_addr == htonl(0x7f000001)); char addr6[] = "::1"; @@ -707,17 +1125,28 @@ struct in6_addr in6; int ret6 = inet_pton(AF_INET6, addr6, &in6); assert(ret6 == 1); - ASSERT_READ_LABEL(((char *) &in6) + sizeof(in6) - 1, 1, j_label); + ASSERT_ZERO_LABEL(ret6); + ASSERT_ZERO_ORIGIN(ret6); + ASSERT_READ_LABEL(((char *)&in6) + sizeof(in6) - 1, 1, j_label); + ASSERT_ORIGINS(&in6, sizeof(in6), dfsan_get_origin((long)(addr6[3]))) } void test_localtime_r() { time_t t0 = 1384800998; struct tm t1; dfsan_set_label(i_label, &t0, sizeof(t0)); - struct tm* ret = localtime_r(&t0, &t1); + dfsan_origin t0_o = dfsan_get_origin((long)t0); + struct tm *pt1 = &t1; + dfsan_set_label(j_label, &pt1, sizeof(pt1)); + dfsan_origin pt1_o = dfsan_get_origin((long)pt1); + struct tm *ret = localtime_r(&t0, pt1); assert(ret == &t1); assert(t1.tm_min == 56); + ASSERT_LABEL(ret, j_label); + ASSERT_INIT_ORIGIN(&ret, pt1_o); + ASSERT_READ_LABEL(&ret, sizeof(ret), j_label); ASSERT_LABEL(t1.tm_mon, i_label); + ASSERT_ORIGIN(t1.tm_mon, t0_o); } void test_getpwuid_r() { @@ -726,11 +1155,17 @@ struct passwd *result; dfsan_set_label(i_label, &pwd, 4); + DEFINE_AND_SAVE_ORIGINS(pwd) + DEFINE_AND_SAVE_ORIGINS(buf) int ret = getpwuid_r(0, &pwd, buf, sizeof(buf), &result); assert(ret == 0); assert(strcmp(pwd.pw_name, "root") == 0); assert(result == &pwd); + ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); ASSERT_READ_ZERO_LABEL(&pwd, 4); + ASSERT_SAVED_ORIGINS(pwd) + ASSERT_SAVED_ORIGINS(buf) } void test_epoll_wait() { @@ -751,12 +1186,15 @@ // Test epoll_wait when no events have occurred. event = {}; dfsan_set_label(i_label, &event, sizeof(event)); + DEFINE_AND_SAVE_ORIGINS(event) ret = epoll_wait(epfd, &event, /*maxevents=*/1, /*timeout=*/0); assert(ret == 0); assert(event.events == 0); assert(event.data.fd == 0); ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); ASSERT_READ_LABEL(&event, sizeof(event), i_label); + ASSERT_SAVED_ORIGINS(event) // Test epoll_wait when an event occurs. write(pipe_fds[1], "x", 1); @@ -765,7 +1203,9 @@ assert(event.events == EPOLLIN); assert(event.data.fd == pipe_fds[0]); ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); ASSERT_READ_ZERO_LABEL(&event, sizeof(event)); + ASSERT_SAVED_ORIGINS(event) // Clean up. close(epfd); @@ -778,8 +1218,12 @@ fd.fd = 0; fd.events = POLLIN; dfsan_set_label(i_label, &fd.revents, sizeof(fd.revents)); + DEFINE_AND_SAVE_ORIGINS(fd) int ret = poll(&fd, 1, 1); + ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); ASSERT_ZERO_LABEL(fd.revents); + ASSERT_SAVED_ORIGINS(fd) assert(ret >= 0); } @@ -790,43 +1234,73 @@ FD_SET(0, &fds); dfsan_set_label(i_label, &fds, sizeof(fds)); dfsan_set_label(j_label, &t, sizeof(t)); + DEFINE_AND_SAVE_ORIGINS(fds) + DEFINE_AND_SAVE_ORIGINS(t) int ret = select(1, &fds, NULL, NULL, &t); assert(ret >= 0); + ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); ASSERT_ZERO_LABEL(t.tv_sec); ASSERT_READ_ZERO_LABEL(&fds, sizeof(fds)); + ASSERT_SAVED_ORIGINS(fds) + ASSERT_SAVED_ORIGINS(t) } void test_sched_getaffinity() { cpu_set_t mask; dfsan_set_label(j_label, &mask, 1); + DEFINE_AND_SAVE_ORIGINS(mask) int ret = sched_getaffinity(0, sizeof(mask), &mask); assert(ret == 0); + ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); ASSERT_READ_ZERO_LABEL(&mask, sizeof(mask)); + ASSERT_SAVED_ORIGINS(mask) } void test_sigemptyset() { sigset_t set; dfsan_set_label(j_label, &set, 1); + DEFINE_AND_SAVE_ORIGINS(set) int ret = sigemptyset(&set); assert(ret == 0); + ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); ASSERT_READ_ZERO_LABEL(&set, sizeof(set)); + ASSERT_SAVED_ORIGINS(set) } void test_sigaction() { struct sigaction oldact; dfsan_set_label(j_label, &oldact, 1); + DEFINE_AND_SAVE_ORIGINS(oldact) int ret = sigaction(SIGUSR1, NULL, &oldact); assert(ret == 0); + ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); ASSERT_READ_ZERO_LABEL(&oldact, sizeof(oldact)); + ASSERT_SAVED_ORIGINS(oldact) +} + +static void SignalHandler(int signo) {} + +void test_signal() { + sighandler_t old_signal_handler = signal(SIGHUP, SignalHandler); + ASSERT_ZERO_LABEL(old_signal_handler); + ASSERT_ZERO_ORIGIN(old_signal_handler); + (void)signal(SIGHUP, old_signal_handler); } void test_sigaltstack() { stack_t old_altstack = {}; dfsan_set_label(j_label, &old_altstack, sizeof(old_altstack)); + DEFINE_AND_SAVE_ORIGINS(old_altstack) int ret = sigaltstack(NULL, &old_altstack); assert(ret == 0); ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); ASSERT_READ_ZERO_LABEL(&old_altstack, sizeof(old_altstack)); + ASSERT_SAVED_ORIGINS(old_altstack) } void test_gettimeofday() { @@ -834,10 +1308,16 @@ struct timezone tz; dfsan_set_label(i_label, &tv, sizeof(tv)); dfsan_set_label(j_label, &tz, sizeof(tz)); + DEFINE_AND_SAVE_ORIGINS(tv) + DEFINE_AND_SAVE_ORIGINS(tz) int ret = gettimeofday(&tv, &tz); assert(ret == 0); + ASSERT_ZERO_LABEL(ret); + ASSERT_ZERO_ORIGIN(ret); ASSERT_READ_ZERO_LABEL(&tv, sizeof(tv)); ASSERT_READ_ZERO_LABEL(&tz, sizeof(tz)); + ASSERT_SAVED_ORIGINS(tv) + ASSERT_SAVED_ORIGINS(tz) } void *pthread_create_test_cb(void *p) { @@ -851,11 +1331,14 @@ pthread_create(&pt, 0, pthread_create_test_cb, (void *)1); void *cbrv; dfsan_set_label(i_label, &cbrv, sizeof(cbrv)); + DEFINE_AND_SAVE_ORIGINS(cbrv) int ret = pthread_join(pt, &cbrv); assert(ret == 0); assert(cbrv == (void *)2); ASSERT_ZERO_LABEL(ret); ASSERT_ZERO_LABEL(cbrv); + ASSERT_ZERO_ORIGIN(ret); + ASSERT_SAVED_ORIGINS(cbrv); } // Tested by test_pthread_create(). This empty function is here to appease the @@ -886,35 +1369,101 @@ size_t sizep = 0, alignp = 0; dfsan_set_label(i_label, &sizep, sizeof(sizep)); dfsan_set_label(i_label, &alignp, sizeof(alignp)); + dfsan_origin sizep_o = dfsan_get_origin(sizep); + dfsan_origin alignp_o = dfsan_get_origin(alignp); _dl_get_tls_static_info(&sizep, &alignp); ASSERT_ZERO_LABEL(sizep); ASSERT_ZERO_LABEL(alignp); + ASSERT_ORIGIN(sizep, sizep_o); + ASSERT_ORIGIN(alignp, alignp_o); } void test_strrchr() { char str1[] = "str1str1"; + + char *p = str1; + dfsan_set_label(j_label, &p, sizeof(p)); + + char *rv = strrchr(p, 'r'); + assert(rv == &str1[6]); +#ifdef STRICT_DATA_DEPENDENCIES + ASSERT_LABEL(rv, j_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, p); +#else + ASSERT_LABEL(rv, j_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, p); +#endif + + char c = 'r'; + dfsan_set_label(k_label, &c, sizeof(c)); + rv = strrchr(str1, c); + assert(rv == &str1[6]); +#ifdef STRICT_DATA_DEPENDENCIES + ASSERT_ZERO_LABEL(rv); +#else + ASSERT_LABEL(rv, k_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, c); +#endif + dfsan_set_label(i_label, &str1[7], 1); - char *rv = strrchr(str1, 'r'); + rv = strrchr(str1, 'r'); assert(rv == &str1[6]); #ifdef STRICT_DATA_DEPENDENCIES ASSERT_ZERO_LABEL(rv); #else ASSERT_LABEL(rv, i_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, str1[7]); #endif } void test_strstr() { char str1[] = "str1str1"; + + char *p1 = str1; + dfsan_set_label(k_label, &p1, sizeof(p1)); + char *rv = strstr(p1, "1s"); + assert(rv == &str1[3]); +#ifdef STRICT_DATA_DEPENDENCIES + ASSERT_LABEL(rv, k_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, p1); +#else + ASSERT_LABEL(rv, k_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, p1); +#endif + + char str2[] = "1s"; + char *p2 = str2; + dfsan_set_label(m_label, &p2, sizeof(p2)); + rv = strstr(str1, p2); + assert(rv == &str1[3]); +#ifdef STRICT_DATA_DEPENDENCIES + ASSERT_ZERO_LABEL(rv); +#else + ASSERT_LABEL(rv, m_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, p2); +#endif + + dfsan_set_label(n_label, &str2[0], 1); + rv = strstr(str1, str2); + assert(rv == &str1[3]); +#ifdef STRICT_DATA_DEPENDENCIES + ASSERT_ZERO_LABEL(rv); +#else + ASSERT_LABEL(rv, n_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, str2[0]); +#endif + dfsan_set_label(i_label, &str1[3], 1); dfsan_set_label(j_label, &str1[5], 1); - char *rv = strstr(str1, "1s"); + rv = strstr(str1, "1s"); assert(rv == &str1[3]); #ifdef STRICT_DATA_DEPENDENCIES ASSERT_ZERO_LABEL(rv); #else ASSERT_LABEL(rv, i_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, str1[3]); #endif rv = strstr(str1, "2s"); @@ -923,21 +1472,50 @@ ASSERT_ZERO_LABEL(rv); #else ASSERT_LABEL(rv, i_j_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, str1[3]); #endif } void test_strpbrk() { char s[] = "abcdefg"; char accept[] = "123fd"; + + char *p_s = s; + char *p_accept = accept; + + dfsan_set_label(n_label, &p_accept, sizeof(p_accept)); + + char *rv = strpbrk(p_s, p_accept); + assert(rv == &s[3]); +#ifdef STRICT_DATA_DEPENDENCIES + ASSERT_ZERO_LABEL(rv); +#else + ASSERT_LABEL(rv, n_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, p_accept); +#endif + + dfsan_set_label(m_label, &p_s, sizeof(p_s)); + + rv = strpbrk(p_s, p_accept); + assert(rv == &s[3]); +#ifdef STRICT_DATA_DEPENDENCIES + ASSERT_LABEL(rv, m_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, p_s); +#else + ASSERT_LABEL(rv, dfsan_union(m_label, n_label)); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, p_s); +#endif + dfsan_set_label(i_label, &s[5], 1); dfsan_set_label(j_label, &accept[1], 1); - char *rv = strpbrk(s, accept); + rv = strpbrk(s, accept); assert(rv == &s[3]); #ifdef STRICT_DATA_DEPENDENCIES ASSERT_ZERO_LABEL(rv); #else ASSERT_LABEL(rv, j_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, accept[1]); #endif char *ps = s; @@ -949,6 +1527,7 @@ ASSERT_LABEL(rv, j_label); #else ASSERT_LABEL(rv, i_j_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, s[5]); #endif rv = strpbrk(ps, "123"); @@ -957,6 +1536,7 @@ ASSERT_ZERO_LABEL(rv); #else ASSERT_LABEL(rv, i_j_label); + ASSERT_INIT_ORIGIN_EQ_ORIGIN(&rv, s[5]); #endif } @@ -965,24 +1545,53 @@ dfsan_set_label(i_label, &str1[3], 1); dfsan_set_label(j_label, &str1[4], 1); - char *crv = (char *) memchr(str1, 'r', sizeof(str1)); + char *crv = (char *)memchr(str1, 'r', sizeof(str1)); assert(crv == &str1[2]); ASSERT_ZERO_LABEL(crv); + ASSERT_ZERO_ORIGIN(crv); - crv = (char *) memchr(str1, '1', sizeof(str1)); + char c = 'r'; + dfsan_set_label(k_label, &c, sizeof(c)); + crv = (char *)memchr(str1, c, sizeof(str1)); + assert(crv == &str1[2]); +#ifdef STRICT_DATA_DEPENDENCIES + ASSERT_ZERO_LABEL(crv); + ASSERT_ZERO_ORIGIN(crv); +#else + ASSERT_LABEL(crv, k_label); + ASSERT_EQ_ORIGIN(crv, c); +#endif + + char *ptr = str1; + dfsan_set_label(k_label, &ptr, sizeof(ptr)); + crv = (char *)memchr(ptr, 'r', sizeof(str1)); + assert(crv == &str1[2]); +#ifdef STRICT_DATA_DEPENDENCIES + ASSERT_LABEL(crv, k_label); + ASSERT_EQ_ORIGIN(crv, ptr); +#else + ASSERT_LABEL(crv, k_label); + ASSERT_EQ_ORIGIN(crv, ptr); +#endif + + crv = (char *)memchr(str1, '1', sizeof(str1)); assert(crv == &str1[3]); #ifdef STRICT_DATA_DEPENDENCIES ASSERT_ZERO_LABEL(crv); + ASSERT_ZERO_ORIGIN(crv); #else ASSERT_LABEL(crv, i_label); + ASSERT_EQ_ORIGIN(crv, str1[3]); #endif - crv = (char *) memchr(str1, 'x', sizeof(str1)); + crv = (char *)memchr(str1, 'x', sizeof(str1)); assert(!crv); #ifdef STRICT_DATA_DEPENDENCIES ASSERT_ZERO_LABEL(crv); + ASSERT_ZERO_ORIGIN(crv); #else ASSERT_LABEL(crv, i_j_label); + ASSERT_EQ_ORIGIN(crv, str1[3]); #endif } @@ -995,12 +1604,15 @@ req.tv_sec = 1; req.tv_nsec = 0; dfsan_set_label(i_label, &rem, sizeof(rem)); + DEFINE_AND_SAVE_ORIGINS(rem) // non interrupted int rv = nanosleep(&req, &rem); assert(rv == 0); ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); ASSERT_READ_LABEL(&rem, 1, i_label); + ASSERT_SAVED_ORIGINS(rem) // interrupted by an alarm signal(SIGALRM, alarm_handler); @@ -1009,17 +1621,25 @@ rv = nanosleep(&req, &rem); assert(rv == -1); ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); ASSERT_READ_ZERO_LABEL(&rem, sizeof(rem)); + ASSERT_SAVED_ORIGINS(rem) } void test_socketpair() { int fd[2]; + dfsan_origin fd_o[2]; dfsan_set_label(i_label, fd, sizeof(fd)); + fd_o[0] = dfsan_get_origin((long)(fd[0])); + fd_o[1] = dfsan_get_origin((long)(fd[1])); int rv = socketpair(PF_LOCAL, SOCK_STREAM, 0, fd); assert(rv == 0); ASSERT_ZERO_LABEL(rv); + ASSERT_ZERO_ORIGIN(rv); ASSERT_READ_ZERO_LABEL(fd, sizeof(fd)); + ASSERT_ORIGIN(fd[0], fd_o[0]); + ASSERT_ORIGIN(fd[1], fd_o[1]); } void test_getpeername() { @@ -1031,14 +1651,19 @@ socklen_t addrlen = sizeof(addr); dfsan_set_label(i_label, &addr, addrlen); dfsan_set_label(i_label, &addrlen, sizeof(addrlen)); + DEFINE_AND_SAVE_ORIGINS(addr) + DEFINE_AND_SAVE_ORIGINS(addrlen) ret = getpeername(sockfds[0], &addr, &addrlen); assert(ret != -1); + ASSERT_ZERO_ORIGIN(ret); ASSERT_ZERO_LABEL(ret); ASSERT_ZERO_LABEL(addrlen); assert(addrlen < sizeof(addr)); ASSERT_READ_ZERO_LABEL(&addr, addrlen); ASSERT_READ_LABEL(((char *)&addr) + addrlen, 1, i_label); + ASSERT_SAVED_ORIGINS(addr) + ASSERT_SAVED_ORIGINS(addrlen) close(sockfds[0]); close(sockfds[1]); @@ -1052,14 +1677,18 @@ socklen_t addrlen = sizeof(addr); dfsan_set_label(i_label, &addr, addrlen); dfsan_set_label(i_label, &addrlen, sizeof(addrlen)); - + DEFINE_AND_SAVE_ORIGINS(addr) + DEFINE_AND_SAVE_ORIGINS(addrlen) int ret = getsockname(sockfd, &addr, &addrlen); assert(ret != -1); + ASSERT_ZERO_ORIGIN(ret); ASSERT_ZERO_LABEL(ret); ASSERT_ZERO_LABEL(addrlen); assert(addrlen < sizeof(addr)); ASSERT_READ_ZERO_LABEL(&addr, addrlen); ASSERT_READ_LABEL(((char *)&addr) + addrlen, 1, i_label); + ASSERT_SAVED_ORIGINS(addr) + ASSERT_SAVED_ORIGINS(addrlen) close(sockfd); } @@ -1072,6 +1701,8 @@ socklen_t optlen = sizeof(optval); dfsan_set_label(i_label, &optval, sizeof(optval)); dfsan_set_label(i_label, &optlen, sizeof(optlen)); + DEFINE_AND_SAVE_ORIGINS(optval) + DEFINE_AND_SAVE_ORIGINS(optlen) int ret = getsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, &optval, &optlen); assert(ret != -1); assert(optlen == sizeof(int)); @@ -1081,6 +1712,9 @@ ASSERT_ZERO_LABEL(optlen); ASSERT_ZERO_LABEL(optval[0]); ASSERT_LABEL(optval[1], i_label); + ASSERT_ZERO_ORIGIN(ret); + ASSERT_SAVED_ORIGINS(optval) + ASSERT_SAVED_ORIGINS(optlen) close(sockfd); } @@ -1104,12 +1738,13 @@ // The value returned by write() should have no label. res = write(fd, buf, len); ASSERT_ZERO_LABEL(res); + ASSERT_ZERO_ORIGIN(res); close(fd); } template -void test_sprintf_chunk(const char* expected, const char* format, T arg) { +void test_sprintf_chunk(const char *expected, const char *format, T arg) { char buf[512]; memset(buf, 'a', sizeof(buf)); @@ -1124,17 +1759,19 @@ strcat(padded_format, " bar"); // Non labelled arg. - assert(sprintf(buf, padded_format, arg) == strlen(padded_expected)); + assert(sprintf(buf, padded_format, arg) == strlen(padded_expected)); assert(strcmp(buf, padded_expected) == 0); ASSERT_READ_LABEL(buf, strlen(padded_expected), 0); memset(buf, 'a', sizeof(buf)); // Labelled arg. dfsan_set_label(i_label, &arg, sizeof(arg)); - assert(sprintf(buf, padded_format, arg) == strlen(padded_expected)); + dfsan_origin a_o = dfsan_get_origin((long)(arg)); + assert(sprintf(buf, padded_format, arg) == strlen(padded_expected)); assert(strcmp(buf, padded_expected) == 0); ASSERT_READ_LABEL(buf, 4, 0); ASSERT_READ_LABEL(buf + 4, strlen(padded_expected) - 8, i_label); + ASSERT_INIT_ORIGINS(buf + 4, strlen(padded_expected) - 8, a_o); ASSERT_READ_LABEL(buf + (strlen(padded_expected) - 4), 4, 0); } @@ -1154,12 +1791,15 @@ // Test formatting & label propagation (multiple conversion specifiers): %s, // %d, %n, %f, and %%. - const char* s = "world"; + const char *s = "world"; int m = 8; int d = 27; - dfsan_set_label(k_label, (void *) (s + 1), 2); + dfsan_set_label(k_label, (void *)(s + 1), 2); + dfsan_origin s_o = dfsan_get_origin((long)(s[1])); dfsan_set_label(i_label, &m, sizeof(m)); + dfsan_origin m_o = dfsan_get_origin((long)m); dfsan_set_label(j_label, &d, sizeof(d)); + dfsan_origin d_o = dfsan_get_origin((long)d); int n; int r = sprintf(buf, "hello %s, %-d/%d/%d %f %% %n%d", s, 2014, m, d, 12345.6781234, &n, 1000); @@ -1167,12 +1807,16 @@ assert(strcmp(buf, "hello world, 2014/8/27 12345.678123 % 1000") == 0); ASSERT_READ_LABEL(buf, 7, 0); ASSERT_READ_LABEL(buf + 7, 2, k_label); + ASSERT_INIT_ORIGINS(buf + 7, 2, s_o); ASSERT_READ_LABEL(buf + 9, 9, 0); ASSERT_READ_LABEL(buf + 18, 1, i_label); + ASSERT_INIT_ORIGINS(buf + 18, 1, m_o); ASSERT_READ_LABEL(buf + 19, 1, 0); ASSERT_READ_LABEL(buf + 20, 2, j_label); + ASSERT_INIT_ORIGINS(buf + 20, 2, d_o); ASSERT_READ_LABEL(buf + 22, 15, 0); ASSERT_LABEL(r, 0); + ASSERT_ZERO_ORIGIN(r); assert(n == 38); // Test formatting & label propagation (single conversion specifier, with @@ -1187,10 +1831,10 @@ test_sprintf_chunk("-16657", "%hd", 0xdeadbeef); test_sprintf_chunk("deadbeefdeadbeef", "%lx", 0xdeadbeefdeadbeef); test_sprintf_chunk("0xdeadbeefdeadbeef", "%p", - (void *) 0xdeadbeefdeadbeef); - test_sprintf_chunk("18446744073709551615", "%ju", (intmax_t) -1); - test_sprintf_chunk("18446744073709551615", "%zu", (size_t) -1); - test_sprintf_chunk("18446744073709551615", "%tu", (size_t) -1); + (void *)0xdeadbeefdeadbeef); + test_sprintf_chunk("18446744073709551615", "%ju", (intmax_t)-1); + test_sprintf_chunk("18446744073709551615", "%zu", (size_t)-1); + test_sprintf_chunk("18446744073709551615", "%tu", (size_t)-1); test_sprintf_chunk("0x1.f9acffa7eb6bfp-4", "%a", 0.123456); test_sprintf_chunk("0X1.F9ACFFA7EB6BFP-4", "%A", 0.123456); @@ -1199,7 +1843,7 @@ test_sprintf_chunk("1.234560e-01", "%e", 0.123456); test_sprintf_chunk("1.234560E-01", "%E", 0.123456); test_sprintf_chunk("0.1234567891234560", "%.16Lf", - (long double) 0.123456789123456); + (long double)0.123456789123456); test_sprintf_chunk("z", "%c", 'z'); @@ -1215,36 +1859,50 @@ char buf[2048]; memset(buf, 'a', sizeof(buf)); dfsan_set_label(0, buf, sizeof(buf)); - const char* s = "world"; + const char *s = "world"; int y = 2014; int m = 8; int d = 27; - dfsan_set_label(k_label, (void *) (s + 1), 2); + dfsan_set_label(k_label, (void *)(s + 1), 2); + dfsan_origin s_o = dfsan_get_origin((long)(s[1])); dfsan_set_label(i_label, &y, sizeof(y)); + dfsan_origin y_o = dfsan_get_origin((long)y); dfsan_set_label(j_label, &m, sizeof(m)); - int r = snprintf(buf, 19, "hello %s, %-d/%d/%d %f", s, y, m, d, + dfsan_origin m_o = dfsan_get_origin((long)m); + int r = snprintf(buf, 19, "hello %s, %-d/ %d/%d %f", s, y, m, d, 12345.6781234); // The return value is the number of bytes that would have been written to // the final string if enough space had been available. - assert(r == 35); + assert(r == 38); assert(memcmp(buf, "hello world, 2014/", 19) == 0); ASSERT_READ_LABEL(buf, 7, 0); ASSERT_READ_LABEL(buf + 7, 2, k_label); + ASSERT_INIT_ORIGINS(buf + 7, 2, s_o); ASSERT_READ_LABEL(buf + 9, 4, 0); ASSERT_READ_LABEL(buf + 13, 4, i_label); + ASSERT_INIT_ORIGINS(buf + 13, 4, y_o); ASSERT_READ_LABEL(buf + 17, 2, 0); ASSERT_LABEL(r, 0); + ASSERT_ZERO_ORIGIN(r); } +// Tested by a seperate source file. This empty function is here to appease the +// check-wrappers script. +void test_fork() {} + int main(void) { #ifdef FAST_16_LABELS i_label = 1; j_label = 2; k_label = 4; + m_label = 8; + n_label = 16; #else i_label = dfsan_create_label("i", 0); j_label = dfsan_create_label("j", 0); k_label = dfsan_create_label("k", 0); + m_label = dfsan_create_label("m", 0); + n_label = dfsan_create_label("n", 0); #endif i_j_label = dfsan_union(i_label, j_label); assert(i_j_label != i_label); @@ -1277,6 +1935,7 @@ test_memchr(); test_memcmp(); test_memcpy(); + test_memmove(); test_memset(); test_nanosleep(); test_poll(); @@ -1289,6 +1948,7 @@ test_sched_getaffinity(); test_select(); test_sigaction(); + test_signal(); test_sigaltstack(); test_sigemptyset(); test_snprintf(); @@ -1298,6 +1958,7 @@ test_strcasecmp(); test_strchr(); test_strcmp(); + test_strcat(); test_strcpy(); test_strdup(); test_strlen(); @@ -1314,4 +1975,5 @@ test_strtoull(); test_time(); test_write(); + test_fork(); } diff --git a/compiler-rt/test/dfsan/origin_fork.cpp b/compiler-rt/test/dfsan/origin_fork.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_fork.cpp @@ -0,0 +1,126 @@ +// Test that chained origins are fork-safe. +// Run a number of threads that create new chained origins, then fork +// and verify that origin reads do not deadlock in the child process. + +// RUN: %clangxx_dfsan -std=c++11 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t +// RUN: DFSAN_OPTIONS=store_context_size=1000,origin_history_size=0,origin_history_per_stack_limit=0 %run %t 2>&1 | FileCheck %s + +// Fun fact: if test output is redirected to a file (as opposed to +// being piped directly to FileCheck), we may lose some "done"s due to +// a kernel bug: +// https://lkml.org/lkml/2014/2/17/324 + +// Sometimes hangs + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +int done; + +void copy_uninit_thread2() { + volatile int x; + volatile int v; + dfsan_set_label(8, (void *)&x, sizeof(x)); + while (true) { + v = x; + x = v; + if (__atomic_load_n(&done, __ATOMIC_RELAXED)) + return; + } +} + +void copy_uninit_thread1(int level) { + if (!level) + copy_uninit_thread2(); + else + copy_uninit_thread1(level - 1); +} + +void *copy_uninit_thread(void *id) { + copy_uninit_thread1((long)id); + return 0; +} + +// Run through stackdepot in the child process. +// If any of the hash table cells are locked, this may deadlock. +void child() { + volatile int x; + volatile int v; + dfsan_set_label(16, (void *)&x, sizeof(x)); + for (int i = 0; i < 10000; ++i) { + v = x; + x = v; + } + write(2, "done\n", 5); +} + +void test() { + const int kThreads = 10; + pthread_t t[kThreads]; + for (int i = 0; i < kThreads; ++i) + pthread_create(&t[i], NULL, copy_uninit_thread, (void *)(long)i); + usleep(100000); + pid_t pid = fork(); + if (pid) { + // parent + __atomic_store_n(&done, 1, __ATOMIC_RELAXED); + pid_t p; + while ((p = wait(NULL)) == -1) { + } + } else { + // child + child(); + } +} + +int main() { + const int kChildren = 20; + for (int i = 0; i < kChildren; ++i) { + pid_t pid = fork(); + if (pid) { + // parent + } else { + test(); + exit(0); + } + } + + for (int i = 0; i < kChildren; ++i) { + pid_t p; + while ((p = wait(NULL)) == -1) { + } + } + + return 0; +} + +// Expect 20 (== kChildren) "done" messages. +// CHECK: done +// CHECK: done +// CHECK: done +// CHECK: done +// CHECK: done +// CHECK: done +// CHECK: done +// CHECK: done +// CHECK: done +// CHECK: done +// CHECK: done +// CHECK: done +// CHECK: done +// CHECK: done +// CHECK: done +// CHECK: done +// CHECK: done +// CHECK: done +// CHECK: done +// CHECK: done diff --git a/compiler-rt/test/dfsan/origin_ld_lost.c b/compiler-rt/test/dfsan/origin_ld_lost.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_ld_lost.c @@ -0,0 +1,21 @@ +// RUN: %clang_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out +// +// Test origin tracking can lost origins at 2-byte load with addr % 4 == 3. + +#include + +__attribute__((noinline)) uint16_t foo(uint16_t a, uint16_t b) { return a + b; } + +int main(int argc, char *argv[]) { + uint64_t a __attribute__((aligned(4))) = 1; + uint32_t b = 10; + dfsan_set_label(4, (uint8_t *)&a + 4, sizeof(uint8_t)); + uint16_t c = foo(*(uint16_t *)((uint8_t *)&a + 3), b); + dfsan_print_origin_trace(&c, "foo"); +} + +// CHECK: Taint value 0x4 {{.*}} origin tracking (foo) +// CHECK: Origin value: {{.*}}, Taint value was created at +// CHECK: #0 {{.*}} in main {{.*}}origin_ld_lost.c:[[@LINE-6]] diff --git a/compiler-rt/test/dfsan/origin_ldst.c b/compiler-rt/test/dfsan/origin_ldst.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_ldst.c @@ -0,0 +1,76 @@ +// RUN: %clang_dfsan -DTEST64 -DALIGN=8 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out +// +// RUN: %clang_dfsan -DTEST32 -DALIGN=4 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out +// +// RUN: %clang_dfsan -DALIGN=2 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out +// +// RUN: %clang_dfsan -DTEST64 -DALIGN=5 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out +// +// RUN: %clang_dfsan -DTEST32 -DALIGN=3 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out +// +// RUN: %clang_dfsan -DALIGN=1 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out +// +// Test origin tracking is accurate in terms of partial store/load, and +// different aligments. + +#include + +#ifdef TEST64 +#define FULL_TYPE uint64_t +#define HALF_TYPE uint32_t +#elif defined(TEST32) +#define FULL_TYPE uint32_t +#define HALF_TYPE uint16_t +#else +#define FULL_TYPE uint16_t +#define HALF_TYPE uint8_t +#endif + +__attribute__((noinline)) FULL_TYPE foo(FULL_TYPE a, FULL_TYPE b) { return a + b; } + +int main(int argc, char *argv[]) { + char x __attribute__((aligned(ALIGN))) = 1, y = 2; + dfsan_set_label(8, &x, sizeof(x)); + char z __attribute__((aligned(ALIGN))) = x + y; + dfsan_print_origin_trace(&z, NULL); + + FULL_TYPE a __attribute__((aligned(ALIGN))) = 1; + FULL_TYPE b = 10; + dfsan_set_label(4, (HALF_TYPE *)&a + 1, sizeof(HALF_TYPE)); + FULL_TYPE c __attribute__((aligned(ALIGN))) = foo(a, b); + dfsan_print_origin_trace(&c, NULL); + dfsan_print_origin_trace((HALF_TYPE *)&c + 1, NULL); +} + +// CHECK: Taint value 0x8 {{.*}} origin tracking () +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in main {{.*}}origin_ldst.c:[[@LINE-13]] + +// CHECK: Origin value: {{.*}}, Taint value was created at +// CHECK: #0 {{.*}} in main {{.*}}origin_ldst.c:[[@LINE-17]] + +// CHECK: Taint value 0x4 {{.*}} origin tracking () +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in main {{.*}}origin_ldst.c:[[@LINE-14]] + +// CHECK: Origin value: {{.*}}, Taint value was created at +// CHECK: #0 {{.*}} in main {{.*}}origin_ldst.c:[[@LINE-18]] + +// CHECK: Taint value 0x4 {{.*}} origin tracking () +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in main {{.*}}origin_ldst.c:[[@LINE-21]] + +// CHECK: Origin value: {{.*}}, Taint value was created at +// CHECK: #0 {{.*}} in main {{.*}}origin_ldst.c:[[@LINE-25]] diff --git a/compiler-rt/test/dfsan/origin_limit.c b/compiler-rt/test/dfsan/origin_limit.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_limit.c @@ -0,0 +1,67 @@ +// RUN: %clang_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t +// +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out +// +// RUN: DFSAN_OPTIONS=origin_history_size=2 %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK2 < %t.out +// +// RUN: DFSAN_OPTIONS=origin_history_size=0 %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK0 < %t.out + +#include + +#include + +__attribute__((noinline)) int foo(int a, int b) { return a + b; } + +int main(int argc, char *argv[]) { + int a = 10; + dfsan_set_label(8, &a, sizeof(a)); + int c = 0; + for (int i = 0; i < 17; ++i) { + c = foo(a, c); + printf("%lx", (unsigned long)&c); + } + dfsan_print_origin_trace(&c, NULL); +} + +// CHECK: Taint value 0x8 {{.*}} origin tracking () +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: Origin value: {{.*}}, Taint value was created at + +// CHECK2: Taint value 0x8 {{.*}} origin tracking () +// CHECK2: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK2: Origin value: {{.*}}, Taint value was created at + +// CHECK0: Taint value 0x8 {{.*}} origin tracking () +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK0: Origin value: {{.*}}, Taint value was created at diff --git a/compiler-rt/test/dfsan/origin_memcpy.c b/compiler-rt/test/dfsan/origin_memcpy.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_memcpy.c @@ -0,0 +1,61 @@ +// RUN: %clang_dfsan -DOFFSET=0 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK0 < %t.out + +// RUN: %clang_dfsan -DOFFSET=10 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK10 < %t.out + +#include + +#include + +int xx[10000]; +int yy[10000]; +volatile int idx = 30; + +__attribute__((noinline)) +void fn_g(int a, int b) { + xx[idx] = a; xx[idx + 10] = b; +} + +__attribute__((noinline)) +void fn_f(int a, int b) { + fn_g(a, b); +} + +__attribute__((noinline)) +void fn_h() { + memcpy(&yy, &xx, sizeof(xx)); +} + +int main(int argc, char *argv[]) { + int volatile z1; + int volatile z2; + dfsan_set_label(8, (void *)&z1, sizeof(z1)); + dfsan_set_label(16, (void *)&z2, sizeof(z2)); + fn_f(z1, z2); + fn_h(); + dfsan_print_origin_trace(&yy[idx + OFFSET], NULL); + return 0; +} + +// CHECK0: Taint value 0x8 {{.*}} origin tracking () +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at + +// CHECK0: #0 {{.*}} in dfs$fn_h {{.*}}origin_memcpy.c:[[@LINE-17]] +// CHECK0: #1 {{.*}} in main {{.*}}origin_memcpy.c:[[@LINE-9]] + +// CHECK0: Origin value: {{.*}}, Taint value was created at + +// CHECK0: #0 {{.*}} in main {{.*}}origin_memcpy.c:[[@LINE-16]] + +// CHECK10: Taint value 0x10 {{.*}} origin tracking () +// CHECK10: Origin value: {{.*}}, Taint value was stored to memory at + +// CHECK10: #0 {{.*}} in dfs$fn_h {{.*}}origin_memcpy.c:[[@LINE-27]] +// CHECK10: #1 {{.*}} in main {{.*}}origin_memcpy.c:[[@LINE-19]] + +// CHECK10: Origin value: {{.*}}, Taint value was created at + +// CHECK10: #0 {{.*}} in main {{.*}}origin_memcpy.c:[[@LINE-25]] diff --git a/compiler-rt/test/dfsan/origin_memmove.c b/compiler-rt/test/dfsan/origin_memmove.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_memmove.c @@ -0,0 +1,60 @@ +// RUN: %clang_dfsan -DOFFSET=0 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK0 < %t.out + +// RUN: %clang_dfsan -DOFFSET=10 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK10 < %t.out + +#include + +#include + +int xx[10000]; + +volatile int idx = 30; + +__attribute__((noinline)) +void fn_g(int a, int b) { + xx[idx] = a; xx[idx + 10] = b; +} + +__attribute__((noinline)) +void fn_f(int a, int b) { + fn_g(a, b); +} + +__attribute__((noinline)) void fn_h() { + memmove(&xx[25], &xx, 7500); +} + +int main(int argc, char *argv[]) { + int volatile z1; + int volatile z2; + dfsan_set_label(8, (void *)&z1, sizeof(z1)); + dfsan_set_label(16, (void *)&z2, sizeof(z2)); + fn_f(z1, z2); + fn_h(); + dfsan_print_origin_trace(&xx[25 + idx + OFFSET], NULL); + return 0; +} + +// CHECK0: Taint value 0x8 {{.*}} origin tracking () +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at + +// CHECK0: #0 {{.*}} in dfs$fn_h {{.*}}origin_memmove.c:[[@LINE-17]] +// CHECK0: #1 {{.*}} in main {{.*}}origin_memmove.c:[[@LINE-9]] + +// CHECK0: Origin value: {{.*}}, Taint value was created at + +// CHECK0: #0 {{.*}} in main {{.*}}origin_memmove.c:[[@LINE-16]] + +// CHECK10: Taint value 0x10 {{.*}} origin tracking () +// CHECK10: Origin value: {{.*}}, Taint value was stored to memory at + +// CHECK10: #0 {{.*}} in dfs$fn_h {{.*}}origin_memmove.c:[[@LINE-27]] +// CHECK10: #1 {{.*}} in main {{.*}}origin_memmove.c:[[@LINE-19]] + +// CHECK10: Origin value: {{.*}}, Taint value was created at + +// CHECK10: #0 {{.*}} in main {{.*}}origin_memmove.c:[[@LINE-25]] diff --git a/compiler-rt/test/dfsan/origin_memset.c b/compiler-rt/test/dfsan/origin_memset.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_memset.c @@ -0,0 +1,46 @@ +// RUN: %clang_dfsan -DOFFSET=0 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK0 < %t.out + +// RUN: %clang_dfsan -DOFFSET=10 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK10 < %t.out + +#include + +#include + +int xx[10000]; + +volatile int idx = 30; + +__attribute__((noinline)) +void fn_g(int a, int b) { + memset(&xx[idx], a, sizeof(a)); + memset(&xx[idx + 10], b, sizeof(b)); +} + +__attribute__((noinline)) +void fn_f(int a, int b) { + fn_g(a, b); +} + +int main(int argc, char *argv[]) { + int volatile z1; + int volatile z2; + dfsan_set_label(8, (void *)&z1, sizeof(z1)); + dfsan_set_label(16, (void *)&z2, sizeof(z2)); + fn_f(z1, z2); + dfsan_print_origin_trace(&xx[idx + OFFSET], NULL); + return 0; +} + +// CHECK0: Taint value 0x8 {{.*}} origin tracking () +// CHECK0: Origin value: {{.*}}, Taint value was created at + +// CHECK0: #0 {{.*}} in main {{.*}}origin_memset.c:[[@LINE-10]] + +// CHECK10: Taint value 0x10 {{.*}} origin tracking () +// CHECK10: Origin value: {{.*}}, Taint value was created at + +// CHECK10: #0 {{.*}} in main {{.*}}origin_memset.c:[[@LINE-14]] diff --git a/compiler-rt/test/dfsan/origin_overlapped.c b/compiler-rt/test/dfsan/origin_overlapped.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_overlapped.c @@ -0,0 +1,23 @@ +// RUN: %clang_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out + +#include + +int main(int argc, char *argv[]) { + char volatile z1; + char volatile z2; + dfsan_set_label(8, (void *)&z1, sizeof(z1)); + dfsan_set_label(16, (void *)&z2, sizeof(z2)); // overwritting the old origin. + char c = z1; + dfsan_print_origin_trace(&c, "bar"); + return 0; +} + +// CHECK: Taint value 0x8 {{.*}} origin tracking (bar) +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in main {{.*}}origin_overlapped.c:[[@LINE-7]] + +// CHECK: Origin value: {{.*}}, Taint value was created at + +// CHECK: #0 {{.*}} in main {{.*}}origin_overlapped.c:[[@LINE-12]] diff --git a/compiler-rt/test/dfsan/origin_pthread.c b/compiler-rt/test/dfsan/origin_pthread.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_pthread.c @@ -0,0 +1,61 @@ +// RUN: %clang_dfsan -DCHECK8 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK8 < %t.out + +// RUN: %clang_dfsan -DCHECK16 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true -mllvm -dfsan-instrumentation-with-call-threshold=0 %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK16 < %t.out + +#include + +#include +#include + +int volatile x; +int __thread y, z; + +static void *ThreadFn(void *a) { + y = x; + memcpy((void *)&z, (void *)&y, sizeof(y)); +#if defined(CHECK8) + if ((int)a == 8) + dfsan_print_origin_trace(&z, NULL); +#elif defined(CHECK16) + if ((int)a == 16) + dfsan_print_origin_trace(&z, NULL); +#endif + return 0; +} + +int main(void) { + dfsan_set_label(8, (void *)&x, sizeof(x)); + + pthread_t t[24]; + for (int i = 0; i < 24; ++i) { + pthread_create(&t[i], 0, ThreadFn, (void *)i); + } + for (int i = 0; i < 24; ++i) { + pthread_join(t[i], 0); + } + return 0; +} + +// CHECK8: Taint value 0x8 {{.*}} origin tracking () +// CHECK8: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK8: #0 {{.*}} in dfs$ThreadFn {{.*}}origin_pthread.c:[[@LINE-26]] + +// CHECK8: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK8: #0 {{.*}} in dfs$ThreadFn {{.*}}origin_pthread.c:[[@LINE-30]] + +// CHECK8: Origin value: {{.*}}, Taint value was created at +// CHECK8: #0 {{.*}} in main {{.*}}origin_pthread.c:[[@LINE-20]] + +// CHECK16: Taint value 0x8 {{.*}} origin tracking () +// CHECK16: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK16: #0 {{.*}} in dfs$ThreadFn {{.*}}origin_pthread.c:[[@LINE-36]] + +// CHECK16: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK16: #0 {{.*}} in dfs$ThreadFn {{.*}}origin_pthread.c:[[@LINE-40]] + +// CHECK16: Origin value: {{.*}}, Taint value was created at +// CHECK16: #0 {{.*}} in main {{.*}}origin_pthread.c:[[@LINE-30]] diff --git a/compiler-rt/test/dfsan/origin_set_label.c b/compiler-rt/test/dfsan/origin_set_label.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_set_label.c @@ -0,0 +1,34 @@ +// RUN: %clang_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out + +// RUN: %clang_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true -mllvm -dfsan-instrumentation-with-call-threshold=0 %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK < %t.out + +#include + +__attribute__((noinline)) uint64_t foo(uint64_t a, uint64_t b) { return a + b; } + +int main(int argc, char *argv[]) { + uint64_t a = 10; + uint64_t b = 20; + dfsan_set_label(8, &a, sizeof(a)); + uint64_t c = foo(a, b); + dfsan_print_origin_trace(&c, NULL); + dfsan_print_origin_trace((int*)(&c) + 1, NULL); +} + +// CHECK: Taint value 0x8 {{.*}} origin tracking () +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in main {{.*}}origin_set_label.c:[[@LINE-7]] + +// CHECK: Origin value: {{.*}}, Taint value was created at +// CHECK: #0 {{.*}} in main {{.*}}origin_set_label.c:[[@LINE-11]] + +// CHECK: Taint value 0x8 {{.*}} origin tracking () +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in main {{.*}}origin_set_label.c:[[@LINE-14]] + +// CHECK: Origin value: {{.*}}, Taint value was created at +// CHECK: #0 {{.*}} in main {{.*}}origin_set_label.c:[[@LINE-18]] diff --git a/compiler-rt/test/dfsan/origin_signal_stress_test.cpp b/compiler-rt/test/dfsan/origin_signal_stress_test.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_signal_stress_test.cpp @@ -0,0 +1,63 @@ +// RUN: %clangxx_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true -O0 %s -o %t && %run %t +// RUN: %clangxx_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true -mllvm -dfsan-instrumentation-with-call-threshold=0 -O0 %s -o %t && %run %t +// +// Test that the state of shadows and origins from a signal handler are consistent. + +#include +#include +#include +#include +#include +#include + +const int kSigCnt = 200; +int x; + +__attribute__((noinline)) int f(int a) { + return a; +} + +__attribute__((noinline)) void g(const dfsan_origin origin) { + int r = f(x); + const dfsan_label r_label = dfsan_get_label(r); + assert(r_label == 8 || r_label == 0); + const dfsan_origin r_origin = dfsan_get_init_origin(&r); + assert(r_origin == origin || r_origin == 0); + return; +} + +int sigcnt; + +void SignalHandler(int signo) { + assert(signo == SIGPROF); + int a = 0; + dfsan_set_label(4, &a, sizeof(a)); + (void)f(a); + ++sigcnt; +} + +int main() { + signal(SIGPROF, SignalHandler); + + itimerval itv; + itv.it_interval.tv_sec = 0; + itv.it_interval.tv_usec = 100; + itv.it_value.tv_sec = 0; + itv.it_value.tv_usec = 100; + setitimer(ITIMER_PROF, &itv, NULL); + + dfsan_set_label(8, &x, sizeof(x)); + const dfsan_origin origin = dfsan_get_origin(x); + do { + g(origin); + } while (sigcnt < kSigCnt); + + itv.it_interval.tv_sec = 0; + itv.it_interval.tv_usec = 0; + itv.it_value.tv_sec = 0; + itv.it_value.tv_usec = 0; + setitimer(ITIMER_PROF, &itv, NULL); + + signal(SIGPROF, SIG_DFL); + return 0; +} diff --git a/compiler-rt/test/dfsan/origin_unaligned_memtrans.c b/compiler-rt/test/dfsan/origin_unaligned_memtrans.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_unaligned_memtrans.c @@ -0,0 +1,74 @@ +// RUN: %clang_dfsan -DOFFSET=0 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK0 < %t.out + +// RUN: %clang_dfsan -DOFFSET=10 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK10 < %t.out + +#include + +#include + +char xx[10000]; +char yy[10000]; +volatile int idx = 30; + +__attribute__((noinline)) void fn_g(char a, char b) { + xx[idx] = a; xx[idx + 10] = b; +} + +__attribute__((noinline)) void fn_f(char a, char b) { + fn_g(a, b); +} + +__attribute__((noinline)) void fn_h() { + memcpy(&yy[2], &xx[2], sizeof(xx) - 4); +} + +__attribute__((noinline)) void fn_i() { + memmove(&yy[25], &yy, 7500); +} + +int main(int argc, char *argv[]) { + char volatile z1; + int volatile buffer; + char volatile z2; + dfsan_set_label(8, (void *)&z1, sizeof(z1)); + dfsan_set_label(16, (void *)&z2, sizeof(z2)); + fn_f(z1, z2); + fn_h(); + fn_i(); + dfsan_print_origin_trace(&yy[25 + idx + OFFSET], NULL); + return 0; +} + +// CHECK0: Taint value 0x8 {{.*}} origin tracking () +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at + +// CHECK0: #0 {{.*}} in dfs$fn_i {{.*}}origin_unaligned_memtrans.c:[[@LINE-19]] +// CHECK0: #1 {{.*}} in main {{.*}}origin_unaligned_memtrans.c:[[@LINE-9]] + +// CHECK0: Origin value: {{.*}}, Taint value was stored to memory at + +// CHECK0: #0 {{.*}} in dfs$fn_h {{.*}}origin_unaligned_memtrans.c:[[@LINE-28]] +// CHECK0: #1 {{.*}} in main {{.*}}origin_unaligned_memtrans.c:[[@LINE-15]] + +// CHECK0: Origin value: {{.*}}, Taint value was created at + +// CHECK0: #0 {{.*}} in main {{.*}}origin_unaligned_memtrans.c:[[@LINE-22]] + +// CHECK10: Taint value 0x10 {{.*}} origin tracking +// CHECK10: Origin value: {{.*}}, Taint value was stored to memory at + +// CHECK10: #0 {{.*}} in dfs$fn_i {{.*}}origin_unaligned_memtrans.c:[[@LINE-34]] +// CHECK10: #1 {{.*}} in main {{.*}}origin_unaligned_memtrans.c:[[@LINE-24]] + +// CHECK10: Origin value: {{.*}}, Taint value was stored to memory at + +// CHECK10: #0 {{.*}} in dfs$fn_h {{.*}}origin_unaligned_memtrans.c:[[@LINE-43]] +// CHECK10: #1 {{.*}} in main {{.*}}origin_unaligned_memtrans.c:[[@LINE-30]] + +// CHECK10: Origin value: {{.*}}, Taint value was created at + +// CHECK10: #0 {{.*}} in main {{.*}}origin_unaligned_memtrans.c:[[@LINE-36]] diff --git a/compiler-rt/test/dfsan/origin_with_sigactions.c b/compiler-rt/test/dfsan/origin_with_sigactions.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_with_sigactions.c @@ -0,0 +1,90 @@ +// Check that stores in signal handlers are not recorded in origin history. +// This is, in fact, undesired behavior caused by our chained origins +// implementation being not async-signal-safe. + +// RUN: %clang_dfsan -DUSE_SIGNAL_ACTION -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: not %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK_ACTION < %t.out + +// RUN: %clang_dfsan -DUSE_SIGNAL_ACTION -mllvm -msan-instrumentation-with-call-threshold=0 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: not %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK_ACTION < %t.out + +// RUN: %clang_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: not %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK_HANDLE < %t.out + +// RUN: %clang_dfsan -mllvm -msan-instrumentation-with-call-threshold=0 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: not %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=CHECK_HANDLE < %t.out + +#include + +#include +#include +#include +#include +#include + +volatile int x, y, u; + +void SignalHandler(int signo) { + y = x; + memcpy((void *)&u, (void *)&y, sizeof(int)); +} + +void SignalAction(int signo, siginfo_t *si, void *uc) { + y = x; + memcpy((void *)&u, (void *)&y, sizeof(int)); +} + +int main(int argc, char *argv[]) { + int volatile z = 1; + dfsan_set_label(8, (void *)&z, sizeof(z)); + x = z; + + struct sigaction psa; +#ifdef USE_SIGNAL_ACTION + psa.sa_flags = SA_SIGINFO; + psa.sa_sigaction = SignalAction; +#else + psa.sa_flags = 0; + psa.sa_handler = SignalHandler; +#endif + sigaction(SIGHUP, &psa, NULL); + kill(getpid(), SIGHUP); + signal(SIGHUP, SIG_DFL); + + assert(x == 1); + assert(y == 1); + assert(u == 1); + + dfsan_print_origin_trace((void *)&u, NULL); + return u; +} + +// CHECK_HANDLE: Taint value 0x8 {{.*}} origin tracking () +// CHECK_HANDLE: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK_HANDLE-NOT: {{.*}} in dfs$SignalHandler {{.*}}origin_with_sigactions.c{{.*}} + + + + + +// CHECK_HANDLE: #0 {{.*}} in main {{.*}}origin_with_sigactions.c:[[@LINE-30]] + +// CHECK_HANDLE: Origin value: {{.*}}, Taint value was created at +// CHECK_HANDLE: #0 {{.*}} in main {{.*}}origin_with_sigactions.c:[[@LINE-34]] + +// CHECK_ACTION: Taint value 0x8 {{.*}} origin tracking +// CHECK_ACTION: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK_HANDLE-NOT: {{.*}} in dfs$SignalAction {{.*}}origin_with_sigactions.c{{.*}} + + + + + +// CHECK_ACTION: #0 {{.*}} in main {{.*}}origin_with_sigactions.c:[[@LINE-43]] + +// CHECK_ACTION: Origin value: {{.*}}, Taint value was created at +// CHECK_ACTION: #0 {{.*}} in main {{.*}}origin_with_sigactions.c:[[@LINE-47]] diff --git a/compiler-rt/test/dfsan/origin_with_signals.cpp b/compiler-rt/test/dfsan/origin_with_signals.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_with_signals.cpp @@ -0,0 +1,47 @@ +// Check that stores in signal handlers are not recorded in origin history. +// This is, in fact, undesired behavior caused by our chained origins +// implementation being not async-signal-safe. + +// RUN: %clangxx_dfsan -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// RUN: %clangxx_dfsan -mllvm -msan-instrumentation-with-call-threshold=0 -mllvm -dfsan-track-origins=1 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include + +#include +#include +#include +#include + +volatile int x, y, u; + +void SignalHandler(int signo) { + y = x; + memcpy((void *)&u, (void *)&y, sizeof(int)); +} + +int main(int argc, char *argv[]) { + int volatile z = 2; + dfsan_set_label(8, (void *)&z, sizeof(z)); + x = z; + + signal(SIGHUP, SignalHandler); + kill(getpid(), SIGHUP); + signal(SIGHUP, SIG_DFL); + + dfsan_print_origin_trace((void *)&u, nullptr); + return u; +} + +// CHECK: Taint value 0x8 {{.*}} origin tracking () +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK-NOT: {{.*}} in dfs$SignalHandler {{.*}}origin_with_signals.cpp{{.*}} + +// CHECK: #0 {{.*}} in main {{.*}}origin_with_signals.cpp:[[@LINE-18]] + +// CHECK: Origin value: {{.*}}, Taint value was created at +// CHECK: #0 {{.*}} in main {{.*}}origin_with_signals.cpp:[[@LINE-22]] diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -26,9 +26,11 @@ /// | | /// | unused | /// | | -/// +--------------------+ 0x200200000000 (kUnusedAddr) +/// +--------------------+ 0x300200000000 (kUnusedAddr) /// | union table | -/// +--------------------+ 0x200000000000 (kUnionTableAddr) +/// +--------------------+ 0x300000000000 (kUnionTableAddr) +/// | origin | +/// +--------------------+ 0x200000008000 (kOriginAddr) /// | shadow memory | /// +--------------------+ 0x000000010000 (kShadowAddr) /// | reserved by kernel | @@ -85,6 +87,7 @@ #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -109,6 +112,8 @@ // This must be consistent with ShadowWidthBits. static const Align kShadowTLSAlignment = Align(2); +static const Align kMinOriginAlignment = Align(4); + // The size of TLS variables. These constants must be kept in sync with the ones // in dfsan.cpp. static const unsigned kArgTLSSize = 800; @@ -201,6 +206,17 @@ "to results."), cl::Hidden, cl::init(true)); +static cl::opt ClInstrumentationWithCallThreshold( + "dfsan-instrumentation-with-call-threshold", + cl::desc("If the function being instrumented requires more than " + "this number of origin stores, use callbacks instead of " + "inline checks (-1 means never use callbacks)."), + cl::Hidden, cl::init(3500)); + +static cl::opt ClTrackOrigins("dfsan-track-origins", + cl::desc("Track origins of labels"), + cl::Hidden, cl::init(0)); + static StringRef GetGlobalTypeString(const GlobalValue &G) { // Types of GlobalVariables are always pointer types. Type *GType = G.getValueType(); @@ -321,7 +337,12 @@ friend struct DFSanFunction; friend class DFSanVisitor; - enum { ShadowWidthBits = 16, ShadowWidthBytes = ShadowWidthBits / 8 }; + enum { + ShadowWidthBits = 16, + ShadowWidthBytes = ShadowWidthBits / 8, + OriginWidthBits = 32, + OriginWidthBytes = OriginWidthBits / 8 + }; /// Which ABI should be used for instrumented functions? enum InstrumentedABI { @@ -362,6 +383,10 @@ Module *Mod; LLVMContext *Ctx; Type *Int8Ptr; + IntegerType *OriginTy; + PointerType *OriginPtrTy; + ConstantInt *OriginBase; + ConstantInt *ZeroOrigin; /// The shadow type for all primitive types and vector types. IntegerType *PrimitiveShadowTy; PointerType *PrimitiveShadowPtrTy; @@ -370,10 +395,14 @@ ConstantInt *ShadowPtrMask; ConstantInt *ShadowPtrMul; Constant *ArgTLS; + ArrayType *ArgOriginTLSTy; + Constant *ArgOriginTLS; Constant *RetvalTLS; + Constant *RetvalOriginTLS; Constant *ExternalShadowMask; FunctionType *DFSanUnionFnTy; FunctionType *DFSanUnionLoadFnTy; + FunctionType *DFSanLoadLabelAndOriginFnTy; FunctionType *DFSanUnimplementedFnTy; FunctionType *DFSanSetLabelFnTy; FunctionType *DFSanNonzeroLabelFnTy; @@ -381,10 +410,14 @@ FunctionType *DFSanCmpCallbackFnTy; FunctionType *DFSanLoadStoreCallbackFnTy; FunctionType *DFSanMemTransferCallbackFnTy; + FunctionType *DFSanChainOriginFnTy; + FunctionType *DFSanMemOriginTransferFnTy; + FunctionType *DFSanMaybeStoreOriginFnTy; FunctionCallee DFSanUnionFn; FunctionCallee DFSanCheckedUnionFn; FunctionCallee DFSanUnionLoadFn; FunctionCallee DFSanUnionLoadFast16LabelsFn; + FunctionCallee DFSanLoadLabelAndOriginFn; FunctionCallee DFSanUnimplementedFn; FunctionCallee DFSanSetLabelFn; FunctionCallee DFSanNonzeroLabelFn; @@ -393,13 +426,21 @@ FunctionCallee DFSanStoreCallbackFn; FunctionCallee DFSanMemTransferCallbackFn; FunctionCallee DFSanCmpCallbackFn; + FunctionCallee DFSanChainOriginFn; + FunctionCallee DFSanMemOriginTransferFn; + FunctionCallee DFSanMaybeStoreOriginFn; + SmallPtrSet DFSanInternalFunctions; MDNode *ColdCallWeights; + MDNode *OriginStoreWeights; DFSanABIList ABIList; DenseMap UnwrappedFnMap; AttrBuilder ReadOnlyNoneAttrs; bool DFSanRuntimeShadowMask = false; + Value *getShadowOffset(Value *Addr, IRBuilder<> &IRB); Value *getShadowAddress(Value *Addr, Instruction *Pos); + std::pair + getShadowOriginAddress(Value *Addr, Align InstAlignment, Instruction *Pos); bool isInstrumented(const Function *F); bool isInstrumented(const GlobalAlias *GA); FunctionType *getArgsFunctionType(FunctionType *T); @@ -417,6 +458,8 @@ bool init(Module &M); + bool shouldTrackOrigins(); + /// Returns whether the pass tracks labels for struct fields and array /// indices. Support only fast16 mode in TLS ABI mode. bool shouldTrackFieldsAndIndices(); @@ -448,6 +491,8 @@ /// Returns the shadow type of of V's type. Type *getShadowTy(Value *V); + uint64_t numOfElementsInArgOrgTLS(); + public: DataFlowSanitizer(const std::vector &ABIListFiles); @@ -461,9 +506,19 @@ DataFlowSanitizer::InstrumentedABI IA; bool IsNativeABI; AllocaInst *LabelReturnAlloca = nullptr; + AllocaInst *OriginReturnAlloca = nullptr; DenseMap ValShadowMap; + DenseMap ValOriginMap; DenseMap AllocaShadowMap; - std::vector> PHIFixups; + DenseMap AllocaOriginMap; + + struct PHIFixupElement { + PHINode *Phi; + PHINode *ShadowPhi; + PHINode *OriginPhi; + }; + std::vector PHIFixups; + DenseSet SkipInsts; std::vector NonZeroChecks; bool AvoidNewBlocks; @@ -497,6 +552,17 @@ /// Computes the shadow address for a retval. Value *getRetvalTLS(Type *T, IRBuilder<> &IRB); + Value *getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB); + + Value *getRetvalOriginTLS(); + + Value *getOrigin(Value *V); + void setOrigin(Instruction *I, Value *Origin); + Value *combineOperandOrigins(Instruction *Inst); + Value *combineOrigins(const std::vector &Shadows, + const std::vector &Origins, Instruction *Pos, + ConstantInt *Zero = nullptr); + Value *getShadow(Value *V); void setShadow(Instruction *I, Value *Shadow); /// Generates IR to compute the union of the two given shadows, inserting it @@ -507,10 +573,12 @@ Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2, Instruction *Pos); Value *combineOperandShadows(Instruction *Inst); - Value *loadShadow(Value *ShadowAddr, uint64_t Size, uint64_t Align, - Instruction *Pos); - void storePrimitiveShadow(Value *Addr, uint64_t Size, Align Alignment, - Value *PrimitiveShadow, Instruction *Pos); + std::pair loadShadowOrigin(Value *ShadowAddr, uint64_t Size, + Align InstAlignment, + Instruction *Pos); + void storePrimitiveShadowOrigin(Value *Addr, uint64_t Size, + Align InstAlignment, Value *PrimitiveShadow, + Value *Origin, Instruction *Pos); /// Applies PrimitiveShadow to all primitive subtypes of T, returning /// the expanded shadow value. /// @@ -528,6 +596,8 @@ /// CTP(other types, PS) = PS Value *collapseToPrimitiveShadow(Value *Shadow, Instruction *Pos); + void incNumOfOriginStores(); + private: /// Collapses the shadow with aggregate type into a single primitive shadow /// value. @@ -539,6 +609,27 @@ /// Returns the shadow value of an argument A. Value *getShadowForTLSArgument(Argument *A); + + Value *updateOrigin(Value *V, IRBuilder<> &IRB); + + Value *originToIntptr(IRBuilder<> &IRB, Value *Origin); + + void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginAddr, + uint64_t Size, Align Alignment); + + void storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size, Value *Shadow, + Value *Origin, Value *OriginAddr, Align InstAlignment); + + Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &name = ""); + + Align getShadowAlign(Align InstAlignment); + Align getOriginAlign(Align InstAlignment); + + bool shouldInstrumentationWithCall(); + + bool useCallbackLoadLabelandOrigin(uint64_t Size, Align InstAlignment); + + int NumOfOriginStores = 0; }; class DFSanVisitor : public InstVisitor { @@ -553,7 +644,7 @@ // Combines shadow values for all of I's operands. Returns the combined shadow // value. - Value *visitOperandShadowInst(Instruction &I); + void visitInstOperands(Instruction &I); void visitUnaryOperator(UnaryOperator &UO); void visitBinaryOperator(BinaryOperator &BO); @@ -574,6 +665,9 @@ void visitSelectInst(SelectInst &I); void visitMemSetInst(MemSetInst &I); void visitMemTransferInst(MemTransferInst &I); + +private: + void visitInstOperandOrigins(Instruction &I); }; } // end anonymous namespace @@ -607,6 +701,13 @@ Type *RetType = T->getReturnType(); if (!RetType->isVoidTy()) ArgTypes.push_back(PrimitiveShadowPtrTy); + + if (shouldTrackOrigins()) { + ArgTypes.append(T->getNumParams(), OriginTy); + if (!RetType->isVoidTy()) + ArgTypes.push_back(OriginPtrTy); + } + return FunctionType::get(T->getReturnType(), ArgTypes, false); } @@ -638,6 +739,16 @@ Type *RetType = T->getReturnType(); if (!RetType->isVoidTy()) ArgTypes.push_back(PrimitiveShadowPtrTy); + + if (shouldTrackOrigins()) { + for (unsigned i = 0, e = T->getNumParams(); i != e; ++i) + ArgTypes.push_back(OriginTy); + if (T->isVarArg()) + ArgTypes.push_back(OriginPtrTy); + if (!RetType->isVoidTy()) + ArgTypes.push_back(OriginPtrTy); + } + return TransformedFunction( T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()), ArgumentIndexMapping); @@ -657,6 +768,11 @@ return isa(V); } +bool DataFlowSanitizer::shouldTrackOrigins() { + return ClTrackOrigins && getInstrumentedABI() == DataFlowSanitizer::IA_TLS && + ClFast16Labels; +} + bool DataFlowSanitizer::shouldTrackFieldsAndIndices() { return getInstrumentedABI() == DataFlowSanitizer::IA_TLS && ClFast16Labels; } @@ -703,6 +819,13 @@ llvm_unreachable("Unexpected shadow type"); } +void DFSanFunction::incNumOfOriginStores() { ++NumOfOriginStores; } + +bool DFSanFunction::shouldInstrumentationWithCall() { + return ClInstrumentationWithCallThreshold >= 0 && + NumOfOriginStores >= ClInstrumentationWithCallThreshold; +} + Value *DFSanFunction::expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow, Instruction *Pos) { Type *ShadowTy = DFS.getShadowTy(T); @@ -811,11 +934,15 @@ Mod = &M; Ctx = &M.getContext(); Int8Ptr = Type::getInt8PtrTy(*Ctx); + OriginTy = IntegerType::get(*Ctx, OriginWidthBits); + OriginPtrTy = PointerType::getUnqual(OriginTy); PrimitiveShadowTy = IntegerType::get(*Ctx, ShadowWidthBits); PrimitiveShadowPtrTy = PointerType::getUnqual(PrimitiveShadowTy); IntptrTy = DL.getIntPtrType(*Ctx); ZeroPrimitiveShadow = ConstantInt::getSigned(PrimitiveShadowTy, 0); ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidthBytes); + OriginBase = ConstantInt::get(IntptrTy, 0x200000000000LL); + ZeroOrigin = ConstantInt::getSigned(OriginTy, 0); if (IsX86_64) ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL); else if (IsMIPS64) @@ -832,10 +959,14 @@ Type *DFSanUnionLoadArgs[2] = {PrimitiveShadowPtrTy, IntptrTy}; DFSanUnionLoadFnTy = FunctionType::get(PrimitiveShadowTy, DFSanUnionLoadArgs, /*isVarArg=*/false); + Type *DFSanLoadLabelAndOriginArgs[2] = {Int8Ptr, IntptrTy}; + DFSanLoadLabelAndOriginFnTy = + FunctionType::get(IntegerType::get(*Ctx, 64), DFSanLoadLabelAndOriginArgs, + /*isVarArg=*/false); DFSanUnimplementedFnTy = FunctionType::get( Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false); - Type *DFSanSetLabelArgs[3] = {PrimitiveShadowTy, Type::getInt8PtrTy(*Ctx), - IntptrTy}; + Type *DFSanSetLabelArgs[4] = {PrimitiveShadowTy, OriginTy, + Type::getInt8PtrTy(*Ctx), IntptrTy}; DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx), DFSanSetLabelArgs, /*isVarArg=*/false); DFSanNonzeroLabelFnTy = @@ -845,6 +976,18 @@ DFSanCmpCallbackFnTy = FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy, /*isVarArg=*/false); + DFSanChainOriginFnTy = + FunctionType::get(OriginTy, OriginTy, /*isVarArg=*/false); + + Type *DFSanMaybeStoreOriginArgs[4] = {IntegerType::get(*Ctx, ShadowWidthBits), + Int8Ptr, IntptrTy, OriginTy}; + DFSanMaybeStoreOriginFnTy = FunctionType::get( + Type::getVoidTy(*Ctx), DFSanMaybeStoreOriginArgs, /*isVarArg=*/false); + + Type *DFSanMemOriginTransferArgs[3] = {Int8Ptr, Int8Ptr, IntptrTy}; + DFSanMemOriginTransferFnTy = FunctionType::get( + Type::getVoidTy(*Ctx), DFSanMemOriginTransferArgs, /*isVarArg=*/false); + Type *DFSanLoadStoreCallbackArgs[2] = {PrimitiveShadowTy, Int8Ptr}; DFSanLoadStoreCallbackFnTy = FunctionType::get(Type::getVoidTy(*Ctx), DFSanLoadStoreCallbackArgs, @@ -855,6 +998,7 @@ /*isVarArg=*/false); ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000); + OriginStoreWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000); return true; } @@ -949,7 +1093,8 @@ Args.push_back(&*AI); CallInst *CI = CallInst::Create(FT, &*F->arg_begin(), Args, "", BB); ReturnInst *RI; - if (FT->getReturnType()->isVoidTy()) + Type *RetType = FT->getReturnType(); + if (RetType->isVoidTy()) RI = ReturnInst::Create(*Ctx, BB); else RI = ReturnInst::Create(*Ctx, CI, BB); @@ -957,17 +1102,34 @@ // F is called by a wrapped custom function with primitive shadows. So // its arguments and return value need conversion. DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true); - Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI; + Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; + ++ValAI; for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N) { Value *Shadow = DFSF.expandFromPrimitiveShadow(ValAI->getType(), &*ShadowAI, CI); DFSF.ValShadowMap[&*ValAI] = Shadow; } + Function::arg_iterator RetShadowAI = ShadowAI; + const bool ShouldTrackOrigins = shouldTrackOrigins(); + if (ShouldTrackOrigins) { + ValAI = F->arg_begin(); + ++ValAI; + Function::arg_iterator OriginAI = ShadowAI; + if (!RetType->isVoidTy()) + ++OriginAI; + for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++OriginAI, --N) { + DFSF.ValOriginMap[&*ValAI] = &*OriginAI; + } + } DFSanVisitor(DFSF).visitCallInst(*CI); - if (!FT->getReturnType()->isVoidTy()) { + if (!RetType->isVoidTy()) { Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow( DFSF.getShadow(RI->getReturnValue()), RI); - new StoreInst(PrimitiveShadow, &*std::prev(F->arg_end()), RI); + new StoreInst(PrimitiveShadow, &*RetShadowAI, RI); + if (ShouldTrackOrigins) { + Value *Origin = DFSF.getOrigin(RI->getReturnValue()); + new StoreInst(Origin, &*std::prev(F->arg_end()), RI); + } } } @@ -1024,11 +1186,23 @@ DFSanUnionLoadFast16LabelsFn = Mod->getOrInsertFunction( "__dfsan_union_load_fast16labels", DFSanUnionLoadFnTy, AL); } + { + AttributeList AL; + AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, + Attribute::NoUnwind); + AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, + Attribute::ReadOnly); + AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex, + Attribute::ZExt); + DFSanLoadLabelAndOriginFn = Mod->getOrInsertFunction( + "__dfsan_load_label_and_origin", DFSanLoadLabelAndOriginFnTy, AL); + } DFSanUnimplementedFn = Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy); { AttributeList AL; AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt); + AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt); DFSanSetLabelFn = Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL); } @@ -1036,6 +1210,56 @@ Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy); DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper", DFSanVarargWrapperFnTy); + { + AttributeList AL; + AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt); + AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex, + Attribute::ZExt); + DFSanChainOriginFn = Mod->getOrInsertFunction("__dfsan_chain_origin", + DFSanChainOriginFnTy, AL); + } + DFSanMemOriginTransferFn = Mod->getOrInsertFunction( + "__dfsan_mem_origin_transfer", DFSanMemOriginTransferFnTy); + + { + AttributeList AL; + AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt); + AL = AL.addParamAttribute(M.getContext(), 3, Attribute::ZExt); + DFSanMaybeStoreOriginFn = Mod->getOrInsertFunction( + "__dfsan_maybe_store_origin", DFSanMaybeStoreOriginFnTy, AL); + } + + DFSanInternalFunctions.insert(DFSanUnionFn.getCallee()->stripPointerCasts()); + DFSanInternalFunctions.insert( + DFSanCheckedUnionFn.getCallee()->stripPointerCasts()); + DFSanInternalFunctions.insert( + DFSanUnionLoadFn.getCallee()->stripPointerCasts()); + DFSanInternalFunctions.insert( + DFSanUnionLoadFast16LabelsFn.getCallee()->stripPointerCasts()); + DFSanInternalFunctions.insert( + DFSanLoadLabelAndOriginFn.getCallee()->stripPointerCasts()); + DFSanInternalFunctions.insert( + DFSanUnimplementedFn.getCallee()->stripPointerCasts()); + DFSanInternalFunctions.insert( + DFSanSetLabelFn.getCallee()->stripPointerCasts()); + DFSanInternalFunctions.insert( + DFSanNonzeroLabelFn.getCallee()->stripPointerCasts()); + DFSanInternalFunctions.insert( + DFSanVarargWrapperFn.getCallee()->stripPointerCasts()); + DFSanInternalFunctions.insert( + DFSanLoadCallbackFn.getCallee()->stripPointerCasts()); + DFSanInternalFunctions.insert( + DFSanStoreCallbackFn.getCallee()->stripPointerCasts()); + DFSanInternalFunctions.insert( + DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts()); + DFSanInternalFunctions.insert( + DFSanCmpCallbackFn.getCallee()->stripPointerCasts()); + DFSanInternalFunctions.insert( + DFSanChainOriginFn.getCallee()->stripPointerCasts()); + DFSanInternalFunctions.insert( + DFSanMemOriginTransferFn.getCallee()->stripPointerCasts()); + DFSanInternalFunctions.insert( + DFSanMaybeStoreOriginFn.getCallee()->stripPointerCasts()); } // Initializes event callback functions and declare them in the module @@ -1050,6 +1274,20 @@ Mod->getOrInsertFunction("__dfsan_cmp_callback", DFSanCmpCallbackFnTy); } +static Constant *getOrInsertGlobal(Module &M, bool &Changed, StringRef Name, + Type *Ty) { + Constant *C = M.getOrInsertGlobal(Name, Ty); + if (GlobalVariable *G = dyn_cast(C)) { + Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel; + G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); + } + return C; +} + +uint64_t DataFlowSanitizer::numOfElementsInArgOrgTLS() { + return kArgTLSSize / OriginWidthBytes; +} + bool DataFlowSanitizer::runImpl(Module &M) { init(M); @@ -1061,19 +1299,26 @@ bool Changed = false; - Type *ArgTLSTy = ArrayType::get(Type::getInt64Ty(*Ctx), kArgTLSSize / 8); - ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy); - if (GlobalVariable *G = dyn_cast(ArgTLS)) { - Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel; - G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); - } - Type *RetvalTLSTy = - ArrayType::get(Type::getInt64Ty(*Ctx), kRetvalTLSSize / 8); - RetvalTLS = Mod->getOrInsertGlobal("__dfsan_retval_tls", RetvalTLSTy); - if (GlobalVariable *G = dyn_cast(RetvalTLS)) { - Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel; - G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); - } + // These globals must be kept in sync with the ones in dfsan.cpp. + ArgTLS = getOrInsertGlobal( + *Mod, Changed, "__dfsan_arg_tls", + ArrayType::get(Type::getInt64Ty(*Ctx), kArgTLSSize / 8)); + RetvalTLS = getOrInsertGlobal( + *Mod, Changed, "__dfsan_retval_tls", + ArrayType::get(Type::getInt64Ty(*Ctx), kRetvalTLSSize / 8)); + ArgOriginTLSTy = ArrayType::get(OriginTy, numOfElementsInArgOrgTLS()); + ArgOriginTLS = getOrInsertGlobal(*Mod, Changed, "__dfsan_arg_origin_tls", + ArgOriginTLSTy); + RetvalOriginTLS = + getOrInsertGlobal(*Mod, Changed, "__dfsan_retval_origin_tls", OriginTy); + + (void)Mod->getOrInsertGlobal("__dfsan_track_origins", OriginTy, [&] { + Changed = true; + return new GlobalVariable( + M, OriginTy, true, GlobalValue::WeakODRLinkage, + ConstantInt::getSigned(OriginTy, shouldTrackOrigins()), + "__dfsan_track_origins"); + }); ExternalShadowMask = Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy); @@ -1084,19 +1329,7 @@ std::vector FnsToInstrument; SmallPtrSet FnsWithNativeABI; for (Function &i : M) { - if (!i.isIntrinsic() && - &i != DFSanUnionFn.getCallee()->stripPointerCasts() && - &i != DFSanCheckedUnionFn.getCallee()->stripPointerCasts() && - &i != DFSanUnionLoadFn.getCallee()->stripPointerCasts() && - &i != DFSanUnionLoadFast16LabelsFn.getCallee()->stripPointerCasts() && - &i != DFSanUnimplementedFn.getCallee()->stripPointerCasts() && - &i != DFSanSetLabelFn.getCallee()->stripPointerCasts() && - &i != DFSanNonzeroLabelFn.getCallee()->stripPointerCasts() && - &i != DFSanVarargWrapperFn.getCallee()->stripPointerCasts() && - &i != DFSanLoadCallbackFn.getCallee()->stripPointerCasts() && - &i != DFSanStoreCallbackFn.getCallee()->stripPointerCasts() && - &i != DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts() && - &i != DFSanCmpCallbackFn.getCallee()->stripPointerCasts()) + if (!i.isIntrinsic() && !DFSanInternalFunctions.contains(&i)) FnsToInstrument.push_back(&i); } @@ -1193,7 +1426,9 @@ : GlobalValue::LinkOnceODRLinkage; Function *NewF = buildWrapperFunction( - &F, std::string("dfsw$") + std::string(F.getName()), + &F, + (shouldTrackOrigins() ? std::string("dfso$") : std::string("dfsw$")) + + std::string(F.getName()), wrapperLinkage, NewFT); if (getInstrumentedABI() == IA_TLS) NewF->removeAttributes(AttributeList::FunctionIndex, ReadOnlyNoneAttrs); @@ -1263,14 +1498,18 @@ // until we have visited every block. Therefore, the code that handles phi // nodes adds them to the PHIFixups list so that they can be properly // handled here. - for (std::vector>::iterator + for (std::vector::iterator i = DFSF.PHIFixups.begin(), e = DFSF.PHIFixups.end(); i != e; ++i) { - for (unsigned val = 0, n = i->first->getNumIncomingValues(); val != n; + for (unsigned val = 0, n = i->Phi->getNumIncomingValues(); val != n; ++val) { - i->second->setIncomingValue( - val, DFSF.getShadow(i->first->getIncomingValue(val))); + i->ShadowPhi->setIncomingValue( + val, DFSF.getShadow(i->Phi->getIncomingValue(val))); + if (i->OriginPhi) { + i->OriginPhi->setIncomingValue( + val, DFSF.getOrigin(i->Phi->getIncomingValue(val))); + } } } @@ -1316,6 +1555,55 @@ DFS.RetvalTLS, PointerType::get(DFS.getShadowTy(T), 0), "_dfsret"); } +Value *DFSanFunction::getRetvalOriginTLS() { return DFS.RetvalOriginTLS; } + +Value *DFSanFunction::getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB) { + return IRB.CreateConstGEP2_64(DFS.ArgOriginTLSTy, DFS.ArgOriginTLS, 0, ArgNo, + "_dfsarg_o"); +} + +Value *DFSanFunction::getOrigin(Value *V) { + assert(DFS.shouldTrackOrigins()); + if (!isa(V) && !isa(V)) + return DFS.ZeroOrigin; + Value *&Origin = ValOriginMap[V]; + if (!Origin) { + if (Argument *A = dyn_cast(V)) { + if (IsNativeABI) + return DFS.ZeroOrigin; + switch (IA) { + case DataFlowSanitizer::IA_TLS: { + if (A->getArgNo() < DFS.numOfElementsInArgOrgTLS()) { + Instruction *ArgOriginTLSPos = &*F->getEntryBlock().begin(); + IRBuilder<> IRB(ArgOriginTLSPos); + Value *ArgOriginPtr = getArgOriginTLS(A->getArgNo(), IRB); + Origin = IRB.CreateLoad(DFS.OriginTy, ArgOriginPtr); + } else { + // Overflow + Origin = DFS.ZeroOrigin; + } + break; + } + case DataFlowSanitizer::IA_Args: { + Origin = DFS.ZeroOrigin; + break; + } + } + } else { + Origin = DFS.ZeroOrigin; + } + } + return Origin; +} + +void DFSanFunction::setOrigin(Instruction *I, Value *Origin) { + if (!DFS.shouldTrackOrigins()) + return; + assert(!ValOriginMap.count(I)); + assert(Origin->getType() == DFS.OriginTy); + ValOriginMap[I] = Origin; +} + Value *DFSanFunction::getShadowForTLSArgument(Argument *A) { unsigned ArgOffset = 0; const DataLayout &DL = F->getParent()->getDataLayout(); @@ -1385,20 +1673,44 @@ ValShadowMap[I] = Shadow; } -Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) { +Value *DataFlowSanitizer::getShadowOffset(Value *Addr, IRBuilder<> &IRB) { assert(Addr != RetvalTLS && "Reinstrumenting?"); - IRBuilder<> IRB(Pos); Value *ShadowPtrMaskValue; if (DFSanRuntimeShadowMask) ShadowPtrMaskValue = IRB.CreateLoad(IntptrTy, ExternalShadowMask); else ShadowPtrMaskValue = ShadowPtrMask; - return IRB.CreateIntToPtr( - IRB.CreateMul( - IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy), - IRB.CreatePtrToInt(ShadowPtrMaskValue, IntptrTy)), - ShadowPtrMul), - PrimitiveShadowPtrTy); + return IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy), + IRB.CreatePtrToInt(ShadowPtrMaskValue, IntptrTy)); +} + +std::pair +DataFlowSanitizer::getShadowOriginAddress(Value *Addr, Align InstAlignment, + Instruction *Pos) { + IRBuilder<> IRB(Pos); + Value *ShadowOffset = getShadowOffset(Addr, IRB); + Value *ShadowPtr = IRB.CreateIntToPtr( + IRB.CreateMul(ShadowOffset, ShadowPtrMul), PrimitiveShadowPtrTy); + Value *OriginPtr = nullptr; + if (shouldTrackOrigins()) { + Value *OriginLong = IRB.CreateAdd(ShadowOffset, OriginBase); + const Align Alignment = llvm::assumeAligned(InstAlignment.value()); + // When alignment is >= 4, Addr must be aligned to 4, otherwise it is UB. + // So Mask is unnecessary. + if (Alignment < kMinOriginAlignment) { + uint64_t Mask = kMinOriginAlignment.value() - 1; + OriginLong = IRB.CreateAnd(OriginLong, ConstantInt::get(IntptrTy, ~Mask)); + } + OriginPtr = IRB.CreateIntToPtr(OriginLong, OriginPtrTy); + } + return {ShadowPtr, OriginPtr}; +} + +Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) { + IRBuilder<> IRB(Pos); + Value *ShadowOffset = getShadowOffset(Addr, IRB); + return IRB.CreateIntToPtr(IRB.CreateMul(ShadowOffset, ShadowPtrMul), + PrimitiveShadowPtrTy); } Value *DFSanFunction::combineShadowsThenConvert(Type *T, Value *V1, Value *V2, @@ -1509,26 +1821,112 @@ return expandFromPrimitiveShadow(Inst->getType(), Shadow, Inst); } -Value *DFSanVisitor::visitOperandShadowInst(Instruction &I) { +void DFSanVisitor::visitInstOperands(Instruction &I) { Value *CombinedShadow = DFSF.combineOperandShadows(&I); DFSF.setShadow(&I, CombinedShadow); - return CombinedShadow; + visitInstOperandOrigins(I); +} + +Value *DFSanFunction::combineOrigins(const std::vector &Shadows, + const std::vector &Origins, + Instruction *Pos, ConstantInt *Zero) { + assert(Shadows.size() == Origins.size()); + size_t Size = Origins.size(); + if (Size == 0) + return DFS.ZeroOrigin; + Value *Origin = nullptr; + if (!Zero) + Zero = DFS.ZeroPrimitiveShadow; + for (size_t i = 0; i != Size; ++i) { + Value *OpOrigin = Origins[i]; + Constant *ConstOpOrigin = dyn_cast(OpOrigin); + if (ConstOpOrigin && ConstOpOrigin->isNullValue()) + continue; + if (!Origin) { + Origin = OpOrigin; + continue; + } + Value *OpShadow = Shadows[i]; + Value *primitiveShadow = collapseToPrimitiveShadow(OpShadow, Pos); + IRBuilder<> IRB(Pos); + Value *Cond = IRB.CreateICmpNE(primitiveShadow, Zero); + Origin = IRB.CreateSelect(Cond, OpOrigin, Origin); + } + return Origin ? Origin : DFS.ZeroOrigin; +} + +Value *DFSanFunction::combineOperandOrigins(Instruction *Inst) { + size_t Size = Inst->getNumOperands(); + std::vector Shadows(Size); + std::vector Origins(Size); + for (unsigned I = 0; I != Size; ++I) { + Shadows[I] = getShadow(Inst->getOperand(I)); + Origins[I] = getOrigin(Inst->getOperand(I)); + } + return combineOrigins(Shadows, Origins, Inst); +} + +void DFSanVisitor::visitInstOperandOrigins(Instruction &I) { + if (!DFSF.DFS.shouldTrackOrigins()) + return; + Value *CombinedOrigin = DFSF.combineOperandOrigins(&I); + DFSF.setOrigin(&I, CombinedOrigin); +} + +Align DFSanFunction::getShadowAlign(Align InstAlignment) { + const Align Alignment = ClPreserveAlignment ? InstAlignment : Align(1); + return Align(Alignment.value() * DFS.ShadowWidthBytes); +} + +Align DFSanFunction::getOriginAlign(Align InstAlignment) { + const Align Alignment = llvm::assumeAligned(InstAlignment.value()); + return Align(std::max(kMinOriginAlignment, Alignment)); +} + +bool DFSanFunction::useCallbackLoadLabelandOrigin(uint64_t Size, + Align InstAlignment) { + assert(Size != 0); + // * if Size == 1, it is sufficient to load its origin aligned at 4. + // * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to + // load its origin aligned at 4. If not, although origins may be lost, it + // should not happen very often. + // * if align >= 4, Addr must be aligned to 4, otherwise it is UB. When + // Size % 4 == 0, it is more efficient to load origins without callbacks. + // * Otherwise we use __dfsan_load_label_and_origin. + // This should ensure that common cases run efficiently. + if (Size <= 2) + return false; + + const Align Alignment = llvm::assumeAligned(InstAlignment.value()); + if (Alignment >= kMinOriginAlignment && + Size % (64 / DFS.ShadowWidthBits) == 0) + return false; + + return true; } // Generates IR to load shadow corresponding to bytes [Addr, Addr+Size), where // Addr has alignment Align, and take the union of each of those shadows. The // returned shadow always has primitive type. -Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, - Instruction *Pos) { +std::pair DFSanFunction::loadShadowOrigin(Value *Addr, + uint64_t Size, + Align InstAlignment, + Instruction *Pos) { + const bool ShouldTrackOrigins = DFS.shouldTrackOrigins(); + if (AllocaInst *AI = dyn_cast(Addr)) { - const auto i = AllocaShadowMap.find(AI); - if (i != AllocaShadowMap.end()) { + const auto SI = AllocaShadowMap.find(AI); + if (SI != AllocaShadowMap.end()) { IRBuilder<> IRB(Pos); - return IRB.CreateLoad(DFS.PrimitiveShadowTy, i->second); + Value *ShadowLI = IRB.CreateLoad(DFS.PrimitiveShadowTy, SI->second); + const auto OI = AllocaOriginMap.find(AI); + assert(!ShouldTrackOrigins || OI != AllocaOriginMap.end()); + return {ShadowLI, OI != AllocaOriginMap.end() + ? IRB.CreateLoad(DFS.OriginTy, OI->second) + : nullptr}; } } - const llvm::Align ShadowAlign(Align * DFS.ShadowWidthBytes); SmallVector Objs; getUnderlyingObjects(Addr, Objs); bool AllConstants = true; @@ -1542,25 +1940,54 @@ break; } if (AllConstants) - return DFS.ZeroPrimitiveShadow; + return {DFS.ZeroPrimitiveShadow, + ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr}; + + if (Size == 0) + return {DFS.ZeroPrimitiveShadow, + ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr}; + + if (ShouldTrackOrigins && + useCallbackLoadLabelandOrigin(Size, InstAlignment)) { + IRBuilder<> IRB(Pos); + CallInst *Call = + IRB.CreateCall(DFS.DFSanLoadLabelAndOriginFn, + {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), + ConstantInt::get(DFS.IntptrTy, Size)}); + Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); + return {IRB.CreateTrunc(IRB.CreateLShr(Call, DFS.OriginWidthBits), + DFS.PrimitiveShadowTy), + IRB.CreateTrunc(Call, DFS.OriginTy)}; + } + + Value *ShadowAddr = nullptr, *OriginAddr = nullptr; + std::tie(ShadowAddr, OriginAddr) = + DFS.getShadowOriginAddress(Addr, InstAlignment, Pos); + + const Align ShadowAlign = getShadowAlign(InstAlignment); + const Align OriginAlignment = getOriginAlign(InstAlignment); + Value *Origin = nullptr; + if (ShouldTrackOrigins) { + IRBuilder<> IRB(Pos); + Origin = IRB.CreateAlignedLoad(DFS.OriginTy, OriginAddr, OriginAlignment); + } - Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos); switch (Size) { - case 0: - return DFS.ZeroPrimitiveShadow; case 1: { LoadInst *LI = new LoadInst(DFS.PrimitiveShadowTy, ShadowAddr, "", Pos); LI->setAlignment(ShadowAlign); - return LI; + return {LI, Origin}; } case 2: { IRBuilder<> IRB(Pos); Value *ShadowAddr1 = IRB.CreateGEP(DFS.PrimitiveShadowTy, ShadowAddr, ConstantInt::get(DFS.IntptrTy, 1)); - return combineShadows( - IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr, ShadowAlign), - IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr1, ShadowAlign), - Pos); + return {combineShadows(IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, + ShadowAddr, ShadowAlign), + IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, + ShadowAddr1, ShadowAlign), + Pos), + Origin}; } } @@ -1568,24 +1995,39 @@ // First OR all the WideShadows, then OR individual shadows within the // combined WideShadow. This is fewer instructions than ORing shadows // individually. + std::vector Shadows; + std::vector Origins; IRBuilder<> IRB(Pos); Value *WideAddr = IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx)); Value *CombinedWideShadow = IRB.CreateAlignedLoad(IRB.getInt64Ty(), WideAddr, ShadowAlign); + if (ShouldTrackOrigins) { + Shadows.push_back(CombinedWideShadow); + Origins.push_back(Origin); + } for (uint64_t Ofs = 64 / DFS.ShadowWidthBits; Ofs != Size; Ofs += 64 / DFS.ShadowWidthBits) { - WideAddr = IRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr, + WideAddr = IRB.CreateGEP(IRB.getInt64Ty(), WideAddr, ConstantInt::get(DFS.IntptrTy, 1)); Value *NextWideShadow = IRB.CreateAlignedLoad(IRB.getInt64Ty(), WideAddr, ShadowAlign); CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow); + if (ShouldTrackOrigins) { + Shadows.push_back(NextWideShadow); + OriginAddr = IRB.CreateGEP(DFS.OriginTy, OriginAddr, + ConstantInt::get(DFS.IntptrTy, 1)); + Origins.push_back( + IRB.CreateAlignedLoad(DFS.OriginTy, OriginAddr, OriginAlignment)); + } } for (unsigned Width = 32; Width >= DFS.ShadowWidthBits; Width >>= 1) { Value *ShrShadow = IRB.CreateLShr(CombinedWideShadow, Width); CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, ShrShadow); } - return IRB.CreateTrunc(CombinedWideShadow, DFS.PrimitiveShadowTy); + return {IRB.CreateTrunc(CombinedWideShadow, DFS.PrimitiveShadowTy), + combineOrigins(Shadows, Origins, Pos, + ConstantInt::getSigned(IRB.getInt64Ty(), 0))}; } if (!AvoidNewBlocks && Size % (64 / DFS.ShadowWidthBits) == 0) { // Fast path for the common case where each byte has identical shadow: load @@ -1650,7 +2092,7 @@ PHINode::Create(DFS.PrimitiveShadowTy, 2, "", &Tail->front()); Shadow->addIncoming(FallbackCall, FallbackBB); Shadow->addIncoming(TruncShadow, LastBr->getParent()); - return Shadow; + return {Shadow, Origin}; } IRBuilder<> IRB(Pos); @@ -1659,7 +2101,7 @@ CallInst *FallbackCall = IRB.CreateCall( UnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)}); FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); - return FallbackCall; + return {FallbackCall, Origin}; } void DFSanVisitor::visitLoadInst(LoadInst &LI) { @@ -1667,15 +2109,27 @@ uint64_t Size = DL.getTypeStoreSize(LI.getType()); if (Size == 0) { DFSF.setShadow(&LI, DFSF.DFS.getZeroShadow(&LI)); + DFSF.setOrigin(&LI, DFSF.DFS.ZeroOrigin); return; } - Align Alignment = ClPreserveAlignment ? LI.getAlign() : Align(1); - Value *PrimitiveShadow = - DFSF.loadShadow(LI.getPointerOperand(), Size, Alignment.value(), &LI); + Value *PrimitiveShadow = nullptr, *Origin = nullptr; + std::vector Shadows; + std::vector Origins; + std::tie(PrimitiveShadow, Origin) = + DFSF.loadShadowOrigin(LI.getPointerOperand(), Size, LI.getAlign(), &LI); + const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins(); + if (ShouldTrackOrigins) { + Shadows.push_back(PrimitiveShadow); + Origins.push_back(Origin); + } if (ClCombinePointerLabelsOnLoad) { Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand()); PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, &LI); + if (ShouldTrackOrigins) { + Shadows.push_back(PtrShadow); + Origins.push_back(DFSF.getOrigin(LI.getPointerOperand())); + } } if (!DFSF.DFS.isZeroShadow(PrimitiveShadow)) DFSF.NonZeroChecks.push_back(PrimitiveShadow); @@ -1683,6 +2137,11 @@ Value *Shadow = DFSF.expandFromPrimitiveShadow(LI.getType(), PrimitiveShadow, &LI); DFSF.setShadow(&LI, Shadow); + + if (ShouldTrackOrigins) { + DFSF.setOrigin(&LI, DFSF.combineOrigins(Shadows, Origins, &LI)); + } + if (ClEventCallbacks) { IRBuilder<> IRB(&LI); Value *Addr8 = IRB.CreateBitCast(LI.getPointerOperand(), DFSF.DFS.Int8Ptr); @@ -1690,35 +2149,151 @@ } } -void DFSanFunction::storePrimitiveShadow(Value *Addr, uint64_t Size, - Align Alignment, - Value *PrimitiveShadow, - Instruction *Pos) { +Value *DFSanFunction::updateOrigin(Value *V, IRBuilder<> &IRB) { + if (!DFS.shouldTrackOrigins()) + return V; + return IRB.CreateCall(DFS.DFSanChainOriginFn, V); +} + +Value *DFSanFunction::originToIntptr(IRBuilder<> &IRB, Value *Origin) { + const unsigned kOriginSize = DataFlowSanitizer::OriginWidthBytes; + const DataLayout &DL = F->getParent()->getDataLayout(); + unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy); + if (IntptrSize == kOriginSize) + return Origin; + assert(IntptrSize == kOriginSize * 2); + Origin = IRB.CreateIntCast(Origin, DFS.IntptrTy, /* isSigned */ false); + return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8)); +} + +void DFSanFunction::paintOrigin(IRBuilder<> &IRB, Value *Origin, + Value *OriginAddr, uint64_t Size, + Align Alignment) { + const unsigned kOriginSize = DataFlowSanitizer::OriginWidthBytes; + const DataLayout &DL = F->getParent()->getDataLayout(); + const Align IntptrAlignment = DL.getABITypeAlign(DFS.IntptrTy); + unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy); + assert(IntptrAlignment >= kMinOriginAlignment); + assert(IntptrSize >= kOriginSize); + + unsigned Ofs = 0; + Align CurrentAlignment = Alignment; + if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) { + Value *IntptrOrigin = originToIntptr(IRB, Origin); + Value *IntptrOriginPtr = + IRB.CreatePointerCast(OriginAddr, PointerType::get(DFS.IntptrTy, 0)); + for (unsigned i = 0; i < Size / IntptrSize; ++i) { + Value *Ptr = i ? IRB.CreateConstGEP1_32(DFS.IntptrTy, IntptrOriginPtr, i) + : IntptrOriginPtr; + IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment); + Ofs += IntptrSize / kOriginSize; + CurrentAlignment = IntptrAlignment; + } + } + + for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) { + Value *GEP = + i ? IRB.CreateConstGEP1_32(DFS.OriginTy, OriginAddr, i) : OriginAddr; + IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment); + CurrentAlignment = kMinOriginAlignment; + } +} + +// Convert a scalar value to an i1 by comparing with 0 +Value *DFSanFunction::convertToBool(Value *V, IRBuilder<> &IRB, + const Twine &name) { + Type *VTy = V->getType(); + assert(VTy->isIntegerTy()); + if (VTy->getIntegerBitWidth() == 1) + // Just converting a bool to a bool, so do nothing. + return V; + return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), name); +} + +static unsigned TypeSizeToSizeIndex(unsigned TypeSize) { + if (TypeSize <= 8) + return 0; + return Log2_32_Ceil((TypeSize + 7) / 8); +} + +void DFSanFunction::storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size, + Value *Shadow, Value *Origin, Value *OriginAddr, + Align InstAlignment) { + // Do not write origins for 0 shadows because we do not trace origins for + // untainted sinks. + const Align OriginAlignment = getOriginAlign(InstAlignment); + Value *CollapsedShadow = collapseToPrimitiveShadow(Shadow, Pos); + IRBuilder<> IRB(Pos); + if (auto *ConstantShadow = dyn_cast(CollapsedShadow)) { + if (!ConstantShadow->isZeroValue()) + paintOrigin(IRB, updateOrigin(Origin, IRB), OriginAddr, Size, + OriginAlignment); + return; + } + + if (shouldInstrumentationWithCall()) { + IRB.CreateCall(DFS.DFSanMaybeStoreOriginFn, + {CollapsedShadow, + IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), + ConstantInt::get(DFS.IntptrTy, Size), Origin}); + } else { + Value *Cmp = convertToBool(CollapsedShadow, IRB, "_dfscmp"); + Instruction *CheckTerm = SplitBlockAndInsertIfThen( + Cmp, &*IRB.GetInsertPoint(), false, DFS.OriginStoreWeights, &DT); + IRBuilder<> IRBNew(CheckTerm); + paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginAddr, Size, + OriginAlignment); + incNumOfOriginStores(); + } +} + +void DFSanFunction::storePrimitiveShadowOrigin(Value *Addr, uint64_t Size, + Align InstAlignment, + Value *PrimitiveShadow, + Value *Origin, + Instruction *Pos) { + const bool ShouldTrackOrigins = DFS.shouldTrackOrigins(); + if (AllocaInst *AI = dyn_cast(Addr)) { - const auto i = AllocaShadowMap.find(AI); - if (i != AllocaShadowMap.end()) { + const auto SI = AllocaShadowMap.find(AI); + if (SI != AllocaShadowMap.end()) { IRBuilder<> IRB(Pos); - IRB.CreateStore(PrimitiveShadow, i->second); + IRB.CreateStore(PrimitiveShadow, SI->second); + + // Do not write origins for 0 shadows because we do not trace origins for + // untainted sinks. + if (ShouldTrackOrigins && !DFS.isZeroShadow(PrimitiveShadow)) { + const auto OI = AllocaOriginMap.find(AI); + assert(OI != AllocaOriginMap.end() && Origin); + IRB.CreateStore(Origin, OI->second); + } return; } } - const Align ShadowAlign(Alignment.value() * DFS.ShadowWidthBytes); + const Align ShadowAlign = getShadowAlign(InstAlignment); IRBuilder<> IRB(Pos); - Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos); + Value *ShadowAddr = nullptr, *OriginAddr = nullptr; if (DFS.isZeroShadow(PrimitiveShadow)) { IntegerType *ShadowTy = IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidthBits); Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0); + ShadowAddr = DFS.getShadowAddress(Addr, Pos); Value *ExtShadowAddr = IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy)); IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign); + // Do not write origins for 0 shadows because we do not trace origins for + // untainted sinks. return; } + std::tie(ShadowAddr, OriginAddr) = + DFS.getShadowOriginAddress(Addr, InstAlignment, Pos); + const unsigned ShadowVecSize = 128 / DFS.ShadowWidthBits; uint64_t Offset = 0; - if (Size >= ShadowVecSize) { + uint64_t LeftSize = Size; + if (LeftSize >= ShadowVecSize) { auto *ShadowVecTy = FixedVectorType::get(DFS.PrimitiveShadowTy, ShadowVecSize); Value *ShadowVec = UndefValue::get(ShadowVecTy); @@ -1733,18 +2308,23 @@ Value *CurShadowVecAddr = IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset); IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign); - Size -= ShadowVecSize; + LeftSize -= ShadowVecSize; ++Offset; - } while (Size >= ShadowVecSize); + } while (LeftSize >= ShadowVecSize); Offset *= ShadowVecSize; } - while (Size > 0) { + while (LeftSize > 0) { Value *CurShadowAddr = IRB.CreateConstGEP1_32(DFS.PrimitiveShadowTy, ShadowAddr, Offset); IRB.CreateAlignedStore(PrimitiveShadow, CurShadowAddr, ShadowAlign); - --Size; + --LeftSize; ++Offset; } + + if (ShouldTrackOrigins) { + storeOrigin(Pos, Addr, Size, PrimitiveShadow, Origin, OriginAddr, + InstAlignment); + } } void DFSanVisitor::visitStoreInst(StoreInst &SI) { @@ -1753,18 +2333,34 @@ if (Size == 0) return; - const Align Alignment = ClPreserveAlignment ? SI.getAlign() : Align(1); + const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins(); + std::vector Shadows; + std::vector Origins; Value* Shadow = DFSF.getShadow(SI.getValueOperand()); + + if (ShouldTrackOrigins) { + Shadows.push_back(Shadow); + Origins.push_back(DFSF.getOrigin(SI.getValueOperand())); + } + Value *PrimitiveShadow; if (ClCombinePointerLabelsOnStore) { Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand()); + if (ShouldTrackOrigins) { + Shadows.push_back(PtrShadow); + Origins.push_back(DFSF.getOrigin(SI.getPointerOperand())); + } PrimitiveShadow = DFSF.combineShadows(Shadow, PtrShadow, &SI); } else { PrimitiveShadow = DFSF.collapseToPrimitiveShadow(Shadow, &SI); } - DFSF.storePrimitiveShadow(SI.getPointerOperand(), Size, Alignment, - PrimitiveShadow, &SI); + Value *Origin = nullptr; + if (ShouldTrackOrigins) { + Origin = DFSF.combineOrigins(Shadows, Origins, &SI); + } + DFSF.storePrimitiveShadowOrigin(SI.getPointerOperand(), Size, SI.getAlign(), + PrimitiveShadow, Origin, &SI); if (ClEventCallbacks) { IRBuilder<> IRB(&SI); Value *Addr8 = IRB.CreateBitCast(SI.getPointerOperand(), DFSF.DFS.Int8Ptr); @@ -1773,42 +2369,43 @@ } void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) { - visitOperandShadowInst(UO); + visitInstOperands(UO); } void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) { - visitOperandShadowInst(BO); + visitInstOperands(BO); } -void DFSanVisitor::visitCastInst(CastInst &CI) { visitOperandShadowInst(CI); } +void DFSanVisitor::visitCastInst(CastInst &CI) { visitInstOperands(CI); } void DFSanVisitor::visitCmpInst(CmpInst &CI) { - Value *CombinedShadow = visitOperandShadowInst(CI); + visitInstOperands(CI); if (ClEventCallbacks) { IRBuilder<> IRB(&CI); + Value *CombinedShadow = DFSF.getShadow(&CI); IRB.CreateCall(DFSF.DFS.DFSanCmpCallbackFn, CombinedShadow); } } void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { - visitOperandShadowInst(GEPI); + visitInstOperands(GEPI); } void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) { - visitOperandShadowInst(I); + visitInstOperands(I); } void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) { - visitOperandShadowInst(I); + visitInstOperands(I); } void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) { - visitOperandShadowInst(I); + visitInstOperands(I); } void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) { if (!DFSF.DFS.shouldTrackFieldsAndIndices()) { - visitOperandShadowInst(I); + visitInstOperands(I); return; } @@ -1817,11 +2414,12 @@ Value *AggShadow = DFSF.getShadow(Agg); Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices()); DFSF.setShadow(&I, ResShadow); + visitInstOperandOrigins(I); } void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) { if (!DFSF.DFS.shouldTrackFieldsAndIndices()) { - visitOperandShadowInst(I); + visitInstOperands(I); return; } @@ -1830,6 +2428,7 @@ Value *InsShadow = DFSF.getShadow(I.getInsertedValueOperand()); Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices()); DFSF.setShadow(&I, Res); + visitInstOperandOrigins(I); } void DFSanVisitor::visitAllocaInst(AllocaInst &I) { @@ -1849,8 +2448,13 @@ if (AllLoadsStores) { IRBuilder<> IRB(&I); DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.PrimitiveShadowTy); + if (DFSF.DFS.shouldTrackOrigins()) { + DFSF.AllocaOriginMap[&I] = + IRB.CreateAlloca(DFSF.DFS.OriginTy, nullptr, "_dfsa"); + } } DFSF.setShadow(&I, DFSF.DFS.ZeroPrimitiveShadow); + DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin); } void DFSanVisitor::visitSelectInst(SelectInst &I) { @@ -1858,35 +2462,81 @@ Value *TrueShadow = DFSF.getShadow(I.getTrueValue()); Value *FalseShadow = DFSF.getShadow(I.getFalseValue()); Value *ShadowSel = nullptr; + const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins(); + std::vector Shadows; + std::vector Origins; + Value *CondOrigin = + ShouldTrackOrigins ? DFSF.getOrigin(I.getCondition()) : nullptr; + Value *TrueOrigin = + ShouldTrackOrigins ? DFSF.getOrigin(I.getTrueValue()) : nullptr; + Value *FalseOrigin = + ShouldTrackOrigins ? DFSF.getOrigin(I.getFalseValue()) : nullptr; if (isa(I.getCondition()->getType())) { ShadowSel = DFSF.combineShadowsThenConvert(I.getType(), TrueShadow, FalseShadow, &I); + if (ShouldTrackOrigins) { + Shadows.push_back(TrueShadow); + Shadows.push_back(FalseShadow); + Origins.push_back(TrueOrigin); + Origins.push_back(FalseOrigin); + } } else { if (TrueShadow == FalseShadow) { ShadowSel = TrueShadow; + if (ShouldTrackOrigins) { + Shadows.push_back(TrueShadow); + Origins.push_back(TrueOrigin); + } } else { ShadowSel = SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I); + if (ShouldTrackOrigins) { + Shadows.push_back(ShadowSel); + Origins.push_back(SelectInst::Create(I.getCondition(), TrueOrigin, + FalseOrigin, "", &I)); + } } } DFSF.setShadow(&I, ClTrackSelectControlFlow ? DFSF.combineShadowsThenConvert( I.getType(), CondShadow, ShadowSel, &I) : ShadowSel); + if (ShouldTrackOrigins) { + if (ClTrackSelectControlFlow) { + Shadows.push_back(CondShadow); + Origins.push_back(CondOrigin); + } + DFSF.setOrigin(&I, DFSF.combineOrigins(Shadows, Origins, &I)); + } } void DFSanVisitor::visitMemSetInst(MemSetInst &I) { IRBuilder<> IRB(&I); Value *ValShadow = DFSF.getShadow(I.getValue()); - IRB.CreateCall(DFSF.DFS.DFSanSetLabelFn, - {ValShadow, IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy( - *DFSF.DFS.Ctx)), - IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)}); + Value *ValOrigin = DFSF.DFS.shouldTrackOrigins() + ? DFSF.getOrigin(I.getValue()) + : DFSF.DFS.ZeroOrigin; + IRB.CreateCall( + DFSF.DFS.DFSanSetLabelFn, + {ValShadow, ValOrigin, + IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(*DFSF.DFS.Ctx)), + IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)}); } void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) { IRBuilder<> IRB(&I); + + // CopyOrMoveOrigin transfers origins by refering to their shadows. So we + // need to move origins before moving shadows. + if (DFSF.DFS.shouldTrackOrigins()) { + IRB.CreateCall( + DFSF.DFS.DFSanMemOriginTransferFn, + {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()), + IRB.CreateIntCast(I.getArgOperand(2), DFSF.DFS.IntptrTy, false)}); + } + Value *RawDestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I); Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I); Value *LenShadow = @@ -1926,6 +2576,10 @@ IRB.CreateAlignedStore(S, DFSF.getRetvalTLS(RT, IRB), kShadowTLSAlignment); } + if (DFSF.DFS.shouldTrackOrigins()) { + Value *O = DFSF.getOrigin(RI.getReturnValue()); + IRB.CreateStore(O, DFSF.getRetvalOriginTLS()); + } break; } case DataFlowSanitizer::IA_Args: { @@ -1945,7 +2599,7 @@ void DFSanVisitor::visitCallBase(CallBase &CB) { Function *F = CB.getCalledFunction(); if ((F && F->isIntrinsic()) || CB.isInlineAsm()) { - visitOperandShadowInst(CB); + visitInstOperands(CB); return; } @@ -1956,6 +2610,7 @@ IRBuilder<> IRB(&CB); + const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins(); DenseMap::iterator i = DFSF.DFS.UnwrappedFnMap.find(CB.getCalledOperand()); if (i != DFSF.DFS.UnwrappedFnMap.end()) { @@ -1966,14 +2621,16 @@ IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn, IRB.CreateGlobalStringPtr(F->getName())); DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB)); + DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin); return; case DataFlowSanitizer::WK_Discard: CB.setCalledFunction(F); DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB)); + DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin); return; case DataFlowSanitizer::WK_Functional: CB.setCalledFunction(F); - visitOperandShadowInst(CB); + visitInstOperands(CB); return; case DataFlowSanitizer::WK_Custom: // Don't try to handle invokes of custom functions, it's too complicated. @@ -1982,7 +2639,7 @@ if (CallInst *CI = dyn_cast(&CB)) { FunctionType *FT = F->getFunctionType(); TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT); - std::string CustomFName = "__dfsw_"; + std::string CustomFName = ShouldTrackOrigins ? "__dfso_" : "__dfsw_"; CustomFName += F->getName(); FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction( CustomFName, CustomFn.TransformedType); @@ -1998,6 +2655,7 @@ std::vector Args; + // Add non-var args auto i = CB.arg_begin(); for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) { Type *T = (*i)->getType(); @@ -2018,12 +2676,14 @@ } } + // Add non-var arg shadows i = CB.arg_begin(); const unsigned ShadowArgStart = Args.size(); for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) Args.push_back( DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*i), &CB)); + // Add var arg shadows if (FT->isVarArg()) { auto *LabelVATy = ArrayType::get(DFSF.DFS.PrimitiveShadowTy, CB.arg_size() - FT->getNumParams()); @@ -2041,6 +2701,7 @@ Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0)); } + // Add ret shadow if (!FT->getReturnType()->isVoidTy()) { if (!DFSF.LabelReturnAlloca) { DFSF.LabelReturnAlloca = @@ -2051,7 +2712,44 @@ Args.push_back(DFSF.LabelReturnAlloca); } - append_range(Args, drop_begin(CB.args(), FT->getNumParams())); + const unsigned OriginArgStart = Args.size(); + if (ShouldTrackOrigins) { + // Add non-var arg origins + i = CB.arg_begin(); + for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) + Args.push_back(DFSF.getOrigin(*i)); + + // Add var arg origins + if (FT->isVarArg()) { + auto *OriginVATy = ArrayType::get( + DFSF.DFS.OriginTy, CB.arg_size() - FT->getNumParams()); + auto *OriginVAAlloca = + new AllocaInst(OriginVATy, getDataLayout().getAllocaAddrSpace(), + "originva", &DFSF.F->getEntryBlock().front()); + + for (unsigned n = 0; i != CB.arg_end(); ++i, ++n) { + auto OriginVAPtr = + IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, n); + IRB.CreateStore(DFSF.getOrigin(*i), OriginVAPtr); + } + + Args.push_back(IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, 0)); + } + + // Add ret origin + if (!FT->getReturnType()->isVoidTy()) { + if (!DFSF.OriginReturnAlloca) { + DFSF.OriginReturnAlloca = new AllocaInst( + DFSF.DFS.OriginTy, getDataLayout().getAllocaAddrSpace(), + "originreturn", &DFSF.F->getEntryBlock().front()); + } + Args.push_back(DFSF.OriginReturnAlloca); + } + } + + // Add var args + for (i = CB.arg_begin() + FT->getNumParams(); i != CB.arg_end(); ++i) + Args.push_back(*i); CallInst *CustomCI = IRB.CreateCall(CustomF, Args); CustomCI->setCallingConv(CI->getCallingConv()); @@ -2066,13 +2764,25 @@ if (CustomCI->getArgOperand(ArgNo)->getType() == DFSF.DFS.PrimitiveShadowTy) CustomCI->addParamAttr(ArgNo, Attribute::ZExt); + if (ShouldTrackOrigins) { + const unsigned OriginArgNo = OriginArgStart + n; + if (CustomCI->getArgOperand(OriginArgNo)->getType() == + DFSF.DFS.OriginTy) + CustomCI->addParamAttr(OriginArgNo, Attribute::ZExt); + } } + // Read return label and origin if (!FT->getReturnType()->isVoidTy()) { LoadInst *LabelLoad = IRB.CreateLoad(DFSF.DFS.PrimitiveShadowTy, DFSF.LabelReturnAlloca); DFSF.setShadow(CustomCI, DFSF.expandFromPrimitiveShadow( FT->getReturnType(), LabelLoad, &CB)); + if (ShouldTrackOrigins) { + LoadInst *OriginLoad = + IRB.CreateLoad(DFSF.DFS.OriginTy, DFSF.OriginReturnAlloca); + DFSF.setOrigin(CustomCI, OriginLoad); + } } CI->replaceAllUsesWith(CustomCI); @@ -2088,6 +2798,15 @@ unsigned ArgOffset = 0; const DataLayout &DL = getDataLayout(); for (unsigned I = 0, N = FT->getNumParams(); I != N; ++I) { + if (ShouldTrackOrigins) { + // Ignore overflowed origins + Value *ArgShadow = DFSF.getShadow(CB.getArgOperand(I)); + if (I < DFSF.DFS.numOfElementsInArgOrgTLS() && + !DFSF.DFS.isZeroShadow(ArgShadow)) + IRB.CreateStore(DFSF.getOrigin(CB.getArgOperand(I)), + DFSF.getArgOriginTLS(I, IRB)); + } + unsigned Size = DL.getTypeAllocSize(DFSF.DFS.getShadowTy(FT->getParamType(I))); // Stop storing if arguments' size overflows. Inside a function, arguments @@ -2132,6 +2851,11 @@ DFSF.setShadow(&CB, LI); DFSF.NonZeroChecks.push_back(LI); } + + LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.OriginTy, + DFSF.getRetvalOriginTLS(), "_dfsret_o"); + DFSF.SkipInsts.insert(LI); + DFSF.setOrigin(&CB, LI); } } @@ -2206,8 +2930,21 @@ ShadowPN->addIncoming(UndefShadow, *i); } - DFSF.PHIFixups.push_back(std::make_pair(&PN, ShadowPN)); DFSF.setShadow(&PN, ShadowPN); + + PHINode *OriginPN = nullptr; + if (DFSF.DFS.shouldTrackOrigins()) { + OriginPN = + PHINode::Create(DFSF.DFS.OriginTy, PN.getNumIncomingValues(), "", &PN); + Value *UndefOrigin = UndefValue::get(DFSF.DFS.OriginTy); + for (PHINode::block_iterator i = PN.block_begin(), e = PN.block_end(); + i != e; ++i) { + OriginPN->addIncoming(UndefOrigin, *i); + } + DFSF.setOrigin(&PN, OriginPN); + } + + DFSF.PHIFixups.push_back({&PN, ShadowPN, OriginPN}); } namespace { diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll b/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll @@ -0,0 +1,43 @@ +; RUN: opt < %s -dfsan -S | FileCheck %s --check-prefix=CHECK +; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-fast-16-labels=true -S | FileCheck %s --check-prefix=CHECK_ORIGIN +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: @__dfsan_arg_tls = external thread_local(initialexec) global [100 x i64] +; CHECK: @__dfsan_retval_tls = external thread_local(initialexec) global [100 x i64] +; CHECK: @__dfsan_arg_origin_tls = external thread_local(initialexec) global [200 x i32] +; CHECK: @__dfsan_retval_origin_tls = external thread_local(initialexec) global i32 +; CHECK: @__dfsan_track_origins = weak_odr constant i32 0 +; CHECK_ORIGIN: @__dfsan_track_origins = weak_odr constant i32 1 +; CHECK: @__dfsan_shadow_ptr_mask = external global i64 +; CHECK: declare void @__dfsan_load_callback(i16, i8*) +; CHECK: declare void @__dfsan_store_callback(i16, i8*) +; CHECK: declare void @__dfsan_mem_transfer_callback(i16*, i64) +; CHECK: declare void @__dfsan_cmp_callback(i16) + +; CHECK: ; Function Attrs: nounwind readnone +; CHECK-NEXT: declare zeroext i16 @__dfsan_union(i16 zeroext, i16 zeroext) #0 + +; CHECK: ; Function Attrs: nounwind readnone +; CHECK-NEXT: declare zeroext i16 @dfsan_union(i16 zeroext, i16 zeroext) #0 + +; CHECK: ; Function Attrs: nounwind readonly +; CHECK-NEXT: declare zeroext i16 @__dfsan_union_load(i16*, i64) #1 + +; CHECK: ; Function Attrs: nounwind readonly +; CHECK-NEXT: declare zeroext i16 @__dfsan_union_load_fast16labels(i16*, i64) #1 + +; CHECK: ; Function Attrs: nounwind readonly +; CHECK-NEXT: declare zeroext i64 @__dfsan_load_label_and_origin(i8*, i64) #1 + +; CHECK: declare void @__dfsan_unimplemented(i8*) +; CHECK: declare void @__dfsan_set_label(i16 zeroext, i32 zeroext, i8*, i64) +; CHECK: declare void @__dfsan_nonzero_label() +; CHECK: declare void @__dfsan_vararg_wrapper(i8*) +; CHECK: declare zeroext i32 @__dfsan_chain_origin(i32 zeroext) +; CHECK: declare void @__dfsan_mem_origin_transfer(i8*, i8*, i64) +; CHECK: declare void @__dfsan_maybe_store_origin(i16 zeroext, i8*, i64, i32 zeroext) + +define void @foo() { + ret void +} diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/memset.ll b/llvm/test/Instrumentation/DataFlowSanitizer/memset.ll --- a/llvm/test/Instrumentation/DataFlowSanitizer/memset.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/memset.ll @@ -6,7 +6,7 @@ define void @ms(i8* %p, i8 %v) { ; CHECK-LABEL: @"dfs$ms"(i8* %0, i8 %1, i16 %2, i16 %3) - ; CHECK: call void @__dfsan_set_label(i16 %3, i8* %0, i64 1) + ; CHECK: call void @__dfsan_set_label(i16 %3, i32 0, i8* %0, i64 1) call void @llvm.memset.p0i8.i64(i8* %p, i8 %v, i64 1, i1 1) ret void } diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_abilist.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_abilist.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_abilist.ll @@ -0,0 +1,315 @@ +; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-fast-16-labels=true -dfsan-abilist=%S/Inputs/abilist.txt -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @discard(i32 %a, i32 %b) { + ret i32 0 +} + +define i32 @call_discard(i32 %a, i32 %b) { + ; CHECK: @"dfs$call_discard" + ; CHECK: %r = call i32 @discard(i32 %a, i32 %b) + ; CHECK: store i32 0, i32* @__dfsan_retval_origin_tls, align 4 + ; CHECK: ret i32 %r + + %r = call i32 @discard(i32 %a, i32 %b) + ret i32 %r +} + +; CHECK: i32 @functional(i32 %a, i32 %b) +define i32 @functional(i32 %a, i32 %b) { + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @call_functional(i32 %a, i32 %b) { + ; CHECK: @"dfs$call_functional" + ; CHECK: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: [[RO:%.*]] = select i1 {{.*}}, i32 [[BO]], i32 [[AO]] + ; CHECK: store i32 [[RO]], i32* @__dfsan_retval_origin_tls, align 4 + + %r = call i32 @functional(i32 %a, i32 %b) + ret i32 %r +} + +define i32 @uninstrumented(i32 %a, i32 %b) { + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @call_uninstrumented(i32 %a, i32 %b) { + ; CHECK: @"dfs$call_uninstrumented" + ; CHECK: %r = call i32 @uninstrumented(i32 %a, i32 %b) + ; CHECK: store i32 0, i32* @__dfsan_retval_origin_tls, align 4 + ; CHECK: ret i32 %r + + %r = call i32 @uninstrumented(i32 %a, i32 %b) + ret i32 %r +} + +define i32 @g(i32 %a, i32 %b) { + %c = add i32 %a, %b + ret i32 %c +} + +@discardg = alias i32 (i32, i32), i32 (i32, i32)* @g + +define i32 @call_discardg(i32 %a, i32 %b) { + ; CHECK: @"dfs$call_discardg" + ; CHECK: %r = call i32 @discardg(i32 %a, i32 %b) + ; CHECK: store i32 0, i32* @__dfsan_retval_origin_tls, align 4 + ; CHECK: ret i32 %r + + %r = call i32 @discardg(i32 %a, i32 %b) + ret i32 %r +} + +define void @custom_without_ret(i32 %a, i32 %b) { + ret void +} + +define i32 @custom_with_ret(i32 %a, i32 %b) { + %c = add i32 %a, %b + ret i32 %c +} + +define void @custom_varg_without_ret(i32 %a, i32 %b, ...) { + ret void +} + +define i32 @custom_varg_with_ret(i32 %a, i32 %b, ...) { + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @custom_cb_with_ret(i32 (i32, i32)* %cb, i32 %a, i32 %b) { + %r = call i32 %cb(i32 %a, i32 %b) + ret i32 %r +} + +define i32 @cb_with_ret(i32 %a, i32 %b) { + %c = add i32 %a, %b + ret i32 %c +} + +define void @custom_cb_without_ret(void (i32, i32)* %cb, i32 %a, i32 %b) { + call void %cb(i32 %a, i32 %b) + ret void +} + +define void @cb_without_ret(i32 %a, i32 %b) { + ret void +} + +define i32 (i32, i32)* @ret_custom() { + ; CHECK: @"dfs$ret_custom" + ; CHECK: store i32 0, i32* @__dfsan_retval_origin_tls, align 4 + + ret i32 (i32, i32)* @custom_with_ret +} + +define void @call_custom_without_ret(i32 %a, i32 %b) { + ; CHECK: @"dfs$call_custom_without_ret" + ; CHECK: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: [[BS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; CHECK: [[AS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 + ; CHECK: call void @__dfso_custom_without_ret(i32 %a, i32 %b, i16 zeroext [[AS]], i16 zeroext [[BS]], i32 zeroext [[AO]], i32 zeroext [[BO]]) + ; CHECK-NEXT: ret void + + call void @custom_without_ret(i32 %a, i32 %b) + ret void +} + +define i32 @call_custom_with_ret(i32 %a, i32 %b) { + ; CHECK: @"dfs$call_custom_with_ret" + ; CHECK: %originreturn = alloca i32, align 4 + ; CHECK: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: %labelreturn = alloca i16, align 2 + ; CHECK: [[BS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; CHECK: [[AS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 + ; CHECK: {{.*}} = call i32 @__dfso_custom_with_ret(i32 %a, i32 %b, i16 zeroext [[AS]], i16 zeroext [[BS]], i16* %labelreturn, i32 zeroext [[AO]], i32 zeroext [[BO]], i32* %originreturn) + ; CHECK: [[RS:%.*]] = load i16, i16* %labelreturn, align 2 + ; CHECK: [[RO:%.*]] = load i32, i32* %originreturn, align 4 + ; CHECK: store i16 [[RS]], i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 + ; CHECK: store i32 [[RO]], i32* @__dfsan_retval_origin_tls, align 4 + + %r = call i32 @custom_with_ret(i32 %a, i32 %b) + ret i32 %r +} + +define void @call_custom_varg_without_ret(i32 %a, i32 %b) { + ; CHECK: @"dfs$call_custom_varg_without_ret" + ; CHECK: %originva = alloca [1 x i32], align 4 + ; CHECK: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: %labelva = alloca [1 x i16], align 2 + ; CHECK: [[BS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; CHECK: [[AS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 + ; CHECK: [[VS0:%.*]] = getelementptr inbounds [1 x i16], [1 x i16]* %labelva, i32 0, i32 0 + ; CHECK: store i16 [[AS]], i16* [[VS0]], align 2 + ; CHECK: [[VS0:%.*]] = getelementptr inbounds [1 x i16], [1 x i16]* %labelva, i32 0, i32 0 + ; CHECK: [[VO0:%.*]] = getelementptr inbounds [1 x i32], [1 x i32]* %originva, i32 0, i32 0 + ; CHECK: store i32 [[AO]], i32* [[VO0]], align 4 + ; CHECK: [[VO0:%.*]] = getelementptr inbounds [1 x i32], [1 x i32]* %originva, i32 0, i32 0 + ; CHECK: call void (i32, i32, i16, i16, i16*, i32, i32, i32*, ...) @__dfso_custom_varg_without_ret(i32 %a, i32 %b, i16 zeroext [[AS]], i16 zeroext [[BS]], i16* [[VS0]], i32 zeroext [[AO]], i32 zeroext [[BO]], i32* [[VO0]], i32 %a) + ; CHECK-NEXT: ret void + + call void (i32, i32, ...) @custom_varg_without_ret(i32 %a, i32 %b, i32 %a) + ret void +} + +define i32 @call_custom_varg_with_ret(i32 %a, i32 %b) { + ; CHECK: @"dfs$call_custom_varg_with_ret" + ; CHECK: %originreturn = alloca i32, align 4 + ; CHECK: %originva = alloca [1 x i32], align 4 + ; CHECK: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: %labelreturn = alloca i16, align 2 + ; CHECK: %labelva = alloca [1 x i16], align 2 + ; CHECK: [[BS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; CHECK: [[AS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 + ; CHECK: [[VS0:%.*]] = getelementptr inbounds [1 x i16], [1 x i16]* %labelva, i32 0, i32 0 + ; CHECK: store i16 [[BS]], i16* [[VS0]], align 2 + ; CHECK: [[VS0:%.*]] = getelementptr inbounds [1 x i16], [1 x i16]* %labelva, i32 0, i32 0 + ; CHECK: [[VO0:%.*]] = getelementptr inbounds [1 x i32], [1 x i32]* %originva, i32 0, i32 0 + ; CHECK: store i32 [[BO]], i32* [[VO0]], align 4 + ; CHECK: [[VO0:%.*]] = getelementptr inbounds [1 x i32], [1 x i32]* %originva, i32 0, i32 0 + ; CHECK: {{.*}} = call i32 (i32, i32, i16, i16, i16*, i16*, i32, i32, i32*, i32*, ...) @__dfso_custom_varg_with_ret(i32 %a, i32 %b, i16 zeroext [[AS]], i16 zeroext [[BS]], i16* [[VS0]], i16* %labelreturn, i32 zeroext [[AO]], i32 zeroext [[BO]], i32* [[VO0]], i32* %originreturn, i32 %b) + ; CHECK: [[RS:%.*]] = load i16, i16* %labelreturn, align 2 + ; CHECK: [[RO:%.*]] = load i32, i32* %originreturn, align 4 + ; CHECK: store i16 [[RS]], i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 + ; CHECK: store i32 [[RO]], i32* @__dfsan_retval_origin_tls, align 4 + + %r = call i32 (i32, i32, ...) @custom_varg_with_ret(i32 %a, i32 %b, i32 %b) + ret i32 %r +} + +define i32 @call_custom_cb_with_ret(i32 %a, i32 %b) { + ; CHECK: @"dfs$call_custom_cb_with_ret" + ; CHECK: %originreturn = alloca i32, align 4 + ; CHECK: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: %labelreturn = alloca i16, align 2 + ; CHECK: [[BS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; CHECK: [[AS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 + ; CHECK: {{.*}} = call i32 @__dfso_custom_cb_with_ret(i32 (i32 (i32, i32)*, i32, i32, i16, i16, i16*, i32, i32, i32*)* @"dfst0$custom_cb_with_ret", i8* bitcast (i32 (i32, i32)* @"dfs$cb_with_ret" to i8*), i32 %a, i32 %b, i16 zeroext 0, i16 zeroext [[AS]], i16 zeroext [[BS]], i16* %labelreturn, i32 zeroext 0, i32 zeroext [[AO]], i32 zeroext [[BO]], i32* %originreturn) + ; CHECK: [[RS:%.*]] = load i16, i16* %labelreturn, align 2 + ; CHECK: [[RO:%.*]] = load i32, i32* %originreturn, align 4 + ; CHECK: store i16 [[RS]], i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 + ; CHECK: store i32 [[RO]], i32* @__dfsan_retval_origin_tls, align 4 + + %r = call i32 @custom_cb_with_ret(i32 (i32, i32)* @cb_with_ret, i32 %a, i32 %b) + ret i32 %r +} + +define void @call_custom_cb_without_ret(i32 %a, i32 %b) { + ; CHECK: @"dfs$call_custom_cb_without_ret" + ; CHECK: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: [[BS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; CHECK: [[AS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 + ; CHECK: call void @__dfso_custom_cb_without_ret(void (void (i32, i32)*, i32, i32, i16, i16, i32, i32)* @"dfst0$custom_cb_without_ret", i8* bitcast (void (i32, i32)* @"dfs$cb_without_ret" to i8*), i32 %a, i32 %b, i16 zeroext 0, i16 zeroext [[AS]], i16 zeroext [[BS]], i32 zeroext 0, i32 zeroext [[AO]], i32 zeroext [[BO]]) + ; CHECK-NEXT: ret void + + call void @custom_cb_without_ret(void (i32, i32)* @cb_without_ret, i32 %a, i32 %b) + ret void +} + +; CHECK: define i32 @discardg(i32 %0, i32 %1) +; CHECK: [[R:%.*]] = call i32 @"dfs$g" +; CHECK-NEXT: %_dfsret = load i16, i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 +; CHECK-NEXT: %_dfsret_o = load i32, i32* @__dfsan_retval_origin_tls, align 4 +; CHECK-NEXT: ret i32 [[R]] + +; CHECK: define linkonce_odr void @"dfso$custom_without_ret"(i32 %0, i32 %1) +; CHECK: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 +; CHECK-NEXT: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 +; CHECK-NEXT: [[BS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 +; CHECK-NEXT: [[AS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 +; CHECK-NEXT: call void @__dfso_custom_without_ret(i32 %0, i32 %1, i16 zeroext [[AS]], i16 zeroext [[BS]], i32 zeroext [[AO]], i32 zeroext [[BO]]) +; CHECK-NEXT: ret void + +; CHECK: define linkonce_odr i32 @"dfso$custom_with_ret"(i32 %0, i32 %1) +; CHECK: %originreturn = alloca i32, align 4 +; CHECK-NEXT: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 +; CHECK-NEXT: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 +; CHECK-NEXT: %labelreturn = alloca i16, align 2 +; CHECK-NEXT: [[BS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 +; CHECK-NEXT: [[AS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 +; CHECK-NEXT: [[R:%.*]] = call i32 @__dfso_custom_with_ret(i32 %0, i32 %1, i16 zeroext [[AS]], i16 zeroext [[BS]], i16* %labelreturn, i32 zeroext [[AO]], i32 zeroext [[BO]], i32* %originreturn) +; CHECK-NEXT: [[RS:%.*]] = load i16, i16* %labelreturn, align 2 +; CHECK-NEXT: [[RO:%.*]] = load i32, i32* %originreturn, align 4 +; CHECK-NEXT: store i16 [[RS]], i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 +; CHECK-NEXT: store i32 [[RO]], i32* @__dfsan_retval_origin_tls, align 4 +; CHECK-NEXT: ret i32 [[R]] + +; CHECK: define linkonce_odr void @"dfso$custom_varg_without_ret"(i32 %0, i32 %1, ...) +; CHECK: call void @__dfsan_vararg_wrapper(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @0, i32 0, i32 0)) +; CHECK-NEXT: unreachable + +; CHECK: define linkonce_odr i32 @"dfso$custom_varg_with_ret"(i32 %0, i32 %1, ...) +; CHECK: call void @__dfsan_vararg_wrapper(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @1, i32 0, i32 0)) +; CHECK-NEXT: unreachable + +; CHECK: define linkonce_odr i32 @"dfso$custom_cb_with_ret"(i32 (i32, i32)* %0, i32 %1, i32 %2) +; CHECK: %originreturn = alloca i32, align 4 +; CHECK-NEXT: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 2), align 4 +; CHECK-NEXT: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 +; CHECK-NEXT: [[CO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 +; CHECK-NEXT: %labelreturn = alloca i16, align 2 +; CHECK-NEXT: [[BS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 4) to i16*), align 2 +; CHECK-NEXT: [[AS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 +; CHECK-NEXT: [[CS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 +; CHECK-NEXT: [[C:%.*]] = bitcast i32 (i32, i32)* %0 to i8* +; CHECK-NEXT: [[R:%.*]] = call i32 @__dfso_custom_cb_with_ret(i32 (i32 (i32, i32)*, i32, i32, i16, i16, i16*, i32, i32, i32*)* @"dfst0$custom_cb_with_ret", i8* [[C]], i32 %1, i32 %2, i16 zeroext [[CS]], i16 zeroext [[AS]], i16 zeroext [[BS]], i16* %labelreturn, i32 zeroext [[CO]], i32 zeroext [[AO]], i32 zeroext [[BO]], i32* %originreturn) +; CHECK-NEXT: [[RS:%.*]] = load i16, i16* %labelreturn, align 2 +; CHECK-NEXT: [[RO:%.*]] = load i32, i32* %originreturn, align 4 +; CHECK-NEXT: store i16 [[RS]], i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 +; CHECK-NEXT: store i32 [[RO]], i32* @__dfsan_retval_origin_tls, align 4 +; CHECK-NEXT: ret i32 [[R]] + +; CHECK: define linkonce_odr void @"dfso$custom_cb_without_ret"(void (i32, i32)* %0, i32 %1, i32 %2) +; CHECK: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 2), align 4 +; CHECK-NEXT: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 +; CHECK-NEXT: [[CO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 +; CHECK-NEXT: [[BS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 4) to i16*), align 2 +; CHECK-NEXT: [[AS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 +; CHECK-NEXT: [[CS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 +; CHECK-NEXT: [[C:%.*]] = bitcast void (i32, i32)* %0 to i8* +; CHECK-NEXT: call void @__dfso_custom_cb_without_ret(void (void (i32, i32)*, i32, i32, i16, i16, i32, i32)* @"dfst0$custom_cb_without_ret", i8* [[C]], i32 %1, i32 %2, i16 zeroext [[CS]], i16 zeroext [[AS]], i16 zeroext [[BS]], i32 zeroext [[CO]], i32 zeroext [[AO]], i32 zeroext [[BO]]) +; CHECK-NEXT: ret void + +; CHECK: declare void @__dfso_custom_without_ret(i32, i32, i16, i16, i32, i32) + +; CHECK: declare i32 @__dfso_custom_with_ret(i32, i32, i16, i16, i16*, i32, i32, i32*) + +; CHECK: declare i32 @__dfso_custom_cb_with_ret(i32 (i32 (i32, i32)*, i32, i32, i16, i16, i16*, i32, i32, i32*)*, i8*, i32, i32, i16, i16, i16, i16*, i32, i32, i32, i32*) + +; CHECK: define linkonce_odr i32 @"dfst0$custom_cb_with_ret"(i32 (i32, i32)* %0, i32 %1, i32 %2, i16 %3, i16 %4, i16* %5, i32 %6, i32 %7, i32* %8) +; CHECK: store i32 %6, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 +; CHECK-NEXT: store i16 %3, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 +; CHECK-NEXT: store i32 %7, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 +; CHECK-NEXT: store i16 %4, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 +; CHECK-NEXT: %9 = call i32 %0(i32 %1, i32 %2) +; CHECK-NEXT: %_dfsret = load i16, i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 +; CHECK-NEXT: %_dfsret_o = load i32, i32* @__dfsan_retval_origin_tls, align 4 +; CHECK-NEXT: store i16 %_dfsret, i16* %5, align 2 +; CHECK-NEXT: store i32 %_dfsret_o, i32* %8, align 4 +; CHECK-NEXT: ret i32 %9 + +; CHECK: declare void @__dfso_custom_cb_without_ret(void (void (i32, i32)*, i32, i32, i16, i16, i32, i32)*, i8*, i32, i32, i16, i16, i16, i32, i32, i32) + +; CHECK: define linkonce_odr void @"dfst0$custom_cb_without_ret"(void (i32, i32)* %0, i32 %1, i32 %2, i16 %3, i16 %4, i32 %5, i32 %6) +; CHECK: store i32 %5, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 +; CHECK-NEXT: store i16 %3, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 +; CHECK-NEXT: store i32 %6, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 +; CHECK-NEXT: store i16 %4, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 +; CHECK-NEXT: call void %0(i32 %1, i32 %2) +; CHECK-NEXT: ret void + +; CHECK: declare void @__dfso_custom_varg_without_ret(i32, i32, i16, i16, i16*, i32, i32, i32*, ...) + +; CHECK: declare i32 @__dfso_custom_varg_with_ret(i32, i32, i16, i16, i16*, i16*, i32, i32, i32*, i32*, ...) \ No newline at end of file diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_cached_shadows.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_cached_shadows.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_cached_shadows.ll @@ -0,0 +1,84 @@ +; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-fast-16-labels=true -S | FileCheck %s --check-prefix=CHECK +; +; %15 and %17 have the same key in shadow cache. They should not reuse the same +; shadow because their blocks do not dominate each other. Origin tracking +; splt blocks. This test ensures DT is updated correctly, and cached shadows +; are not mis-used. +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @cached_shadows(double %0) { + ; CHECK: @"dfs$cached_shadows" + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: [[AS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 + ; CHECK: [[L1:[0-9]+]]: + ; CHECK: {{.*}} = phi i16 + ; CHECK: {{.*}} = phi i32 + ; CHECK: {{.*}} = phi double [ 3.000000e+00 + ; CHECK: [[S_L1:%.*]] = phi i16 [ 0, %[[L0:[0-9]+]] ], [ [[S_L7:%.*]], %[[L7:[0-9]+]] ] + ; CHECK: [[O_L1:%.*]] = phi i32 [ 0, %[[L0]] ], [ [[O_L7:%.*]], %[[L7]] ] + ; CHECK: [[V_L1:%.*]] = phi double [ 4.000000e+00, %[[L0]] ], [ [[V_L7:%.*]], %[[L7]] ] + ; CHECK: br i1 {{%[0-9]+}}, label %[[L2:[0-9]+]], label %[[L4:[0-9]+]] + ; CHECK: [[L2]]: + ; CHECK: br i1 {{%[0-9]+}}, label %[[L3:[0-9]+]], label %[[L7]] + ; CHECK: [[L3]]: + ; CHECK: [[S_L3:%.*]] = or i16 + ; CHECK: [[AS_NE_L3:%.*]] = icmp ne i16 [[AS]], 0 + ; CHECK: [[O_L3:%.*]] = select i1 [[AS_NE_L3]], i32 %2, i32 [[O_L1]] + ; CHECK: [[V_L3:%.*]] = fsub double [[V_L1]], %0 + ; CHECK: br label %[[L7]] + ; CHECK: [[L4]]: + ; CHECK: br i1 %_dfscmp, label %[[L5:[0-9]+]], label %[[L6:[0-9]+]] + ; CHECK: [[L5]]: + ; CHECK: br label %[[L6]] + ; CHECK: [[L6]]: + ; CHECK: [[S_L6:%.*]] = or i16 + ; CHECK: [[AS_NE_L6:%.*]] = icmp ne i16 [[AS]], 0 + ; CHECK: [[O_L6:%.*]] = select i1 [[AS_NE_L6]], i32 [[AO]], i32 [[O_L1]] + ; CHECK: [[V_L6:%.*]] = fadd double %24, %0 + ; CHECK: br label %[[L7]] + ; CHECK: [[L7]]: + ; CHECK: [[S_L7]] = phi i16 [ [[S_L3]], %[[L3]] ], [ [[S_L1]], %[[L2]] ], [ [[S_L6]], %[[L6]] ] + ; CHECK: [[O_L7]] = phi i32 [ [[O_L3]], %[[L3]] ], [ [[O_L1]], %[[L2]] ], [ [[O_L6]], %[[L6]] ] + ; CHECK: [[V_L7]] = phi double [ [[V_L3]], %[[L3]] ], [ [[V_L1]], %[[L2]] ], [ [[V_L6]], %[[L6]] ] + ; CHECK: br i1 {{%[0-9]+}}, label %[[L1]], label %[[L8:[0-9]+]] + ; CHECK: [[L8]]: + + %2 = alloca double, align 8 + %3 = alloca double, align 8 + %4 = bitcast double* %2 to i8* + store volatile double 1.000000e+00, double* %2, align 8 + %5 = bitcast double* %3 to i8* + store volatile double 2.000000e+00, double* %3, align 8 + br label %6 + +6: ; preds = %18, %1 + %7 = phi double [ 3.000000e+00, %1 ], [ %19, %18 ] + %8 = phi double [ 4.000000e+00, %1 ], [ %20, %18 ] + %9 = load volatile double, double* %3, align 8 + %10 = fcmp une double %9, 0.000000e+00 + %11 = load volatile double, double* %3, align 8 + br i1 %10, label %12, label %16 + +12: ; preds = %6 + %13 = fcmp une double %11, 0.000000e+00 + br i1 %13, label %14, label %18 + +14: ; preds = %12 + %15 = fsub double %8, %0 + br label %18 + +16: ; preds = %6 + store volatile double %11, double* %2, align 8 + %17 = fadd double %8, %0 + br label %18 + +18: ; preds = %16, %14, %12 + %19 = phi double [ %8, %14 ], [ %7, %12 ], [ %8, %16 ] + %20 = phi double [ %15, %14 ], [ %8, %12 ], [ %17, %16 ] + %21 = fcmp olt double %19, 9.900000e+01 + br i1 %21, label %6, label %22 + +22: ; preds = %18 + ret void +} \ No newline at end of file diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_call.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_call.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_call.ll @@ -0,0 +1,80 @@ +; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-fast-16-labels=true -S | FileCheck %s --check-prefix=CHECK +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i1 @arg_overflow( +i1 %a0, i1 %a1, i1 %a2, i1 %a3, i1 %a4, i1 %a5, i1 %a6, i1 %a7, i1 %a8, i1 %a9, +i1 %a10, i1 %a11, i1 %a12, i1 %a13, i1 %a14, i1 %a15, i1 %a16, i1 %a17, i1 %a18, i1 %a19, +i1 %a20, i1 %a21, i1 %a22, i1 %a23, i1 %a24, i1 %a25, i1 %a26, i1 %a27, i1 %a28, i1 %a29, +i1 %a30, i1 %a31, i1 %a32, i1 %a33, i1 %a34, i1 %a35, i1 %a36, i1 %a37, i1 %a38, i1 %a39, +i1 %a40, i1 %a41, i1 %a42, i1 %a43, i1 %a44, i1 %a45, i1 %a46, i1 %a47, i1 %a48, i1 %a49, +i1 %a50, i1 %a51, i1 %a52, i1 %a53, i1 %a54, i1 %a55, i1 %a56, i1 %a57, i1 %a58, i1 %a59, +i1 %a60, i1 %a61, i1 %a62, i1 %a63, i1 %a64, i1 %a65, i1 %a66, i1 %a67, i1 %a68, i1 %a69, +i1 %a70, i1 %a71, i1 %a72, i1 %a73, i1 %a74, i1 %a75, i1 %a76, i1 %a77, i1 %a78, i1 %a79, +i1 %a80, i1 %a81, i1 %a82, i1 %a83, i1 %a84, i1 %a85, i1 %a86, i1 %a87, i1 %a88, i1 %a89, +i1 %a90, i1 %a91, i1 %a92, i1 %a93, i1 %a94, i1 %a95, i1 %a96, i1 %a97, i1 %a98, i1 %a99, +i1 %a100, i1 %a101, i1 %a102, i1 %a103, i1 %a104, i1 %a105, i1 %a106, i1 %a107, i1 %a108, i1 %a109, +i1 %a110, i1 %a111, i1 %a112, i1 %a113, i1 %a114, i1 %a115, i1 %a116, i1 %a117, i1 %a118, i1 %a119, +i1 %a120, i1 %a121, i1 %a122, i1 %a123, i1 %a124, i1 %a125, i1 %a126, i1 %a127, i1 %a128, i1 %a129, +i1 %a130, i1 %a131, i1 %a132, i1 %a133, i1 %a134, i1 %a135, i1 %a136, i1 %a137, i1 %a138, i1 %a139, +i1 %a140, i1 %a141, i1 %a142, i1 %a143, i1 %a144, i1 %a145, i1 %a146, i1 %a147, i1 %a148, i1 %a149, +i1 %a150, i1 %a151, i1 %a152, i1 %a153, i1 %a154, i1 %a155, i1 %a156, i1 %a157, i1 %a158, i1 %a159, +i1 %a160, i1 %a161, i1 %a162, i1 %a163, i1 %a164, i1 %a165, i1 %a166, i1 %a167, i1 %a168, i1 %a169, +i1 %a170, i1 %a171, i1 %a172, i1 %a173, i1 %a174, i1 %a175, i1 %a176, i1 %a177, i1 %a178, i1 %a179, +i1 %a180, i1 %a181, i1 %a182, i1 %a183, i1 %a184, i1 %a185, i1 %a186, i1 %a187, i1 %a188, i1 %a189, +i1 %a190, i1 %a191, i1 %a192, i1 %a193, i1 %a194, i1 %a195, i1 %a196, i1 %a197, i1 %a198, i1 %a199, +i1 %a200 +) { + ; CHECK: @"dfs$arg_overflow" + ; CHECK: [[A199:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 199), align 4 + ; CHECK: store i32 [[A199]], i32* @__dfsan_retval_origin_tls, align 4 + + %r = add i1 %a199, %a200 + ret i1 %r +} + +define i1 @param_overflow(i1 %a) { + ; CHECK: @"dfs$param_overflow" + ; CHECK: store i32 %1, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 199), align 4 + ; CHECK-NEXT: store i16 %2, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 398) to i16*), align 2 + ; CHECK-NEXT: store i16 %2, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 400) to i16*), align 2 + ; CHECK-NEXT: %r = call i1 @"dfs$arg_overflow" + ; CHECK: %_dfsret_o = load i32, i32* @__dfsan_retval_origin_tls, align 4 + ; CHECK: store i32 %_dfsret_o, i32* @__dfsan_retval_origin_tls, align 4 + + %r = call i1 @arg_overflow( +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, i1 %a, +i1 %a +) + ret i1 %r +} + +declare void @foo(i1 %a) + +define void @param_with_zero_shadow() { + ; CHECK: @"dfs$param_with_zero_shadow" + ; CHECK-NEXT: store i16 0, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 + ; CHECK-NEXT: call void @"dfs$foo"(i1 true) + + call void @foo(i1 1) + ret void +} diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_ldst.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_ldst.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_ldst.ll @@ -0,0 +1,414 @@ +; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-fast-16-labels=true -S | FileCheck %s --check-prefix=CHECK +; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-fast-16-labels=true -dfsan-combine-pointer-labels-on-load=false -S | FileCheck %s --check-prefix=NO_COMBINE_LOAD_PTR +; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-fast-16-labels=true -dfsan-combine-pointer-labels-on-store=true -S | FileCheck %s --check-prefix=COMBINE_STORE_PTR +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define {} @load0({}* %p) { + ; CHECK: @"dfs$load0" + ; CHECK-NEXT: %a = load {}, {}* %p, align 1 + ; CHECK-NEXT: store {} zeroinitializer, {}* bitcast ([100 x i64]* @__dfsan_retval_tls to {}*), align 2 + ; CHECK-NEXT: store i32 0, i32* @__dfsan_retval_origin_tls, align 4 + ; CHECK-NEXT: ret {} %a + + %a = load {}, {}* %p + ret {} %a +} + +define i16 @load_non_escaped_alloca() { + ; CHECK: @"dfs$load_non_escaped_alloca" + ; CHECK: [[S_ALLOCA:%.*]] = alloca i16, align 2 + ; CHECK: [[O_ALLOCA:%.*]] = alloca i32, align 4 + ; CHECK: [[SHADOW:%.*]] = load i16, i16* [[S_ALLOCA]], align 2 + ; CHECK: [[ORIGIN:%.*]] = load i32, i32* [[O_ALLOCA]], align 4 + ; CHECK: %a = load i16, i16* %p, align 2 + ; CHECK: store i16 [[SHADOW]], i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 + ; CHECK: store i32 [[ORIGIN]], i32* @__dfsan_retval_origin_tls, align 4 + + %p = alloca i16 + %a = load i16, i16* %p + ret i16 %a +} + +define i16* @load_escaped_alloca() { + ; CHECK: @"dfs$load_escaped_alloca" + ; CHECK: [[INTP:%.*]] = ptrtoint i16* %p to i64 + ; CHECK: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913 + ; CHECK: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2 + ; CHECK: [[SHADOW_PTR0:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i16* + ; CHECK: [[ORIGIN_OFFSET:%.*]] = add i64 [[OFFSET]], 35184372088832 + ; CHECK: [[ORIGIN_ADDR:%.*]] = and i64 [[ORIGIN_OFFSET]], -4 + ; CHECK: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32* + ; CHECK: {{%.*}} = load i32, i32* [[ORIGIN_PTR]], align 4 + ; CHECK: [[SHADOW_PTR1:%.*]] = getelementptr i16, i16* [[SHADOW_PTR0]], i64 1 + ; CHECK: [[SHADOW0:%.*]] = load i16, i16* [[SHADOW_PTR0]], align 2 + ; CHECK: [[SHADOW1:%.*]] = load i16, i16* [[SHADOW_PTR1]], align 2 + ; CHECK: {{%.*}} = or i16 [[SHADOW0]], [[SHADOW1]] + ; CHECK: %a = load i16, i16* %p, align 2 + ; CHECK: store i16 0, i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 + ; CHECK: store i32 0, i32* @__dfsan_retval_origin_tls, align 4 + + %p = alloca i16 + %a = load i16, i16* %p + ret i16* %p +} + +@X = constant i1 1 +define i1 @load_global() { + ; CHECK: @"dfs$load_global" + ; CHECK: %a = load i1, i1* @X, align 1 + ; CHECK: store i16 0, i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 + ; CHECK: store i32 0, i32* @__dfsan_retval_origin_tls, align 4 + + %a = load i1, i1* @X + ret i1 %a +} + +define i1 @load1(i1* %p) { + ; CHECK: @"dfs$load1" + ; CHECK: [[PO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: [[PS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 + ; CHECK: [[INTP:%.*]] = ptrtoint {{.*}} %p to i64 + ; CHECK: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913 + ; CHECK: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2 + ; CHECK: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i16* + ; CHECK: [[ORIGIN_OFFSET:%.*]] = add i64 [[OFFSET]], 35184372088832 + ; CHECK: [[ORIGIN_ADDR:%.*]] = and i64 [[ORIGIN_OFFSET]], -4 + ; CHECK: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32* + ; CHECK: [[AO:%.*]] = load i32, i32* [[ORIGIN_PTR]], align 4 + ; CHECK: [[AS:%.*]] = load i16, i16* [[SHADOW_PTR]], align 2 + ; CHECK: [[RS:%.*]] = or i16 [[AS]], [[PS]] + ; CHECK: [[PS_NZ:%.*]] = icmp ne i16 [[PS]], 0 + ; CHECK: [[RO:%.*]] = select i1 [[PS_NZ]], i32 [[PO]], i32 [[AO]] + ; CHECK: %a = load i1, i1* %p, align 1 + ; CHECK: store i16 [[RS]], i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 + ; CHECK: store i32 [[RO]], i32* @__dfsan_retval_origin_tls, align 4 + + %a = load i1, i1* %p + ret i1 %a +} + +define i16 @load16(i1 %i, i16* %p) { + ; CHECK: @"dfs$load16" + ; CHECK: [[PO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[PS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; CHECK: [[INTP:%.*]] = ptrtoint {{.*}} %p to i64 + ; CHECK: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913 + ; CHECK: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2 + ; CHECK: [[SHADOW_PTR0:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i16* + ; CHECK: [[ORIGIN_OFFSET:%.*]] = add i64 [[OFFSET]], 35184372088832 + ; CHECK: [[ORIGIN_ADDR:%.*]] = and i64 [[ORIGIN_OFFSET]], -4 + ; CHECK: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32* + ; CHECK: [[AO:%.*]] = load i32, i32* [[ORIGIN_PTR]], align 4 + ; CHECK: [[SHADOW_PTR1:%.*]] = getelementptr i16, i16* [[SHADOW_PTR0]], i64 1 + ; CHECK: [[SHADOW0:%.*]] = load i16, i16* [[SHADOW_PTR0]], align 2 + ; CHECK: [[SHADOW1:%.*]] = load i16, i16* [[SHADOW_PTR1]], align 2 + ; CHECK: [[AS:%.*]] = or i16 [[SHADOW0]], [[SHADOW1]] + ; CHECK: [[RS:%.*]] = or i16 [[AS]], [[PS]] + ; CHECK: [[PS_NZ:%.*]] = icmp ne i16 [[PS]], 0 + ; CHECK: [[RO:%.*]] = select i1 [[PS_NZ]], i32 [[PO]], i32 [[AO]] + ; CHECK: %a = load i16, i16* %p, align 2 + ; CHECK: store i16 [[RS]], i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 + ; CHECK: store i32 [[RO]], i32* @__dfsan_retval_origin_tls, align 4 + + %a = load i16, i16* %p + ret i16 %a +} + +define i32 @load32(i32* %p) { + ; CHECK: @"dfs$load32" + + ; NO_COMBINE_LOAD_PTR: @"dfs$load32" + ; NO_COMBINE_LOAD_PTR: [[INTP:%.*]] = ptrtoint i32* %p to i64 + ; NO_COMBINE_LOAD_PTR: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i16* + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_ADDR:%.*]] = add i64 [[OFFSET]], 35184372088832 + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32* + ; NO_COMBINE_LOAD_PTR: [[AO:%.*]] = load i32, i32* [[ORIGIN_PTR]], align 4 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR64:%.*]] = bitcast i16* [[SHADOW_PTR]] to i64* + ; NO_COMBINE_LOAD_PTR: [[SHADOW64:%.*]] = load i64, i64* [[SHADOW_PTR64]], align 2 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_H32:%.*]] = lshr i64 [[SHADOW64]], 32 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32:%.*]] = or i64 [[SHADOW64]], [[SHADOW64_H32]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32_H16:%.*]] = lshr i64 [[SHADOW64_HL32]], 16 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32_HL16:%.*]] = or i64 [[SHADOW64_HL32]], [[SHADOW64_HL32_H16]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW:%.*]] = trunc i64 [[SHADOW64_HL32_HL16]] to i16 + ; NO_COMBINE_LOAD_PTR: %a = load i32, i32* %p, align 4 + ; NO_COMBINE_LOAD_PTR: store i16 [[SHADOW]], i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 + ; NO_COMBINE_LOAD_PTR: store i32 [[AO]], i32* @__dfsan_retval_origin_tls, align 4 + + %a = load i32, i32* %p + ret i32 %a +} + +define i64 @load64(i64* %p) { + ; CHECK: @"dfs$load64" + + ; NO_COMBINE_LOAD_PTR: @"dfs$load64" + ; NO_COMBINE_LOAD_PTR: [[INTP:%.*]] = ptrtoint i64* %p to i64 + ; NO_COMBINE_LOAD_PTR: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i16* + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_ADDR:%.*]] = add i64 [[OFFSET]], 35184372088832 + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_PTR_0:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32* + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_0:%.*]] = load i32, i32* [[ORIGIN_PTR_0]], align 8 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR_0:%.*]] = bitcast i16* [[SHADOW_PTR]] to i64* + ; NO_COMBINE_LOAD_PTR: [[SHADOW_0:%.*]] = load i64, i64* [[SHADOW_PTR_0]], align 2 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR_1:%.*]] = getelementptr i64, i64* [[SHADOW_PTR_0]], i64 1 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_1:%.*]] = load i64, i64* [[SHADOW_PTR_1]], align 2 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64:%.*]] = or i64 [[SHADOW_0]], [[SHADOW_1]] + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_PTR_1:%.*]] = getelementptr i32, i32* [[ORIGIN_PTR_0]], i64 1 + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_1:%.*]] = load i32, i32* [[ORIGIN_PTR_1]], align 8 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_H32:%.*]] = lshr i64 [[SHADOW64]], 32 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32:%.*]] = or i64 [[SHADOW64]], [[SHADOW64_H32]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32_H16:%.*]] = lshr i64 [[SHADOW64_HL32]], 16 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32_HL16:%.*]] = or i64 [[SHADOW64_HL32]], [[SHADOW64_HL32_H16]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW:%.*]] = trunc i64 [[SHADOW64_HL32_HL16]] to i16 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_1_NZ:%.*]] = icmp ne i64 [[SHADOW_1]], 0 + ; NO_COMBINE_LOAD_PTR: [[ORIGIN:%.*]] = select i1 [[SHADOW_1_NZ]], i32 [[ORIGIN_1]], i32 [[ORIGIN_0]] + ; NO_COMBINE_LOAD_PTR: %a = load i64, i64* %p, align 8 + ; NO_COMBINE_LOAD_PTR: store i16 [[SHADOW]], i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 + ; NO_COMBINE_LOAD_PTR: store i32 [[ORIGIN]], i32* @__dfsan_retval_origin_tls, align 4 + + %a = load i64, i64* %p + ret i64 %a +} + +define i64 @load64_align2(i64* %p) { + ; CHECK: @"dfs$load64_align2" + + ; NO_COMBINE_LOAD_PTR: @"dfs$load64_align2" + ; NO_COMBINE_LOAD_PTR-NEXT: [[INTP:%.*]] = bitcast i64* %p to i8* + ; NO_COMBINE_LOAD_PTR-NEXT: [[LABLE_ORIGIN:%.*]] = call zeroext i64 @__dfsan_load_label_and_origin(i8* [[INTP]], i64 8) + ; NO_COMBINE_LOAD_PTR-NEXT: [[LABLE_ORIGIN_H32:%.*]] = lshr i64 [[LABLE_ORIGIN]], 32 + ; NO_COMBINE_LOAD_PTR-NEXT: [[LABLE:%.*]] = trunc i64 [[LABLE_ORIGIN_H32]] to i16 + ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN:%.*]] = trunc i64 [[LABLE_ORIGIN]] to i32 + ; NO_COMBINE_LOAD_PTR-NEXT: %a = load i64, i64* %p, align 2 + ; NO_COMBINE_LOAD_PTR-NEXT: store i16 [[LABLE]], i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 + ; NO_COMBINE_LOAD_PTR-NEXT: store i32 [[ORIGIN]], i32* @__dfsan_retval_origin_tls, align 4 + + %a = load i64, i64* %p, align 2 + ret i64 %a +} + +define i92 @load92(i92* %p) { + ; CHECK: @"dfs$load92" + + ; NO_COMBINE_LOAD_PTR: @"dfs$load92" + ; NO_COMBINE_LOAD_PTR: [[INTP:%.*]] = ptrtoint i92* %p to i64 + ; NO_COMBINE_LOAD_PTR: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i16* + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_ADDR:%.*]] = add i64 [[OFFSET]], 35184372088832 + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_PTR_0:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32* + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_0:%.*]] = load i32, i32* [[ORIGIN_PTR_0]], align 8 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR_0:%.*]] = bitcast i16* [[SHADOW_PTR]] to i64* + ; NO_COMBINE_LOAD_PTR: [[SHADOW_0:%.*]] = load i64, i64* [[SHADOW_PTR_0]], align 2 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR_1:%.*]] = getelementptr i64, i64* [[SHADOW_PTR_0]], i64 1 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_1:%.*]] = load i64, i64* [[SHADOW_PTR_1]], align 2 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_01:%.*]] = or i64 [[SHADOW_0]], [[SHADOW_1]] + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_PTR_1:%.*]] = getelementptr i32, i32* [[ORIGIN_PTR_0]], i64 1 + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_1:%.*]] = load i32, i32* [[ORIGIN_PTR_1]], align 8 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR_2:%.*]] = getelementptr i64, i64* [[SHADOW_PTR_1]], i64 1 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_2:%.*]] = load i64, i64* [[SHADOW_PTR_2]], align 2 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64:%.*]] = or i64 [[SHADOW_01]], [[SHADOW_2]] + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_PTR_2:%.*]] = getelementptr i32, i32* [[ORIGIN_PTR_1]], i64 1 + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_2:%.*]] = load i32, i32* [[ORIGIN_PTR_2]], align 8 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_H32:%.*]] = lshr i64 [[SHADOW64]], 32 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32:%.*]] = or i64 [[SHADOW64]], [[SHADOW64_H32]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32_H16:%.*]] = lshr i64 [[SHADOW64_HL32]], 16 + ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32_HL16:%.*]] = or i64 [[SHADOW64_HL32]], [[SHADOW64_HL32_H16]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW:%.*]] = trunc i64 [[SHADOW64_HL32_HL16]] to i16 + ; NO_COMBINE_LOAD_PTR: [[SHADOW_1_NZ:%.*]] = icmp ne i64 [[SHADOW_1]], 0 + ; NO_COMBINE_LOAD_PTR: [[ORIGIN_10:%.*]] = select i1 [[SHADOW_1_NZ]], i32 [[ORIGIN_1]], i32 [[ORIGIN_0]] + ; NO_COMBINE_LOAD_PTR: [[SHADOW_2_NZ:%.*]] = icmp ne i64 [[SHADOW_2]], 0 + ; NO_COMBINE_LOAD_PTR: [[ORIGIN:%.*]] = select i1 [[SHADOW_2_NZ]], i32 [[ORIGIN_2]], i32 [[ORIGIN_10]] + ; NO_COMBINE_LOAD_PTR: %a = load i92, i92* %p, align 8 + ; NO_COMBINE_LOAD_PTR: store i16 [[SHADOW]], i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 + ; NO_COMBINE_LOAD_PTR: store i32 [[ORIGIN]], i32* @__dfsan_retval_origin_tls, align 4 + + %a = load i92, i92* %p + ret i92 %a +} + +define i17 @load17(i17* %p) { + ; CHECK: @"dfs$load17" + + ; NO_COMBINE_LOAD_PTR: @"dfs$load17" + ; NO_COMBINE_LOAD_PTR-NEXT: [[INTP:%.*]] = bitcast i17* %p to i8* + ; NO_COMBINE_LOAD_PTR-NEXT: [[LABLE_ORIGIN:%.*]] = call zeroext i64 @__dfsan_load_label_and_origin(i8* [[INTP]], i64 3) + ; NO_COMBINE_LOAD_PTR-NEXT: [[LABLE_ORIGIN_H32:%.*]] = lshr i64 [[LABLE_ORIGIN]], 32 + ; NO_COMBINE_LOAD_PTR-NEXT: [[LABLE:%.*]] = trunc i64 [[LABLE_ORIGIN_H32]] to i16 + ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN:%.*]] = trunc i64 [[LABLE_ORIGIN]] to i32 + ; NO_COMBINE_LOAD_PTR-NEXT: %a = load i17, i17* %p, align 4 + ; NO_COMBINE_LOAD_PTR-NEXT: store i16 [[LABLE]], i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 + ; NO_COMBINE_LOAD_PTR-NEXT: store i32 [[ORIGIN]], i32* @__dfsan_retval_origin_tls, align 4 + + %a = load i17, i17* %p, align 4 + ret i17 %a +} + +define void @store_zero_to_non_escaped_alloca() { + ; CHECK: @"dfs$store_zero_to_non_escaped_alloca" + ; CHECK-NEXT: [[A:%.*]] = alloca i16, align 2 + ; CHECK-NEXT: %_dfsa = alloca i32, align 4 + ; CHECK-NEXT: %p = alloca i16, align 2 + ; CHECK-NEXT: store i16 0, i16* [[A]], align 2 + ; CHECK-NEXT: store i16 1, i16* %p, align 2 + ; CHECK-NEXT: ret void + + %p = alloca i16 + store i16 1, i16* %p + ret void +} + +define void @store_nonzero_to_non_escaped_alloca(i16 %a) { + ; CHECK: @"dfs$store_nonzero_to_non_escaped_alloca" + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: %_dfsa = alloca i32, align 4 + ; CHECK: store i32 [[AO]], i32* %_dfsa, align 4 + + %p = alloca i16 + store i16 %a, i16* %p + ret void +} + +define i16* @store_zero_to_escaped_alloca() { + ; CHECK: @"dfs$store_zero_to_escaped_alloca" + ; CHECK: [[SA:%.*]] = bitcast i16* {{.*}} to i32* + ; CHECK_NEXT: store i32 0, i32* [[SA]], align 4 + ; CHECK_NEXT: store i16 1, i16* %p, align 2 + ; CHECK_NEXT: store i16 0, i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 + + ; COMBINE_STORE_PTR: @"dfs$store_zero_to_escaped_alloca" + ; COMBINE_STORE_PTR: [[SA:%.*]] = bitcast i16* {{.*}} to i32* + ; COMBINE_STORE_PTR_NEXT: store i32 0, i32* [[SA]], align 4 + ; COMBINE_STORE_PTR_NEXT: store i16 1, i16* %p, align 2 + ; COMBINE_STORE_PTR_NEXT: store i16 0, i16* bitcast ([100 x i64]* @__dfsan_retval_tls to i16*), align 2 + + %p = alloca i16 + store i16 1, i16* %p + ret i16* %p +} + +define i16* @store_nonzero_to_escaped_alloca(i16 %a) { + ; CHECK: @"dfs$store_nonzero_to_escaped_alloca" + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: [[AS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 + ; CHECK: [[INTP:%.*]] = ptrtoint {{.*}} %p to i64 + ; CHECK: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913 + ; CHECK: [[ORIGIN_OFFSET:%.*]] = add i64 [[OFFSET]], 35184372088832 + ; CHECK: [[ORIGIN_ADDR:%.*]] = and i64 [[ORIGIN_OFFSET]], -4 + ; CHECK: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32* + ; CHECK: %_dfscmp = icmp ne i16 [[AS]], 0 + ; CHECK: br i1 %_dfscmp, label %[[L1:.*]], label %[[L2:.*]], + ; CHECK: [[L1]]: + ; CHECK: [[NO:%.*]] = call i32 @__dfsan_chain_origin(i32 [[AO]]) + ; CHECK: store i32 [[NO]], i32* [[ORIGIN_PTR]], align 4 + ; CHECK: br label %[[L2]] + ; CHECK: [[L2]]: + ; CHECK: store i16 %a, i16* %p, align 2 + + ; COMBINE_STORE_PTR: @"dfs$store_nonzero_to_escaped_alloca" + ; COMBINE_STORE_PTR: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; COMBINE_STORE_PTR: [[AS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 + ; COMBINE_STORE_PTR: [[INTP:%.*]] = ptrtoint {{.*}} %p to i64 + ; COMBINE_STORE_PTR: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913 + ; COMBINE_STORE_PTR: [[ORIGIN_OFFSET:%.*]] = add i64 [[OFFSET]], 35184372088832 + ; COMBINE_STORE_PTR: [[ORIGIN_ADDR:%.*]] = and i64 [[ORIGIN_OFFSET]], -4 + ; COMBINE_STORE_PTR: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32* + ; COMBINE_STORE_PTR: %_dfscmp = icmp ne i16 [[AS]], 0 + ; COMBINE_STORE_PTR: br i1 %_dfscmp, label %[[L1:.*]], label %[[L2:.*]], + ; COMBINE_STORE_PTR: [[L1]]: + ; COMBINE_STORE_PTR: [[NO:%.*]] = call i32 @__dfsan_chain_origin(i32 [[AO]]) + ; COMBINE_STORE_PTR: store i32 [[NO]], i32* [[ORIGIN_PTR]], align 4 + ; COMBINE_STORE_PTR: br label %[[L2]] + ; COMBINE_STORE_PTR: [[L2]]: + ; COMBINE_STORE_PTR: store i16 %a, i16* %p, align 2 + + %p = alloca i16 + store i16 %a, i16* %p + ret i16* %p +} + +define void @store64_align8(i64* %p, i64 %a) { + ; CHECK: @"dfs$store64_align8" + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[AS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; CHECK: %_dfscmp = icmp ne i16 [[AS]], 0 + ; CHECK: br i1 %_dfscmp, label %[[L1:.*]], label %[[L2:.*]], + ; CHECK: [[L1]]: + ; CHECK: [[NO:%.*]] = call i32 @__dfsan_chain_origin(i32 [[AO]]) + ; CHECK: [[NO_ZEXT:%.*]] = zext i32 [[NO]] to i64 + ; CHECK: [[NO_SHL:%.*]] = shl i64 [[NO_ZEXT]], 32 + ; CHECK: [[NO2:%.*]] = or i64 [[NO_ZEXT]], [[NO_SHL]] + ; CHECK: [[O_PTR:%.*]] = bitcast i32* {{.*}} to i64* + ; CHECK: store i64 [[NO2]], i64* [[O_PTR]], align 8 + ; CHECK: br label %[[L2]] + ; CHECK: [[L2]]: + ; CHECK: store i64 %a, i64* %p, align 8 + + ; COMBINE_STORE_PTR: @"dfs$store64_align8" + ; COMBINE_STORE_PTR: [[PO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; COMBINE_STORE_PTR: [[PS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 + ; COMBINE_STORE_PTR: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; COMBINE_STORE_PTR: [[AS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; COMBINE_STORE_PTR: [[MS:%.*]] = or i16 [[AS]], [[PS]] + ; COMBINE_STORE_PTR: [[NE:%.*]] = icmp ne i16 [[PS]], 0 + ; COMBINE_STORE_PTR: [[MO:%.*]] = select i1 [[NE]], i32 [[PO]], i32 [[AO]] + ; COMBINE_STORE_PTR: %_dfscmp = icmp ne i16 [[MS]], 0 + ; COMBINE_STORE_PTR: br i1 %_dfscmp, label %[[L1:.*]], label %[[L2:.*]], + ; COMBINE_STORE_PTR: [[L1]]: + ; COMBINE_STORE_PTR: [[NO:%.*]] = call i32 @__dfsan_chain_origin(i32 [[MO]]) + ; COMBINE_STORE_PTR: [[NO_ZEXT:%.*]] = zext i32 [[NO]] to i64 + ; COMBINE_STORE_PTR: [[NO_SHL:%.*]] = shl i64 [[NO_ZEXT]], 32 + ; COMBINE_STORE_PTR: [[NO2:%.*]] = or i64 [[NO_ZEXT]], [[NO_SHL]] + ; COMBINE_STORE_PTR: [[O_PTR:%.*]] = bitcast i32* {{.*}} to i64* + ; COMBINE_STORE_PTR: store i64 [[NO2]], i64* [[O_PTR]], align 8 + ; COMBINE_STORE_PTR: br label %[[L2]] + ; COMBINE_STORE_PTR: [[L2]]: + ; COMBINE_STORE_PTR: store i64 %a, i64* %p, align 8 + + store i64 %a, i64* %p + ret void +} + +define void @store64_align2(i64* %p, i64 %a) { + ; CHECK: @"dfs$store64_align2" + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[AS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; CHECK: %_dfscmp = icmp ne i16 [[AS]], 0 + ; CHECK: br i1 %_dfscmp, label %[[L1:.*]], label %[[L2:.*]], + ; CHECK: [[L1]]: + ; CHECK: [[NO:%.*]] = call i32 @__dfsan_chain_origin(i32 [[AO]]) + ; CHECK: store i32 [[NO]], i32* [[O_PTR0:%.*]], align 4 + ; CHECK: [[O_PTR1:%.*]] = getelementptr i32, i32* [[O_PTR0]], i32 1 + ; CHECK: store i32 [[NO]], i32* [[O_PTR1]], align 4 + ; CHECK: [[L2]]: + ; CHECK: store i64 %a, i64* %p, align 2 + + store i64 %a, i64* %p, align 2 + ret void +} + +define void @store96_align8(i96* %p, i96 %a) { + ; CHECK: @"dfs$store96_align8" + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[AS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; CHECK: %_dfscmp = icmp ne i16 [[AS]], 0 + ; CHECK: br i1 %_dfscmp, label %[[L1:.*]], label %[[L2:.*]], + ; CHECK: [[L1]]: + ; CHECK: [[NO:%.*]] = call i32 @__dfsan_chain_origin(i32 [[AO]]) + ; CHECK: [[NO_ZEXT:%.*]] = zext i32 [[NO]] to i64 + ; CHECK: [[NO_SHL:%.*]] = shl i64 [[NO_ZEXT]], 32 + ; CHECK: [[NO2:%.*]] = or i64 [[NO_ZEXT]], [[NO_SHL]] + ; CHECK: [[O_PTR64:%.*]] = bitcast i32* [[O_PTR0:%.*]] to i64* + ; CHECK: store i64 [[NO2]], i64* [[O_PTR64]], align 8 + ; CHECK: [[O_PTR1:%.*]] = getelementptr i32, i32* [[O_PTR0]], i32 2 + ; CHECK: store i32 [[NO]], i32* [[O_PTR1]], align 8 + ; CHECK: [[L2]]: + ; CHECK: store i96 %a, i96* %p, align 8 + + store i96 %a, i96* %p, align 8 + ret void +} diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_mem_intrinsic.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_mem_intrinsic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_mem_intrinsic.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-fast-16-labels=true -S | FileCheck %s --check-prefix=CHECK +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) + +define void @memcpy(i8* %d, i8* %s, i32 %l) { + ; CHECK: @"dfs$memcpy" + ; CHECK: [[L64:%.*]] = zext i32 %l to i64 + ; CHECK: call void @__dfsan_mem_origin_transfer(i8* %d, i8* %s, i64 [[L64]]) + ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 {{.*}}, i8* align 2 {{.*}}, i32 {{.*}}, i1 false) + ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 %l, i1 false) + + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 %l, i1 0) + ret void +} + +define void @memmove(i8* %d, i8* %s, i32 %l) { + ; CHECK: @"dfs$memmove" + ; CHECK: [[L64:%.*]] = zext i32 %l to i64 + ; CHECK: call void @__dfsan_mem_origin_transfer(i8* %d, i8* %s, i64 [[L64]]) + ; CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* align 2 {{.*}}, i8* align 2 {{.*}}, i32 {{.*}}, i1 false) + ; CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* %d, i8* %s, i32 %l, i1 false) + + call void @llvm.memmove.p0i8.p0i8.i32(i8* %d, i8* %s, i32 %l, i1 0) + ret void +} + +define void @memset(i8* %p, i8 %v) { + ; CHECK: @"dfs$memset" + ; CHECK: [[O:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[S:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; CHECK: call void @__dfsan_set_label(i16 [[S]], i32 [[O]], i8* %p, i64 1) + call void @llvm.memset.p0i8.i64(i8* %p, i8 %v, i64 1, i1 1) + ret void +} \ No newline at end of file diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_other_ops.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_other_ops.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_other_ops.ll @@ -0,0 +1,173 @@ +; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-fast-16-labels=true -S | FileCheck %s --check-prefix=CHECK +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define float @unop(float %f) { + ; CHECK: @"dfs$unop" + ; CHECK: [[FO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: store i32 [[FO]], i32* @__dfsan_retval_origin_tls, align 4 + + %r = fneg float %f + ret float %r +} + +define i1 @binop(i1 %a, i1 %b) { + ; CHECK: @"dfs$binop" + ; CHECK: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: [[BS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; CHECK: [[NE:%.*]] = icmp ne i16 [[BS]], 0 + ; CHECK: [[MO:%.*]] = select i1 [[NE]], i32 [[BO]], i32 [[AO]] + ; CHECK: store i32 [[MO]], i32* @__dfsan_retval_origin_tls, align 4 + + %r = add i1 %a, %b + ret i1 %r +} + +define i8 @castop(i32* %p) { + ; CHECK: @"dfs$castop" + ; CHECK: [[PO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: store i32 [[PO]], i32* @__dfsan_retval_origin_tls, align 4 + + %r = ptrtoint i32* %p to i8 + ret i8 %r +} + +define i1 @cmpop(i1 %a, i1 %b) { + ; CHECK: @"dfs$cmpop" + ; CHECK: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: [[BS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; CHECK: [[NE:%.*]] = icmp ne i16 [[BS]], 0 + ; CHECK: [[MO:%.*]] = select i1 [[NE]], i32 [[BO]], i32 [[AO]] + ; CHECK: store i32 [[MO]], i32* @__dfsan_retval_origin_tls, align 4 + + %r = icmp eq i1 %a, %b + ret i1 %r +} + +define i32* @gepop([10 x [20 x i32]]* %p, i32 %a, i32 %b, i32 %c) { + ; CHECK: @"dfs$gepop" + ; CHECK: [[CO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 3), align 4 + ; CHECK: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 2), align 4 + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[PO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: [[CS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 6) to i16*), align 2 + ; CHECK: [[BS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 4) to i16*), align 2 + ; CHECK: [[AS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; CHECK: [[AS_NE:%.*]] = icmp ne i16 [[AS]], 0 + ; CHECK: [[APO:%.*]] = select i1 [[AS_NE]], i32 [[AO]], i32 [[PO]] + ; CHECK: [[BS_NE:%.*]] = icmp ne i16 [[BS]], 0 + ; CHECK: [[ABPO:%.*]] = select i1 [[BS_NE]], i32 [[BO]], i32 [[APO]] + ; CHECK: [[CS_NE:%.*]] = icmp ne i16 [[CS]], 0 + ; CHECK: [[ABCPO:%.*]] = select i1 [[CS_NE]], i32 [[CO]], i32 [[ABPO]] + ; CHECK: store i32 [[ABCPO]], i32* @__dfsan_retval_origin_tls, align 4 + + %e = getelementptr [10 x [20 x i32]], [10 x [20 x i32]]* %p, i32 %a, i32 %b, i32 %c + ret i32* %e +} + +define i32 @eeop(<4 x i32> %a, i32 %b) { + ; CHECK: @"dfs$eeop" + ; CHECK: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: [[BS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; CHECK: [[NE:%.*]] = icmp ne i16 [[BS]], 0 + ; CHECK: [[MO:%.*]] = select i1 [[NE]], i32 [[BO]], i32 [[AO]] + ; CHECK: store i32 [[MO]], i32* @__dfsan_retval_origin_tls, align 4 + + %e = extractelement <4 x i32> %a, i32 %b + ret i32 %e +} + +define <4 x i32> @ieop(<4 x i32> %p, i32 %a, i32 %b) { + ; CHECK: @"dfs$ieop" + ; CHECK: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 2), align 4 + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[PO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: [[BS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 4) to i16*), align 2 + ; CHECK: [[AS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; CHECK: [[AS_NE:%.*]] = icmp ne i16 [[AS]], 0 + ; CHECK: [[APO:%.*]] = select i1 [[AS_NE]], i32 [[AO]], i32 [[PO]] + ; CHECK: [[BS_NE:%.*]] = icmp ne i16 [[BS]], 0 + ; CHECK: [[ABPO:%.*]] = select i1 [[BS_NE]], i32 [[BO]], i32 [[APO]] + ; CHECK: store i32 [[ABPO]], i32* @__dfsan_retval_origin_tls, align 4 + + %e = insertelement <4 x i32> %p, i32 %a, i32 %b + ret <4 x i32> %e +} + +define <4 x i32> @svop(<4 x i32> %a, <4 x i32> %b) { + ; CHECK: @"dfs$svop" + ; CHECK: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: [[BS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; CHECK: [[NE:%.*]] = icmp ne i16 [[BS]], 0 + ; CHECK: [[MO:%.*]] = select i1 [[NE]], i32 [[BO]], i32 [[AO]] + ; CHECK: store i32 [[MO]], i32* @__dfsan_retval_origin_tls, align 4 + + %e = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %e +} + +define i32 @evop({i32, float} %a) { + ; CHECK: @"dfs$evop" + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: store i32 [[AO]], i32* @__dfsan_retval_origin_tls, align 4 + + %e = extractvalue {i32, float} %a, 0 + ret i32 %e +} + +define {i32, {float, float}} @ivop({i32, {float, float}} %a, {float, float} %b) { + ; CHECK: @"dfs$ivop" + ; CHECK: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: [[BS:%.*]] = load { i16, i16 }, { i16, i16 }* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 6) to { i16, i16 }*), align 2 + ; CHECK: [[BS0:%.*]] = extractvalue { i16, i16 } [[BS]], 0 + ; CHECK: [[BS1:%.*]] = extractvalue { i16, i16 } [[BS]], 1 + ; CHECK: [[BS01:%.*]] = or i16 [[BS0]], [[BS1]] + ; CHECK: [[NE:%.*]] = icmp ne i16 [[BS01]], 0 + ; CHECK: [[MO:%.*]] = select i1 [[NE]], i32 [[BO]], i32 [[AO]] + ; CHECK: store i32 [[MO]], i32* @__dfsan_retval_origin_tls, align 4 + + %e = insertvalue {i32, {float, float}} %a, {float, float} %b, 1 + ret {i32, {float, float}} %e +} + +define i32 @phiop(i32 %a, i32 %b, i1 %c) { + ; CHECK: @"dfs$phiop" + ; CHECK: entry: + ; CHECK: [[BO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK: [[BS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i16*), align 2 + ; CHECK: [[AS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 + ; CHECK: br i1 %c, label %next, label %done + ; CHECK: next: + ; CHECK: br i1 %c, label %T, label %F + ; CHECK: T: + ; CHECK: [[BS_NE:%.*]] = icmp ne i16 [[BS]], 0 + ; CHECK: [[BAO_T:%.*]] = select i1 [[BS_NE]], i32 [[BO]], i32 [[AO]] + ; CHECK: br label %done + ; CHECK: F: + ; CHECK: [[AS_NE:%.*]] = icmp ne i16 [[AS]], 0 + ; CHECK: [[BAO_F:%.*]] = select i1 [[AS_NE]], i32 [[AO]], i32 [[BO]] + ; CHECK: br label %done + ; CHECK: done: + ; CHECK: [[PO:%.*]] = phi i32 [ [[BAO_T]], %T ], [ [[BAO_F]], %F ], [ [[AO]], %entry ] + ; CHECK: store i32 [[PO]], i32* @__dfsan_retval_origin_tls, align 4 + +entry: + br i1 %c, label %next, label %done +next: + br i1 %c, label %T, label %F +T: + %sum = add i32 %a, %b + br label %done +F: + %diff = sub i32 %b, %a + br label %done +done: + %r = phi i32 [%sum, %T], [%diff, %F], [%a, %entry] + ret i32 %r +} \ No newline at end of file diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_select.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_select.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_select.ll @@ -0,0 +1,67 @@ +; RUN: opt < %s -dfsan -dfsan-track-select-control-flow=1 -dfsan-track-origins=1 -dfsan-fast-16-labels=true -S | FileCheck %s --check-prefix=TRACK_CONTROL_FLOW +; RUN: opt < %s -dfsan -dfsan-track-select-control-flow=0 -dfsan-track-origins=1 -dfsan-fast-16-labels=true -S | FileCheck %s --check-prefix=NO_TRACK_CONTROL_FLOW +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i8 @select8(i1 %c, i8 %t, i8 %f) { + ; TRACK_CONTROL_FLOW: @"dfs$select8" + ; TRACK_CONTROL_FLOW: [[FO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 2), align 4 + ; TRACK_CONTROL_FLOW: [[TO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; TRACK_CONTROL_FLOW: [[CO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; TRACK_CONTROL_FLOW: [[CS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 + ; TRACK_CONTROL_FLOW: [[TFO:%.*]] = select i1 %c, i32 [[TO]], i32 [[FO]] + ; TRACK_CONTROL_FLOW: [[CS_NE:%.*]] = icmp ne i16 [[CS]], 0 + ; TRACK_CONTROL_FLOW: [[CTFO:%.*]] = select i1 [[CS_NE]], i32 [[CO]], i32 [[TFO]] + ; TRACK_CONTROL_FLOW: store i32 [[CTFO]], i32* @__dfsan_retval_origin_tls, align 4 + + ; NO_TRACK_CONTROL_FLOW: @"dfs$select8" + ; NO_TRACK_CONTROL_FLOW: [[FO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 2), align 4 + ; NO_TRACK_CONTROL_FLOW: [[TO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; NO_TRACK_CONTROL_FLOW: [[TFO:%.*]] = select i1 %c, i32 [[TO]], i32 [[FO]] + ; NO_TRACK_CONTROL_FLOW: store i32 [[TFO]], i32* @__dfsan_retval_origin_tls, align 4 + + %a = select i1 %c, i8 %t, i8 %f + ret i8 %a +} + +define i8 @select8e(i1 %c, i8 %tf) { + ; TRACK_CONTROL_FLOW: @"dfs$select8e" + ; TRACK_CONTROL_FLOW: [[TFO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; TRACK_CONTROL_FLOW: [[CO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; TRACK_CONTROL_FLOW: [[CS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 + ; TRACK_CONTROL_FLOW: [[CS_NE:%.*]] = icmp ne i16 [[CS]], 0 + ; TRACK_CONTROL_FLOW: [[CTFO:%.*]] = select i1 [[CS_NE]], i32 [[CO]], i32 [[TFO]] + ; TRACK_CONTROL_FLOW: store i32 [[CTFO]], i32* @__dfsan_retval_origin_tls, align 4 + + ; NO_TRACK_CONTROL_FLOW: @"dfs$select8e" + ; NO_TRACK_CONTROL_FLOW: [[TFO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; NO_TRACK_CONTROL_FLOW: store i32 [[TFO]], i32* @__dfsan_retval_origin_tls, align 4 + +%a = select i1 %c, i8 %tf, i8 %tf + ret i8 %a +} + +define <4 x i8> @select8v(<4 x i1> %c, <4 x i8> %t, <4 x i8> %f) { + ; TRACK_CONTROL_FLOW: @"dfs$select8v" + ; TRACK_CONTROL_FLOW: [[FO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 2), align 4 + ; TRACK_CONTROL_FLOW: [[TO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; TRACK_CONTROL_FLOW: [[CO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; TRACK_CONTROL_FLOW: [[FS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 4) to i16*), align 2 + ; TRACK_CONTROL_FLOW: [[CS:%.*]] = load i16, i16* bitcast ([100 x i64]* @__dfsan_arg_tls to i16*), align 2 + ; TRACK_CONTROL_FLOW: [[FS_NE:%.*]] = icmp ne i16 [[FS]], 0 + ; TRACK_CONTROL_FLOW: [[FTO:%.*]] = select i1 [[FS_NE]], i32 [[FO]], i32 [[TO]] + ; TRACK_CONTROL_FLOW: [[CS_NE:%.*]] = icmp ne i16 [[CS]], 0 + ; TRACK_CONTROL_FLOW: [[CFTO:%.*]] = select i1 [[CS_NE]], i32 [[CO]], i32 [[FTO]] + ; TRACK_CONTROL_FLOW: store i32 [[CFTO]], i32* @__dfsan_retval_origin_tls, align 4 + + ; NO_TRACK_CONTROL_FLOW: @"dfs$select8v" + ; NO_TRACK_CONTROL_FLOW: [[FO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 2), align 4 + ; NO_TRACK_CONTROL_FLOW: [[TO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; NO_TRACK_CONTROL_FLOW: [[FS:%.*]] = load i16, i16* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 4) to i16*), align 2 + ; NO_TRACK_CONTROL_FLOW: [[FS_NE:%.*]] = icmp ne i16 [[FS]], 0 + ; NO_TRACK_CONTROL_FLOW: [[FTO:%.*]] = select i1 [[FS_NE]], i32 [[FO]], i32 [[TO]] + ; NO_TRACK_CONTROL_FLOW: store i32 [[FTO]], i32* @__dfsan_retval_origin_tls, align 4 + + %a = select <4 x i1> %c, <4 x i8> %t, <4 x i8> %f + ret <4 x i8> %a +} \ No newline at end of file diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_store_threshold.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_store_threshold.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_store_threshold.ll @@ -0,0 +1,18 @@ +; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-fast-16-labels=true -dfsan-instrumentation-with-call-threshold=0 -S | FileCheck %s --check-prefix=CHECK +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @store_threshold([2 x i64]* %p, [2 x i64] %a) { + ; CHECK: @"dfs$store_threshold" + ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4 + ; CHECK: [[AS:%.*]] = load [2 x i16], [2 x i16]* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to [2 x i16]*), align 2 + ; CHECK: [[AS0:%.*]] = extractvalue [2 x i16] [[AS]], 0 + ; CHECK: [[AS1:%.*]] = extractvalue [2 x i16] [[AS]], 1 + ; CHECK: [[AS01:%.*]] = or i16 [[AS0]], [[AS1]] + ; CHECK: [[ADDR:%.*]] = bitcast [2 x i64]* %p to i8* + ; CHECK: call void @__dfsan_maybe_store_origin(i16 [[AS01]], i8* [[ADDR]], i64 16, i32 [[AO]]) + ; CHECK: store [2 x i64] %a, [2 x i64]* %p, align 8 + + store [2 x i64] %a, [2 x i64]* %p + ret void +}