diff --git a/compiler-rt/lib/dfsan/CMakeLists.txt b/compiler-rt/lib/dfsan/CMakeLists.txt --- a/compiler-rt/lib/dfsan/CMakeLists.txt +++ b/compiler-rt/lib/dfsan/CMakeLists.txt @@ -3,6 +3,7 @@ # Runtime library sources and build flags. set(DFSAN_RTL_SOURCES dfsan.cpp + dfsan_chained_origin_depot.cpp dfsan_custom.cpp dfsan_interceptors.cpp dfsan_thread.cpp @@ -10,7 +11,9 @@ set(DFSAN_RTL_HEADERS dfsan.h + dfsan_chained_origin_depot.h dfsan_flags.inc + dfsan_flags.h dfsan_platform.h dfsan_thread.h ) diff --git a/compiler-rt/lib/dfsan/dfsan.h b/compiler-rt/lib/dfsan/dfsan.h --- a/compiler-rt/lib/dfsan/dfsan.h +++ b/compiler-rt/lib/dfsan/dfsan.h @@ -14,8 +14,9 @@ #ifndef DFSAN_H #define DFSAN_H -#include "sanitizer_common/sanitizer_internal_defs.h" +#include "dfsan_flags.h" #include "dfsan_platform.h" +#include "sanitizer_common/sanitizer_internal_defs.h" using __sanitizer::uptr; using __sanitizer::u16; @@ -58,19 +59,6 @@ return shadow_for(const_cast(ptr)); } -struct Flags { -#define DFSAN_FLAG(Type, Name, DefaultValue, Description) Type Name; -#include "dfsan_flags.inc" -#undef DFSAN_FLAG - - void SetDefaults(); -}; - -extern Flags flags_data; -inline Flags &flags() { - return flags_data; -} - } // namespace __dfsan #endif // DFSAN_H diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp --- a/compiler-rt/lib/dfsan/dfsan.cpp +++ b/compiler-rt/lib/dfsan/dfsan.cpp @@ -20,6 +20,9 @@ #include "dfsan/dfsan.h" +#include "dfsan/dfsan_chained_origin_depot.h" +#include "dfsan/dfsan_flags.h" +#include "dfsan/dfsan_origin.h" #include "dfsan/dfsan_thread.h" #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" @@ -282,6 +285,46 @@ return label; } +// For platforms which support slow unwinder only, we need to restrict the store +// context size to 1, basically only storing the current pc, because the slow +// unwinder which is based on libunwind is not async signal safe and causes +// random freezes in forking applications as well as in signal handlers. +// DFSan supports only Linux. So we do not restrict the store context size. +#define GET_STORE_STACK_TRACE_PC_BP(pc, bp) \ + BufferedStackTrace stack; \ + stack.Unwind(pc, bp, nullptr, true, flags().store_context_size); + +#define PRINT_CALLER_STACK_TRACE \ + { \ + GET_CALLER_PC_BP_SP; \ + (void)sp; \ + GET_STORE_STACK_TRACE_PC_BP(pc, bp) \ + stack.Print(); \ + } + +static u32 ChainOrigin(u32 id, StackTrace *stack, bool from_init = false) { + // StackDepot is not async signal safe. Do not create new chains in a signal + // handler. + DFsanThread *t = GetCurrentThread(); + if (t && t->InSignalHandler()) + return id; + + // As an optimization the origin of an application byte is updated only when + // its shadow is non-zero. Because we are only interested in the origins of + // taint labels, it does not matter what origin a zero label has. This reduces + // memory write cost. MSan does similar optimization. The following invariant + // may not hold because of some bugs. We check the invariant to help debug. + if (!from_init && id == 0 && flags().check_origin_invariant) { + Printf(" DFSan found invalid origin invariant\n"); + PRINT_CALLER_STACK_TRACE + } + + Origin o = Origin::FromRawId(id); + stack->tag = StackTrace::TAG_UNKNOWN; + Origin chained = Origin::CreateChainedOrigin(o, stack); + return chained.raw_id(); +} + static void WriteShadowIfDifferent(dfsan_label label, uptr shadow_addr, uptr size) { dfsan_label *labelp = (dfsan_label *)shadow_addr; diff --git a/compiler-rt/lib/dfsan/dfsan_chained_origin_depot.h b/compiler-rt/lib/dfsan/dfsan_chained_origin_depot.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/dfsan/dfsan_chained_origin_depot.h @@ -0,0 +1,26 @@ +//===-- dfsan_chained_origin_depot.h ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of DataFlowSanitizer. +// +// A storage for chained origins. +//===----------------------------------------------------------------------===// + +#ifndef DFSAN_CHAINED_ORIGIN_DEPOT_H +#define DFSAN_CHAINED_ORIGIN_DEPOT_H + +#include "sanitizer_common/sanitizer_chained_origin_depot.h" +#include "sanitizer_common/sanitizer_common.h" + +namespace __dfsan { + +ChainedOriginDepot* GetChainedOriginDepot(); + +} // namespace __dfsan + +#endif // DFSAN_CHAINED_ORIGIN_DEPOT_H diff --git a/compiler-rt/lib/dfsan/dfsan_chained_origin_depot.cpp b/compiler-rt/lib/dfsan/dfsan_chained_origin_depot.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/dfsan/dfsan_chained_origin_depot.cpp @@ -0,0 +1,22 @@ +//===-- dfsan_chained_origin_depot.cpp ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of DataFlowSanitizer. +// +// A storage for chained origins. +//===----------------------------------------------------------------------===// + +#include "dfsan_chained_origin_depot.h" + +namespace __dfsan { + +static ChainedOriginDepot chainedOriginDepot; + +ChainedOriginDepot* GetChainedOriginDepot() { return &chainedOriginDepot; } + +} // namespace __dfsan diff --git a/compiler-rt/lib/dfsan/dfsan_flags.h b/compiler-rt/lib/dfsan/dfsan_flags.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/dfsan/dfsan_flags.h @@ -0,0 +1,32 @@ +//===-- dfsan_flags.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of DataFlowSanitizer. +// +// DFSan flags. +//===----------------------------------------------------------------------===// + +#ifndef DFSAN_FLAGS_H +#define DFSAN_FLAGS_H + +namespace __dfsan { + +struct Flags { +#define DFSAN_FLAG(Type, Name, DefaultValue, Description) Type Name; +#include "dfsan_flags.inc" +#undef DFSAN_FLAG + + void SetDefaults(); +}; + +extern Flags flags_data; +inline Flags &flags() { return flags_data; } + +} // namespace __dfsan + +#endif // DFSAN_FLAGS_H diff --git a/compiler-rt/lib/dfsan/dfsan_flags.inc b/compiler-rt/lib/dfsan/dfsan_flags.inc --- a/compiler-rt/lib/dfsan/dfsan_flags.inc +++ b/compiler-rt/lib/dfsan/dfsan_flags.inc @@ -29,3 +29,14 @@ DFSAN_FLAG(const char *, dump_labels_at_exit, "", "The path of the file where " "to dump the labels when the " "program terminates.") +DFSAN_FLAG( + int, origin_history_size, Origin::kMaxDepth, + "The limit of origin chain length. Non-positive values mean unlimited.") +DFSAN_FLAG( + int, origin_history_per_stack_limit, 20000, + "The limit of origin node's references count. " + "Non-positive values mean unlimited.") +DFSAN_FLAG(int, store_context_size, 20, + "The depth limit of origin tracking stack traces.") +DFSAN_FLAG(bool, check_origin_invariant, false, + "Whether to check if the origin invariant holds.") diff --git a/compiler-rt/lib/dfsan/dfsan_origin.h b/compiler-rt/lib/dfsan/dfsan_origin.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/dfsan/dfsan_origin.h @@ -0,0 +1,127 @@ +//===-- dfsan_origin.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of DataFlowSanitizer. +// +// Origin id utils. +//===----------------------------------------------------------------------===// + +#ifndef DFSAN_ORIGIN_H +#define DFSAN_ORIGIN_H + +#include "dfsan_chained_origin_depot.h" +#include "dfsan_flags.h" +#include "sanitizer_common/sanitizer_stackdepot.h" + +namespace __dfsan { + +// Origin handling. +// +// Origin is a 32-bit identifier that is attached to any taint value in the +// program and describes how this memory came to be tainted. +// +// Chained origin id is like: +// zzzz xxxx xxxx xxxx +// +// Chained origin id describes an event of storing a taint value to +// memory. The xxx part is a value of ChainedOriginDepot, which is a mapping of +// (stack_id, prev_id) -> id, where +// * stack_id describes the event. +// StackDepot keeps a mapping between those and corresponding stack traces. +// * prev_id is another origin id that describes the earlier part of the +// taint value history. 0 prev_id indicates the start of a chain. +// Following a chain of prev_id provides the full recorded history of a taint +// value. +// +// This, effectively, defines a forest where nodes are points in value history +// marked with origin ids, and edges are events that are marked with stack_id. +// +// The "zzzz" bits of chained origin id are used to store the length of the +// origin chain. + +class Origin { + public: + static bool isValidId(u32 id) { return id != 0; } + + u32 raw_id() const { return raw_id_; } + + bool isChainedOrigin() const { return Origin::isValidId(raw_id_); } + + u32 getChainedId() const { + CHECK(Origin::isValidId(raw_id_)); + return raw_id_ & kChainedIdMask; + } + + // Returns the next origin in the chain and the current stack trace. + // + // It scans a partition of StackDepot linearly, and is used only by origin + // tracking report. + Origin getNextChainedOrigin(StackTrace *stack) const { + CHECK(Origin::isValidId(raw_id_)); + u32 prev_id; + u32 stack_id = GetChainedOriginDepot()->Get(getChainedId(), &prev_id); + if (stack) + *stack = StackDepotGet(stack_id); + return Origin(prev_id); + } + + static Origin CreateChainedOrigin(Origin prev, StackTrace *stack) { + int depth = prev.isChainedOrigin() ? prev.depth() : -1; + // depth is the length of the chain minus 1. + // origin_history_size of 0 means unlimited depth. + if (flags().origin_history_size > 0) { + ++depth; + if (depth >= flags().origin_history_size || depth > kMaxDepth) + return prev; + } + + StackDepotHandle h = StackDepotPut_WithHandle(*stack); + if (!h.valid()) + return prev; + + if (flags().origin_history_per_stack_limit > 0) { + int use_count = h.use_count(); + if (use_count > flags().origin_history_per_stack_limit) + return prev; + } + + u32 chained_id; + bool inserted = + GetChainedOriginDepot()->Put(h.id(), prev.raw_id(), &chained_id); + CHECK((chained_id & kChainedIdMask) == chained_id); + + if (inserted && flags().origin_history_per_stack_limit > 0) + h.inc_use_count_unsafe(); + + return Origin((depth << kDepthShift) | chained_id); + } + + static Origin FromRawId(u32 id) { return Origin(id); } + + private: + static const int kDepthBits = 4; + static const int kDepthShift = 32 - kDepthBits; + + static const u32 kChainedIdMask = ((u32)-1) >> kDepthBits; + + u32 raw_id_; + + explicit Origin(u32 raw_id) : raw_id_(raw_id) {} + + int depth() const { + CHECK(isChainedOrigin()); + return (raw_id_ >> kDepthShift) & ((1 << kDepthBits) - 1); + } + + public: + static const int kMaxDepth = (1 << kDepthBits) - 1; +}; + +} // namespace __dfsan + +#endif // DFSAN_ORIGIN_H