Diff 324682

compiler-rt/lib/dfsan/dfsan.h

	Show All 37 Lines
	void dfsan_add_label(dfsan_label label, void *addr, uptr size);			void dfsan_add_label(dfsan_label label, void *addr, uptr size);
	void dfsan_set_label(dfsan_label label, void *addr, uptr size);			void dfsan_set_label(dfsan_label label, void *addr, uptr size);
	dfsan_label dfsan_read_label(const void *addr, uptr size);			dfsan_label dfsan_read_label(const void *addr, uptr size);
	dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2);			dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2);
	// Zero out [offset, offset+size) from __dfsan_arg_tls.			// Zero out [offset, offset+size) from __dfsan_arg_tls.
	void dfsan_clear_arg_tls(uptr offset, uptr size);			void dfsan_clear_arg_tls(uptr offset, uptr size);
	// Zero out the TLS storage.			// Zero out the TLS storage.
	void dfsan_clear_thread_local_state();			void dfsan_clear_thread_local_state();

				// Return the origin associated with the first taint byte in the size bytes
				// from the address addr.
				dfsan_origin dfsan_read_origin_of_first_taint(const void *addr, uptr size);

				// Copy or move the origins of the len bytes from src to dst.
				void dfsan_mem_origin_transfer(const void dst, const void src, uptr len);
	} // extern "C"			} // extern "C"

	template <typename T>			template <typename T>
	void dfsan_set_label(dfsan_label label, T &data) { // NOLINT			void dfsan_set_label(dfsan_label label, T &data) { // NOLINT
	dfsan_set_label(label, (void *)&data, sizeof(T));			dfsan_set_label(label, (void *)&data, sizeof(T));
	}			}

	namespace __dfsan {			namespace __dfsan {
	Show All 37 Lines

compiler-rt/lib/dfsan/dfsan.cpp

Show First 20 Lines • Show All 43 Lines • ▼ Show 20 Lines

static dfsan_label_info __dfsan_label_info[kNumLabels]; static dfsan_label_info __dfsan_label_info[kNumLabels];

Flags __dfsan::flags_data; Flags __dfsan::flags_data;

// The size of TLS variables. These constants must be kept in sync with the ones // The size of TLS variables. These constants must be kept in sync with the ones

// in DataFlowSanitizer.cpp. // in DataFlowSanitizer.cpp.

static const int kDFsanArgTlsSize = 800; static const int kDFsanArgTlsSize = 800;

static const int kDFsanRetvalTlsSize = 800; static const int kDFsanRetvalTlsSize = 800;

static const int kDFsanArgOriginTlsSize = 800;

SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u64 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u64

__dfsan_retval_tls[kDFsanRetvalTlsSize / sizeof(u64)]; __dfsan_retval_tls[kDFsanRetvalTlsSize / sizeof(u64)];

SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u32 __dfsan_retval_origin_tls;

SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u64 SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u64

__dfsan_arg_tls[kDFsanArgTlsSize / sizeof(u64)]; __dfsan_arg_tls[kDFsanArgTlsSize / sizeof(u64)];

SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u32

__dfsan_arg_origin_tls[kDFsanArgOriginTlsSize / sizeof(u32)];

SANITIZER_INTERFACE_ATTRIBUTE uptr __dfsan_shadow_ptr_mask; SANITIZER_INTERFACE_ATTRIBUTE uptr __dfsan_shadow_ptr_mask;

// Instrumented code may set this value in terms of -dfsan-track-origins.

// * undefined or 0: do not track origins.

// * 1: track origins at memory store operations.

// * 2: TODO: track origins at memory store operations and callsites.

extern "C" SANITIZER_WEAK_ATTRIBUTE const int __dfsan_track_origins;

int __dfsan_get_track_origins() {

return &__dfsan_track_origins ? __dfsan_track_origins : 0;

}

// On Linux/x86_64, memory is laid out as follows: // On Linux/x86_64, memory is laid out as follows:

// //

// +--------------------+ 0x800000000000 (top of memory) // +--------------------+ 0x800000000000 (top of memory)

// | application memory | // | application memory |

// +--------------------+ 0x700000008000 (kAppAddr) // +--------------------+ 0x700000008000 (kAppAddr)

// | | // | |

// | unused | // | unused |

// | | // | |

▲ Show 20 Lines • Show All 170 Lines • ▼ Show 20 Lines

extern "C" SANITIZER_INTERFACE_ATTRIBUTE extern "C" SANITIZER_INTERFACE_ATTRIBUTE

dfsan_label __dfsan_union_load_fast16labels(const dfsan_label *ls, uptr n) { dfsan_label __dfsan_union_load_fast16labels(const dfsan_label *ls, uptr n) {

dfsan_label label = ls[0]; dfsan_label label = ls[0];

for (uptr i = 1; i != n; ++i) for (uptr i = 1; i != n; ++i)

label |= ls[i]; label |= ls[i];

return label; return label;

} }

// Return the union of all the n labels from addr at the high 32 bit, and the

// origin of the first taint byte at the low 32 bit.

morehouseUnsubmitted

Not Done

Why return them in the same u64? Would it be simpler to return the label and write the origin to a pointer passed in as an arg?

morehouse: Why return them in the same u64? Would it be simpler to return the label and write the origin…

stephan.yichao.zhaoAuthorUnsubmitted

Done

We will be using this in the instrumented code. I saw today's IR sometimes lowers returning a pair like {i32, i32*} into returning a 64bit int as an optimization.
And returning {i32, i32*} is also an optimization of returning one element as return value, and the other as an argument pointer.
So I did this manually.

But I do not have strong option. Which one do you feel is better in this case?

stephan.yichao.zhao: We will be using this in the instrumented code. I saw today's IR sometimes lowers returning a…

extern "C" SANITIZER_INTERFACE_ATTRIBUTE u64

__dfsan_load_label_and_origin(const void *addr, uptr n) {

dfsan_label label = 0;

u64 ret = 0;

uptr p = (uptr)addr;

dfsan_label *s = shadow_for((void *)p);

for (uptr i = 0; i < n; ++i) {

dfsan_label l = s[i];

if (!l)

continue;

label |= l;

if (!ret)

ret = *(dfsan_origin *)origin_for((void *)(p + i));

}

return ret | (u64)label << 32;

morehouseUnsubmitted

Done

dfsan_label *s = shadow_for((void *)p);

- for (uptr i = 0; i < n; ++i, ++p, ++s) {

- dfsan_label l = *s;

+ for (uptr i = 0; i < n; ++i) {

+ dfsan_label l = s[i];

if (!l)

continue;

label |= l;

if (!ret) {

- ret = *(dfsan_origin *)origin_for((void *)p);

+ ret = *(dfsan_origin *)origin_for((void *)(p + i));

}

return ret | (u64)label << 32;

Can be simplified.

morehouse: Can be simplified.

}

extern "C" SANITIZER_INTERFACE_ATTRIBUTE extern "C" SANITIZER_INTERFACE_ATTRIBUTE

void __dfsan_unimplemented(char *fname) { void __dfsan_unimplemented(char *fname) {

if (flags().warn_unimplemented) if (flags().warn_unimplemented)

Report("WARNING: DataFlowSanitizer: call to uninstrumented function %s\n", Report("WARNING: DataFlowSanitizer: call to uninstrumented function %s\n",

fname); fname);

} }

// Use '-mllvm -dfsan-debug-nonzero-labels' and break on this function // Use '-mllvm -dfsan-debug-nonzero-labels' and break on this function

Show All 28 Lines dfsan_label label =

atomic_fetch_add(&__dfsan_last_label, 1, memory_order_relaxed) + 1; atomic_fetch_add(&__dfsan_last_label, 1, memory_order_relaxed) + 1;

dfsan_check_label(label); dfsan_check_label(label);

__dfsan_label_info[label].l1 = __dfsan_label_info[label].l2 = 0; __dfsan_label_info[label].l1 = __dfsan_label_info[label].l2 = 0;

__dfsan_label_info[label].desc = desc; __dfsan_label_info[label].desc = desc;

__dfsan_label_info[label].userdata = userdata; __dfsan_label_info[label].userdata = userdata;

return label; return label;

} }

// Return the origin of the first taint byte in the size bytes from the address

// addr.

static dfsan_origin GetOriginIfTainted(uptr addr, uptr size) {

for (uptr i = 0; i < size; ++i, ++addr) {

dfsan_label *s = shadow_for((void *)addr);

if (!is_shadow_addr_valid((uptr)s)) {

morehouseUnsubmitted

Done

Why do we need to check the shadow addr? Shouldn't it always be valid?

morehouse: Why do we need to check the shadow addr? Shouldn't it always be valid?

stephan.yichao.zhaoAuthorUnsubmitted

Done

We know the current DFSan memory layout is not correct. For example, addresses (0, 0x10000) are mapped to themselves.
For unknown reason this can happen at memmove or memcpy used inside a signal call.
This caused crash at runtime, and gdb coredump shows this. But I am not sure the full execution path that can lead to this.

Msan's is like this: it maps 0-0x10000 to 0x01....
We could try removing this check after fixing the memory layout issue after supporting 8bit.

Added comments.

stephan.yichao.zhao: We know the current DFSan memory layout is not correct. For example, addresses (0, 0x10000) are…

// The current DFSan memory layout is not always correct. For example,

// addresses (0, 0x10000) are mapped to (0, 0x10000). Before fixing the

// issue, we ignore such addresses.

continue;

}

if (*s)

return *(dfsan_origin *)origin_for((void *)addr);

}

return 0;

}

// For platforms which support slow unwinder only, we need to restrict the store // For platforms which support slow unwinder only, we need to restrict the store

// context size to 1, basically only storing the current pc, because the slow // context size to 1, basically only storing the current pc, because the slow

// unwinder which is based on libunwind is not async signal safe and causes // unwinder which is based on libunwind is not async signal safe and causes

// random freezes in forking applications as well as in signal handlers. // random freezes in forking applications as well as in signal handlers.

// DFSan supports only Linux. So we do not restrict the store context size. // DFSan supports only Linux. So we do not restrict the store context size.

#define GET_STORE_STACK_TRACE_PC_BP(pc, bp) \ #define GET_STORE_STACK_TRACE_PC_BP(pc, bp) \

BufferedStackTrace stack; \ BufferedStackTrace stack; \

stack.Unwind(pc, bp, nullptr, true, flags().store_context_size); stack.Unwind(pc, bp, nullptr, true, flags().store_context_size);

#define PRINT_CALLER_STACK_TRACE \ #define PRINT_CALLER_STACK_TRACE \

{ \ { \

GET_CALLER_PC_BP_SP; \ GET_CALLER_PC_BP_SP; \

(void)sp; \ (void)sp; \

GET_STORE_STACK_TRACE_PC_BP(pc, bp) \ GET_STORE_STACK_TRACE_PC_BP(pc, bp) \

stack.Print(); \ stack.Print(); \

} }

/* // Return a chain with the previous ID id and the current stack.

// from_init = true if this is the first chain of an origin tracking path.

static u32 ChainOrigin(u32 id, StackTrace *stack, bool from_init = false) { static u32 ChainOrigin(u32 id, StackTrace *stack, bool from_init = false) {

// StackDepot is not async signal safe. Do not create new chains in a signal // StackDepot is not async signal safe. Do not create new chains in a signal

// handler. // handler.

DFsanThread *t = GetCurrentThread(); DFsanThread *t = GetCurrentThread();

if (t && t->InSignalHandler()) if (t && t->InSignalHandler())

return id; return id;

// As an optimization the origin of an application byte is updated only when // As an optimization the origin of an application byte is updated only when

// its shadow is non-zero. Because we are only interested in the origins of // its shadow is non-zero. Because we are only interested in the origins of

// taint labels, it does not matter what origin a zero label has. This reduces // taint labels, it does not matter what origin a zero label has. This reduces

// memory write cost. MSan does similar optimization. The following invariant // memory write cost. MSan does similar optimization. The following invariant

// may not hold because of some bugs. We check the invariant to help debug. // may not hold because of some bugs. We check the invariant to help debug.

if (!from_init && id == 0 && flags().check_origin_invariant) { if (!from_init && id == 0 && flags().check_origin_invariant) {

Printf(" DFSan found invalid origin invariant\n"); Printf(" DFSan found invalid origin invariant\n");

PRINT_CALLER_STACK_TRACE PRINT_CALLER_STACK_TRACE

} }

Origin o = Origin::FromRawId(id); Origin o = Origin::FromRawId(id);

stack->tag = StackTrace::TAG_UNKNOWN; stack->tag = StackTrace::TAG_UNKNOWN;

Origin chained = Origin::CreateChainedOrigin(o, stack); Origin chained = Origin::CreateChainedOrigin(o, stack);

return chained.raw_id(); return chained.raw_id();

} }

static const uptr kOriginAlign = sizeof(dfsan_origin);

static const uptr kOriginAlignMask = ~(kOriginAlign - 1UL);

static uptr AlignUp(uptr u) {

return (u + kOriginAlign - 1) & kOriginAlignMask;

}

static uptr AlignDown(uptr u) { return u & kOriginAlignMask; }

static void ChainAndWriteOriginIfTainted(uptr src, uptr size, uptr dst,

StackTrace *stack) {

dfsan_origin o = GetOriginIfTainted(src, size);

if (o) {

o = ChainOrigin(o, stack);

*(dfsan_origin *)origin_for((void *)dst) = o;

}

// Copy the origins of the size bytes from src to dst. The source and target

// memory ranges cannot be overlapped. This is used by memcpy. stack records the

// stack trace of the memcpy. When dst and src are not 4-byte aligned properly,

// origins at the unaligned address boundaries may be overwritten because four

// contiguous bytes share the same origin.

static void CopyOrigin(const void *dst, const void *src, uptr size,

StackTrace *stack) {

uptr d = (uptr)dst;

uptr beg = AlignDown(d);

// Copy left unaligned origin if that memory is tainted.

if (beg < d) {

ChainAndWriteOriginIfTainted((uptr)src, beg + kOriginAlign - d, beg, stack);

beg += kOriginAlign;

}

uptr end = AlignDown(d + size);

// If both ends fall into the same 4-byte slot, we are done.

if (end < beg)

return;

// Copy right unaligned origin if that memory is tainted.

if (end < d + size)

ChainAndWriteOriginIfTainted((uptr)src + (end - d), (d + size) - end, end,

stack);

if (beg >= end)

return;

// Align src up.

uptr s = AlignUp((uptr)src);

dfsan_origin *src_o = (dfsan_origin *)origin_for((void *)s);

u64 *src_s = (u64 *)shadow_for((void *)s);

dfsan_origin *src_end = (dfsan_origin *)origin_for((void *)(s + (end - beg)));

dfsan_origin *dst_o = (dfsan_origin *)origin_for((void *)beg);

dfsan_origin last_src_o = 0;

dfsan_origin last_dst_o = 0;

for (; src_o < src_end; ++src_o, ++src_s, ++dst_o) {

if (!*src_s)

continue;

if (*src_o != last_src_o) {

last_src_o = *src_o;

last_dst_o = ChainOrigin(last_src_o, stack);

}

*dst_o = last_dst_o;

}

// Copy the origins of the size bytes from src to dst. The source and target

// memory ranges may be overlapped. So the copy is done in a reverse order.

// This is used by memmove. stack records the stack trace of the memmove.

static void ReverseCopyOrigin(const void *dst, const void *src, uptr size,

StackTrace *stack) {

uptr d = (uptr)dst;

uptr end = AlignDown(d + size);

// Copy right unaligned origin if that memory is tainted.

if (end < d + size)

ChainAndWriteOriginIfTainted((uptr)src + (end - d), (d + size) - end, end,

stack);

uptr beg = AlignDown(d);

if (beg + kOriginAlign < end) {

// Align src up.

uptr s = AlignUp((uptr)src);

dfsan_origin *src =

(dfsan_origin *)origin_for((void *)(s + end - beg - kOriginAlign));

u64 *src_s = (u64 *)shadow_for((void *)(s + end - beg - kOriginAlign));

dfsan_origin *src_begin = (dfsan_origin *)origin_for((void *)s);

dfsan_origin *dst =

(dfsan_origin *)origin_for((void *)(end - kOriginAlign));

dfsan_origin src_o = 0;

dfsan_origin dst_o = 0;

for (; src >= src_begin; --src, --src_s, --dst) {

if (!*src_s)

continue;

if (*src != src_o) {

src_o = *src;

dst_o = ChainOrigin(src_o, stack);

}

*dst = dst_o;

}

// Copy left unaligned origin if that memory is tainted.

if (beg < d)

ChainAndWriteOriginIfTainted((uptr)src, beg + kOriginAlign - d, beg, stack);

}

// Copy or move the origins of the len bytes from src to dst. The source and

// target memory ranges may or may not be overlapped. This is used by memory

// transfer operations. stack records the stack trace of the memory transfer

// operation.

static void MoveOrigin(const void *dst, const void *src, uptr size,

StackTrace *stack) {

if (!has_valid_shadow_addr(dst) ||

!has_valid_shadow_addr((void *)((uptr)dst + size)) ||

!has_valid_shadow_addr(src) ||

!has_valid_shadow_addr((void *)((uptr)src + size))) {

return;

}

// If destination origin range overlaps with source origin range, move

// origins by copying origins in a reverse order; otherwise, copy origins in

// a normal order. The orders of origin transfer are consistent with the

// orders of how memcpy and memmove transfer user data.

uptr src_aligned_beg = reinterpret_cast<uptr>(src) & ~3UL;

uptr src_aligned_end = (reinterpret_cast<uptr>(src) + size) & ~3UL;

uptr dst_aligned_beg = reinterpret_cast<uptr>(dst) & ~3UL;

if (dst_aligned_beg < src_aligned_end && dst_aligned_beg >= src_aligned_beg)

return ReverseCopyOrigin(dst, src, size, stack);

return CopyOrigin(dst, src, size, stack);

}

// Set the size bytes from the addres dst to be the origin value.

static void SetOrigin(const void *dst, uptr size, u32 origin) {

if (size == 0)

return;

// Origin mapping is 4 bytes per 4 bytes of application memory.

// Here we extend the range such that its left and right bounds are both

// 4 byte aligned.

uptr x = unaligned_origin_for((uptr)dst);

uptr beg = AlignDown(x);

uptr end = AlignUp(x + size); // align up.

u64 origin64 = ((u64)origin << 32) | origin;

// This is like memset, but the value is 32-bit. We unroll by 2 to write

// 64 bits at once. May want to unroll further to get 128-bit stores.

if (beg & 7ULL) {

if (*(u32 *)beg != origin)

*(u32 *)beg = origin;

beg += 4;

}

for (uptr addr = beg; addr < (end & ~7UL); addr += 8) {

if (*(u64 *)addr == origin64)

continue;

*(u64 *)addr = origin64;

}

if (end & 7ULL)

if (*(u32 *)(end - kOriginAlign) != origin)

*(u32 *)(end - kOriginAlign) = origin;

}

static void WriteShadowIfDifferent(dfsan_label label, uptr shadow_addr, static void WriteShadowIfDifferent(dfsan_label label, uptr shadow_addr,

uptr size) { uptr size) {

dfsan_label *labelp = (dfsan_label *)shadow_addr; dfsan_label *labelp = (dfsan_label *)shadow_addr;

for (; size != 0; --size, ++labelp) { for (; size != 0; --size, ++labelp) {

// Don't write the label if it is already the value we need it to be. // Don't write the label if it is already the value we need it to be.

// In a program where most addresses are not labeled, it is common that // In a program where most addresses are not labeled, it is common that

// a page of shadow memory is entirely zeroed. The Linux copy-on-write // a page of shadow memory is entirely zeroed. The Linux copy-on-write

morehouseUnsubmitted

Done

Lets create constants kOriginAlign = sizeof(dfsan_origin) and kOriginAlignMask = ~(kOriginAlign - 1), and use those everywhere instead of hardcoding 3.

Also, maybe short functions for AlignUp and AlignDown would improve readability.

morehouse: Lets create constants `kOriginAlign = sizeof(dfsan_origin)` and `kOriginAlignMask = ~…

// implementation will share all of the zeroed pages, making a copy of a // implementation will share all of the zeroed pages, making a copy of a

// page when any value is written. The un-sharing will happen even if // page when any value is written. The un-sharing will happen even if

// the value written does not change the value in memory. Avoiding the // the value written does not change the value in memory. Avoiding the

// the amount of real memory used by large programs. // the amount of real memory used by large programs.

morehouseUnsubmitted

Done

Please fix this lint.

morehouse: Please fix this lint.

stephan.yichao.zhaoAuthorUnsubmitted

Done

https://github.com/llvm/llvm-project/commit/a7538fee3a0256a8891e746823f7b0f0ade84e62 commented out ChainOrigin for fixing compilation warnings. Uncommented.

stephan.yichao.zhao: https://github.com/llvm/llvm-project/commit/a7538fee3a0256a8891e746823f7b0f0ade84e62 commented…

if (label == *labelp) if (label == *labelp)

continue; continue;

*labelp = label; *labelp = label;

} }

// Return a new origin chain with the previous ID id and the current stack

morehouseUnsubmitted

Done

}

- // Return a new origin chian with the previous ID id and the current stack

+ // Return a new origin chain with the previous ID id and the current stack

// trace.

morehouse:

// trace.

extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin

__dfsan_chain_origin(dfsan_origin id) {

GET_CALLER_PC_BP_SP;

(void)sp;

GET_STORE_STACK_TRACE_PC_BP(pc, bp);

return ChainOrigin(id, &stack);

}

// Copy or move the origins of the len bytes from src to dst.

morehouseUnsubmitted

Done

Similar structure to the above; may be a good candidate for a helper function or lambda.

auto ChainAndWriteOriginIfTainted = [=](uptr src, uptr size, uptr dest) {
...
};

morehouse: Similar structure to the above; may be a good candidate for a helper function or lambda. ```…

stephan.yichao.zhaoAuthorUnsubmitted

Done

defined a static ChainAndWriteOriginIfTainted.

stephan.yichao.zhao: defined a static ChainAndWriteOriginIfTainted.

extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_mem_origin_transfer(

const void *dst, const void *src, uptr len) {

if (src == dst)

morehouseUnsubmitted

Done

Nit: Can reduce nesting if we invert the condition: if (beg == end) return;.

morehouse: Nit: Can reduce nesting if we invert the condition: `if (beg == end) return;`.

return;

GET_CALLER_PC_BP;

morehouseUnsubmitted

Done

This skips the origin for the unaligned start of src.

morehouse: This skips the origin for the unaligned start of `src`.

stephan.yichao.zhaoAuthorUnsubmitted

Done

The first "if (beg < d) {" block works for those bytes.

stephan.yichao.zhao: The first "if (beg < d) {" block works for those bytes.

morehouseUnsubmitted

Done

What if dst is aligned properly but src is not? Then we skip the beg < d case.

morehouse: What if `dst` is aligned properly but `src` is not? Then we skip the `beg < d` case.

stephan.yichao.zhaoAuthorUnsubmitted

Done

This is a good point. I also missed the 'behavior'. I feel this is an side effect of 4 bytes share the same origin.

Consider copying 5 bytes from 0x12 to 0x20.
B0 and B1 share an origin O0, B2-B4 share another origin O1.
After the copy, B0-B3 should share one origin. But they have either O0 or O1 at src, so we can only keep one.

Ignoring copying B0 B1 is like copying, then being overwritten.

This can also happen at the end: at the case the code indeed copies both, but the last one wins.

I added some comments. Hopefully unaligned copies are not common.

stephan.yichao.zhao: This is a good point. I also missed the 'behavior'. I feel this is an side effect of 4 bytes…

GET_STORE_STACK_TRACE_PC_BP(pc, bp);

MoveOrigin(dst, src, len, &stack);

}

SANITIZER_INTERFACE_ATTRIBUTE void dfsan_mem_origin_transfer(const void *dst,

const void *src,

uptr len) {

__dfsan_mem_origin_transfer(dst, src, len);

}

// If the label s is tainted, set the size bytes from the address p to be a new

morehouseUnsubmitted

Done

__dfsan_mem_origin_transfer(dst, src, len);

}

- // If the label is taint, set the size bytes from the address p to be a new

+ // If the label s is tainted, set the size bytes from the address p to be a new

// origin chain with the previous ID o and the current stack trace. This is

morehouse:

morehouseUnsubmitted

Done

Looks like this comment wasn't actually addressed.

morehouse: Looks like this comment wasn't actually addressed.

// origin chain with the previous ID o and the current stack trace. This is

// used by instrumentation to reduce code size when too much code is inserted.

extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_maybe_store_origin(

u16 s, void *p, uptr size, dfsan_origin o) {

if (UNLIKELY(s)) {

GET_CALLER_PC_BP_SP;

(void)sp;

GET_STORE_STACK_TRACE_PC_BP(pc, bp);

SetOrigin(p, size, ChainOrigin(o, &stack));

}

extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_set_label( extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_set_label(

morehouseUnsubmitted

Done

Can we use this also for memcpy? It looks like we're checking byte-by-byte either way, so why not share the implementation?

morehouse: Can we use this also for `memcpy`? It looks like we're checking byte-by-byte either way, so…

stephan.yichao.zhaoAuthorUnsubmitted

Done

I feel this could be a small optimization if our CPUs do not work the same when prefetching data in different ways. Based on the link's experiments, the latency also depends on code pattern, like the order of the original data initialization. So if the origin is copied or moved in the same order as memcpy/memmove, the result performance would be consistent to those user memcpy/memmove.

https://stackoverflow.com/questions/13339582/why-is-linux-memmove-implemented-the-way-it-is shows that most memcpy does a forward copy and most memmove does a backward copy. This matches CopyOrigin and ReverseCopyOrigin.

Actually I am not sure in practice how much this can help performance, but since we have already had both ReverseCopyOrigin and CopyOrigin (following Msan), it would be fine to keep both.

stephan.yichao.zhao: I feel this could be a small optimization if our CPUs do not work [[ https://stackoverflow.

dfsan_label label, void *addr, uptr size) { dfsan_label label, void *addr, uptr size) {

const uptr beg_shadow_addr = (uptr)__dfsan::shadow_for(addr); const uptr beg_shadow_addr = (uptr)__dfsan::shadow_for(addr);

if (0 != label) { if (0 != label) {

WriteShadowIfDifferent(label, beg_shadow_addr, size); WriteShadowIfDifferent(label, beg_shadow_addr, size);

return; return;

} }

Show All 38 Lines

extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label

__dfsw_dfsan_get_label(long data, dfsan_label data_label, __dfsw_dfsan_get_label(long data, dfsan_label data_label,

dfsan_label *ret_label) { dfsan_label *ret_label) {

*ret_label = 0; *ret_label = 0;

return data_label; return data_label;

} }

SANITIZER_INTERFACE_ATTRIBUTE dfsan_label SANITIZER_INTERFACE_ATTRIBUTE dfsan_label

dfsan_read_label(const void *addr, uptr size) { dfsan_read_label(const void *addr, uptr size) {

morehouseUnsubmitted

Not Done

Under what circumstances would we not have valid shadow addresses for the range?

morehouse: Under what circumstances would we not have valid shadow addresses for the range?

stephan.yichao.zhaoAuthorUnsubmitted

Done

This is similar to the check in GetOriginIfTainted. Will be revisiting this after memory layout is fixed. Added comments.

stephan.yichao.zhao: This is similar to the check in GetOriginIfTainted. Will be revisiting this after memory layout…

if (size == 0) if (size == 0)

return 0; return 0;

return __dfsan_union_load(shadow_for(addr), size); return __dfsan_union_load(shadow_for(addr), size);

} }

morehouseUnsubmitted

Done

// If destination origin range overlaps with source origin range, move

- // origins by coping origins in a reverse order; otherwise, copy origins in

+ // origins by copying origins in a reverse order; otherwise, copy origins in

// a normal order.

morehouse:

morehouseUnsubmitted

Done

Any downside to always copying in reverse order for simplicity?

morehouse: Any downside to always copying in reverse order for simplicity?

stephan.yichao.zhaoAuthorUnsubmitted

Done

Added comments.

stephan.yichao.zhao: Added comments.

SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin

dfsan_read_origin_of_first_taint(const void *addr, uptr size) {

return GetOriginIfTainted((uptr)addr, size);

}

extern "C" SANITIZER_INTERFACE_ATTRIBUTE extern "C" SANITIZER_INTERFACE_ATTRIBUTE

const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label) { const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label) {

return &__dfsan_label_info[label]; return &__dfsan_label_info[label];

} }

extern "C" SANITIZER_INTERFACE_ATTRIBUTE int extern "C" SANITIZER_INTERFACE_ATTRIBUTE int

dfsan_has_label(dfsan_label label, dfsan_label elem) { dfsan_has_label(dfsan_label label, dfsan_label elem) {

if (label == elem) if (label == elem)

▲ Show 20 Lines • Show All 93 Lines • ▼ Show 20 Lines

void dfsan_clear_arg_tls(uptr offset, uptr size) { void dfsan_clear_arg_tls(uptr offset, uptr size) {

internal_memset((void *)((uptr)__dfsan_arg_tls + offset), 0, size); internal_memset((void *)((uptr)__dfsan_arg_tls + offset), 0, size);

} }

SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE

void dfsan_clear_thread_local_state() { void dfsan_clear_thread_local_state() {

internal_memset(__dfsan_arg_tls, 0, sizeof(__dfsan_arg_tls)); internal_memset(__dfsan_arg_tls, 0, sizeof(__dfsan_arg_tls));

internal_memset(__dfsan_retval_tls, 0, sizeof(__dfsan_retval_tls)); internal_memset(__dfsan_retval_tls, 0, sizeof(__dfsan_retval_tls));

if (__dfsan_get_track_origins()) {

internal_memset(__dfsan_arg_origin_tls, 0, sizeof(__dfsan_arg_origin_tls));

internal_memset(&__dfsan_retval_origin_tls, 0,

sizeof(__dfsan_retval_origin_tls));

}

} }

static void InitializePlatformEarly() { static void InitializePlatformEarly() {

AvoidCVE_2016_2143(); AvoidCVE_2016_2143();

#ifdef DFSAN_RUNTIME_VMA #ifdef DFSAN_RUNTIME_VMA

__dfsan::vmaSize = __dfsan::vmaSize =

(MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1); (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1);

if (__dfsan::vmaSize == 39 || __dfsan::vmaSize == 42 || if (__dfsan::vmaSize == 39 || __dfsan::vmaSize == 42 ||

▲ Show 20 Lines • Show All 69 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[dfsan] Add origin tls/move/read APIs
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 324682

compiler-rt/lib/dfsan/dfsan.h

compiler-rt/lib/dfsan/dfsan.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[dfsan] Add origin tls/move/read APIsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 324682

compiler-rt/lib/dfsan/dfsan.h

compiler-rt/lib/dfsan/dfsan.cpp

[dfsan] Add origin tls/move/read APIs
ClosedPublic