diff --git a/compiler-rt/lib/msan/msan.h b/compiler-rt/lib/msan/msan.h --- a/compiler-rt/lib/msan/msan.h +++ b/compiler-rt/lib/msan/msan.h @@ -297,18 +297,19 @@ void MsanAllocatorInit(); void MsanAllocatorThreadFinish(); -void MsanDeallocate(StackTrace *stack, void *ptr); - -void *msan_malloc(uptr size, StackTrace *stack); -void *msan_calloc(uptr nmemb, uptr size, StackTrace *stack); -void *msan_realloc(void *ptr, uptr size, StackTrace *stack); -void *msan_reallocarray(void *ptr, uptr nmemb, uptr size, StackTrace *stack); -void *msan_valloc(uptr size, StackTrace *stack); -void *msan_pvalloc(uptr size, StackTrace *stack); -void *msan_aligned_alloc(uptr alignment, uptr size, StackTrace *stack); -void *msan_memalign(uptr alignment, uptr size, StackTrace *stack); +void MsanDeallocate(StackUnwindCtx *stack, void *ptr); + +void *msan_malloc(uptr size, StackUnwindCtx *stack); +void *msan_calloc(uptr nmemb, uptr size, StackUnwindCtx *stack); +void *msan_realloc(void *ptr, uptr size, StackUnwindCtx *stack); +void *msan_reallocarray(void *ptr, uptr nmemb, uptr size, + StackUnwindCtx *stack); +void *msan_valloc(uptr size, StackUnwindCtx *stack); +void *msan_pvalloc(uptr size, StackUnwindCtx *stack); +void *msan_aligned_alloc(uptr alignment, uptr size, StackUnwindCtx *stack); +void *msan_memalign(uptr alignment, uptr size, StackUnwindCtx *stack); int msan_posix_memalign(void **memptr, uptr alignment, uptr size, - StackTrace *stack); + StackUnwindCtx *stack); void InstallTrapHandler(); void InstallAtExitHandler(); @@ -333,28 +334,33 @@ // Returns a "chained" origin id, pointing to the given stack trace followed by // the previous origin id. -u32 ChainOrigin(u32 id, StackTrace *stack); +u32 ChainOrigin(u32 id, StackUnwindCtx *stack); + +TraceHash GetTraceHash(uptr pc); const int STACK_TRACE_TAG_POISON = StackTrace::TAG_CUSTOM + 1; -#define GET_MALLOC_STACK_TRACE \ - BufferedStackTrace stack; \ - if (__msan_get_track_origins() && msan_inited) \ - stack.Unwind(StackTrace::GetCurrentPc(), GET_CURRENT_FRAME(), \ - nullptr, common_flags()->fast_unwind_on_malloc, \ - common_flags()->malloc_context_size) +#define GET_MALLOC_STACK_TRACE \ + StackUnwindCtx stack(StackTrace::GetCurrentPc(), GET_CURRENT_FRAME(), \ + nullptr); \ + if (__msan_get_track_origins() && msan_inited) \ + stack.RequestFast(common_flags()->fast_unwind_on_malloc) \ + .WithMaxDepth(common_flags()->malloc_context_size) \ + .WithTraceHash(GetTraceHash(GET_CALLER_PC())); // For platforms which support slow unwinder only, we restrict the store context // size to 1, basically only storing the current pc. We do this because the slow // unwinder which is based on libunwind is not async signal safe and causes // random freezes in forking applications as well as in signal handlers. -#define GET_STORE_STACK_TRACE_PC_BP(pc, bp) \ - BufferedStackTrace stack; \ - if (__msan_get_track_origins() > 1 && msan_inited) { \ - int size = flags()->store_context_size; \ - if (!SANITIZER_CAN_FAST_UNWIND) \ - size = Min(size, 1); \ - stack.Unwind(pc, bp, nullptr, common_flags()->fast_unwind_on_malloc, size);\ +#define GET_STORE_STACK_TRACE_PC_BP(pc, bp) \ + StackUnwindCtx stack(pc, bp, nullptr); \ + if (__msan_get_track_origins() > 1 && msan_inited) { \ + int size = flags()->store_context_size; \ + if (!SANITIZER_CAN_FAST_UNWIND) \ + size = Min(size, 1); \ + stack.RequestFast(common_flags()->fast_unwind_on_malloc) \ + .WithMaxDepth(size) \ + .WithTraceHash(GetTraceHash(GET_CALLER_PC())); \ } #define GET_STORE_STACK_TRACE \ diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp --- a/compiler-rt/lib/msan/msan.cpp +++ b/compiler-rt/lib/msan/msan.cpp @@ -67,6 +67,9 @@ SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u32 __msan_origin_tls; +SANITIZER_INTERFACE_ATTRIBUTE +THREADLOCAL TraceHash __msan_ctrace_tls; + static THREADLOCAL int is_in_symbolizer; extern "C" SANITIZER_WEAK_ATTRIBUTE const int __msan_track_origins; @@ -290,17 +293,24 @@ return StackOriginDescr[id]; } -u32 ChainOrigin(u32 id, StackTrace *stack) { +u32 ChainOrigin(u32 id, StackUnwindCtx *stack) { MsanThread *t = GetCurrentThread(); if (t && t->InSignalHandler()) return id; Origin o = Origin::FromRawId(id); - stack->tag = StackTrace::TAG_UNKNOWN; + stack->WithTag(StackTrace::TAG_UNKNOWN); Origin chained = Origin::CreateChainedOrigin(o, stack); return chained.raw_id(); } +TraceHash GetTraceHash(uptr pc) { + const static auto rotl5 = [](u32 x) { return (x << 5) | (x >> 27); }; + if (__msan_ctrace_tls) + return rotl5(__msan_ctrace_tls) ^ pc; + return 0; +} + } // namespace __msan void __sanitizer::BufferedStackTrace::UnwindImpl( diff --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp --- a/compiler-rt/lib/msan/msan_allocator.cpp +++ b/compiler-rt/lib/msan/msan_allocator.cpp @@ -151,14 +151,15 @@ allocator.SwallowCache(GetAllocatorCache(this)); } -static void *MsanAllocate(StackTrace *stack, uptr size, uptr alignment, +static void *MsanAllocate(StackUnwindCtx *stack, uptr size, uptr alignment, bool zeroise) { if (size > max_malloc_size) { if (AllocatorMayReturnNull()) { Report("WARNING: MemorySanitizer failed to allocate 0x%zx bytes\n", size); return nullptr; } - ReportAllocationSizeTooBig(size, max_malloc_size, stack); + ReportAllocationSizeTooBig(size, max_malloc_size, + &BufferedStackTrace::TLSUnwind(*stack)); } MsanThread *t = GetCurrentThread(); void *allocated; @@ -174,7 +175,7 @@ SetAllocatorOutOfMemory(); if (AllocatorMayReturnNull()) return nullptr; - ReportOutOfMemory(size, stack); + ReportOutOfMemory(size, &BufferedStackTrace::TLSUnwind(*stack)); } Metadata *meta = reinterpret_cast(allocator.GetMetaData(allocated)); @@ -184,7 +185,7 @@ } else if (flags()->poison_in_malloc) { __msan_poison(allocated, size); if (__msan_get_track_origins()) { - stack->tag = StackTrace::TAG_ALLOC; + stack->WithTag(StackTrace::TAG_ALLOC); Origin o = Origin::CreateHeapOrigin(stack); __msan_set_origin(allocated, size, o.raw_id()); } @@ -193,7 +194,7 @@ return allocated; } -void MsanDeallocate(StackTrace *stack, void *p) { +void MsanDeallocate(StackUnwindCtx *stack, void *p) { CHECK(p); MSAN_FREE_HOOK(p); Metadata *meta = reinterpret_cast(allocator.GetMetaData(p)); @@ -204,7 +205,7 @@ if (flags()->poison_in_free) { __msan_poison(p, size); if (__msan_get_track_origins()) { - stack->tag = StackTrace::TAG_DEALLOC; + stack->WithTag(StackTrace::TAG_DEALLOC); Origin o = Origin::CreateHeapOrigin(stack); __msan_set_origin(p, size, o.raw_id()); } @@ -220,7 +221,7 @@ } } -void *MsanReallocate(StackTrace *stack, void *old_p, uptr new_size, +void *MsanReallocate(StackUnwindCtx *stack, void *old_p, uptr new_size, uptr alignment) { Metadata *meta = reinterpret_cast(allocator.GetMetaData(old_p)); uptr old_size = meta->requested_size; @@ -230,7 +231,7 @@ meta->requested_size = new_size; if (new_size > old_size) { if (flags()->poison_in_malloc) { - stack->tag = StackTrace::TAG_ALLOC; + stack->WithTag(StackTrace::TAG_ALLOC); PoisonMemory((char *)old_p + old_size, new_size - old_size, stack); } } @@ -245,11 +246,11 @@ return new_p; } -void *MsanCalloc(StackTrace *stack, uptr nmemb, uptr size) { +void *MsanCalloc(StackUnwindCtx *stack, uptr nmemb, uptr size) { if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) { if (AllocatorMayReturnNull()) return nullptr; - ReportCallocOverflow(nmemb, size, stack); + ReportCallocOverflow(nmemb, size, &BufferedStackTrace::TLSUnwind(*stack)); } return MsanAllocate(stack, nmemb * size, sizeof(u64), true); } @@ -262,15 +263,15 @@ return b->requested_size; } -void *msan_malloc(uptr size, StackTrace *stack) { +void *msan_malloc(uptr size, StackUnwindCtx *stack) { return SetErrnoOnNull(MsanAllocate(stack, size, sizeof(u64), false)); } -void *msan_calloc(uptr nmemb, uptr size, StackTrace *stack) { +void *msan_calloc(uptr nmemb, uptr size, StackUnwindCtx *stack) { return SetErrnoOnNull(MsanCalloc(stack, nmemb, size)); } -void *msan_realloc(void *ptr, uptr size, StackTrace *stack) { +void *msan_realloc(void *ptr, uptr size, StackUnwindCtx *stack) { if (!ptr) return SetErrnoOnNull(MsanAllocate(stack, size, sizeof(u64), false)); if (size == 0) { @@ -280,59 +281,64 @@ return SetErrnoOnNull(MsanReallocate(stack, ptr, size, sizeof(u64))); } -void *msan_reallocarray(void *ptr, uptr nmemb, uptr size, StackTrace *stack) { +void *msan_reallocarray(void *ptr, uptr nmemb, uptr size, + StackUnwindCtx *stack) { if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) { errno = errno_ENOMEM; if (AllocatorMayReturnNull()) return nullptr; - ReportReallocArrayOverflow(nmemb, size, stack); + ReportReallocArrayOverflow(nmemb, size, + &BufferedStackTrace::TLSUnwind(*stack)); } return msan_realloc(ptr, nmemb * size, stack); } -void *msan_valloc(uptr size, StackTrace *stack) { +void *msan_valloc(uptr size, StackUnwindCtx *stack) { return SetErrnoOnNull(MsanAllocate(stack, size, GetPageSizeCached(), false)); } -void *msan_pvalloc(uptr size, StackTrace *stack) { +void *msan_pvalloc(uptr size, StackUnwindCtx *stack) { uptr PageSize = GetPageSizeCached(); if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) { errno = errno_ENOMEM; if (AllocatorMayReturnNull()) return nullptr; - ReportPvallocOverflow(size, stack); + ReportPvallocOverflow(size, &BufferedStackTrace::TLSUnwind(*stack)); } // pvalloc(0) should allocate one page. size = size ? RoundUpTo(size, PageSize) : PageSize; return SetErrnoOnNull(MsanAllocate(stack, size, PageSize, false)); } -void *msan_aligned_alloc(uptr alignment, uptr size, StackTrace *stack) { +void *msan_aligned_alloc(uptr alignment, uptr size, StackUnwindCtx *stack) { if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) { errno = errno_EINVAL; if (AllocatorMayReturnNull()) return nullptr; - ReportInvalidAlignedAllocAlignment(size, alignment, stack); + ReportInvalidAlignedAllocAlignment(size, alignment, + &BufferedStackTrace::TLSUnwind(*stack)); } return SetErrnoOnNull(MsanAllocate(stack, size, alignment, false)); } -void *msan_memalign(uptr alignment, uptr size, StackTrace *stack) { +void *msan_memalign(uptr alignment, uptr size, StackUnwindCtx *stack) { if (UNLIKELY(!IsPowerOfTwo(alignment))) { errno = errno_EINVAL; if (AllocatorMayReturnNull()) return nullptr; - ReportInvalidAllocationAlignment(alignment, stack); + ReportInvalidAllocationAlignment(alignment, + &BufferedStackTrace::TLSUnwind(*stack)); } return SetErrnoOnNull(MsanAllocate(stack, size, alignment, false)); } int msan_posix_memalign(void **memptr, uptr alignment, uptr size, - StackTrace *stack) { + StackUnwindCtx *stack) { if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) { if (AllocatorMayReturnNull()) return errno_EINVAL; - ReportInvalidPosixMemalignAlignment(alignment, stack); + ReportInvalidPosixMemalignAlignment(alignment, + &BufferedStackTrace::TLSUnwind(*stack)); } void *ptr = MsanAllocate(stack, size, alignment, false); if (UNLIKELY(!ptr)) diff --git a/compiler-rt/lib/msan/msan_interceptors.cpp b/compiler-rt/lib/msan/msan_interceptors.cpp --- a/compiler-rt/lib/msan/msan_interceptors.cpp +++ b/compiler-rt/lib/msan/msan_interceptors.cpp @@ -908,7 +908,7 @@ void __msan_allocated_memory(const void *data, uptr size) { GET_MALLOC_STACK_TRACE; if (flags()->poison_in_malloc) { - stack.tag = STACK_TRACE_TAG_POISON; + stack.WithTag(STACK_TRACE_TAG_POISON); PoisonMemory(data, size, &stack); } } @@ -921,7 +921,7 @@ void __sanitizer_dtor_callback(const void *data, uptr size) { GET_MALLOC_STACK_TRACE; if (flags()->poison_in_dtor) { - stack.tag = STACK_TRACE_TAG_POISON; + stack.WithTag(STACK_TRACE_TAG_POISON); PoisonMemory(data, size, &stack); } } diff --git a/compiler-rt/lib/msan/msan_new_delete.cpp b/compiler-rt/lib/msan/msan_new_delete.cpp --- a/compiler-rt/lib/msan/msan_new_delete.cpp +++ b/compiler-rt/lib/msan/msan_new_delete.cpp @@ -33,12 +33,14 @@ #define OPERATOR_NEW_BODY(nothrow) \ GET_MALLOC_STACK_TRACE; \ void *res = msan_malloc(size, &stack);\ - if (!nothrow && UNLIKELY(!res)) ReportOutOfMemory(size, &stack);\ + if (!nothrow && UNLIKELY(!res)) \ + ReportOutOfMemory(size, &BufferedStackTrace::TLSUnwind(stack)); \ return res #define OPERATOR_NEW_BODY_ALIGN(nothrow) \ GET_MALLOC_STACK_TRACE;\ void *res = msan_memalign((uptr)align, size, &stack);\ - if (!nothrow && UNLIKELY(!res)) ReportOutOfMemory(size, &stack);\ + if (!nothrow && UNLIKELY(!res)) \ + ReportOutOfMemory(size, &BufferedStackTrace::TLSUnwind(stack)); \ return res; INTERCEPTOR_ATTRIBUTE diff --git a/compiler-rt/lib/msan/msan_origin.h b/compiler-rt/lib/msan/msan_origin.h --- a/compiler-rt/lib/msan/msan_origin.h +++ b/compiler-rt/lib/msan/msan_origin.h @@ -11,8 +11,11 @@ #ifndef MSAN_ORIGIN_H #define MSAN_ORIGIN_H -#include "sanitizer_common/sanitizer_stackdepot.h" #include "msan_chained_origin_depot.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_hash.h" +#include "sanitizer_common/sanitizer_hashcache.h" +#include "sanitizer_common/sanitizer_stackdepot.h" namespace __msan { @@ -99,14 +102,14 @@ return Origin((1 << kHeapShift) | id); } - static Origin CreateHeapOrigin(StackTrace *stack) { - u32 stack_id = StackDepotPut(*stack); + static Origin CreateHeapOrigin(StackUnwindCtx *stack) { + u32 stack_id = StackDepotPutUnwound(*stack); CHECK(stack_id); CHECK((stack_id & kHeapIdMask) == stack_id); return Origin(stack_id); } - static Origin CreateChainedOrigin(Origin prev, StackTrace *stack) { + static Origin CreateChainedOrigin(Origin prev, StackUnwindCtx *stack) { int depth = prev.isChainedOrigin() ? prev.depth() : 0; // depth is the length of the chain minus 1. // origin_history_size of 0 means unlimited depth. @@ -119,20 +122,37 @@ } } - StackDepotHandle h = StackDepotPut_WithHandle(*stack); - if (!h.valid()) return prev; + u32 id = StackDepotPutUnwound(*stack); + if (!id) + return prev; if (flags()->origin_history_per_stack_limit > 0) { - int use_count = h.use_count(); - if (use_count > flags()->origin_history_per_stack_limit) return prev; + // int use_count = h.use_count(); + // if (use_count > flags()->origin_history_per_stack_limit) return prev; } + static thread_local HashCache<128> chain_cache; + static ConcurrentHashCache<1024> chain_cache_l2; + u32 chained_id; - bool inserted = ChainedOriginDepotPut(h.id(), prev.raw_id(), &chained_id); - CHECK((chained_id & kChainedIdMask) == chained_id); + u32 chain_hash = id * 4132367431 + prev.raw_id() * 3462451541; + + if (!chain_cache.get(chain_hash, &chained_id)) { + // L1 cache miss + if (chain_cache_l2.get(chain_hash, &chained_id)) { + // L2 cache hit; update L1 + chain_cache.insert(chain_hash, chained_id); + } else { + // L1+L2 cache miss; update both caches + ChainedOriginDepotPut(id, prev.raw_id(), &chained_id); + CHECK((chained_id & kChainedIdMask) == chained_id); + chain_cache.insert(chain_hash, chained_id); + chain_cache_l2.insert(chain_hash, chained_id); + } + } - if (inserted && flags()->origin_history_per_stack_limit > 0) - h.inc_use_count_unsafe(); + // if (inserted && flags()->origin_history_per_stack_limit > 0) + // h.inc_use_count_unsafe(); return Origin((1 << kHeapShift) | (depth << kDepthShift) | chained_id); } diff --git a/compiler-rt/lib/msan/msan_poisoning.h b/compiler-rt/lib/msan/msan_poisoning.h --- a/compiler-rt/lib/msan/msan_poisoning.h +++ b/compiler-rt/lib/msan/msan_poisoning.h @@ -28,21 +28,22 @@ // Copy origin from src (app address) to dst (app address), creating chained // origin ids as necessary, without overriding origin for fully initialized // quads. -void CopyOrigin(const void *dst, const void *src, uptr size, StackTrace *stack); +void CopyOrigin(const void *dst, const void *src, uptr size, + StackUnwindCtx *stack); // memmove() shadow and origin. Dst and src are application addresses. // See CopyOrigin() for the origin copying logic. void MoveShadowAndOrigin(const void *dst, const void *src, uptr size, - StackTrace *stack); + StackUnwindCtx *stack); // memcpy() shadow and origin. Dst and src are application addresses. // See CopyOrigin() for the origin copying logic. void CopyShadowAndOrigin(const void *dst, const void *src, uptr size, - StackTrace *stack); + StackUnwindCtx *stack); // memcpy() app memory, and do "the right thing" to the corresponding shadow and // origin regions. -void CopyMemory(void *dst, const void *src, uptr size, StackTrace *stack); +void CopyMemory(void *dst, const void *src, uptr size, StackUnwindCtx *stack); // Fill shadow will value. Ptr is an application address. void SetShadow(const void *ptr, uptr size, u8 value); @@ -51,7 +52,7 @@ void SetOrigin(const void *dst, uptr size, u32 origin); // Mark memory region uninitialized, with origins. -void PoisonMemory(const void *dst, uptr size, StackTrace *stack); +void PoisonMemory(const void *dst, uptr size, StackUnwindCtx *stack); } // namespace __msan diff --git a/compiler-rt/lib/msan/msan_poisoning.cpp b/compiler-rt/lib/msan/msan_poisoning.cpp --- a/compiler-rt/lib/msan/msan_poisoning.cpp +++ b/compiler-rt/lib/msan/msan_poisoning.cpp @@ -40,7 +40,7 @@ } void CopyOrigin(const void *dst, const void *src, uptr size, - StackTrace *stack) { + StackUnwindCtx *stack) { if (!MEM_IS_APP(dst) || !MEM_IS_APP(src)) return; uptr d = (uptr)dst; @@ -95,7 +95,7 @@ } void MoveShadowAndOrigin(const void *dst, const void *src, uptr size, - StackTrace *stack) { + StackUnwindCtx *stack) { if (!MEM_IS_APP(dst)) return; if (!MEM_IS_APP(src)) return; if (src == dst) return; @@ -105,7 +105,7 @@ } void CopyShadowAndOrigin(const void *dst, const void *src, uptr size, - StackTrace *stack) { + StackUnwindCtx *stack) { if (!MEM_IS_APP(dst)) return; if (!MEM_IS_APP(src)) return; REAL(memcpy)((void *)MEM_TO_SHADOW((uptr)dst), @@ -113,7 +113,7 @@ if (__msan_get_track_origins()) CopyOrigin(dst, src, size, stack); } -void CopyMemory(void *dst, const void *src, uptr size, StackTrace *stack) { +void CopyMemory(void *dst, const void *src, uptr size, StackUnwindCtx *stack) { REAL(memcpy)(dst, src, size); CopyShadowAndOrigin(dst, src, size, stack); } @@ -162,7 +162,7 @@ if (end & 7ULL) *(u32 *)(end - 4) = origin; } -void PoisonMemory(const void *dst, uptr size, StackTrace *stack) { +void PoisonMemory(const void *dst, uptr size, StackUnwindCtx *stack) { SetShadow(dst, size, (u8)-1); if (__msan_get_track_origins()) { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_hashcache.h b/compiler-rt/lib/sanitizer_common/sanitizer_hashcache.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/sanitizer_common/sanitizer_hashcache.h @@ -0,0 +1,59 @@ +#pragma once + +#include + +#include "sanitizer_allocator_internal.h" +#include "sanitizer_common.h" +#include "sanitizer_internal_defs.h" +#include "sanitizer_platform.h" + +namespace __sanitizer { + +template +struct HashCache { + u32 keys[capacity] = {}; + u32 vals[capacity] = {}; + + constexpr HashCache() {} + + bool get(u32 key, u32 *val) { + uptr idx = key & (capacity - 1); + if (keys[idx] != key) + return false; + if (val) + *val = vals[idx]; + return true; + } + + bool insert(u32 key, u32 val) { + uptr idx = key & (capacity - 1); + if (keys[idx] == key) + return false; + + u32 *keyslot = &keys[idx]; + u32 *valslot = &vals[idx]; + + *keyslot = key; + *valslot = val; + return true; + } +}; + +template +struct ConcurrentHashCache { + atomic_uint64_t data[N]; + + bool get(u32 key, u32 *val) { + u64 data = atomic_load(&this->data[key % N], memory_order_relaxed); + u32 found_key = data; + u32 found_val = data >> 32; + *val = found_val; + return key == found_key; + } + + void insert(u32 key, u32 val) { + u64 data = (u64)val << 32 | key; + atomic_store(&this->data[key % N], data, memory_order_relaxed); + } +}; +} // namespace __sanitizer diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.h b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.h --- a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.h @@ -35,7 +35,8 @@ StackDepotStats *StackDepotGetStats(); u32 StackDepotPut(StackTrace stack); -StackDepotHandle StackDepotPut_WithHandle(StackTrace stack); +u32 StackDepotPutUnwound(const StackUnwindCtx &ctx); + // Retrieves a stored stack trace by the id. StackTrace StackDepotGet(u32 id); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp --- a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp @@ -14,6 +14,7 @@ #include "sanitizer_common.h" #include "sanitizer_hash.h" +#include "sanitizer_hashcache.h" #include "sanitizer_stackdepotbase.h" namespace __sanitizer { @@ -40,10 +41,10 @@ atomic_load(&hash_and_use_count, memory_order_relaxed) & kHashMask; if ((hash & kHashMask) != hash_bits || args.size != size || args.tag != tag) return false; - uptr i = 0; - for (; i < size; i++) { - if (stack[i] != args.trace[i]) return false; - } + // uptr i = 0; + // for (; i < size; i++) { + // if (stack[i] != args.trace[i]) return false; + // } return true; } static uptr storage_size(const args_type &args) { @@ -99,8 +100,38 @@ return h.valid() ? h.id() : 0; } -StackDepotHandle StackDepotPut_WithHandle(StackTrace stack) { - return theDepot.Put(stack); +u32 StackDepotPutUnwound(const StackUnwindCtx &ctx) { + static thread_local HashCache<1024> trace_hash_map; + static ConcurrentHashCache<16384> trace_map_l2; + + const static auto PutTrace = [](const StackUnwindCtx &ctx) { + // Keep the temp trace in TLS because it's too big to be comfortable + // in the stack. + static thread_local BufferedStackTrace tmp_trace; + tmp_trace.Reset(); + tmp_trace.Unwind(ctx); + return StackDepotPut(tmp_trace); + }; + + if (!ctx.trace_hash) { + return PutTrace(ctx); + } + u32 used_hash = ctx.trace_hash ^ ctx.tag; + + u32 depot_id; + if (trace_hash_map.get(used_hash, &depot_id)) + // L1 cache hit + return depot_id; + if (trace_map_l2.get(used_hash, &depot_id)) { + // L2 cache hit, so update L1 + trace_hash_map.insert(used_hash, depot_id); + return depot_id; + } + // L1+L2 cache misses; update both caches + depot_id = PutTrace(ctx); + trace_hash_map.insert(used_hash, depot_id); + trace_map_l2.insert(used_hash, depot_id); + return depot_id; } StackTrace StackDepotGet(u32 id) { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h @@ -90,6 +90,44 @@ #endif } +typedef u32 TraceHash; + +struct StackUnwindCtx { + uptr pc; + uptr bp; + void *context; + uptr stack_top, stack_bottom; + TraceHash trace_hash = 0; + u32 max_depth = kStackTraceMax; + u32 tag = 0; + bool use_stack_bounds = false; + bool request_fast = false; + + StackUnwindCtx(uptr pc, uptr bp, void *ctx) : pc(pc), bp(bp), context(ctx) {} + StackUnwindCtx &WithTraceHash(TraceHash hash) { + this->trace_hash = hash; + return *this; + } + StackUnwindCtx &WithStackBounds(uptr stack_top, uptr stack_bottom) { + this->stack_top = stack_top; + this->stack_bottom = stack_bottom; + this->use_stack_bounds = true; + return *this; + }; + StackUnwindCtx &RequestFast(bool fast) { + this->request_fast = fast; + return *this; + } + StackUnwindCtx &WithMaxDepth(u32 max) { + this->max_depth = max; + return *this; + } + StackUnwindCtx &WithTag(u32 tag) { + this->tag = tag; + return *this; + } +}; + // StackTrace that owns the buffer used to store the addresses. struct BufferedStackTrace : public StackTrace { uptr trace_buffer[kStackTraceMax]; @@ -118,6 +156,23 @@ void Unwind(u32 max_depth, uptr pc, uptr bp, void *context, uptr stack_top, uptr stack_bottom, bool request_fast_unwind); + void Unwind(const StackUnwindCtx &ctx) { + if (ctx.use_stack_bounds) { + Unwind(ctx.max_depth, ctx.pc, ctx.bp, ctx.context, ctx.stack_top, + ctx.stack_bottom, ctx.request_fast); + } else { + Unwind(ctx.pc, ctx.bp, ctx.context, ctx.request_fast, ctx.max_depth); + } + tag = ctx.tag; + } + + static const BufferedStackTrace &TLSUnwind(const StackUnwindCtx &ctx) { + thread_local BufferedStackTrace trace; + trace.Reset(); + trace.Unwind(ctx); + return trace; + } + void Reset() { *static_cast(this) = StackTrace(trace_buffer, 0); top_frame_bp = 0; @@ -125,8 +180,8 @@ private: // Every runtime defines its own implementation of this method - void UnwindImpl(uptr pc, uptr bp, void *context, bool request_fast, - u32 max_depth); + __attribute__((weak)) void UnwindImpl(uptr pc, uptr bp, void *context, + bool request_fast, u32 max_depth); // UnwindFast/Slow have platform-specific implementations void UnwindFast(uptr pc, uptr bp, uptr stack_top, uptr stack_bottom, diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -290,6 +290,11 @@ cl::desc("check arguments and return values at function call boundaries"), cl::Hidden, cl::init(false)); +static cl::opt + ClCachedUnwinding("msan-cached-unwinding", + cl::desc("hash active stack traces for fast unwinding"), + cl::Hidden, cl::init(false)); + static cl::opt ClDumpStrictInstructions("msan-dump-strict-instructions", cl::desc("print out instructions with default strict semantics"), cl::Hidden, cl::init(false)); @@ -552,6 +557,9 @@ /// (x86_64-specific). Value *VAArgOverflowSizeTLS; + /// Thread-local shadow storage for current trace hash + Value *CurrTraceTLS; + /// Are the instrumentation callbacks set up? bool CallbacksInitialized = false; @@ -720,6 +728,7 @@ VAArgTLS = nullptr; VAArgOriginTLS = nullptr; VAArgOverflowSizeTLS = nullptr; + CurrTraceTLS = nullptr; WarningFn = M.getOrInsertFunction("__msan_warning", IRB.getVoidTy(), IRB.getInt32Ty()); @@ -810,6 +819,8 @@ VAArgOverflowSizeTLS = getOrInsertGlobal(M, "__msan_va_arg_overflow_size_tls", IRB.getInt64Ty()); + CurrTraceTLS = getOrInsertGlobal(M, "__msan_ctrace_tls", IRB.getInt32Ty()); + for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; AccessSizeIndex++) { unsigned AccessSize = 1 << AccessSizeIndex; @@ -1097,6 +1108,9 @@ LLVM_DEBUG(if (!InsertChecks) dbgs() << "MemorySanitizer is not inserting checks into '" << F.getName() << "'\n"); + + if (ClCachedUnwinding) + ActualFnStart = insertTracePrologue(ActualFnStart); } Value *updateOrigin(Value *V, IRBuilder<> &IRB) { @@ -1286,6 +1300,25 @@ return ret; } + BasicBlock *insertTracePrologue(BasicBlock *B) { + BasicBlock *ret = SplitBlock(B, B->getFirstNonPHI()); + IRBuilder<> IRB(B->getFirstNonPHI()); + + // CurrTrace allows us to approximately hash the entire call trace leading + // to this function. We XOR in the prologue to mark the beginning of this + // frame, and then XOR in the epilogue (ret instructions) to mark the end. + Value *CurrTraceHash = IRB.CreateLoad(IRB.getInt32Ty(), MS.CurrTraceTLS); + Value *ReturnAddress = IRB.CreatePtrToInt( + IRB.CreateIntrinsic(Intrinsic::returnaddress, {}, {IRB.getInt32(0)}), + IRB.getInt32Ty()); + CurrTraceHash = + IRB.CreateIntrinsic(Intrinsic::fshl, {IRB.getInt32Ty()}, + {CurrTraceHash, CurrTraceHash, IRB.getInt32(5)}); + CurrTraceHash = IRB.CreateXor(CurrTraceHash, ReturnAddress); + IRB.CreateStore(CurrTraceHash, MS.CurrTraceTLS); + return ret; + } + /// Add MemorySanitizer instrumentation to a function. bool runOnFunction() { // In the presence of unreachable blocks, we may see Phi nodes with @@ -3738,6 +3771,20 @@ void visitReturnInst(ReturnInst &I) { IRBuilder<> IRB(&I); + if (ClCachedUnwinding) { + // XOR out the current stack frame's hash to indicate returning up a + // frame. + Value *CurrTraceHash = IRB.CreateLoad(IRB.getInt32Ty(), MS.CurrTraceTLS); + Value *ReturnAddress = IRB.CreatePtrToInt( + IRB.CreateIntrinsic(Intrinsic::returnaddress, {}, {IRB.getInt32(0)}), + IRB.getInt32Ty()); + CurrTraceHash = IRB.CreateXor(CurrTraceHash, ReturnAddress); + CurrTraceHash = + IRB.CreateIntrinsic(Intrinsic::fshr, {IRB.getInt32Ty()}, + {CurrTraceHash, CurrTraceHash, IRB.getInt32(5)}); + IRB.CreateStore(CurrTraceHash, MS.CurrTraceTLS); + } + Value *RetVal = I.getReturnValue(); if (!RetVal) return; // Don't emit the epilogue for musttail call returns.