Index: compiler-rt/lib/hwasan/CMakeLists.txt =================================================================== --- compiler-rt/lib/hwasan/CMakeLists.txt +++ compiler-rt/lib/hwasan/CMakeLists.txt @@ -10,6 +10,7 @@ hwasan_poisoning.cc hwasan_report.cc hwasan_thread.cc + hwasan_thread_list.cc ) set(HWASAN_RTL_CXX_SOURCES @@ -25,8 +26,9 @@ hwasan_mapping.h hwasan_poisoning.h hwasan_report.h - hwasan_thread.h) - + hwasan_thread.h + hwasan_thread_list.h + ) set(HWASAN_DEFINITIONS) append_list_if(COMPILER_RT_HWASAN_WITH_INTERCEPTORS HWASAN_WITH_INTERCEPTORS=1 HWASAN_DEFINITIONS) Index: compiler-rt/lib/hwasan/hwasan.h =================================================================== --- compiler-rt/lib/hwasan/hwasan.h +++ compiler-rt/lib/hwasan/hwasan.h @@ -41,6 +41,10 @@ const unsigned kAddressTagShift = 56; const uptr kAddressTagMask = 0xFFUL << kAddressTagShift; +// Minimal alignment of the shadow base address. Determines the space available +// for threads and stack histories. This is an ABI constant. +const unsigned kShadowBaseAlignment = 30; + static inline tag_t GetTagFromPointer(uptr p) { return p >> kAddressTagShift; } @@ -66,6 +70,7 @@ bool ProtectRange(uptr beg, uptr end); bool InitShadow(); +void InitThreads(); void MadviseShadow(); char *GetProcSelfMaps(); void InitializeInterceptors(); @@ -142,6 +147,7 @@ }; void HwasanTSDInit(); +void HwasanTSDThreadInit(); void HwasanOnDeadlySignal(int signo, void *info, void *context); Index: compiler-rt/lib/hwasan/hwasan.cc =================================================================== --- compiler-rt/lib/hwasan/hwasan.cc +++ compiler-rt/lib/hwasan/hwasan.cc @@ -17,6 +17,7 @@ #include "hwasan_poisoning.h" #include "hwasan_report.h" #include "hwasan_thread.h" +#include "hwasan_thread_list.h" #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_flags.h" @@ -174,7 +175,8 @@ static constexpr uptr kMemoryUsageBufferSize = 4096; static void HwasanFormatMemoryUsage(InternalScopedString &s) { - auto thread_stats = Thread::GetThreadStats(); + HwasanThreadList &thread_list = hwasanThreadList(); + auto thread_stats = thread_list.GetThreadStats(); auto *sds = StackDepotGetStats(); AllocatorStatCounters asc; GetAllocatorStats(asc); @@ -184,7 +186,7 @@ " heap: %zd", internal_getpid(), GetRSS(), thread_stats.n_live_threads, thread_stats.total_stack_size, - thread_stats.n_live_threads * Thread::MemoryUsedPerThread(), + thread_stats.n_live_threads * thread_list.MemoryUsedPerThread(), sds->allocated, sds->n_uniq_ids, asc[AllocatorStatMapped]); } @@ -233,6 +235,8 @@ DumpProcessMap(); Die(); } + InitThreads(); + hwasanThreadList().CreateCurrentThread(); hwasan_shadow_inited = 1; } @@ -268,11 +272,10 @@ InitializeCoverage(common_flags()->coverage, common_flags()->coverage_dir); HwasanTSDInit(); + HwasanTSDThreadInit(); HwasanAllocatorInit(); - Thread::Create(); - #if HWASAN_CONTAINS_UBSAN __ubsan::InitAsPlugin(); #endif Index: compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cc =================================================================== --- compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cc +++ compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cc @@ -13,6 +13,7 @@ /// //===----------------------------------------------------------------------===// +#include "hwasan.h" #include "hwasan_dynamic_shadow.h" #include "hwasan_mapping.h" #include "sanitizer_common/sanitizer_common.h" @@ -35,12 +36,16 @@ } } -// Returns an address aligned to 8 pages, such that one page on the left and -// shadow_size_bytes bytes on the right of it are mapped r/o. +// Returns an address aligned to kShadowBaseAlignment, such that +// 2**kShadowBaseAlingment on the left and shadow_size_bytes bytes on the right +// of it are mapped no access. static uptr MapDynamicShadow(uptr shadow_size_bytes) { const uptr granularity = GetMmapGranularity(); - const uptr alignment = granularity << kShadowScale; - const uptr left_padding = granularity; + const uptr min_alignment = granularity << kShadowScale; + const uptr alignment = 1ULL << kShadowBaseAlignment; + CHECK_GE(alignment, min_alignment); + + const uptr left_padding = 1ULL << kShadowBaseAlignment; const uptr shadow_size = RoundUpTo(shadow_size_bytes, granularity); const uptr map_size = shadow_size + left_padding + alignment; Index: compiler-rt/lib/hwasan/hwasan_flags.inc =================================================================== --- compiler-rt/lib/hwasan/hwasan_flags.inc +++ compiler-rt/lib/hwasan/hwasan_flags.inc @@ -51,3 +51,7 @@ "to find bugs.") HWASAN_FLAG(bool, export_memory_stats, true, "Export up-to-date memory stats through /proc") +HWASAN_FLAG(int, stack_history_size, 512, + "The number of stack frames remembered per thread. " + "Affects the quality of stack-related reports, but not the ability " + "to find bugs.") Index: compiler-rt/lib/hwasan/hwasan_interceptors.cc =================================================================== --- compiler-rt/lib/hwasan/hwasan_interceptors.cc +++ compiler-rt/lib/hwasan/hwasan_interceptors.cc @@ -226,7 +226,8 @@ if (UNLIKELY(!hwasan_inited)) // Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym. return AllocateFromLocalPool(size); - return hwasan_malloc(size, &stack); + void *res = hwasan_malloc(size, &stack); + return res; } #if HWASAN_WITH_INTERCEPTORS Index: compiler-rt/lib/hwasan/hwasan_linux.cc =================================================================== --- compiler-rt/lib/hwasan/hwasan_linux.cc +++ compiler-rt/lib/hwasan/hwasan_linux.cc @@ -22,6 +22,7 @@ #include "hwasan_mapping.h" #include "hwasan_report.h" #include "hwasan_thread.h" +#include "hwasan_thread_list.h" #include #include @@ -37,6 +38,10 @@ #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_procmaps.h" +#if HWASAN_WITH_INTERCEPTORS && !SANITIZER_ANDROID +THREADLOCAL uptr __hwasan_tls; +#endif + namespace __hwasan { static void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) { @@ -179,6 +184,20 @@ return true; } +void InitThreads() { + CHECK(__hwasan_shadow_memory_dynamic_address); + uptr guard_page_size = GetMmapGranularity(); + uptr thread_space_start = + __hwasan_shadow_memory_dynamic_address - (1ULL << kShadowBaseAlignment); + uptr thread_space_end = + __hwasan_shadow_memory_dynamic_address - guard_page_size; + ReserveShadowMemoryRange(thread_space_start, thread_space_end - 1, + "hwasan threads"); + ProtectGap(thread_space_end, + __hwasan_shadow_memory_dynamic_address - thread_space_end); + InitThreadList(thread_space_start, thread_space_end - thread_space_start); +} + static void MadviseShadowRegion(uptr beg, uptr end) { uptr size = end - beg + 1; if (common_flags()->no_huge_pages_for_shadow) @@ -214,7 +233,7 @@ // ---------------------- TSD ---------------- {{{1 extern "C" void __hwasan_thread_enter() { - Thread::Create(); + hwasanThreadList().CreateCurrentThread(); } extern "C" void __hwasan_thread_exit() { @@ -222,21 +241,25 @@ // Make sure that signal handler can not see a stale current thread pointer. atomic_signal_fence(memory_order_seq_cst); if (t) - t->Destroy(); + hwasanThreadList().ReleaseThread(t); } #if HWASAN_WITH_INTERCEPTORS static pthread_key_t tsd_key; static bool tsd_key_inited = false; +void HwasanTSDThreadInit() { + if (tsd_key_inited) + CHECK_EQ(0, pthread_setspecific(tsd_key, + (void *)GetPthreadDestructorIterations())); +} + void HwasanTSDDtor(void *tsd) { - Thread *t = (Thread*)tsd; - if (t->destructor_iterations_ > 1) { - t->destructor_iterations_--; - CHECK_EQ(0, pthread_setspecific(tsd_key, tsd)); + uptr iterations = (uptr)tsd; + if (iterations > 1) { + CHECK_EQ(0, pthread_setspecific(tsd_key, (void *)(iterations - 1))); return; } - t->Destroy(); __hwasan_thread_exit(); } @@ -245,31 +268,26 @@ tsd_key_inited = true; CHECK_EQ(0, pthread_key_create(&tsd_key, HwasanTSDDtor)); } - -Thread *GetCurrentThread() { - return (Thread *)pthread_getspecific(tsd_key); -} - -void SetCurrentThread(Thread *t) { - // Make sure that HwasanTSDDtor gets called at the end. - CHECK(tsd_key_inited); - // Make sure we do not reset the current Thread. - CHECK_EQ(0, pthread_getspecific(tsd_key)); - pthread_setspecific(tsd_key, (void *)t); -} -#elif SANITIZER_ANDROID +#else void HwasanTSDInit() {} -Thread *GetCurrentThread() { - return (Thread*)*get_android_tls_ptr(); -} +void HwasanTSDThreadInit() {} +#endif -void SetCurrentThread(Thread *t) { - *get_android_tls_ptr() = (uptr)t; +#if SANITIZER_ANDROID +uptr *GetCurrentThreadLongPtr() { + return (uptr *)get_android_tls_ptr(); } #else -#error unsupported configuration !HWASAN_WITH_INTERCEPTORS && !SANITIZER_ANDROID +uptr *GetCurrentThreadLongPtr() { + return &__hwasan_tls; +} #endif +Thread *GetCurrentThread() { + auto *R = (StackAllocationsRingBuffer*)GetCurrentThreadLongPtr(); + return hwasanThreadList().GetThreadByBufferAddress((uptr)(R->Next())); +} + struct AccessInfo { uptr addr; uptr size; Index: compiler-rt/lib/hwasan/hwasan_report.cc =================================================================== --- compiler-rt/lib/hwasan/hwasan_report.cc +++ compiler-rt/lib/hwasan/hwasan_report.cc @@ -16,6 +16,7 @@ #include "hwasan_allocator.h" #include "hwasan_mapping.h" #include "hwasan_thread.h" +#include "hwasan_thread_list.h" #include "sanitizer_common/sanitizer_allocator_internal.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_flags.h" @@ -109,7 +110,7 @@ } } - Thread::VisitAllLiveThreads([&](Thread *t) { + hwasanThreadList().VisitAllLiveThreads([&](Thread *t) { // Scan all threads' ring buffers to find if it's a heap-use-after-free. HeapAllocationRecord har; if (uptr D = FindHeapAllocation(t->heap_allocations(), tagged_addr, &har)) { @@ -145,6 +146,23 @@ Printf("%s", d.Default()); t->Announce(); + // Temporary report section, needs to be improved. + Printf("Previosly allocated frames:\n"); + auto *sa = t->stack_allocations(); + uptr frames = Min((uptr)flags()->stack_history_size, sa->size()); + for (uptr i = 0; i < frames; i++) { + uptr record = (*sa)[i]; + if (!record) + break; + uptr sp = (record >> 48) << 4; + uptr pc_mask = (1ULL << 48) - 1; + uptr pc = record & pc_mask; + uptr fixed_pc = StackTrace::GetNextInstructionPc(pc); + StackTrace stack(&fixed_pc, 1); + Printf("record: %p pc: %p sp: %p", record, pc, sp); + stack.Print(); + } + num_descriptions_printed++; } }); Index: compiler-rt/lib/hwasan/hwasan_thread.h =================================================================== --- compiler-rt/lib/hwasan/hwasan_thread.h +++ compiler-rt/lib/hwasan/hwasan_thread.h @@ -16,12 +16,15 @@ #include "hwasan_allocator.h" #include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_ring_buffer.h" namespace __hwasan { +typedef __sanitizer::CompactRingBuffer StackAllocationsRingBuffer; + class Thread { public: - static void Create(); // Must be called from the thread itself. + void Init(uptr stack_buffer_start, uptr stack_buffer_size); // Must be called from the thread itself. void Destroy(); uptr stack_top() { return stack_top_; } @@ -48,27 +51,15 @@ void LeaveInterceptorScope() { in_interceptor_scope_--; } AllocatorCache *allocator_cache() { return &allocator_cache_; } - HeapAllocationsRingBuffer *heap_allocations() { - return heap_allocations_; - } + HeapAllocationsRingBuffer *heap_allocations() { return heap_allocations_; } + StackAllocationsRingBuffer *stack_allocations() { return stack_allocations_; } tag_t GenerateRandomTag(); - int destructor_iterations_; void DisableTagging() { tagging_disabled_++; } void EnableTagging() { tagging_disabled_--; } bool TaggingIsDisabled() const { return tagging_disabled_; } - template - static void VisitAllLiveThreads(CB cb) { - SpinMutexLock l(&thread_list_mutex); - Thread *t = thread_list_head; - while (t) { - cb(t); - t = t->next_; - } - } - u64 unique_id() const { return unique_id_; } void Announce() { if (announced_) return; @@ -76,22 +67,9 @@ Print("Thread: "); } - struct ThreadStats { - uptr n_live_threads; - uptr total_stack_size; - }; - - static ThreadStats GetThreadStats() { - SpinMutexLock l(&thread_list_mutex); - return thread_stats; - } - - static uptr MemoryUsedPerThread(); - private: // NOTE: There is no Thread constructor. It is allocated // via mmap() and *must* be valid in zero-initialized state. - void Init(); void ClearShadowForThreadStackAndTLS(); void Print(const char *prefix); uptr stack_top_; @@ -108,23 +86,23 @@ AllocatorCache allocator_cache_; HeapAllocationsRingBuffer *heap_allocations_; + StackAllocationsRingBuffer *stack_allocations_; static void InsertIntoThreadList(Thread *t); static void RemoveFromThreadList(Thread *t); Thread *next_; // All live threads form a linked list. - static SpinMutex thread_list_mutex; - static Thread *thread_list_head; - static ThreadStats thread_stats; u64 unique_id_; // counting from zero. u32 tagging_disabled_; // if non-zero, malloc uses zero tag in this thread. bool announced_; + + friend struct ThreadListHead; }; Thread *GetCurrentThread(); -void SetCurrentThread(Thread *t); +uptr *GetCurrentThreadLongPtr(); struct ScopedTaggingDisabler { ScopedTaggingDisabler() { GetCurrentThread()->DisableTagging(); } Index: compiler-rt/lib/hwasan/hwasan_thread.cc =================================================================== --- compiler-rt/lib/hwasan/hwasan_thread.cc +++ compiler-rt/lib/hwasan/hwasan_thread.cc @@ -9,6 +9,7 @@ #include "sanitizer_common/sanitizer_placement_new.h" #include "sanitizer_common/sanitizer_tls_get_addr.h" + namespace __hwasan { static u32 RandomSeed() { @@ -24,102 +25,52 @@ return seed; } -Thread *Thread::thread_list_head; -SpinMutex Thread::thread_list_mutex; -Thread::ThreadStats Thread::thread_stats; - -void Thread::InsertIntoThreadList(Thread *t) { - CHECK(!t->next_); - SpinMutexLock l(&thread_list_mutex); - thread_stats.n_live_threads++; - thread_stats.total_stack_size += t->stack_size(); - if (!thread_list_head) { - thread_list_head = t; - return; - } - Thread *last = thread_list_head; - while (last->next_) - last = last->next_; - last->next_ = t; -} - -void Thread::RemoveFromThreadList(Thread *t) { - SpinMutexLock l(&thread_list_mutex); - thread_stats.n_live_threads--; - thread_stats.total_stack_size -= t->stack_size(); - if (t == thread_list_head) { - thread_list_head = t->next_; - t->next_ = nullptr; - return; - } - Thread *prev = thread_list_head; - Thread *cur = prev->next_; - CHECK(cur); - while (cur) { - if (cur == t) { - prev->next_ = cur->next_; - return; - } - prev = cur; - cur = cur->next_; - } - CHECK(0 && "RemoveFromThreadList: thread not found"); -} - -void Thread::Create() { +void Thread::Init(uptr stack_buffer_start, uptr stack_buffer_size) { static u64 unique_id; - uptr PageSize = GetPageSizeCached(); - uptr size = RoundUpTo(sizeof(Thread), PageSize); - Thread *thread = (Thread*)MmapOrDie(size, __func__); - thread->destructor_iterations_ = GetPthreadDestructorIterations(); - thread->unique_id_ = unique_id++; - thread->random_state_ = - flags()->random_tags ? RandomSeed() : thread->unique_id_; + unique_id_ = unique_id++; + random_state_ = flags()->random_tags ? RandomSeed() : unique_id_; if (auto sz = flags()->heap_history_size) - thread->heap_allocations_ = HeapAllocationsRingBuffer::New(sz); - SetCurrentThread(thread); - thread->Init(); - InsertIntoThreadList(thread); -} + heap_allocations_ = HeapAllocationsRingBuffer::New(sz); -uptr Thread::MemoryUsedPerThread() { - uptr res = sizeof(Thread); - if (auto sz = flags()->heap_history_size) - res += HeapAllocationsRingBuffer::SizeInBytes(sz); - return res; -} + HwasanTSDThreadInit(); // Only needed with interceptors. + uptr *ThreadLong = GetCurrentThreadLongPtr(); + // The following implicitly sets (this) as the current thread. + stack_allocations_ = new (ThreadLong) + StackAllocationsRingBuffer((void *)stack_buffer_start, stack_buffer_size); + // Check that it worked. + CHECK_EQ(GetCurrentThread(), this); -void Thread::Init() { - // GetPthreadDestructorIterations may call malloc, so disable the tagging. + // ScopedTaggingDisable needs GetCurrentThread to be set up. ScopedTaggingDisabler disabler; // If this process is "init" (pid 1), /proc may not be mounted yet. if (IsMainThread() && !FileExists("/proc/self/maps")) { stack_top_ = stack_bottom_ = 0; tls_begin_ = tls_end_ = 0; - return; - } - - uptr tls_size; - uptr stack_size; - GetThreadStackAndTls(IsMainThread(), &stack_bottom_, &stack_size, &tls_begin_, - &tls_size); - stack_top_ = stack_bottom_ + stack_size; - tls_end_ = tls_begin_ + tls_size; - - int local; - CHECK(AddrIsInStack((uptr)&local)); - CHECK(MemIsApp(stack_bottom_)); - CHECK(MemIsApp(stack_top_ - 1)); - - if (stack_bottom_) { + } else { + uptr tls_size; + uptr stack_size; + GetThreadStackAndTls(IsMainThread(), &stack_bottom_, &stack_size, + &tls_begin_, &tls_size); + stack_top_ = stack_bottom_ + stack_size; + tls_end_ = tls_begin_ + tls_size; + + int local; + CHECK(AddrIsInStack((uptr)&local)); CHECK(MemIsApp(stack_bottom_)); CHECK(MemIsApp(stack_top_ - 1)); + + if (stack_bottom_) { + CHECK(MemIsApp(stack_bottom_)); + CHECK(MemIsApp(stack_top_ - 1)); + } } + if (flags()->verbose_threads) { if (IsMainThread()) { - Printf("sizeof(Thread): %zd sizeof(RB): %zd\n", sizeof(Thread), - heap_allocations_->SizeInBytes()); + Printf("sizeof(Thread): %zd sizeof(HeapRB): %zd sizeof(StackRB): %zd\n", + sizeof(Thread), heap_allocations_->SizeInBytes(), + stack_allocations_->size() * sizeof(uptr)); } Print("Creating : "); } @@ -137,11 +88,8 @@ Print("Destroying: "); AllocatorSwallowThreadLocalCache(allocator_cache()); ClearShadowForThreadStackAndTLS(); - RemoveFromThreadList(this); - uptr size = RoundUpTo(sizeof(Thread), GetPageSizeCached()); if (heap_allocations_) heap_allocations_->Delete(); - UnmapOrDie(this, size); DTLS_Destroy(); } Index: compiler-rt/lib/sanitizer_common/sanitizer_ring_buffer.h =================================================================== --- compiler-rt/lib/sanitizer_common/sanitizer_ring_buffer.h +++ compiler-rt/lib/sanitizer_common/sanitizer_ring_buffer.h @@ -72,12 +72,73 @@ // L: last_, always points to the last data element. // N: next_, initially equals to last_, is decremented on every push, // wraps around if it's less or equal than its own address. - T *last_; T *next_; T data_[1]; // flexible array. }; +#if SANITIZER_WORDSIZE == 64 +template +class CompactRingBuffer { + static constexpr int kPageSizeBits = 12; + static constexpr int kSizeShift = 56; + static constexpr uptr kNextMask = (1ULL << kSizeShift) - 1; + + uptr GetStorageSize() const { return (long_ >> kSizeShift) << kPageSizeBits; } + + void SetNext(const T *next) { + long_ = (long_ & ~kNextMask) | (uptr)next; + } + + public: + CompactRingBuffer(void *storage, uptr size) { + CHECK_EQ(sizeof(CompactRingBuffer), sizeof(void *)); + CHECK(IsPowerOfTwo(size)); + CHECK_GE(size, 1 << kPageSizeBits); + CHECK_LE(size, 128 << kPageSizeBits); + CHECK_EQ(size % 4096, 0); + CHECK_EQ(size % sizeof(T), 0); + CHECK_EQ((uptr)storage % (size * 2), 0); + long_ = (uptr)storage | ((size >> kPageSizeBits) << kSizeShift); + } + + T *Next() const { return (T *)(long_ & kNextMask); } + + void *StartOfStorage() const { + return (void *)((uptr)Next() & ~(GetStorageSize() - 1)); + } + + void *EndOfStorage() const { + return (void *)((uptr)StartOfStorage() + GetStorageSize()); + } + + uptr size() const { return GetStorageSize() / sizeof(T); } + + void push(T t) { + T *next = Next(); + *next = t; + next++; + next = (T *)((uptr)next & ~GetStorageSize()); + SetNext(next); + } + + T operator[](uptr Idx) const { + CHECK_LT(Idx, size()); + const T *Begin = (const T *)StartOfStorage(); + sptr StorageIdx = Next() - Begin; + StorageIdx -= (sptr)(Idx + 1); + if (StorageIdx < 0) + StorageIdx += size(); + return Begin[StorageIdx]; + } + + public: + ~CompactRingBuffer() {} + CompactRingBuffer(const CompactRingBuffer &) = delete; + + uptr long_; +}; +#endif } // namespace __sanitizer #endif // SANITIZER_RING_BUFFER_H Index: compiler-rt/lib/sanitizer_common/tests/sanitizer_ring_buffer_test.cc =================================================================== --- compiler-rt/lib/sanitizer_common/tests/sanitizer_ring_buffer_test.cc +++ compiler-rt/lib/sanitizer_common/tests/sanitizer_ring_buffer_test.cc @@ -66,6 +66,7 @@ #undef EXPECT_RING_BUFFER } +#if SANITIZER_WORDSIZE == 64 TEST(RingBuffer, int64) { TestRB(); } @@ -74,4 +75,25 @@ TestRB(); } +template +WeirdRingBuffer *AllocWeirdRingBuffer(size_t count) { + size_t sz = sizeof(T) * count; + EXPECT_EQ(0ULL, sz % 4096); + void *p = MmapAlignedOrDieOnFatalError(sz, sz * 2, "WeirdRingBuffer"); + return new WeirdRingBuffer(p, sz); +} + +TEST(WeirdRingBuffer, int64) { + const size_t page_sizes[] = {1, 2, 4, 128}; + + for (size_t pages : page_sizes) { + size_t count = 4096 * pages / sizeof(int64_t); + auto R = AllocWeirdRingBuffer(count); + int64_t top = count * 3 + 13; + for (int64_t i = 0; i < top; ++i) R->push(i); + for (int64_t i = 0; i < (int64_t)count; ++i) + EXPECT_EQ(top - i - 1, (*R)[i]); + } +} +#endif } // namespace __sanitizer Index: compiler-rt/test/hwasan/TestCases/deep-recursion.c =================================================================== --- /dev/null +++ compiler-rt/test/hwasan/TestCases/deep-recursion.c @@ -0,0 +1,73 @@ +// RUN: %clang_hwasan -O1 %s -o %t +// RUN: %env_hwasan_opts=stack_history_size=1 not %run %t 2>&1 | FileCheck %s --check-prefix=D1 +// RUN: %env_hwasan_opts=stack_history_size=2 not %run %t 2>&1 | FileCheck %s --check-prefix=D2 +// RUN: %env_hwasan_opts=stack_history_size=3 not %run %t 2>&1 | FileCheck %s --check-prefix=D3 +// RUN: %env_hwasan_opts=stack_history_size=5 not %run %t 2>&1 | FileCheck %s --check-prefix=D5 +// RUN: not %run %t 2>&1 | FileCheck %s --check-prefix=DEFAULT + +// REQUIRES: stable-runtime + +#include +// At least -O1 is needed for this function to not have a stack frame on +// AArch64. +void USE(void *x) { // pretend_to_do_something(void *x) + __asm__ __volatile__("" : : "r" (x) : "memory"); +} + +volatile int four = 4; + +__attribute__((noinline)) void OOB() { int x[4]; x[four] = 0; USE(&x[0]); } +__attribute__((noinline)) void FUNC1() { int x; USE(&x); OOB(); } +__attribute__((noinline)) void FUNC2() { int x; USE(&x); FUNC1(); } +__attribute__((noinline)) void FUNC3() { int x; USE(&x); FUNC2(); } +__attribute__((noinline)) void FUNC4() { int x; USE(&x); FUNC3(); } +__attribute__((noinline)) void FUNC5() { int x; USE(&x); FUNC4(); } +__attribute__((noinline)) void FUNC6() { int x; USE(&x); FUNC5(); } +__attribute__((noinline)) void FUNC7() { int x; USE(&x); FUNC6(); } +__attribute__((noinline)) void FUNC8() { int x; USE(&x); FUNC7(); } +__attribute__((noinline)) void FUNC9() { int x; USE(&x); FUNC8(); } +__attribute__((noinline)) void FUNC10() { int x; USE(&x); FUNC9(); } + +int main() { FUNC10(); } + +// D1: Previosly allocated frames +// D1: in OOB +// D1-NOT: in FUNC +// D1: Memory tags around the buggy address + +// D2: Previosly allocated frames +// D2: in OOB +// D2: in FUNC1 +// D2-NOT: in FUNC +// D2: Memory tags around the buggy address + +// D3: Previosly allocated frames +// D3: in OOB +// D3: in FUNC1 +// D3: in FUNC2 +// D3-NOT: in FUNC +// D3: Memory tags around the buggy address + +// D5: Previosly allocated frames +// D5: in OOB +// D5: in FUNC1 +// D5: in FUNC2 +// D5: in FUNC3 +// D5: in FUNC4 +// D5-NOT: in FUNC +// D5: Memory tags around the buggy address + +// DEFAULT: Previosly allocated frames +// DEFAULT: in OOB +// DEFAULT: in FUNC1 +// DEFAULT: in FUNC2 +// DEFAULT: in FUNC3 +// DEFAULT: in FUNC4 +// DEFAULT: in FUNC5 +// DEFAULT: in FUNC6 +// DEFAULT: in FUNC7 +// DEFAULT: in FUNC8 +// DEFAULT: in FUNC9 +// DEFAULT: in FUNC10 +// DEFAULT-NOT: in FUNC +// DEFAULT: Memory tags around the buggy address Index: compiler-rt/test/hwasan/TestCases/rich-stack.c =================================================================== --- /dev/null +++ compiler-rt/test/hwasan/TestCases/rich-stack.c @@ -0,0 +1,66 @@ +// Test how stack frames are reported (not fully implemented yet). +// RUN: %clang_hwasan %s -o %t +// RUN: not %run %t 3 2 -1 2>&1 | FileCheck %s --check-prefix=R321 +// REQUIRES: stable-runtime +#include +#include +void USE(void *x) { // pretend_to_do_something(void *x) + __asm__ __volatile__("" : : "r" (x) : "memory"); +} +void USE2(void *a, void *b) { USE(a); USE(b); } +void USE4(void *a, void *b, void *c, void *d) { USE2(a, b); USE2(c, d); } + +void BAR(int depth, int err_depth, int offset); + +uint64_t *leaked_ptr; + +void FOO(int depth, int err_depth, int offset) { + uint8_t v1; + uint16_t v2; + uint32_t v4; + uint64_t v8; + uint64_t v16[2]; + uint64_t v32[4]; + uint64_t v48[3]; + USE4(&v1, &v2, &v4, &v8); USE4(&v16, &v32, &v48, 0); + leaked_ptr = &v16[0]; + if (depth) + BAR(depth - 1, err_depth, offset); + + if (err_depth == depth) + v16[offset] = 0; // maybe OOB. + if (err_depth == -depth) + leaked_ptr[offset] = 0; // maybe UAR. + USE(&v16); +} + +void BAR(int depth, int err_depth, int offset) { + uint64_t x16[2]; + uint64_t x32[4]; + USE2(&x16, &x32); + leaked_ptr = &x16[0]; + if (depth) + FOO(depth - 1, err_depth, offset); + if (err_depth == depth) + x16[offset] = 0; // maybe OOB + if (err_depth == -depth) + leaked_ptr[offset] = 0; // maybe UAR + USE(&x16); +} + + +int main(int argc, char **argv) { + if (argc != 4) return -1; + int depth = atoi(argv[1]); + int err_depth = atoi(argv[2]); + int offset = atoi(argv[3]); + FOO(depth, err_depth, offset); + return 0; +} + +// R321: HWAddressSanitizer: tag-mismatch +// R321-NEXT: WRITE of size 8 +// R321-NEXT: in BAR +// R321-NEXT: in FOO +// R321-NEXT: in main +// R321: is located in stack of thread T0 Index: llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp =================================================================== --- llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -172,13 +172,17 @@ Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag); Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong); bool instrumentStack(SmallVectorImpl &Allocas, - SmallVectorImpl &RetVec); + SmallVectorImpl &RetVec, Value *StackTag); Value *getNextTagWithCall(IRBuilder<> &IRB); Value *getStackBaseTag(IRBuilder<> &IRB); Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI, unsigned AllocaNo); Value *getUARTag(IRBuilder<> &IRB, Value *StackTag); + Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty); + Value *emitPrologue(IRBuilder<> &IRB, Value *BaseTag, bool WithFrameRecord); + // void writeFrameRecord(IRBuilder<> &IRB, Value *BaseTag); + private: LLVMContext *C; Triple TargetTriple; @@ -216,6 +220,7 @@ Constant *ShadowGlobal; Value *LocalDynamicShadow = nullptr; + GlobalValue *ThreadPtrGlobal = nullptr; }; } // end anonymous namespace @@ -263,6 +268,12 @@ /*InitArgs=*/{}); appendToGlobalCtors(M, HwasanCtorFunction, 0); } + if (TargetTriple.getArch() == Triple::x86_64 && TargetTriple.isOSLinux()) + appendToCompilerUsed( + M, ThreadPtrGlobal = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::ExternalLinkage, nullptr, + "__hwasan_tls", nullptr, GlobalVariable::InitialExecTLSModel)); + return true; } @@ -563,7 +574,7 @@ Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) { if (ClGenerateTagsWithCalls) - return nullptr; + return getNextTagWithCall(IRB); // FIXME: use addressofreturnaddress (but implement it in aarch64 backend // first). Module *M = IRB.GetInsertBlock()->getParent()->getParent(); @@ -631,15 +642,85 @@ return UntaggedPtrLong; } -bool HWAddressSanitizer::instrumentStack( - SmallVectorImpl &Allocas, - SmallVectorImpl &RetVec) { - Function *F = Allocas[0]->getParent()->getParent(); - Instruction *InsertPt = &*F->getEntryBlock().begin(); - IRBuilder<> IRB(InsertPt); +Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) { + Module *M = IRB.GetInsertBlock()->getParent()->getParent(); + if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) { + Function *ThreadPointerFunc = + Intrinsic::getDeclaration(M, Intrinsic::thread_pointer); + Value *SlotPtr = IRB.CreatePointerCast( + IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), 0x40), + Ty->getPointerTo(0)); + return SlotPtr; + } + if (ThreadPtrGlobal) + return ThreadPtrGlobal; - Value *StackTag = getStackBaseTag(IRB); + return nullptr; +} + +static constexpr unsigned kShadowBaseAlignment = 30; + +Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, Value *BaseTag, + bool WithFrameRecord) { + assert(BaseTag); + Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy); + assert(SlotPtr); + + Value *ThreadLong = IRB.CreateLoad(SlotPtr); + // Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI. + Value *ThreadLongMaybeUntagged = + TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong); + + if (WithFrameRecord) { + // Prepare ring buffer data. + Function *F = IRB.GetInsertBlock()->getParent(); + auto PC = IRB.CreatePtrToInt(F, IntptrTy); + auto GetStackPointerFn = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::frameaddress); + Value *SP = IRB.CreatePtrToInt( + IRB.CreateCall(GetStackPointerFn, + {Constant::getNullValue(IRB.getInt32Ty())}), + IntptrTy); + // Mix SP and PC. TODO: also add the tag to the mix. + // Assumptions: + // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero) + // SP is 0xsssssssssssSSSS0 (4 lower bits are zero) + // We only really need ~20 lower non-zero bits (SSSS), so we mix like this: + // 0xSSSSPPPPPPPPPPPP + SP = IRB.CreateShl(SP, 44); + + // Store data to ring buffer. + Value *RecordPtr = + IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IntptrTy->getPointerTo(0)); + IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr); + + // Update the ring buffer. Top byte of ThreadLong defines the size of the + // buffer in pages, it must be a power of two, and the start of the buffer + // must be aligned by twice that much. Therefore wrap around of the ring + // buffer is simply Addr &= ~((ThreadLong >> 56) << 10). + Value *WrapMask = IRB.CreateXor( + IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 10, "", true, true), + ConstantInt::get(IntptrTy, (uint64_t)-1)); + Value *ThreadLongNew = IRB.CreateAnd( + IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask); + IRB.CreateStore(ThreadLongNew, SlotPtr); + } + + // Get shadow base address by aligning RecordPtr up. + // Note: this is not correct if the pointer is already aligned. + // Runtime library will make sure this never happens. + Value *ShadowBase = IRB.CreateAdd( + IRB.CreateOr( + ThreadLongMaybeUntagged, + ConstantInt::get(IntptrTy, (1UL << kShadowBaseAlignment) - 1)), + ConstantInt::get(IntptrTy, 1), "hwasan.shadow"); + return ShadowBase; +} + +bool HWAddressSanitizer::instrumentStack( + SmallVectorImpl &Allocas, + SmallVectorImpl &RetVec, Value *StackTag) { // Ideally, we want to calculate tagged stack base pointer, and rewrite all // alloca addresses using that. Unfortunately, offsets are not known yet // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a @@ -647,7 +728,7 @@ // This generates one extra instruction per alloca use. for (unsigned N = 0; N < Allocas.size(); ++N) { auto *AI = Allocas[N]; - IRB.SetInsertPoint(AI->getNextNode()); + IRBuilder<> IRB(AI->getNextNode()); // Replace uses of the alloca with tagged address. Value *Tag = getAllocaTag(IRB, StackTag, AI, N); @@ -702,12 +783,6 @@ LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n"); - initializeCallbacks(*F.getParent()); - - assert(!LocalDynamicShadow); - maybeInsertDynamicShadowAtFunctionEntry(F); - - bool Changed = false; SmallVector ToInstrument; SmallVector AllocasToInstrument; SmallVector RetVec; @@ -740,8 +815,23 @@ } } + if (AllocasToInstrument.empty() && ToInstrument.empty()) + return false; + + initializeCallbacks(*F.getParent()); + + assert(!LocalDynamicShadow); + // maybeInsertDynamicShadowAtFunctionEntry(F); + + Instruction *InsertPt = &*F.getEntryBlock().begin(); + IRBuilder<> EntryIRB(InsertPt); + Value *StackTag = getStackBaseTag(EntryIRB); + LocalDynamicShadow = emitPrologue( + EntryIRB, StackTag, /*WithFrameRecord*/ !AllocasToInstrument.empty()); + + bool Changed = false; if (!AllocasToInstrument.empty()) - Changed |= instrumentStack(AllocasToInstrument, RetVec); + Changed |= instrumentStack(AllocasToInstrument, RetVec, StackTag); for (auto Inst : ToInstrument) Changed |= instrumentMemAccess(Inst);