Index: compiler-rt/trunk/lib/hwasan/CMakeLists.txt =================================================================== --- compiler-rt/trunk/lib/hwasan/CMakeLists.txt +++ compiler-rt/trunk/lib/hwasan/CMakeLists.txt @@ -10,6 +10,7 @@ hwasan_poisoning.cc hwasan_report.cc hwasan_thread.cc + hwasan_thread_list.cc ) set(HWASAN_RTL_CXX_SOURCES @@ -25,8 +26,9 @@ hwasan_mapping.h hwasan_poisoning.h hwasan_report.h - hwasan_thread.h) - + hwasan_thread.h + hwasan_thread_list.h + ) set(HWASAN_DEFINITIONS) append_list_if(COMPILER_RT_HWASAN_WITH_INTERCEPTORS HWASAN_WITH_INTERCEPTORS=1 HWASAN_DEFINITIONS) Index: compiler-rt/trunk/lib/hwasan/hwasan.h =================================================================== --- compiler-rt/trunk/lib/hwasan/hwasan.h +++ compiler-rt/trunk/lib/hwasan/hwasan.h @@ -41,6 +41,10 @@ const unsigned kAddressTagShift = 56; const uptr kAddressTagMask = 0xFFUL << kAddressTagShift; +// Minimal alignment of the shadow base address. Determines the space available +// for threads and stack histories. This is an ABI constant. +const unsigned kShadowBaseAlignment = 32; + static inline tag_t GetTagFromPointer(uptr p) { return p >> kAddressTagShift; } @@ -66,6 +70,7 @@ bool ProtectRange(uptr beg, uptr end); bool InitShadow(); +void InitThreads(); void MadviseShadow(); char *GetProcSelfMaps(); void InitializeInterceptors(); @@ -142,6 +147,7 @@ }; void HwasanTSDInit(); +void HwasanTSDThreadInit(); void HwasanOnDeadlySignal(int signo, void *info, void *context); Index: compiler-rt/trunk/lib/hwasan/hwasan.cc =================================================================== --- compiler-rt/trunk/lib/hwasan/hwasan.cc +++ compiler-rt/trunk/lib/hwasan/hwasan.cc @@ -17,6 +17,7 @@ #include "hwasan_poisoning.h" #include "hwasan_report.h" #include "hwasan_thread.h" +#include "hwasan_thread_list.h" #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_flags.h" @@ -174,7 +175,8 @@ static constexpr uptr kMemoryUsageBufferSize = 4096; static void HwasanFormatMemoryUsage(InternalScopedString &s) { - auto thread_stats = Thread::GetThreadStats(); + HwasanThreadList &thread_list = hwasanThreadList(); + auto thread_stats = thread_list.GetThreadStats(); auto *sds = StackDepotGetStats(); AllocatorStatCounters asc; GetAllocatorStats(asc); @@ -184,7 +186,7 @@ " heap: %zd", internal_getpid(), GetRSS(), thread_stats.n_live_threads, thread_stats.total_stack_size, - thread_stats.n_live_threads * Thread::MemoryUsedPerThread(), + thread_stats.n_live_threads * thread_list.MemoryUsedPerThread(), sds->allocated, sds->n_uniq_ids, asc[AllocatorStatMapped]); } @@ -253,7 +255,12 @@ __sanitizer_set_report_path(common_flags()->log_path); DisableCoreDumperIfNecessary(); + __hwasan_shadow_init(); + + InitThreads(); + hwasanThreadList().CreateCurrentThread(); + MadviseShadow(); // This may call libc -> needs initialized shadow. @@ -268,11 +275,10 @@ InitializeCoverage(common_flags()->coverage, common_flags()->coverage_dir); HwasanTSDInit(); + HwasanTSDThreadInit(); HwasanAllocatorInit(); - Thread::Create(); - #if HWASAN_CONTAINS_UBSAN __ubsan::InitAsPlugin(); #endif Index: compiler-rt/trunk/lib/hwasan/hwasan_dynamic_shadow.cc =================================================================== --- compiler-rt/trunk/lib/hwasan/hwasan_dynamic_shadow.cc +++ compiler-rt/trunk/lib/hwasan/hwasan_dynamic_shadow.cc @@ -13,6 +13,7 @@ /// //===----------------------------------------------------------------------===// +#include "hwasan.h" #include "hwasan_dynamic_shadow.h" #include "hwasan_mapping.h" #include "sanitizer_common/sanitizer_common.h" @@ -35,12 +36,16 @@ } } -// Returns an address aligned to 8 pages, such that one page on the left and -// shadow_size_bytes bytes on the right of it are mapped r/o. +// Returns an address aligned to kShadowBaseAlignment, such that +// 2**kShadowBaseAlingment on the left and shadow_size_bytes bytes on the right +// of it are mapped no access. static uptr MapDynamicShadow(uptr shadow_size_bytes) { const uptr granularity = GetMmapGranularity(); - const uptr alignment = granularity << kShadowScale; - const uptr left_padding = granularity; + const uptr min_alignment = granularity << kShadowScale; + const uptr alignment = 1ULL << kShadowBaseAlignment; + CHECK_GE(alignment, min_alignment); + + const uptr left_padding = 1ULL << kShadowBaseAlignment; const uptr shadow_size = RoundUpTo(shadow_size_bytes, granularity); const uptr map_size = shadow_size + left_padding + alignment; Index: compiler-rt/trunk/lib/hwasan/hwasan_flags.inc =================================================================== --- compiler-rt/trunk/lib/hwasan/hwasan_flags.inc +++ compiler-rt/trunk/lib/hwasan/hwasan_flags.inc @@ -51,3 +51,7 @@ "to find bugs.") HWASAN_FLAG(bool, export_memory_stats, true, "Export up-to-date memory stats through /proc") +HWASAN_FLAG(int, stack_history_size, 1024, + "The number of stack frames remembered per thread. " + "Affects the quality of stack-related reports, but not the ability " + "to find bugs.") Index: compiler-rt/trunk/lib/hwasan/hwasan_linux.cc =================================================================== --- compiler-rt/trunk/lib/hwasan/hwasan_linux.cc +++ compiler-rt/trunk/lib/hwasan/hwasan_linux.cc @@ -22,6 +22,7 @@ #include "hwasan_mapping.h" #include "hwasan_report.h" #include "hwasan_thread.h" +#include "hwasan_thread_list.h" #include #include @@ -37,6 +38,10 @@ #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_procmaps.h" +#if HWASAN_WITH_INTERCEPTORS && !SANITIZER_ANDROID +THREADLOCAL uptr __hwasan_tls; +#endif + namespace __hwasan { static void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) { @@ -179,6 +184,20 @@ return true; } +void InitThreads() { + CHECK(__hwasan_shadow_memory_dynamic_address); + uptr guard_page_size = GetMmapGranularity(); + uptr thread_space_start = + __hwasan_shadow_memory_dynamic_address - (1ULL << kShadowBaseAlignment); + uptr thread_space_end = + __hwasan_shadow_memory_dynamic_address - guard_page_size; + ReserveShadowMemoryRange(thread_space_start, thread_space_end - 1, + "hwasan threads"); + ProtectGap(thread_space_end, + __hwasan_shadow_memory_dynamic_address - thread_space_end); + InitThreadList(thread_space_start, thread_space_end - thread_space_start); +} + static void MadviseShadowRegion(uptr beg, uptr end) { uptr size = end - beg + 1; if (common_flags()->no_huge_pages_for_shadow) @@ -214,7 +233,7 @@ // ---------------------- TSD ---------------- {{{1 extern "C" void __hwasan_thread_enter() { - Thread::Create(); + hwasanThreadList().CreateCurrentThread(); } extern "C" void __hwasan_thread_exit() { @@ -222,21 +241,25 @@ // Make sure that signal handler can not see a stale current thread pointer. atomic_signal_fence(memory_order_seq_cst); if (t) - t->Destroy(); + hwasanThreadList().ReleaseThread(t); } #if HWASAN_WITH_INTERCEPTORS static pthread_key_t tsd_key; static bool tsd_key_inited = false; +void HwasanTSDThreadInit() { + if (tsd_key_inited) + CHECK_EQ(0, pthread_setspecific(tsd_key, + (void *)GetPthreadDestructorIterations())); +} + void HwasanTSDDtor(void *tsd) { - Thread *t = (Thread*)tsd; - if (t->destructor_iterations_ > 1) { - t->destructor_iterations_--; - CHECK_EQ(0, pthread_setspecific(tsd_key, tsd)); + uptr iterations = (uptr)tsd; + if (iterations > 1) { + CHECK_EQ(0, pthread_setspecific(tsd_key, (void *)(iterations - 1))); return; } - t->Destroy(); __hwasan_thread_exit(); } @@ -245,31 +268,26 @@ tsd_key_inited = true; CHECK_EQ(0, pthread_key_create(&tsd_key, HwasanTSDDtor)); } - -Thread *GetCurrentThread() { - return (Thread *)pthread_getspecific(tsd_key); -} - -void SetCurrentThread(Thread *t) { - // Make sure that HwasanTSDDtor gets called at the end. - CHECK(tsd_key_inited); - // Make sure we do not reset the current Thread. - CHECK_EQ(0, pthread_getspecific(tsd_key)); - pthread_setspecific(tsd_key, (void *)t); -} -#elif SANITIZER_ANDROID +#else void HwasanTSDInit() {} -Thread *GetCurrentThread() { - return (Thread*)*get_android_tls_ptr(); -} +void HwasanTSDThreadInit() {} +#endif -void SetCurrentThread(Thread *t) { - *get_android_tls_ptr() = (uptr)t; +#if SANITIZER_ANDROID +uptr *GetCurrentThreadLongPtr() { + return (uptr *)get_android_tls_ptr(); } #else -#error unsupported configuration !HWASAN_WITH_INTERCEPTORS && !SANITIZER_ANDROID +uptr *GetCurrentThreadLongPtr() { + return &__hwasan_tls; +} #endif +Thread *GetCurrentThread() { + auto *R = (StackAllocationsRingBuffer*)GetCurrentThreadLongPtr(); + return hwasanThreadList().GetThreadByBufferAddress((uptr)(R->Next())); +} + struct AccessInfo { uptr addr; uptr size; Index: compiler-rt/trunk/lib/hwasan/hwasan_report.cc =================================================================== --- compiler-rt/trunk/lib/hwasan/hwasan_report.cc +++ compiler-rt/trunk/lib/hwasan/hwasan_report.cc @@ -16,6 +16,7 @@ #include "hwasan_allocator.h" #include "hwasan_mapping.h" #include "hwasan_thread.h" +#include "hwasan_thread_list.h" #include "sanitizer_common/sanitizer_allocator_internal.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_flags.h" @@ -35,6 +36,31 @@ return res; } +// A RAII object that holds a copy of the current thread stack ring buffer. +// The actual stack buffer may change while we are iterating over it (for +// example, Printf may call syslog() which can itself be built with hwasan). +class SavedStackAllocations { + public: + SavedStackAllocations(StackAllocationsRingBuffer *rb) { + uptr size = rb->size() * sizeof(uptr); + void *storage = + MmapAlignedOrDieOnFatalError(size, size * 2, "saved stack allocations"); + new (&rb_) StackAllocationsRingBuffer(*rb, storage); + } + + ~SavedStackAllocations() { + StackAllocationsRingBuffer *rb = get(); + UnmapOrDie(rb->StartOfStorage(), rb->size() * sizeof(uptr)); + } + + StackAllocationsRingBuffer *get() { + return (StackAllocationsRingBuffer *)&rb_; + } + + private: + uptr rb_; +}; + class Decorator: public __sanitizer::SanitizerCommonDecorator { public: Decorator() : SanitizerCommonDecorator() { } @@ -63,7 +89,9 @@ return 0; } -void PrintAddressDescription(uptr tagged_addr, uptr access_size) { +void PrintAddressDescription( + uptr tagged_addr, uptr access_size, + StackAllocationsRingBuffer *current_stack_allocations) { Decorator d; int num_descriptions_printed = 0; uptr untagged_addr = UntagAddr(tagged_addr); @@ -109,7 +137,7 @@ } } - Thread::VisitAllLiveThreads([&](Thread *t) { + hwasanThreadList().VisitAllLiveThreads([&](Thread *t) { // Scan all threads' ring buffers to find if it's a heap-use-after-free. HeapAllocationRecord har; if (uptr D = FindHeapAllocation(t->heap_allocations(), tagged_addr, &har)) { @@ -145,6 +173,25 @@ Printf("%s", d.Default()); t->Announce(); + // Temporary report section, needs to be improved. + Printf("Previosly allocated frames:\n"); + auto *sa = (t == GetCurrentThread() && current_stack_allocations) + ? current_stack_allocations + : t->stack_allocations(); + uptr frames = Min((uptr)flags()->stack_history_size, sa->size()); + for (uptr i = 0; i < frames; i++) { + uptr record = (*sa)[i]; + if (!record) + break; + uptr sp = (record >> 48) << 4; + uptr pc_mask = (1ULL << 48) - 1; + uptr pc = record & pc_mask; + uptr fixed_pc = StackTrace::GetNextInstructionPc(pc); + StackTrace stack(&fixed_pc, 1); + Printf("record: %p pc: %p sp: %p", record, pc, sp); + stack.Print(); + } + num_descriptions_printed++; } }); @@ -170,13 +217,16 @@ void ReportInvalidAccessInsideAddressRange(const char *what, const void *start, uptr size, uptr offset) { ScopedErrorReportLock l; + SavedStackAllocations current_stack_allocations( + GetCurrentThread()->stack_allocations()); Decorator d; Printf("%s", d.Warning()); Printf("%sTag mismatch in %s%s%s at offset %zu inside [%p, %zu)%s\n", d.Warning(), d.Name(), what, d.Warning(), offset, start, size, d.Default()); - PrintAddressDescription((uptr)start + offset, 1); + PrintAddressDescription((uptr)start + offset, 1, + current_stack_allocations.get()); // if (__sanitizer::Verbosity()) // DescribeMemoryRange(start, size); } @@ -224,7 +274,7 @@ stack->Print(); - PrintAddressDescription(tagged_addr, 0); + PrintAddressDescription(tagged_addr, 0, nullptr); PrintTagsAroundAddr(tag_ptr); @@ -235,6 +285,8 @@ void ReportTagMismatch(StackTrace *stack, uptr tagged_addr, uptr access_size, bool is_store) { ScopedErrorReportLock l; + SavedStackAllocations current_stack_allocations( + GetCurrentThread()->stack_allocations()); Decorator d; Printf("%s", d.Error()); @@ -258,7 +310,8 @@ stack->Print(); - PrintAddressDescription(tagged_addr, access_size); + PrintAddressDescription(tagged_addr, access_size, + current_stack_allocations.get()); t->Announce(); PrintTagsAroundAddr(tag_ptr); Index: compiler-rt/trunk/lib/hwasan/hwasan_thread.h =================================================================== --- compiler-rt/trunk/lib/hwasan/hwasan_thread.h +++ compiler-rt/trunk/lib/hwasan/hwasan_thread.h @@ -16,12 +16,15 @@ #include "hwasan_allocator.h" #include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_ring_buffer.h" namespace __hwasan { +typedef __sanitizer::CompactRingBuffer StackAllocationsRingBuffer; + class Thread { public: - static void Create(); // Must be called from the thread itself. + void Init(uptr stack_buffer_start, uptr stack_buffer_size); // Must be called from the thread itself. void Destroy(); uptr stack_top() { return stack_top_; } @@ -48,27 +51,15 @@ void LeaveInterceptorScope() { in_interceptor_scope_--; } AllocatorCache *allocator_cache() { return &allocator_cache_; } - HeapAllocationsRingBuffer *heap_allocations() { - return heap_allocations_; - } + HeapAllocationsRingBuffer *heap_allocations() { return heap_allocations_; } + StackAllocationsRingBuffer *stack_allocations() { return stack_allocations_; } tag_t GenerateRandomTag(); - int destructor_iterations_; void DisableTagging() { tagging_disabled_++; } void EnableTagging() { tagging_disabled_--; } bool TaggingIsDisabled() const { return tagging_disabled_; } - template - static void VisitAllLiveThreads(CB cb) { - SpinMutexLock l(&thread_list_mutex); - Thread *t = thread_list_head; - while (t) { - cb(t); - t = t->next_; - } - } - u64 unique_id() const { return unique_id_; } void Announce() { if (announced_) return; @@ -76,22 +67,9 @@ Print("Thread: "); } - struct ThreadStats { - uptr n_live_threads; - uptr total_stack_size; - }; - - static ThreadStats GetThreadStats() { - SpinMutexLock l(&thread_list_mutex); - return thread_stats; - } - - static uptr MemoryUsedPerThread(); - private: // NOTE: There is no Thread constructor. It is allocated // via mmap() and *must* be valid in zero-initialized state. - void Init(); void ClearShadowForThreadStackAndTLS(); void Print(const char *prefix); uptr stack_top_; @@ -108,23 +86,23 @@ AllocatorCache allocator_cache_; HeapAllocationsRingBuffer *heap_allocations_; + StackAllocationsRingBuffer *stack_allocations_; static void InsertIntoThreadList(Thread *t); static void RemoveFromThreadList(Thread *t); Thread *next_; // All live threads form a linked list. - static SpinMutex thread_list_mutex; - static Thread *thread_list_head; - static ThreadStats thread_stats; u64 unique_id_; // counting from zero. u32 tagging_disabled_; // if non-zero, malloc uses zero tag in this thread. bool announced_; + + friend struct ThreadListHead; }; Thread *GetCurrentThread(); -void SetCurrentThread(Thread *t); +uptr *GetCurrentThreadLongPtr(); struct ScopedTaggingDisabler { ScopedTaggingDisabler() { GetCurrentThread()->DisableTagging(); } Index: compiler-rt/trunk/lib/hwasan/hwasan_thread.cc =================================================================== --- compiler-rt/trunk/lib/hwasan/hwasan_thread.cc +++ compiler-rt/trunk/lib/hwasan/hwasan_thread.cc @@ -9,6 +9,7 @@ #include "sanitizer_common/sanitizer_placement_new.h" #include "sanitizer_common/sanitizer_tls_get_addr.h" + namespace __hwasan { static u32 RandomSeed() { @@ -24,102 +25,52 @@ return seed; } -Thread *Thread::thread_list_head; -SpinMutex Thread::thread_list_mutex; -Thread::ThreadStats Thread::thread_stats; - -void Thread::InsertIntoThreadList(Thread *t) { - CHECK(!t->next_); - SpinMutexLock l(&thread_list_mutex); - thread_stats.n_live_threads++; - thread_stats.total_stack_size += t->stack_size(); - if (!thread_list_head) { - thread_list_head = t; - return; - } - Thread *last = thread_list_head; - while (last->next_) - last = last->next_; - last->next_ = t; -} - -void Thread::RemoveFromThreadList(Thread *t) { - SpinMutexLock l(&thread_list_mutex); - thread_stats.n_live_threads--; - thread_stats.total_stack_size -= t->stack_size(); - if (t == thread_list_head) { - thread_list_head = t->next_; - t->next_ = nullptr; - return; - } - Thread *prev = thread_list_head; - Thread *cur = prev->next_; - CHECK(cur); - while (cur) { - if (cur == t) { - prev->next_ = cur->next_; - return; - } - prev = cur; - cur = cur->next_; - } - CHECK(0 && "RemoveFromThreadList: thread not found"); -} - -void Thread::Create() { +void Thread::Init(uptr stack_buffer_start, uptr stack_buffer_size) { static u64 unique_id; - uptr PageSize = GetPageSizeCached(); - uptr size = RoundUpTo(sizeof(Thread), PageSize); - Thread *thread = (Thread*)MmapOrDie(size, __func__); - thread->destructor_iterations_ = GetPthreadDestructorIterations(); - thread->unique_id_ = unique_id++; - thread->random_state_ = - flags()->random_tags ? RandomSeed() : thread->unique_id_; + unique_id_ = unique_id++; + random_state_ = flags()->random_tags ? RandomSeed() : unique_id_; if (auto sz = flags()->heap_history_size) - thread->heap_allocations_ = HeapAllocationsRingBuffer::New(sz); - SetCurrentThread(thread); - thread->Init(); - InsertIntoThreadList(thread); -} + heap_allocations_ = HeapAllocationsRingBuffer::New(sz); -uptr Thread::MemoryUsedPerThread() { - uptr res = sizeof(Thread); - if (auto sz = flags()->heap_history_size) - res += HeapAllocationsRingBuffer::SizeInBytes(sz); - return res; -} + HwasanTSDThreadInit(); // Only needed with interceptors. + uptr *ThreadLong = GetCurrentThreadLongPtr(); + // The following implicitly sets (this) as the current thread. + stack_allocations_ = new (ThreadLong) + StackAllocationsRingBuffer((void *)stack_buffer_start, stack_buffer_size); + // Check that it worked. + CHECK_EQ(GetCurrentThread(), this); -void Thread::Init() { - // GetPthreadDestructorIterations may call malloc, so disable the tagging. + // ScopedTaggingDisable needs GetCurrentThread to be set up. ScopedTaggingDisabler disabler; // If this process is "init" (pid 1), /proc may not be mounted yet. if (IsMainThread() && !FileExists("/proc/self/maps")) { stack_top_ = stack_bottom_ = 0; tls_begin_ = tls_end_ = 0; - return; - } + } else { + uptr tls_size; + uptr stack_size; + GetThreadStackAndTls(IsMainThread(), &stack_bottom_, &stack_size, + &tls_begin_, &tls_size); + stack_top_ = stack_bottom_ + stack_size; + tls_end_ = tls_begin_ + tls_size; - uptr tls_size; - uptr stack_size; - GetThreadStackAndTls(IsMainThread(), &stack_bottom_, &stack_size, &tls_begin_, - &tls_size); - stack_top_ = stack_bottom_ + stack_size; - tls_end_ = tls_begin_ + tls_size; - - int local; - CHECK(AddrIsInStack((uptr)&local)); - CHECK(MemIsApp(stack_bottom_)); - CHECK(MemIsApp(stack_top_ - 1)); - - if (stack_bottom_) { + int local; + CHECK(AddrIsInStack((uptr)&local)); CHECK(MemIsApp(stack_bottom_)); CHECK(MemIsApp(stack_top_ - 1)); + + if (stack_bottom_) { + CHECK(MemIsApp(stack_bottom_)); + CHECK(MemIsApp(stack_top_ - 1)); + } } + if (flags()->verbose_threads) { if (IsMainThread()) { - Printf("sizeof(Thread): %zd sizeof(RB): %zd\n", sizeof(Thread), - heap_allocations_->SizeInBytes()); + Printf("sizeof(Thread): %zd sizeof(HeapRB): %zd sizeof(StackRB): %zd\n", + sizeof(Thread), heap_allocations_->SizeInBytes(), + stack_allocations_->size() * sizeof(uptr)); } Print("Creating : "); } @@ -137,11 +88,8 @@ Print("Destroying: "); AllocatorSwallowThreadLocalCache(allocator_cache()); ClearShadowForThreadStackAndTLS(); - RemoveFromThreadList(this); - uptr size = RoundUpTo(sizeof(Thread), GetPageSizeCached()); if (heap_allocations_) heap_allocations_->Delete(); - UnmapOrDie(this, size); DTLS_Destroy(); } Index: compiler-rt/trunk/lib/hwasan/hwasan_thread_list.h =================================================================== --- compiler-rt/trunk/lib/hwasan/hwasan_thread_list.h +++ compiler-rt/trunk/lib/hwasan/hwasan_thread_list.h @@ -0,0 +1,200 @@ +//===-- hwasan_thread_list.h ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of HWAddressSanitizer. +// +//===----------------------------------------------------------------------===// + +// HwasanThreadList is a registry for live threads, as well as an allocator for +// HwasanThread objects and their stack history ring buffers. There are +// constraints on memory layout of the shadow region and CompactRingBuffer that +// are part of the ABI contract between compiler-rt and llvm. +// +// * Start of the shadow memory region is aligned to 2**kShadowBaseAlignment. +// * All stack ring buffers are located within (2**kShadowBaseAlignment) +// sized region below and adjacent to the shadow region. +// * Each ring buffer has a size of (2**N)*4096 where N is in [0, 8), and is +// aligned to twice its size. The value of N can be different for each buffer. +// +// These constrains guarantee that, given an address A of any element of the +// ring buffer, +// A_next = (A + sizeof(uptr)) & ~((1 << (N + 13)) - 1) +// is the address of the next element of that ring buffer (with wrap-around). +// And, with K = kShadowBaseAlignment, +// S = (A | ((1 << K) - 1)) + 1 +// (align up to kShadowBaseAlignment) is the start of the shadow region. +// +// These calculations are used in compiler instrumentation to update the ring +// buffer and obtain the base address of shadow using only two inputs: address +// of the current element of the ring buffer, and N (i.e. size of the ring +// buffer). Since the value of N is very limited, we pack both inputs into a +// single thread-local word as +// (1 << (N + 56)) | A +// See the implementation of class CompactRingBuffer, which is what is stored in +// said thread-local word. +// +// Note the unusual way of aligning up the address of the shadow: +// (A | ((1 << K) - 1)) + 1 +// It is only correct if A is not already equal to the shadow base address, but +// it saves 2 instructions on AArch64. + +#include "hwasan.h" +#include "hwasan_allocator.h" +#include "hwasan_flags.h" +#include "hwasan_thread.h" + +#include "sanitizer_common/sanitizer_placement_new.h" + +namespace __hwasan { + +static uptr RingBufferSize() { + uptr desired_bytes = flags()->stack_history_size * sizeof(uptr); + // FIXME: increase the limit to 8 once this bug is fixed: + // https://bugs.llvm.org/show_bug.cgi?id=39030 + for (int shift = 1; shift < 7; ++shift) { + uptr size = 4096 * (1ULL << shift); + if (size >= desired_bytes) + return size; + } + Printf("stack history size too large: %d\n", flags()->stack_history_size); + CHECK(0); + return 0; +} + +struct ThreadListHead { + Thread *list_; + + ThreadListHead() : list_(nullptr) {} + + void Push(Thread *t) { + t->next_ = list_; + list_ = t; + } + + Thread *Pop() { + Thread *t = list_; + if (t) + list_ = t->next_; + return t; + } + + void Remove(Thread *t) { + Thread **cur = &list_; + while (*cur != t) cur = &(*cur)->next_; + CHECK(*cur && "thread not found"); + *cur = (*cur)->next_; + } + + template + void ForEach(CB cb) { + Thread *t = list_; + while (t) { + cb(t); + t = t->next_; + } + } +}; + +struct ThreadStats { + uptr n_live_threads; + uptr total_stack_size; +}; + +class HwasanThreadList { + public: + HwasanThreadList(uptr storage, uptr size) + : free_space_(storage), + free_space_end_(storage + size), + ring_buffer_size_(RingBufferSize()) {} + + Thread *CreateCurrentThread() { + Thread *t; + { + SpinMutexLock l(&list_mutex_); + t = free_list_.Pop(); + if (t) + internal_memset((void *)t, 0, sizeof(Thread) + ring_buffer_size_); + else + t = AllocThread(); + live_list_.Push(t); + } + t->Init((uptr)(t + 1), ring_buffer_size_); + AddThreadStats(t); + return t; + } + + void ReleaseThread(Thread *t) { + // FIXME: madvise away the ring buffer? + RemoveThreadStats(t); + t->Destroy(); + SpinMutexLock l(&list_mutex_); + live_list_.Remove(t); + free_list_.Push(t); + } + + Thread *GetThreadByBufferAddress(uptr p) { + uptr align = ring_buffer_size_ * 2; + return (Thread *)(RoundDownTo(p, align) - sizeof(Thread)); + } + + uptr MemoryUsedPerThread() { + uptr res = sizeof(Thread) + ring_buffer_size_; + if (auto sz = flags()->heap_history_size) + res += HeapAllocationsRingBuffer::SizeInBytes(sz); + return res; + } + + template + void VisitAllLiveThreads(CB cb) { + SpinMutexLock l(&list_mutex_); + live_list_.ForEach(cb); + } + + void AddThreadStats(Thread *t) { + SpinMutexLock l(&stats_mutex_); + stats_.n_live_threads++; + stats_.total_stack_size += t->stack_size(); + } + + void RemoveThreadStats(Thread *t) { + SpinMutexLock l(&stats_mutex_); + stats_.n_live_threads--; + stats_.total_stack_size -= t->stack_size(); + } + + ThreadStats GetThreadStats() { + SpinMutexLock l(&stats_mutex_); + return stats_; + } + + private: + Thread *AllocThread() { + uptr align = ring_buffer_size_ * 2; + uptr ring_buffer_start = RoundUpTo(free_space_ + sizeof(Thread), align); + free_space_ = ring_buffer_start + ring_buffer_size_; + CHECK(free_space_ <= free_space_end_ && "out of thread memory"); + return (Thread *)(ring_buffer_start - sizeof(Thread)); + } + + uptr free_space_; + uptr free_space_end_; + uptr ring_buffer_size_; + + ThreadListHead free_list_; + ThreadListHead live_list_; + SpinMutex list_mutex_; + + ThreadStats stats_; + SpinMutex stats_mutex_; +}; + +void InitThreadList(uptr storage, uptr size); +HwasanThreadList &hwasanThreadList(); + +} // namespace Index: compiler-rt/trunk/lib/hwasan/hwasan_thread_list.cc =================================================================== --- compiler-rt/trunk/lib/hwasan/hwasan_thread_list.cc +++ compiler-rt/trunk/lib/hwasan/hwasan_thread_list.cc @@ -0,0 +1,15 @@ +#include "hwasan_thread_list.h" + +namespace __hwasan { +static ALIGNED(16) char thread_list_placeholder[sizeof(HwasanThreadList)]; +static HwasanThreadList *hwasan_thread_list; + +HwasanThreadList &hwasanThreadList() { return *hwasan_thread_list; } + +void InitThreadList(uptr storage, uptr size) { + CHECK(hwasan_thread_list == nullptr); + hwasan_thread_list = + new (thread_list_placeholder) HwasanThreadList(storage, size); +} + +} // namespace Index: compiler-rt/trunk/lib/sanitizer_common/sanitizer_ring_buffer.h =================================================================== --- compiler-rt/trunk/lib/sanitizer_common/sanitizer_ring_buffer.h +++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_ring_buffer.h @@ -72,12 +72,91 @@ // L: last_, always points to the last data element. // N: next_, initially equals to last_, is decremented on every push, // wraps around if it's less or equal than its own address. - T *last_; T *next_; T data_[1]; // flexible array. }; +// A ring buffer with externally provided storage that encodes its state in 8 +// bytes. Has significant constraints on size and alignment of storage. +// See a comment in hwasan/hwasan_thread_list.h for the motivation behind this. +#if SANITIZER_WORDSIZE == 64 +template +class CompactRingBuffer { + // Top byte of long_ stores the buffer size in pages. + // Lower bytes store the address of the next buffer element. + static constexpr int kPageSizeBits = 12; + static constexpr int kSizeShift = 56; + static constexpr uptr kNextMask = (1ULL << kSizeShift) - 1; + + uptr GetStorageSize() const { return (long_ >> kSizeShift) << kPageSizeBits; } + + void Init(void *storage, uptr size) { + CHECK_EQ(sizeof(CompactRingBuffer), sizeof(void *)); + CHECK(IsPowerOfTwo(size)); + CHECK_GE(size, 1 << kPageSizeBits); + CHECK_LE(size, 128 << kPageSizeBits); + CHECK_EQ(size % 4096, 0); + CHECK_EQ(size % sizeof(T), 0); + CHECK_EQ((uptr)storage % (size * 2), 0); + long_ = (uptr)storage | ((size >> kPageSizeBits) << kSizeShift); + } + + void SetNext(const T *next) { + long_ = (long_ & ~kNextMask) | (uptr)next; + } + + public: + CompactRingBuffer(void *storage, uptr size) { + Init(storage, size); + } + + // A copy constructor of sorts. + CompactRingBuffer(const CompactRingBuffer &other, void *storage) { + uptr size = other.GetStorageSize(); + internal_memcpy(storage, other.StartOfStorage(), size); + Init(storage, size); + uptr Idx = other.Next() - (const T *)other.StartOfStorage(); + SetNext((const T *)storage + Idx); + } + + T *Next() const { return (T *)(long_ & kNextMask); } + + void *StartOfStorage() const { + return (void *)((uptr)Next() & ~(GetStorageSize() - 1)); + } + + void *EndOfStorage() const { + return (void *)((uptr)StartOfStorage() + GetStorageSize()); + } + + uptr size() const { return GetStorageSize() / sizeof(T); } + + void push(T t) { + T *next = Next(); + *next = t; + next++; + next = (T *)((uptr)next & ~GetStorageSize()); + SetNext(next); + } + + T operator[](uptr Idx) const { + CHECK_LT(Idx, size()); + const T *Begin = (const T *)StartOfStorage(); + sptr StorageIdx = Next() - Begin; + StorageIdx -= (sptr)(Idx + 1); + if (StorageIdx < 0) + StorageIdx += size(); + return Begin[StorageIdx]; + } + + public: + ~CompactRingBuffer() {} + CompactRingBuffer(const CompactRingBuffer &) = delete; + + uptr long_; +}; +#endif } // namespace __sanitizer #endif // SANITIZER_RING_BUFFER_H Index: compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_ring_buffer_test.cc =================================================================== --- compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_ring_buffer_test.cc +++ compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_ring_buffer_test.cc @@ -66,6 +66,7 @@ #undef EXPECT_RING_BUFFER } +#if SANITIZER_WORDSIZE == 64 TEST(RingBuffer, int64) { TestRB(); } @@ -74,4 +75,25 @@ TestRB(); } +template +CompactRingBuffer *AllocCompactRingBuffer(size_t count) { + size_t sz = sizeof(T) * count; + EXPECT_EQ(0ULL, sz % 4096); + void *p = MmapAlignedOrDieOnFatalError(sz, sz * 2, "CompactRingBuffer"); + return new CompactRingBuffer(p, sz); +} + +TEST(CompactRingBuffer, int64) { + const size_t page_sizes[] = {1, 2, 4, 128}; + + for (size_t pages : page_sizes) { + size_t count = 4096 * pages / sizeof(int64_t); + auto R = AllocCompactRingBuffer(count); + int64_t top = count * 3 + 13; + for (int64_t i = 0; i < top; ++i) R->push(i); + for (int64_t i = 0; i < (int64_t)count; ++i) + EXPECT_EQ(top - i - 1, (*R)[i]); + } +} +#endif } // namespace __sanitizer Index: compiler-rt/trunk/test/hwasan/TestCases/deep-recursion.c =================================================================== --- compiler-rt/trunk/test/hwasan/TestCases/deep-recursion.c +++ compiler-rt/trunk/test/hwasan/TestCases/deep-recursion.c @@ -0,0 +1,73 @@ +// RUN: %clang_hwasan -O1 %s -o %t +// RUN: %env_hwasan_opts=stack_history_size=1 not %run %t 2>&1 | FileCheck %s --check-prefix=D1 +// RUN: %env_hwasan_opts=stack_history_size=2 not %run %t 2>&1 | FileCheck %s --check-prefix=D2 +// RUN: %env_hwasan_opts=stack_history_size=3 not %run %t 2>&1 | FileCheck %s --check-prefix=D3 +// RUN: %env_hwasan_opts=stack_history_size=5 not %run %t 2>&1 | FileCheck %s --check-prefix=D5 +// RUN: not %run %t 2>&1 | FileCheck %s --check-prefix=DEFAULT + +// REQUIRES: stable-runtime + +#include +// At least -O1 is needed for this function to not have a stack frame on +// AArch64. +void USE(void *x) { // pretend_to_do_something(void *x) + __asm__ __volatile__("" : : "r" (x) : "memory"); +} + +volatile int four = 4; + +__attribute__((noinline)) void OOB() { int x[4]; x[four] = 0; USE(&x[0]); } +__attribute__((noinline)) void FUNC1() { int x; USE(&x); OOB(); } +__attribute__((noinline)) void FUNC2() { int x; USE(&x); FUNC1(); } +__attribute__((noinline)) void FUNC3() { int x; USE(&x); FUNC2(); } +__attribute__((noinline)) void FUNC4() { int x; USE(&x); FUNC3(); } +__attribute__((noinline)) void FUNC5() { int x; USE(&x); FUNC4(); } +__attribute__((noinline)) void FUNC6() { int x; USE(&x); FUNC5(); } +__attribute__((noinline)) void FUNC7() { int x; USE(&x); FUNC6(); } +__attribute__((noinline)) void FUNC8() { int x; USE(&x); FUNC7(); } +__attribute__((noinline)) void FUNC9() { int x; USE(&x); FUNC8(); } +__attribute__((noinline)) void FUNC10() { int x; USE(&x); FUNC9(); } + +int main() { FUNC10(); } + +// D1: Previosly allocated frames +// D1: in OOB +// D1-NOT: in FUNC +// D1: Memory tags around the buggy address + +// D2: Previosly allocated frames +// D2: in OOB +// D2: in FUNC1 +// D2-NOT: in FUNC +// D2: Memory tags around the buggy address + +// D3: Previosly allocated frames +// D3: in OOB +// D3: in FUNC1 +// D3: in FUNC2 +// D3-NOT: in FUNC +// D3: Memory tags around the buggy address + +// D5: Previosly allocated frames +// D5: in OOB +// D5: in FUNC1 +// D5: in FUNC2 +// D5: in FUNC3 +// D5: in FUNC4 +// D5-NOT: in FUNC +// D5: Memory tags around the buggy address + +// DEFAULT: Previosly allocated frames +// DEFAULT: in OOB +// DEFAULT: in FUNC1 +// DEFAULT: in FUNC2 +// DEFAULT: in FUNC3 +// DEFAULT: in FUNC4 +// DEFAULT: in FUNC5 +// DEFAULT: in FUNC6 +// DEFAULT: in FUNC7 +// DEFAULT: in FUNC8 +// DEFAULT: in FUNC9 +// DEFAULT: in FUNC10 +// DEFAULT-NOT: in FUNC +// DEFAULT: Memory tags around the buggy address Index: compiler-rt/trunk/test/hwasan/TestCases/rich-stack.c =================================================================== --- compiler-rt/trunk/test/hwasan/TestCases/rich-stack.c +++ compiler-rt/trunk/test/hwasan/TestCases/rich-stack.c @@ -0,0 +1,66 @@ +// Test how stack frames are reported (not fully implemented yet). +// RUN: %clang_hwasan %s -o %t +// RUN: not %run %t 3 2 -1 2>&1 | FileCheck %s --check-prefix=R321 +// REQUIRES: stable-runtime +#include +#include +void USE(void *x) { // pretend_to_do_something(void *x) + __asm__ __volatile__("" : : "r" (x) : "memory"); +} +void USE2(void *a, void *b) { USE(a); USE(b); } +void USE4(void *a, void *b, void *c, void *d) { USE2(a, b); USE2(c, d); } + +void BAR(int depth, int err_depth, int offset); + +uint64_t *leaked_ptr; + +void FOO(int depth, int err_depth, int offset) { + uint8_t v1; + uint16_t v2; + uint32_t v4; + uint64_t v8; + uint64_t v16[2]; + uint64_t v32[4]; + uint64_t v48[3]; + USE4(&v1, &v2, &v4, &v8); USE4(&v16, &v32, &v48, 0); + leaked_ptr = &v16[0]; + if (depth) + BAR(depth - 1, err_depth, offset); + + if (err_depth == depth) + v16[offset] = 0; // maybe OOB. + if (err_depth == -depth) + leaked_ptr[offset] = 0; // maybe UAR. + USE(&v16); +} + +void BAR(int depth, int err_depth, int offset) { + uint64_t x16[2]; + uint64_t x32[4]; + USE2(&x16, &x32); + leaked_ptr = &x16[0]; + if (depth) + FOO(depth - 1, err_depth, offset); + if (err_depth == depth) + x16[offset] = 0; // maybe OOB + if (err_depth == -depth) + leaked_ptr[offset] = 0; // maybe UAR + USE(&x16); +} + + +int main(int argc, char **argv) { + if (argc != 4) return -1; + int depth = atoi(argv[1]); + int err_depth = atoi(argv[2]); + int offset = atoi(argv[3]); + FOO(depth, err_depth, offset); + return 0; +} + +// R321: HWAddressSanitizer: tag-mismatch +// R321-NEXT: WRITE of size 8 +// R321-NEXT: in BAR +// R321-NEXT: in FOO +// R321-NEXT: in main +// R321: is located in stack of thread T0 Index: compiler-rt/trunk/test/hwasan/TestCases/stack-history-length.c =================================================================== --- compiler-rt/trunk/test/hwasan/TestCases/stack-history-length.c +++ compiler-rt/trunk/test/hwasan/TestCases/stack-history-length.c @@ -0,0 +1,36 @@ +// RUN: %clang_hwasan -O1 -DX=2046 %s -o %t.2046 +// RUN: %clang_hwasan -O1 -DX=2047 %s -o %t.2047 +// RUN: %env_hwasan_opts=stack_history_size=2048 not %run %t.2046 2>&1 | FileCheck %s --check-prefix=YES +// RUN: %env_hwasan_opts=stack_history_size=2048 not %run %t.2047 2>&1 | FileCheck %s --check-prefix=NO + +// REQUIRES: stable-runtime + +#include + +void USE(void *x) { // pretend_to_do_something(void *x) + __asm__ __volatile__("" : : "r" (x) : "memory"); +} + +volatile int four = 4; +__attribute__((noinline)) void FUNC0() { int x[4]; USE(&x[0]); } +__attribute__((noinline)) void FUNC() { int x[4]; USE(&x[0]); } +__attribute__((noinline)) void OOB() { int x[4]; x[four] = 0; USE(&x[0]); } + +int main() { + // FUNC0 is X+2's element of the ring buffer. + // If runtime buffer size is less than it, FUNC0 record will be lost. + FUNC0(); + for (int i = 0; i < X; ++i) + FUNC(); + OOB(); +} + +// YES: Previosly allocated frames +// YES: OOB +// YES: FUNC +// YES: FUNC0 + +// NO: Previosly allocated frames +// NO: OOB +// NO: FUNC +// NO-NOT: FUNC0 Index: llvm/trunk/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp =================================================================== --- llvm/trunk/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ llvm/trunk/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -63,6 +63,8 @@ std::numeric_limits::max(); static const unsigned kPointerTagShift = 56; +static const unsigned kShadowBaseAlignment = 32; + static cl::opt ClMemoryAccessCallbackPrefix( "hwasan-memory-access-callback-prefix", cl::desc("Prefix for memory access callbacks"), cl::Hidden, @@ -132,6 +134,18 @@ cl::desc("Access dynamic shadow through an ifunc global on " "platforms that support this"), cl::Hidden, cl::init(false)); + +static cl::opt ClWithTls( + "hwasan-with-tls", + cl::desc("Access dynamic shadow through an thread-local pointer on " + "platforms that support this"), + cl::Hidden, cl::init(true)); + +static cl::opt + ClRecordStackHistory("hwasan-record-stack-history", + cl::desc("Record stack frames with tagged allocations " + "in a thread-local ring buffer"), + cl::Hidden, cl::init(true)); namespace { /// An instrumentation pass implementing detection of addressability bugs @@ -155,7 +169,7 @@ void initializeCallbacks(Module &M); - void maybeInsertDynamicShadowAtFunctionEntry(Function &F); + Value *getDynamicShadowNonTls(IRBuilder<> &IRB); void untagPointerOperand(Instruction *I, Value *Addr); Value *memToShadow(Value *Shadow, Type *Ty, IRBuilder<> &IRB); @@ -172,13 +186,16 @@ Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag); Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong); bool instrumentStack(SmallVectorImpl &Allocas, - SmallVectorImpl &RetVec); + SmallVectorImpl &RetVec, Value *StackTag); Value *getNextTagWithCall(IRBuilder<> &IRB); Value *getStackBaseTag(IRBuilder<> &IRB); Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI, unsigned AllocaNo); Value *getUARTag(IRBuilder<> &IRB, Value *StackTag); + Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty); + Value *emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord); + private: LLVMContext *C; Triple TargetTriple; @@ -188,10 +205,14 @@ /// If InGlobal is true, then /// extern char __hwasan_shadow[]; /// shadow = (mem >> Scale) + &__hwasan_shadow + /// If InTls is true, then + /// extern char *__hwasan_tls; + /// shadow = (mem >> Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment) struct ShadowMapping { int Scale; uint64_t Offset; bool InGlobal; + bool InTls; void init(Triple &TargetTriple); unsigned getAllocaAlignment() const { return 1U << Scale; } @@ -216,6 +237,7 @@ Constant *ShadowGlobal; Value *LocalDynamicShadow = nullptr; + GlobalValue *ThreadPtrGlobal = nullptr; }; } // end anonymous namespace @@ -263,6 +285,12 @@ /*InitArgs=*/{}); appendToGlobalCtors(M, HwasanCtorFunction, 0); } + if (!TargetTriple.isAndroid()) + appendToCompilerUsed( + M, ThreadPtrGlobal = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::ExternalLinkage, nullptr, + "__hwasan_tls", nullptr, GlobalVariable::InitialExecTLSModel)); + return true; } @@ -297,12 +325,11 @@ ArrayType::get(IRB.getInt8Ty(), 0)); } -void HWAddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) { +Value *HWAddressSanitizer::getDynamicShadowNonTls(IRBuilder<> &IRB) { // Generate code only when dynamic addressing is needed. if (Mapping.Offset != kDynamicShadowSentinel) - return; + return nullptr; - IRBuilder<> IRB(&F.front().front()); if (Mapping.InGlobal) { // An empty inline asm with input reg == output reg. // An opaque pointer-to-int cast, basically. @@ -310,11 +337,12 @@ FunctionType::get(IntptrTy, {ShadowGlobal->getType()}, false), StringRef(""), StringRef("=r,0"), /*hasSideEffects=*/false); - LocalDynamicShadow = IRB.CreateCall(Asm, {ShadowGlobal}, ".hwasan.shadow"); + return IRB.CreateCall(Asm, {ShadowGlobal}, ".hwasan.shadow"); } else { - Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal( - kHwasanShadowMemoryDynamicAddress, IntptrTy); - LocalDynamicShadow = IRB.CreateLoad(GlobalDynamicAddress); + Value *GlobalDynamicAddress = + IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal( + kHwasanShadowMemoryDynamicAddress, IntptrTy); + return IRB.CreateLoad(GlobalDynamicAddress); } } @@ -563,7 +591,7 @@ Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) { if (ClGenerateTagsWithCalls) - return nullptr; + return getNextTagWithCall(IRB); // FIXME: use addressofreturnaddress (but implement it in aarch64 backend // first). Module *M = IRB.GetInsertBlock()->getParent()->getParent(); @@ -631,15 +659,88 @@ return UntaggedPtrLong; } +Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) { + Module *M = IRB.GetInsertBlock()->getParent()->getParent(); + if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) { + Function *ThreadPointerFunc = + Intrinsic::getDeclaration(M, Intrinsic::thread_pointer); + Value *SlotPtr = IRB.CreatePointerCast( + IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), 0x40), + Ty->getPointerTo(0)); + return SlotPtr; + } + if (ThreadPtrGlobal) + return ThreadPtrGlobal; + + + return nullptr; +} + +Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, + bool WithFrameRecord) { + if (!Mapping.InTls) + return getDynamicShadowNonTls(IRB); + + Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy); + assert(SlotPtr); + + Value *ThreadLong = IRB.CreateLoad(SlotPtr); + // Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI. + Value *ThreadLongMaybeUntagged = + TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong); + + if (WithFrameRecord) { + // Prepare ring buffer data. + Function *F = IRB.GetInsertBlock()->getParent(); + auto PC = IRB.CreatePtrToInt(F, IntptrTy); + auto GetStackPointerFn = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::frameaddress); + Value *SP = IRB.CreatePtrToInt( + IRB.CreateCall(GetStackPointerFn, + {Constant::getNullValue(IRB.getInt32Ty())}), + IntptrTy); + // Mix SP and PC. TODO: also add the tag to the mix. + // Assumptions: + // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero) + // SP is 0xsssssssssssSSSS0 (4 lower bits are zero) + // We only really need ~20 lower non-zero bits (SSSS), so we mix like this: + // 0xSSSSPPPPPPPPPPPP + SP = IRB.CreateShl(SP, 44); + + // Store data to ring buffer. + Value *RecordPtr = + IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IntptrTy->getPointerTo(0)); + IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr); + + // Update the ring buffer. Top byte of ThreadLong defines the size of the + // buffer in pages, it must be a power of two, and the start of the buffer + // must be aligned by twice that much. Therefore wrap around of the ring + // buffer is simply Addr &= ~((ThreadLong >> 56) << 12). + // The use of AShr instead of LShr is due to + // https://bugs.llvm.org/show_bug.cgi?id=39030 + // Runtime library makes sure not to use the highest bit. + Value *WrapMask = IRB.CreateXor( + IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true), + ConstantInt::get(IntptrTy, (uint64_t)-1)); + Value *ThreadLongNew = IRB.CreateAnd( + IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask); + IRB.CreateStore(ThreadLongNew, SlotPtr); + } + + // Get shadow base address by aligning RecordPtr up. + // Note: this is not correct if the pointer is already aligned. + // Runtime library will make sure this never happens. + Value *ShadowBase = IRB.CreateAdd( + IRB.CreateOr( + ThreadLongMaybeUntagged, + ConstantInt::get(IntptrTy, (1UL << kShadowBaseAlignment) - 1)), + ConstantInt::get(IntptrTy, 1), "hwasan.shadow"); + return ShadowBase; +} + bool HWAddressSanitizer::instrumentStack( SmallVectorImpl &Allocas, - SmallVectorImpl &RetVec) { - Function *F = Allocas[0]->getParent()->getParent(); - Instruction *InsertPt = &*F->getEntryBlock().begin(); - IRBuilder<> IRB(InsertPt); - - Value *StackTag = getStackBaseTag(IRB); - + SmallVectorImpl &RetVec, Value *StackTag) { // Ideally, we want to calculate tagged stack base pointer, and rewrite all // alloca addresses using that. Unfortunately, offsets are not known yet // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a @@ -647,7 +748,7 @@ // This generates one extra instruction per alloca use. for (unsigned N = 0; N < Allocas.size(); ++N) { auto *AI = Allocas[N]; - IRB.SetInsertPoint(AI->getNextNode()); + IRBuilder<> IRB(AI->getNextNode()); // Replace uses of the alloca with tagged address. Value *Tag = getAllocaTag(IRB, StackTag, AI, N); @@ -702,12 +803,6 @@ LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n"); - initializeCallbacks(*F.getParent()); - - assert(!LocalDynamicShadow); - maybeInsertDynamicShadowAtFunctionEntry(F); - - bool Changed = false; SmallVector ToInstrument; SmallVector AllocasToInstrument; SmallVector RetVec; @@ -740,8 +835,25 @@ } } - if (!AllocasToInstrument.empty()) - Changed |= instrumentStack(AllocasToInstrument, RetVec); + if (AllocasToInstrument.empty() && ToInstrument.empty()) + return false; + + initializeCallbacks(*F.getParent()); + + assert(!LocalDynamicShadow); + + Instruction *InsertPt = &*F.getEntryBlock().begin(); + IRBuilder<> EntryIRB(InsertPt); + LocalDynamicShadow = emitPrologue(EntryIRB, + /*WithFrameRecord*/ ClRecordStackHistory && + !AllocasToInstrument.empty()); + + bool Changed = false; + if (!AllocasToInstrument.empty()) { + Value *StackTag = + ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB); + Changed |= instrumentStack(AllocasToInstrument, RetVec, StackTag); + } for (auto Inst : ToInstrument) Changed |= instrumentMemAccess(Inst); @@ -752,26 +864,26 @@ } void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple) { - const bool IsAndroid = TargetTriple.isAndroid(); - const bool IsAndroidWithIfuncSupport = - IsAndroid && !TargetTriple.isAndroidVersionLT(21); - Scale = kDefaultShadowScale; - const bool WithIfunc = ClWithIfunc.getNumOccurrences() > 0 - ? ClWithIfunc - : IsAndroidWithIfuncSupport; - if (ClMappingOffset.getNumOccurrences() > 0) { InGlobal = false; + InTls = false; Offset = ClMappingOffset; } else if (ClEnableKhwasan || ClInstrumentWithCalls) { InGlobal = false; + InTls = false; Offset = 0; - } else if (WithIfunc) { + } else if (ClWithIfunc) { InGlobal = true; + InTls = false; + Offset = kDynamicShadowSentinel; + } else if (ClWithTls) { + InGlobal = false; + InTls = true; Offset = kDynamicShadowSentinel; } else { InGlobal = false; + InTls = false; Offset = kDynamicShadowSentinel; } } Index: llvm/trunk/test/Instrumentation/HWAddressSanitizer/alloca.ll =================================================================== --- llvm/trunk/test/Instrumentation/HWAddressSanitizer/alloca.ll +++ llvm/trunk/test/Instrumentation/HWAddressSanitizer/alloca.ll @@ -1,8 +1,8 @@ ; Test alloca instrumentation. ; -; RUN: opt < %s -hwasan -S | FileCheck %s --check-prefixes=CHECK,DYNAMIC-SHADOW,NO-UAR-TAGS +; RUN: opt < %s -hwasan -hwasan-with-ifunc=1 -S | FileCheck %s --check-prefixes=CHECK,DYNAMIC-SHADOW,NO-UAR-TAGS ; RUN: opt < %s -hwasan -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=CHECK,ZERO-BASED-SHADOW,NO-UAR-TAGS -; RUN: opt < %s -hwasan -hwasan-uar-retag-to-zero=0 -S | FileCheck %s --check-prefixes=CHECK,DYNAMIC-SHADOW,UAR-TAGS +; RUN: opt < %s -hwasan -hwasan-with-ifunc=1 -hwasan-uar-retag-to-zero=0 -S | FileCheck %s --check-prefixes=CHECK,DYNAMIC-SHADOW,UAR-TAGS target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-android" Index: llvm/trunk/test/Instrumentation/HWAddressSanitizer/basic.ll =================================================================== --- llvm/trunk/test/Instrumentation/HWAddressSanitizer/basic.ll +++ llvm/trunk/test/Instrumentation/HWAddressSanitizer/basic.ll @@ -1,7 +1,7 @@ ; Test basic address sanitizer instrumentation. ; -; RUN: opt < %s -hwasan -hwasan-recover=0 -S | FileCheck %s --check-prefixes=CHECK,ABORT,DYNAMIC-SHADOW -; RUN: opt < %s -hwasan -hwasan-recover=1 -S | FileCheck %s --check-prefixes=CHECK,RECOVER,DYNAMIC-SHADOW +; RUN: opt < %s -hwasan -hwasan-recover=0 -hwasan-with-ifunc=1 -hwasan-with-tls=0 -S | FileCheck %s --check-prefixes=CHECK,ABORT,DYNAMIC-SHADOW +; RUN: opt < %s -hwasan -hwasan-recover=1 -hwasan-with-ifunc=1 -hwasan-with-tls=0 -S | FileCheck %s --check-prefixes=CHECK,RECOVER,DYNAMIC-SHADOW ; RUN: opt < %s -hwasan -hwasan-recover=0 -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=CHECK,ABORT,ZERO-BASED-SHADOW ; RUN: opt < %s -hwasan -hwasan-recover=1 -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=CHECK,RECOVER,ZERO-BASED-SHADOW @@ -342,7 +342,6 @@ define i8 @test_load_addrspace(i8 addrspace(256)* %a) sanitize_hwaddress { ; CHECK-LABEL: @test_load_addrspace( ; CHECK-NEXT: entry: -; DYNAMIC-SHADOW: %.hwasan.shadow = call i64 asm "", "=r,0"([0 x i8]* @__hwasan_shadow) ; CHECK-NEXT: %[[B:[^ ]*]] = load i8, i8 addrspace(256)* %a ; CHECK-NEXT: ret i8 %[[B]] Index: llvm/trunk/test/Instrumentation/HWAddressSanitizer/prologue.ll =================================================================== --- llvm/trunk/test/Instrumentation/HWAddressSanitizer/prologue.ll +++ llvm/trunk/test/Instrumentation/HWAddressSanitizer/prologue.ll @@ -0,0 +1,88 @@ +; Test -hwasan-with-ifunc flag. +; +; RUN: opt -hwasan -S < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-TLS,CHECK-HISTORY +; RUN: opt -hwasan -S -hwasan-with-ifunc=0 -hwasan-with-tls=1 -hwasan-record-stack-history=1 < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-TLS,CHECK-HISTORY +; RUN: opt -hwasan -S -hwasan-with-ifunc=0 -hwasan-with-tls=1 -hwasan-record-stack-history=0 < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-TLS,CHECK-NOHISTORY +; RUN: opt -hwasan -S -hwasan-with-ifunc=0 -hwasan-with-tls=0 < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-GLOBAL,CHECK-NOHISTORY +; RUN: opt -hwasan -S -hwasan-with-ifunc=1 -hwasan-with-tls=0 < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC,CHECK-NOHISTORY + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android22" + +; CHECK-IFUNC: @__hwasan_shadow = external global [0 x i8] +; CHECK-NOIFUNC: @__hwasan_shadow_memory_dynamic_address = external global i64 + +define i32 @test_load(i32* %a) sanitize_hwaddress { +; First instrumentation in the function must be to load the dynamic shadow +; address into a local variable. +; CHECK-LABEL: @test_load +; CHECK: entry: + +; CHECK-IFUNC: %[[A:[^ ]*]] = call i64 asm "", "=r,0"([0 x i8]* @__hwasan_shadow) +; CHECK-IFUNC: add i64 %{{.*}}, %[[A]] + +; CHECK-GLOBAL: load i64, i64* @__hwasan_shadow_memory_dynamic_address + +; CHECK-TLS: %[[A:[^ ]*]] = call i8* @llvm.thread.pointer() +; CHECK-TLS: %[[B:[^ ]*]] = getelementptr i8, i8* %[[A]], i32 64 +; CHECK-TLS: %[[C:[^ ]*]] = bitcast i8* %[[B]] to i64* +; CHECK-TLS: %[[D:[^ ]*]] = load i64, i64* %[[C]] +; CHECK-TLS: %[[E:[^ ]*]] = or i64 %[[D]], 4294967295 +; CHECK-TLS: = add i64 %[[E]], 1 + +; "store i64" is only used to update stack history (this input IR intentionally does not use any i64) +; W/o any allocas, the history is not updated, even if it is enabled explicitly with -hwasan-record-stack-history=1 +; CHECK-NOT: store i64 + +; CHECK: ret i32 + +entry: + %x = load i32, i32* %a, align 4 + ret i32 %x +} + +declare void @use(i32* %p) + +define void @test_alloca() sanitize_hwaddress { +; First instrumentation in the function must be to load the dynamic shadow +; address into a local variable. +; CHECK-LABEL: @test_alloca +; CHECK: entry: + +; CHECK-IFUNC: %[[A:[^ ]*]] = call i64 asm "", "=r,0"([0 x i8]* @__hwasan_shadow) +; CHECK-IFUNC: add i64 %{{.*}}, %[[A]] + +; CHECK-GLOBAL: load i64, i64* @__hwasan_shadow_memory_dynamic_address + +; CHECK-TLS: %[[A:[^ ]*]] = call i8* @llvm.thread.pointer() +; CHECK-TLS: %[[B:[^ ]*]] = getelementptr i8, i8* %[[A]], i32 64 +; CHECK-TLS: %[[C:[^ ]*]] = bitcast i8* %[[B]] to i64* +; CHECK-TLS: %[[D:[^ ]*]] = load i64, i64* %[[C]] + +; CHECK-NOHISTORY-NOT: store i64 + +; CHECK-HISTORY: %[[PTR:[^ ]*]] = inttoptr i64 %[[D]] to i64* +; CHECK-HISTORY: store i64 %{{.*}}, i64* %[[PTR]] +; CHECK-HISTORY: %[[D1:[^ ]*]] = ashr i64 %[[D]], 56 +; CHECK-HISTORY: %[[D2:[^ ]*]] = shl nuw nsw i64 %[[D1]], 12 +; CHECK-HISTORY: %[[D3:[^ ]*]] = xor i64 %[[D2]], -1 +; CHECK-HISTORY: %[[D4:[^ ]*]] = add i64 %[[D]], 8 +; CHECK-HISTORY: %[[D5:[^ ]*]] = and i64 %[[D4]], %[[D3]] +; CHECK-HISTORY: store i64 %[[D5]], i64* %[[C]] + +; CHECK-TLS: %[[E:[^ ]*]] = or i64 %[[D]], 4294967295 +; CHECK-TLS: = add i64 %[[E]], 1 + +; CHECK-NOHISTORY-NOT: store i64 + + +entry: + %x = alloca i32, align 4 + call void @use(i32* %x) + ret void +} Index: llvm/trunk/test/Instrumentation/HWAddressSanitizer/with-ifunc.ll =================================================================== --- llvm/trunk/test/Instrumentation/HWAddressSanitizer/with-ifunc.ll +++ llvm/trunk/test/Instrumentation/HWAddressSanitizer/with-ifunc.ll @@ -1,30 +0,0 @@ -; Test -hwasan-with-ifunc flag. -; -; RUN: opt -hwasan -S < %s | \ -; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC -; RUN: opt -hwasan -S -hwasan-with-ifunc=0 < %s | \ -; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOIFUNC -; RUN: opt -hwasan -S -hwasan-with-ifunc=1 < %s | \ -; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC - -target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64--linux-android22" - -; CHECK-IFUNC: @__hwasan_shadow = external global [0 x i8] -; CHECK-NOIFUNC: @__hwasan_shadow_memory_dynamic_address = external global i64 - -define i32 @test_load(i32* %a) sanitize_hwaddress { -; First instrumentation in the function must be to load the dynamic shadow -; address into a local variable. -; CHECK-LABEL: @test_load -; CHECK: entry: - -; CHECK-IFUNC: %[[A:[^ ]*]] = call i64 asm "", "=r,0"([0 x i8]* @__hwasan_shadow) -; CHECK-IFUNC: add i64 %{{.*}}, %[[A]] - -; CHECK-NOIFUNC: load i64, i64* @__hwasan_shadow_memory_dynamic_address - -entry: - %x = load i32, i32* %a, align 4 - ret i32 %x -}