diff --git a/compiler-rt/lib/asan/asan_rtl.cpp b/compiler-rt/lib/asan/asan_rtl.cpp --- a/compiler-rt/lib/asan/asan_rtl.cpp +++ b/compiler-rt/lib/asan/asan_rtl.cpp @@ -490,9 +490,6 @@ if (flags()->start_deactivated) AsanDeactivate(); - // interceptors - InitTlsSize(); - // Create main thread. AsanThread *main_thread = CreateMainThread(); CHECK_EQ(0, main_thread->tid()); @@ -568,7 +565,7 @@ type, top, bottom, top - bottom, top - bottom); return; } - PoisonShadow(bottom, top - bottom, 0); + PoisonShadow(bottom, RoundUpTo(top - bottom, SHADOW_GRANULARITY), 0); } static void UnpoisonDefaultStack() { diff --git a/compiler-rt/lib/asan/asan_thread.cpp b/compiler-rt/lib/asan/asan_thread.cpp --- a/compiler-rt/lib/asan/asan_thread.cpp +++ b/compiler-rt/lib/asan/asan_thread.cpp @@ -307,7 +307,7 @@ uptr stack_size = 0; GetThreadStackAndTls(tid() == 0, &stack_bottom_, &stack_size, &tls_begin_, &tls_size); - stack_top_ = stack_bottom_ + stack_size; + stack_top_ = RoundDownTo(stack_bottom_ + stack_size, SHADOW_GRANULARITY); tls_end_ = tls_begin_ + tls_size; dtls_ = DTLS_Get(); diff --git a/compiler-rt/lib/hwasan/hwasan.cpp b/compiler-rt/lib/hwasan/hwasan.cpp --- a/compiler-rt/lib/hwasan/hwasan.cpp +++ b/compiler-rt/lib/hwasan/hwasan.cpp @@ -265,8 +265,6 @@ hwasan_init_is_running = 1; SanitizerToolName = "HWAddressSanitizer"; - InitTlsSize(); - CacheBinaryName(); InitializeFlags(); diff --git a/compiler-rt/lib/lsan/lsan.cpp b/compiler-rt/lib/lsan/lsan.cpp --- a/compiler-rt/lib/lsan/lsan.cpp +++ b/compiler-rt/lib/lsan/lsan.cpp @@ -98,7 +98,6 @@ InitCommonLsan(); InitializeAllocator(); ReplaceSystemMalloc(); - InitTlsSize(); InitializeInterceptors(); InitializeThreadRegistry(); InstallDeadlySignalHandlers(LsanOnDeadlySignal); diff --git a/compiler-rt/lib/memprof/memprof_rtl.cpp b/compiler-rt/lib/memprof/memprof_rtl.cpp --- a/compiler-rt/lib/memprof/memprof_rtl.cpp +++ b/compiler-rt/lib/memprof/memprof_rtl.cpp @@ -214,9 +214,6 @@ InitializeCoverage(common_flags()->coverage, common_flags()->coverage_dir); - // interceptors - InitTlsSize(); - // Create main thread. MemprofThread *main_thread = CreateMainThread(); CHECK_EQ(0, main_thread->tid()); diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp --- a/compiler-rt/lib/msan/msan.cpp +++ b/compiler-rt/lib/msan/msan.cpp @@ -436,7 +436,6 @@ InitializeInterceptors(); CheckASLR(); - InitTlsSize(); InstallDeadlySignalHandlers(MsanOnDeadlySignal); InstallAtExitHandler(); // Needs __cxa_atexit interceptor. diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -284,7 +284,6 @@ void InitializeCoverage(bool enabled, const char *coverage_dir); -void InitTlsSize(); uptr GetTlsSize(); // Other diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp --- a/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp @@ -103,7 +103,6 @@ void InstallDeadlySignalHandlers(SignalHandlerType handler) {} void SetAlternateSignalStack() {} void UnsetAlternateSignalStack() {} -void InitTlsSize() {} bool SignalContext::IsStackOverflow() const { return false; } void SignalContext::DumpAllRegisters(void *context) { UNIMPLEMENTED(); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h @@ -98,7 +98,6 @@ // Exposed for testing. uptr ThreadDescriptorSize(); uptr ThreadSelf(); -uptr ThreadSelfOffset(); // Matches a library's file name against a base name (stripping path and version // information). diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp @@ -188,80 +188,8 @@ #endif } -#if SANITIZER_GLIBC && !SANITIZER_GO -static uptr g_tls_size; - -#ifdef __i386__ -#define CHECK_GET_TLS_STATIC_INFO_VERSION (!__GLIBC_PREREQ(2, 27)) -#else -#define CHECK_GET_TLS_STATIC_INFO_VERSION 0 -#endif - -#if CHECK_GET_TLS_STATIC_INFO_VERSION -#define DL_INTERNAL_FUNCTION __attribute__((regparm(3), stdcall)) -#else -#define DL_INTERNAL_FUNCTION -#endif - -namespace { -struct GetTlsStaticInfoCall { - typedef void (*get_tls_func)(size_t*, size_t*); -}; -struct GetTlsStaticInfoRegparmCall { - typedef void (*get_tls_func)(size_t*, size_t*) DL_INTERNAL_FUNCTION; -}; - -template -void CallGetTls(void* ptr, size_t* size, size_t* align) { - typename T::get_tls_func get_tls; - CHECK_EQ(sizeof(get_tls), sizeof(ptr)); - internal_memcpy(&get_tls, &ptr, sizeof(ptr)); - CHECK_NE(get_tls, 0); - get_tls(size, align); -} - -bool CmpLibcVersion(int major, int minor, int patch) { - int ma; - int mi; - int pa; - if (!GetLibcVersion(&ma, &mi, &pa)) - return false; - if (ma > major) - return true; - if (ma < major) - return false; - if (mi > minor) - return true; - if (mi < minor) - return false; - return pa >= patch; -} - -} // namespace - -void InitTlsSize() { - // all current supported platforms have 16 bytes stack alignment - const size_t kStackAlign = 16; - void *get_tls_static_info_ptr = dlsym(RTLD_NEXT, "_dl_get_tls_static_info"); - size_t tls_size = 0; - size_t tls_align = 0; - // On i?86, _dl_get_tls_static_info used to be internal_function, i.e. - // __attribute__((regparm(3), stdcall)) before glibc 2.27 and is normal - // function in 2.27 and later. - if (CHECK_GET_TLS_STATIC_INFO_VERSION && !CmpLibcVersion(2, 27, 0)) - CallGetTls(get_tls_static_info_ptr, - &tls_size, &tls_align); - else - CallGetTls(get_tls_static_info_ptr, - &tls_size, &tls_align); - if (tls_align < kStackAlign) - tls_align = kStackAlign; - g_tls_size = RoundUpTo(tls_size, tls_align); -} -#else -void InitTlsSize() { } -#endif // SANITIZER_GLIBC && !SANITIZER_GO - +// ThreadDescriptorSize() is only used by lsan to get the pointer to +// thread-specific data keys in the thread control block. #if (defined(__x86_64__) || defined(__i386__) || defined(__mips__) || \ defined(__aarch64__) || defined(__powerpc64__) || defined(__s390__) || \ defined(__arm__) || SANITIZER_RISCV64) && \ @@ -334,13 +262,6 @@ return val; } -// The offset at which pointer to self is located in the thread descriptor. -const uptr kThreadSelfOffset = FIRST_32_SECOND_64(8, 16); - -uptr ThreadSelfOffset() { - return kThreadSelfOffset; -} - #if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 // TlsPreTcbSize includes size of struct pthread_descr and size of tcb // head structure. It lies before the static tls blocks. @@ -359,48 +280,73 @@ } #endif -uptr ThreadSelf() { - uptr descr_addr; -#if defined(__i386__) - asm("mov %%gs:%c1,%0" : "=r"(descr_addr) : "i"(kThreadSelfOffset)); -#elif defined(__x86_64__) - asm("mov %%fs:%c1,%0" : "=r"(descr_addr) : "i"(kThreadSelfOffset)); -#elif defined(__mips__) - // MIPS uses TLS variant I. The thread pointer (in hardware register $29) - // points to the end of the TCB + 0x7000. The pthread_descr structure is - // immediately in front of the TCB. TlsPreTcbSize() includes the size of the - // TCB and the size of pthread_descr. - const uptr kTlsTcbOffset = 0x7000; - uptr thread_pointer; - asm volatile(".set push;\ - .set mips64r2;\ - rdhwr %0,$29;\ - .set pop" : "=r" (thread_pointer)); - descr_addr = thread_pointer - kTlsTcbOffset - TlsPreTcbSize(); -#elif defined(__aarch64__) || defined(__arm__) - descr_addr = reinterpret_cast(__builtin_thread_pointer()) - - ThreadDescriptorSize(); -#elif SANITIZER_RISCV64 - // https://github.com/riscv/riscv-elf-psabi-doc/issues/53 - uptr thread_pointer = reinterpret_cast(__builtin_thread_pointer()); - descr_addr = thread_pointer - TlsPreTcbSize(); -#elif defined(__s390__) - descr_addr = reinterpret_cast(__builtin_thread_pointer()); -#elif defined(__powerpc64__) - // PPC64LE uses TLS variant I. The thread pointer (in GPR 13) - // points to the end of the TCB + 0x7000. The pthread_descr structure is - // immediately in front of the TCB. TlsPreTcbSize() includes the size of the - // TCB and the size of pthread_descr. - const uptr kTlsTcbOffset = 0x7000; - uptr thread_pointer; - asm("addi %0,13,%1" : "=r"(thread_pointer) : "I"(-kTlsTcbOffset)); - descr_addr = thread_pointer - TlsPreTcbSize(); -#else -#error "unsupported CPU arch" -#endif - return descr_addr; +#if !SANITIZER_GO +namespace { +struct TlsBlock { + uptr begin, end, align; + size_t tls_modid; + bool operator<(const TlsBlock &rhs) const { return begin < rhs.begin; } +}; +} // namespace + +extern "C" void *__tls_get_addr(size_t *); + +static int TouchTlsBlock(struct dl_phdr_info *info, size_t size, + void *data) { + size_t mod_and_off[2] = {info->dlpi_tls_modid, 0}; + if (mod_and_off[0] != 0) + __tls_get_addr(mod_and_off); + return 0; } -#endif // (x86_64 || i386 || MIPS) && SANITIZER_LINUX + +static int CollectStaticTlsBlocks(struct dl_phdr_info *info, size_t size, + void *data) { + if (!info->dlpi_tls_data) + return 0; + const uptr begin = (uptr)info->dlpi_tls_data; + for (unsigned i = 0; i != info->dlpi_phnum; ++i) + if (info->dlpi_phdr[i].p_type == PT_TLS) { + static_cast *>(data)->push_back( + TlsBlock{begin, begin + info->dlpi_phdr[i].p_memsz, + info->dlpi_phdr[i].p_align, info->dlpi_tls_modid}); + break; + } + return 0; +} + +static void GetStaticTlsBoundary(uptr *addr, uptr *size, uptr *align) { + InternalMmapVector ranges; + dl_iterate_phdr(CollectStaticTlsBlocks, &ranges); + uptr len = ranges.size(); + Sort(ranges.begin(), len); + // Find the range with tls_modid=1. For glibc, because libc.so uses PT_TLS, + // this module is guaranteed to exist and is one of the initially loaded + // modules. + uptr one = 0; + while (one != len && ranges[one].tls_modid != 1) ++one; + if (one == len) { + // This may happen with musl if no module uses PT_TLS. + *addr = 0; + *size = 0; + *align = 1; + return; + } + // Find the maximum consecutive ranges. We consider two modules consecutive if + // the gap is smaller than the alignment. The dynamic loader places static TLS + // blocks this way not to waste space. + uptr l = one; + *align = ranges[l].align; + while (l != 0 && ranges[l].begin < ranges[l - 1].end + ranges[l - 1].align) + *align = Max(*align, ranges[--l].align); + uptr r = one + 1; + while (r != len && ranges[r].begin < ranges[r - 1].end + ranges[r - 1].align) + *align = Max(*align, ranges[r++].align); + *addr = ranges[l].begin; + *size = ranges[r - 1].end - ranges[l].begin; +} +#endif // !SANITIZER_GO +#endif // (x86_64 || i386 || mips || ...) && SANITIZER_LINUX && + // !SANITIZER_ANDROID #if SANITIZER_FREEBSD static void **ThreadSelfSegbase() { @@ -472,18 +418,54 @@ *size = 0; } #elif SANITIZER_LINUX + // glibc before 2.25 does not eager allocate static TLS blocks so + // dlpi_tls_data is NULL. Call __tls_get_addr to force allocation. For safety, + // skip grte/v4-2.19 for now. + int major, minor, patch; + if (GetLibcVersion(&major, &minor, &patch) && major == 2 && minor < 25 && + minor != 19) + dl_iterate_phdr(TouchTlsBlock, nullptr); + + uptr align; + GetStaticTlsBoundary(addr, size, &align); #if defined(__x86_64__) || defined(__i386__) || defined(__s390__) - *addr = ThreadSelf(); - *size = GetTlsSize(); - *addr -= *size; - *addr += ThreadDescriptorSize(); -#elif defined(__mips__) || defined(__aarch64__) || defined(__powerpc64__) || \ - defined(__arm__) || SANITIZER_RISCV64 - *addr = ThreadSelf(); - *size = GetTlsSize(); + if (SANITIZER_GLIBC) { +#if defined(__s390__) + align = Max(align, 16); #else - *addr = 0; - *size = 0; + align = Max(align, 64); +#endif + } + const uptr tp = RoundUpTo(*addr + *size, align); + + // lsan requires the range to additionally cover the static TLS surplus + // (elf/dl-tls.c defines 1664). Otherwise there may be false positives for + // allocations only referenced by tls in dynamically loaded modules. + if (SANITIZER_GLIBC) + *size += 1644; + + // Extend the range to include the thread control block. On glibc, lsan needs + // the range to include pthread::{specific_1stblock,specific} so that + // allocations only referenced by pthread_setspecific can be scanned. This may + // underestimate by at most TLS_TCB_ALIGN-1 bytes but it should be fine + // because the number of bytes after pthread::specific is larger. + *addr = tp - RoundUpTo(*size, align); + *size = tp - *addr + ThreadDescriptorSize(); +#else + if (SANITIZER_GLIBC) + *size += 1664; +#if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 + const uptr pre_tcb_size = TlsPreTcbSize(); + *addr -= pre_tcb_size; + *size += pre_tcb_size; +#else + // arm and aarch64 reserve two words at TP, so this underestimates the range. + // However, this is sufficient for the purpose of finding the pointers to + // thread-specific data keys. + const uptr tcb_size = ThreadDescriptorSize(); + *addr -= tcb_size; + *size += tcb_size; +#endif #endif #elif SANITIZER_FREEBSD void** segbase = ThreadSelfSegbase(); @@ -524,17 +506,11 @@ #if !SANITIZER_GO uptr GetTlsSize() { -#if SANITIZER_FREEBSD || SANITIZER_ANDROID || SANITIZER_NETBSD || \ +#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD || \ SANITIZER_SOLARIS uptr addr, size; GetTls(&addr, &size); return size; -#elif SANITIZER_GLIBC -#if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 - return RoundUpTo(g_tls_size + TlsPreTcbSize(), 16); -#else - return g_tls_size; -#endif #else return 0; #endif @@ -557,10 +533,9 @@ if (!main) { // If stack and tls intersect, make them non-intersecting. if (*tls_addr > *stk_addr && *tls_addr < *stk_addr + *stk_size) { - CHECK_GT(*tls_addr + *tls_size, *stk_addr); - CHECK_LE(*tls_addr + *tls_size, *stk_addr + *stk_size); - *stk_size -= *tls_size; - *tls_addr = *stk_addr + *stk_size; + if (*stk_addr + *stk_size < *tls_addr + *tls_size) + *tls_size = *stk_addr + *stk_size - *tls_addr; + *stk_size = *tls_addr - *stk_addr; } } #endif diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp @@ -548,9 +548,6 @@ return 0; } -void InitTlsSize() { -} - uptr TlsBaseAddr() { uptr segbase = 0; #if defined(__x86_64__) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_rtems.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_rtems.cpp --- a/compiler-rt/lib/sanitizer_common/sanitizer_rtems.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_rtems.cpp @@ -106,7 +106,6 @@ void InstallDeadlySignalHandlers(SignalHandlerType handler) {} void SetAlternateSignalStack() {} void UnsetAlternateSignalStack() {} -void InitTlsSize() {} void SignalContext::DumpAllRegisters(void *context) {} const char *DescribeSignalOrException(int signo) { UNIMPLEMENTED(); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp --- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp @@ -846,9 +846,6 @@ return 0; } -void InitTlsSize() { -} - void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size, uptr *tls_addr, uptr *tls_size) { #if SANITIZER_GO diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_common_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_common_test.cpp --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_common_test.cpp +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_common_test.cpp @@ -210,12 +210,10 @@ } TEST(SanitizerCommon, ThreadStackTlsMain) { - InitTlsSize(); TestThreadInfo(true); } TEST(SanitizerCommon, ThreadStackTlsWorker) { - InitTlsSize(); pthread_t t; PTHREAD_CREATE(&t, 0, WorkerThread, 0); PTHREAD_JOIN(t, 0); diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_linux_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_linux_test.cpp --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_linux_test.cpp +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_linux_test.cpp @@ -188,24 +188,9 @@ } #if (defined(__x86_64__) || defined(__i386__)) && !SANITIZER_ANDROID -void *thread_self_offset_test_func(void *arg) { - bool result = - *(uptr *)((char *)ThreadSelf() + ThreadSelfOffset()) == ThreadSelf(); - return (void *)result; -} - -TEST(SanitizerLinux, ThreadSelfOffset) { - EXPECT_TRUE((bool)thread_self_offset_test_func(0)); - pthread_t tid; - void *result; - ASSERT_EQ(0, pthread_create(&tid, 0, thread_self_offset_test_func, 0)); - ASSERT_EQ(0, pthread_join(tid, &result)); - EXPECT_TRUE((bool)result); -} - // libpthread puts the thread descriptor at the end of stack space. void *thread_descriptor_size_test_func(void *arg) { - uptr descr_addr = ThreadSelf(); + uptr descr_addr = (uptr)pthread_self(); pthread_attr_t attr; pthread_getattr_np(pthread_self(), &attr); void *stackaddr; diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp @@ -655,8 +655,11 @@ return p; } +// In glibc<2.25, dynamic TLS blocks are allocated by __libc_memalign. Intercept +// __libc_memalign so that (1) we can detect races (2) free will not be called +// on libc internally allocated blocks. TSAN_INTERCEPTOR(void*, __libc_memalign, uptr align, uptr sz) { - SCOPED_TSAN_INTERCEPTOR(__libc_memalign, align, sz); + SCOPED_INTERCEPTOR_RAW(__libc_memalign, align, sz); return user_memalign(thr, pc, align, sz); } diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp --- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp @@ -318,7 +318,6 @@ } CheckAndProtect(); - InitTlsSize(); #endif // !SANITIZER_GO } diff --git a/compiler-rt/test/asan/TestCases/Linux/static_tls.cpp b/compiler-rt/test/asan/TestCases/Linux/static_tls.cpp --- a/compiler-rt/test/asan/TestCases/Linux/static_tls.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/static_tls.cpp @@ -13,6 +13,10 @@ // binutils 2.26 has a change that causes this test to fail on powerpc64. // UNSUPPORTED: powerpc64 +/// We call __tls_get_addr early in GetTls to work around an issue for glibc<2.25, +/// so we don't get a log for f(). +// REQUIRES: glibc-2.27 + #ifndef SHARED #include