diff --git a/libc/config/linux/app.h b/libc/config/linux/app.h --- a/libc/config/linux/app.h +++ b/libc/config/linux/app.h @@ -15,8 +15,8 @@ namespace __llvm_libc { -// Data structure to capture properties of the linux/ELF TLS. -struct TLS { +// Data structure to capture properties of the linux/ELF TLS image. +struct TLSImage { // The load address of the TLS. uintptr_t address; @@ -68,8 +68,8 @@ Args *args; - // The properties of an application's TLS. - TLS tls; + // The properties of an application's TLS image. + TLSImage tls; // Environment data. uint64_t *envPtr; @@ -77,9 +77,29 @@ extern AppProperties app; -// Creates and initializes the TLS area for the current thread. Should not +// The descriptor of a thread's TLS area. +struct TLSDescriptor { + // The size of the TLS area. + uintptr_t size = 0; + + // The address of the TLS area. This address can be passed to cleanup + // functions like munmap. + uintptr_t addr = 0; + + // The value the thread pointer register should be initialized to. + // Note that, dependending the target architecture ABI, it can be the + // same as |addr| or something else. + uintptr_t tp = 0; + + constexpr TLSDescriptor() = default; +}; + +// Create and initialize the TLS area for the current thread. Should not // be called before app.tls has been initialized. -void initTLS(); +void init_tls(TLSDescriptor &tls); + +// Cleanup the TLS area as described in |tls_descriptor|. +void cleanup_tls(uintptr_t tls_addr, uintptr_t tls_size); } // namespace __llvm_libc diff --git a/libc/loader/linux/aarch64/start.cpp b/libc/loader/linux/aarch64/start.cpp --- a/libc/loader/linux/aarch64/start.cpp +++ b/libc/loader/linux/aarch64/start.cpp @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -36,9 +35,12 @@ AppProperties app; -void initTLS() { - if (app.tls.size == 0) +void init_tls(TLSDescriptor &tls_descriptor) { + if (app.tls.size == 0) { + tls_descriptor.size = 0; + tls_descriptor.tp = 0; return; + } // aarch64 follows the variant 1 TLS layout: // @@ -49,14 +51,14 @@ // // The thread pointer points to the first entry. - const size_t size_of_pointers = 2 * sizeof(uintptr_t); - size_t padding = 0; - const size_t ALIGNMENT_MASK = app.tls.align - 1; - size_t diff = size_of_pointers & ALIGNMENT_MASK; + const uintptr_t size_of_pointers = 2 * sizeof(uintptr_t); + uintptr_t padding = 0; + const uintptr_t ALIGNMENT_MASK = app.tls.align - 1; + uintptr_t diff = size_of_pointers & ALIGNMENT_MASK; if (diff != 0) padding += (ALIGNMENT_MASK - diff) + 1; - size_t alloc_size = size_of_pointers + padding + app.tls.size; + uintptr_t alloc_size = size_of_pointers + padding + app.tls.size; // We cannot call the mmap function here as the functions set errno on // failure. Since errno is implemented via a thread local variable, we cannot @@ -73,9 +75,19 @@ __llvm_libc::inline_memcpy(reinterpret_cast(tls_addr), reinterpret_cast(app.tls.address), app.tls.init_size); - __arm_wsr64("tpidr_el0", thread_ptr); + tls_descriptor.size = alloc_size; + tls_descriptor.addr = thread_ptr; + tls_descriptor.tp = thread_ptr; } +void cleanup_tls(uintptr_t addr, uintptr_t size) { + if (size == 0) + return; + __llvm_libc::syscall(SYS_munmap, addr, size); +} + +static void set_thread_ptr(uintptr_t val) { __arm_wsr64("tpidr_el0", val); } + } // namespace __llvm_libc using __llvm_libc::app; @@ -134,9 +146,13 @@ app.tls.align = phdr->p_align; } - __llvm_libc::initTLS(); + __llvm_libc::TLSDescriptor tls; + __llvm_libc::init_tls(tls); + if (tls.size != 0) + __llvm_libc::set_thread_ptr(tls.tp); - __llvm_libc::syscall(SYS_exit, main(app.args->argc, - reinterpret_cast(app.args->argv), - reinterpret_cast(env_ptr))); + int retval = main(app.args->argc, reinterpret_cast(app.args->argv), + reinterpret_cast(env_ptr)); + __llvm_libc::cleanup_tls(tls.addr, tls.size); + __llvm_libc::syscall(SYS_exit, retval); } diff --git a/libc/loader/linux/x86_64/CMakeLists.txt b/libc/loader/linux/x86_64/CMakeLists.txt --- a/libc/loader/linux/x86_64/CMakeLists.txt +++ b/libc/loader/linux/x86_64/CMakeLists.txt @@ -11,4 +11,5 @@ COMPILE_OPTIONS -fno-omit-frame-pointer -ffreestanding # To avoid compiler warnings about calling the main function. + -fno-builtin ) diff --git a/libc/loader/linux/x86_64/start.cpp b/libc/loader/linux/x86_64/start.cpp --- a/libc/loader/linux/x86_64/start.cpp +++ b/libc/loader/linux/x86_64/start.cpp @@ -33,9 +33,12 @@ // TODO: The function is x86_64 specific. Move it to config/linux/app.h // and generalize it. Also, dynamic loading is not handled currently. -void initTLS() { - if (app.tls.size == 0) +void init_tls(TLSDescriptor &tls_descriptor) { + if (app.tls.size == 0) { + tls_descriptor.size = 0; + tls_descriptor.tp = 0; return; + } // We will assume the alignment is always a power of two. uintptr_t tlsSize = app.tls.size & -app.tls.align; @@ -45,7 +48,7 @@ // Per the x86_64 TLS ABI, the entry pointed to by the thread pointer is the // address of the TLS block. So, we add more size to accomodate this address // entry. - size_t tlsSizeWithAddr = tlsSize + sizeof(uintptr_t); + uintptr_t tlsSizeWithAddr = tlsSize + sizeof(uintptr_t); // We cannot call the mmap function here as the functions set errno on // failure. Since errno is implemented via a thread local variable, we cannot @@ -67,8 +70,21 @@ __llvm_libc::inline_memcpy(reinterpret_cast(tlsAddr), reinterpret_cast(app.tls.address), app.tls.init_size); - if (__llvm_libc::syscall(SYS_arch_prctl, ARCH_SET_FS, endPtr) == -1) - __llvm_libc::syscall(SYS_exit, 1); + + tls_descriptor = {tlsSizeWithAddr, uintptr_t(tlsAddr), endPtr}; + return; +} + +void cleanup_tls(uintptr_t addr, uintptr_t size) { + if (size == 0) + return; + __llvm_libc::syscall(SYS_munmap, addr, size); +} + +// Sets the thread pointer to |val|. Returns true on success, false on failure. +static bool set_thread_ptr(uintptr_t val) { + return __llvm_libc::syscall(SYS_arch_prctl, ARCH_SET_FS, val) == -1 ? false + : true; } } // namespace __llvm_libc @@ -144,9 +160,13 @@ app.tls.align = phdr->p_align; } - __llvm_libc::initTLS(); + __llvm_libc::TLSDescriptor tls; + __llvm_libc::init_tls(tls); + if (tls.size != 0 && !__llvm_libc::set_thread_ptr(tls.tp)) + __llvm_libc::syscall(SYS_exit, 1); - __llvm_libc::syscall(SYS_exit, main(app.args->argc, - reinterpret_cast(app.args->argv), - reinterpret_cast(env_ptr))); + int retval = main(app.args->argc, reinterpret_cast(app.args->argv), + reinterpret_cast(env_ptr)); + __llvm_libc::cleanup_tls(tls.addr, tls.size); + __llvm_libc::syscall(SYS_exit, retval); } diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt --- a/libc/src/__support/threads/linux/CMakeLists.txt +++ b/libc/src/__support/threads/linux/CMakeLists.txt @@ -26,6 +26,7 @@ thread.cpp DEPENDS .futex_word_type + libc.config.linux.app_h libc.include.sys_syscall libc.src.__support.CPP.atomic libc.src.__support.CPP.error diff --git a/libc/src/__support/threads/linux/thread.cpp b/libc/src/__support/threads/linux/thread.cpp --- a/libc/src/__support/threads/linux/thread.cpp +++ b/libc/src/__support/threads/linux/thread.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/__support/threads/thread.h" +#include "config/linux/app.h" #include "src/__support/CPP/atomic.h" #include "src/__support/CPP/error.h" #include "src/__support/OSUtil/syscall.h" // For syscall functions. @@ -42,11 +43,10 @@ | CLONE_THREAD // Same thread group as the parent. | CLONE_SYSVSEM // Share a single list of System V semaphore adjustment // values - | CLONE_PARENT_SETTID // Set child thread ID in |ptid| of the parent. - | CLONE_CHILD_CLEARTID; // Let the kernel clear the tid address - // wake the joining thread. -// TODO: Add the CLONE_SETTLS flag and setup the TLS area correctly -// when making the clone syscall. + | CLONE_PARENT_SETTID // Set child thread ID in |ptid| of the parent. + | CLONE_CHILD_CLEARTID // Let the kernel clear the tid address + // wake the joining thread. + | CLONE_SETTLS; // Setup the thread pointer of the new thread. static inline cpp::ErrorOr alloc_stack(size_t size) { long mmap_result = @@ -80,6 +80,14 @@ void *arg; }; +static void cleanup_thread_resources(ThreadAttributes *attrib) { + // Cleanup the TLS before the stack as the TLS information is stored on + // the stack. + cleanup_tls(attrib->tls, attrib->tls_size); + if (attrib->owned_stack) + free_stack(attrib->stack, attrib->stack_size); +} + __attribute__((always_inline)) inline uintptr_t get_start_args_addr() { // NOTE: For __builtin_frame_address to work reliably across compilers, // architectures and various optimization levels, the TU including this file @@ -118,8 +126,7 @@ if (!attrib->detach_state.compare_exchange_strong( joinable_state, uint32_t(DetachState::EXITING))) { // Thread is detached so cleanup the resources. - if (attrib->owned_stack) - free_stack(attrib->stack, attrib->stack_size); + cleanup_thread_resources(attrib); // Set the CLEAR_TID address to nullptr to prevent the kernel // from signalling at a non-existent futex location. @@ -143,6 +150,9 @@ owned_stack = true; } + TLSDescriptor tls; + init_tls(tls); + // When the new thread is spawned by the kernel, the new thread gets the // stack we pass to the clone syscall. However, this stack is empty and does // not have any local vars present in this function. Hence, one cannot @@ -167,6 +177,8 @@ attrib->stack = stack; attrib->stack_size = size; attrib->owned_stack = owned_stack; + attrib->tls = tls.addr; + attrib->tls_size = tls.size; start_args->thread_attrib = attrib; start_args->runner = runner; @@ -187,14 +199,14 @@ SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack, &attrib->tid, // The address where the child tid is written &clear_tid->val, // The futex where the child thread status is signalled - 0 // Set TLS to null for now. + tls.tp // The thread pointer value for the new thread. ); #elif defined(LLVM_LIBC_ARCH_AARCH64) long register clone_result asm("x0"); clone_result = __llvm_libc::syscall( SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack, &attrib->tid, // The address where the child tid is written - 0, // Set TLS to null for now. + tls.tp, // The thread pointer value for the new thread. &clear_tid->val // The futex where the child thread status is signalled ); #else @@ -209,8 +221,7 @@ #endif start_thread(); } else if (clone_result < 0) { - if (attrib->owned_stack) - free_stack(attrib->stack, attrib->stack_size); + cleanup_thread_resources(attrib); return -clone_result; } @@ -225,8 +236,7 @@ else retval.stdc_retval = attrib->retval.stdc_retval; - if (attrib->owned_stack) - free_stack(attrib->stack, attrib->stack_size); + cleanup_thread_resources(attrib); return 0; } @@ -243,8 +253,8 @@ // and free up resources. wait(); - if (attrib->owned_stack) - free_stack(attrib->stack, attrib->stack_size); + cleanup_thread_resources(attrib); + return int(DetachType::CLEANUP); } diff --git a/libc/src/__support/threads/thread.h b/libc/src/__support/threads/thread.h --- a/libc/src/__support/threads/thread.h +++ b/libc/src/__support/threads/thread.h @@ -81,9 +81,10 @@ // exits. It will clean up the thread resources once the thread // exits. cpp::Atomic detach_state; - void *stack; // Pointer to the thread stack - void *tls; + void *stack; // Pointer to the thread stack unsigned long long stack_size; // Size of the stack + uintptr_t tls; // Address to the thread TLS memory + uintptr_t tls_size; // The size of area pointed to by |tls|. unsigned char owned_stack; // Indicates if the thread owns this stack memory int tid; ThreadStyle style; @@ -92,10 +93,8 @@ constexpr ThreadAttributes() : detach_state(uint32_t(DetachState::DETACHED)), stack(nullptr), - tls(nullptr), stack_size(0), owned_stack(false), tid(-1), - style(ThreadStyle::POSIX), retval(), - platform_data(nullptr) { - } + stack_size(0), tls(0), tls_size(0), owned_stack(false), tid(-1), + style(ThreadStyle::POSIX), retval(), platform_data(nullptr) {} }; struct Thread { diff --git a/libc/test/integration/src/__support/threads/CMakeLists.txt b/libc/test/integration/src/__support/threads/CMakeLists.txt --- a/libc/test/integration/src/__support/threads/CMakeLists.txt +++ b/libc/test/integration/src/__support/threads/CMakeLists.txt @@ -17,3 +17,15 @@ libc.src.__support.threads.mutex libc.src.__support.threads.thread ) + +add_integration_test( + thread_tls_test + SUITE + libc-support-threads-integration-tests + SRCS + thread_tls_test.cpp + LOADER + libc.loader.linux.crt1 + DEPENDS + libc.src.__support.threads.thread +) diff --git a/libc/test/integration/src/__support/threads/thread_tls_test.cpp b/libc/test/integration/src/__support/threads/thread_tls_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/integration/src/__support/threads/thread_tls_test.cpp @@ -0,0 +1,46 @@ +//===-- Test handling of thread local data --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/threads/thread.h" +#include "utils/IntegrationTest/test.h" + +static constexpr int INIT_VAL = 100; +static constexpr int UPDATE_VAL = 123; + +static thread_local int tlval = INIT_VAL; + +// This function updates the tlval and returns its old value. +int func(void *) { + int old_tlval = tlval; + tlval = UPDATE_VAL; + return old_tlval; +} + +void thread_local_test() { + int retval; + + __llvm_libc::Thread th1; + th1.run(func, nullptr, nullptr, 0); + th1.join(&retval); + ASSERT_EQ(retval, INIT_VAL); + + __llvm_libc::Thread th2; + th2.run(func, nullptr, nullptr, 0); + th2.join(&retval); + ASSERT_EQ(retval, INIT_VAL); +} + +int main() { + // From the main thread, we will update the main thread's tlval. + // This should not affect the child thread's tlval; + ASSERT_EQ(tlval, INIT_VAL); + tlval = UPDATE_VAL; + ASSERT_EQ(tlval, UPDATE_VAL); + thread_local_test(); + return 0; +}