diff --git a/libc/include/llvm-libc-types/thrd_t.h b/libc/include/llvm-libc-types/thrd_t.h --- a/libc/include/llvm-libc-types/thrd_t.h +++ b/libc/include/llvm-libc-types/thrd_t.h @@ -12,14 +12,8 @@ #include typedef struct { - struct { - void *__stack; - unsigned long long __stack_size; - unsigned char __managed_stack; - int __retval; - int __tid; - } __attrib; - __futex_word __clear_tid; + void *__attribs; + void *__platform_attribs; } thrd_t; #endif // __LLVM_LIBC_TYPES_THRD_T_H__ diff --git a/libc/src/__support/threads/linux/thread.h b/libc/src/__support/threads/linux/thread.h --- a/libc/src/__support/threads/linux/thread.h +++ b/libc/src/__support/threads/linux/thread.h @@ -97,8 +97,8 @@ template struct Thread { private: - ThreadAttributes attrib; - cpp::Atomic clear_tid; + ThreadAttributes *attrib; + cpp::Atomic *clear_tid; public: Thread() = default; @@ -106,7 +106,9 @@ static void start_thread() __attribute__((noinline)); // Return 0 on success or an error value on failure. - int run(ThreadRunner *f, void *arg, void *stack, size_t size) { + int run(ThreadRunner *f, void *arg, void *stack, size_t size, + bool detached = false) { + bool owned_stack = false; if (stack == nullptr) { if (size == 0) size = DEFAULT_STACK_SIZE; @@ -115,13 +117,8 @@ return alloc.error_code(); else stack = alloc.value(); - attrib.owned_stack = true; - } else { - attrib.owned_stack = false; + owned_stack = true; } - attrib.stack = stack; - attrib.stack_size = size; - clear_tid.val = CLEAR_TID_VALUE; // When the new thread is spawned by the kernel, the new thread gets the // stack we pass to the clone syscall. However, this stack is empty and does @@ -129,15 +126,32 @@ // pass arguments to the thread start function, or use any local vars from // here. So, we pack them into the new stack from where the thread can sniff // them out. - uintptr_t adjusted_stack = reinterpret_cast(attrib.stack) + - attrib.stack_size - - sizeof(StartArgs); + // + // Likewise, the actual thread state information is also stored on the + // stack memory. + uintptr_t adjusted_stack = reinterpret_cast(stack) + size - + sizeof(StartArgs) - + sizeof(ThreadAttributes) - + sizeof(cpp::Atomic); + auto *start_args = reinterpret_cast *>(adjusted_stack); start_args->thread = this; start_args->func = f; start_args->arg = arg; + attrib = reinterpret_cast *>( + adjusted_stack + sizeof(StartArgs)); + attrib->detached = detached; + attrib->stack = stack; + attrib->stack_size = size; + attrib->owned_stack = owned_stack; + + clear_tid = reinterpret_cast *>( + adjusted_stack + sizeof(StartArgs) + + sizeof(ThreadAttributes)); + clear_tid->val = CLEAR_TID_VALUE; + // The clone syscall takes arguments in an architecture specific order. // Also, we want the result of the syscall to be in a register as the child // thread gets a completely different stack after it is created. The stack @@ -146,17 +160,17 @@ long register clone_result asm("rax"); clone_result = __llvm_libc::syscall( SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack, - &attrib.tid, // The address where the child tid is written - &clear_tid.val, // The futex where the child thread status is signalled - 0 // Set TLS to null for now. + &attrib->tid, // The address where the child tid is written + &clear_tid->val, // The futex where the child thread status is signalled + 0 // Set TLS to null for now. ); #elif defined(LLVM_LIBC_ARCH_AARCH64) long register clone_result asm("x0"); clone_result = __llvm_libc::syscall( SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack, - &attrib.tid, // The address where the child tid is written - 0, // Set TLS to null for now. - &clear_tid.val // The futex where the child thread status is signalled + &attrib->tid, // The address where the child tid is written + 0, // Set TLS to null for now. + &clear_tid->val // The futex where the child thread status is signalled ); #else #error "Unsupported architecture for the clone syscall." @@ -165,29 +179,31 @@ if (clone_result == 0) { start_thread(); } else if (clone_result < 0) { - if (attrib.owned_stack) - free_stack(attrib.stack, attrib.stack_size); + if (attrib->owned_stack) + free_stack(attrib->stack, attrib->stack_size); return -clone_result; } return 0; } - int join() { + int join(ReturnType *retval) { // The kernel should set the value at the clear tid address to zero. // If not, it is a spurious wake and we should continue to wait on // the futex. - while (clear_tid.load() != 0) { + while (clear_tid->load() != 0) { // We cannot do a FUTEX_WAIT_PRIVATE here as the kernel does a // FUTEX_WAKE and not a FUTEX_WAKE_PRIVATE. - __llvm_libc::syscall(SYS_futex, &clear_tid.val, FUTEX_WAIT, + __llvm_libc::syscall(SYS_futex, &clear_tid->val, FUTEX_WAIT, CLEAR_TID_VALUE, nullptr); } + + *retval = attrib->retval; + if (!attrib->detached) + free_stack(attrib->stack, attrib->stack_size); + return 0; } - - // Meaningful only after the thread finishes. - ReturnType return_value() { return attrib.retval; } }; template @@ -195,10 +211,12 @@ auto *start_args = reinterpret_cast *>(get_start_args_addr()); auto *thread = start_args->thread; - thread->attrib.retval = start_args->func(start_args->arg); - if (thread->attrib.owned_stack) - free_stack(thread->attrib.stack, thread->attrib.stack_size); - __llvm_libc::syscall(SYS_exit, thread->attrib.retval); + thread->attrib->retval = start_args->func(start_args->arg); + + if (thread->attrib->detached && thread->attrib->owned_stack) + free_stack(thread->attrib->stack, thread->attrib->stack_size); + + __llvm_libc::syscall(SYS_exit, thread->attrib->retval); } } // namespace __llvm_libc diff --git a/libc/src/__support/threads/thread_attrib.h b/libc/src/__support/threads/thread_attrib.h --- a/libc/src/__support/threads/thread_attrib.h +++ b/libc/src/__support/threads/thread_attrib.h @@ -9,14 +9,25 @@ #ifndef LLVM_LIBC_SRC_SUPPORT_THREADS_THREAD_ATTRIB_H #define LLVM_LIBC_SRC_SUPPORT_THREADS_THREAD_ATTRIB_H +#include "src/__support/architectures.h" + namespace __llvm_libc { +#if (defined(LLVM_LIBC_ARCH_AARCH64) || defined(LLVM_LIBC_ARCH_X86_64)) +constexpr unsigned int STACK_ALIGNMENT = 16; +#endif +// TODO: Provide stack alignment requirements for other architectures. + // A data type to hold common thread attributes which have to be stored as -// thread state. A platform thread implementation should store the attrib object -// in its Thread data structure. Note that this is different from public -// attribute types like pthread_attr which contain information which need not -// be saved as part of a thread's state. For example, the stack guard size. -template struct ThreadAttributes { +// thread state. Note that this is different from public attribute types like +// pthread_attr_t which might contain information which need not be saved as +// part of a thread's state. For example, the stack guard size. +// +// Thread attributes are typically stored on the stack. So, we align as required +// for the target architecture. +template +struct alignas(STACK_ALIGNMENT) ThreadAttributes { + bool detached; void *stack; // Pointer to the thread stack unsigned long long stack_size; // Size of the stack unsigned char owned_stack; // Indicates if the thread owns this stack memory diff --git a/libc/src/threads/thrd_join.cpp b/libc/src/threads/thrd_join.cpp --- a/libc/src/threads/thrd_join.cpp +++ b/libc/src/threads/thrd_join.cpp @@ -19,12 +19,8 @@ LLVM_LIBC_FUNCTION(int, thrd_join, (thrd_t * th, int *retval)) { auto *thread = reinterpret_cast *>(th); - int result = thread->join(); - if (result == 0) { - *retval = thread->return_value(); - return thrd_success; - } - return thrd_error; + int result = thread->join(retval); + return result == 0 ? thrd_success : thrd_error; } } // namespace __llvm_libc