diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake --- a/libc/cmake/modules/LLVMLibCTestRules.cmake +++ b/libc/cmake/modules/LLVMLibCTestRules.cmake @@ -422,8 +422,15 @@ get_fq_deps_list(fq_deps_list ${INTEGRATION_TEST_DEPENDS}) # All integration tests setup TLS area and the main thread's self object. - # So, we need to link in the threads implementation. - list(APPEND fq_deps_list libc.src.__support.threads.thread) + # So, we need to link in the threads implementation. Likewise, the startup + # code also has to run init_array callbacks which potentially register + # their own atexit callbacks. So, link in exit and atexit also with all + # integration tests. + list( + APPEND fq_deps_list + libc.src.__support.threads.thread + libc.src.stdlib.atexit + libc.src.stdlib.exit) list(REMOVE_DUPLICATES fq_deps_list) # TODO: Instead of gathering internal object files from entrypoints, # collect the object files with public names of entrypoints. diff --git a/libc/loader/linux/aarch64/CMakeLists.txt b/libc/loader/linux/aarch64/CMakeLists.txt --- a/libc/loader/linux/aarch64/CMakeLists.txt +++ b/libc/loader/linux/aarch64/CMakeLists.txt @@ -7,6 +7,8 @@ libc.include.sys_mman libc.include.sys_syscall libc.src.__support.OSUtil.osutil + libc.src.stdlib.exit + libc.src.stdlib.atexit libc.src.string.memory_utils.memcpy_implementation COMPILE_OPTIONS -fno-omit-frame-pointer diff --git a/libc/loader/linux/aarch64/start.cpp b/libc/loader/linux/aarch64/start.cpp --- a/libc/loader/linux/aarch64/start.cpp +++ b/libc/loader/linux/aarch64/start.cpp @@ -9,6 +9,8 @@ #include "config/linux/app.h" #include "src/__support/OSUtil/syscall.h" #include "src/__support/threads/thread.h" +#include "src/stdlib/atexit.h" +#include "src/stdlib/exit.h" #include "src/string/memory_utils/memcpy_implementations.h" #include @@ -116,8 +118,8 @@ static void call_fini_array_callbacks() { size_t fini_array_size = __fini_array_end - __fini_array_start; - for (size_t i = 0; i < fini_array_size; ++i) - reinterpret_cast(__fini_array_start[i])(); + for (size_t i = fini_array_size; i > 0; --i) + reinterpret_cast(__fini_array_start[i - 1])(); } } // namespace __llvm_libc @@ -185,6 +187,12 @@ __llvm_libc::self.attrib = &__llvm_libc::main_thread_attrib; + // We want the fini array callbacks to be run after other atexit + // callbacks are run. So, we register them before running the init + // array callbacks as they can potentially register their own atexit + // callbacks. + __llvm_libc::atexit(&__llvm_libc::call_fini_array_callbacks); + __llvm_libc::call_init_array_callbacks( app.args->argc, reinterpret_cast(app.args->argv), reinterpret_cast(env_ptr)); @@ -192,10 +200,11 @@ int retval = main(app.args->argc, reinterpret_cast(app.args->argv), reinterpret_cast(env_ptr)); - __llvm_libc::call_fini_array_callbacks(); - + // TODO: TLS cleanup should be done after all other atexit callbacks + // are run. So, register a cleanup callback for it with atexit before + // everything else. __llvm_libc::cleanup_tls(tls.addr, tls.size); - __llvm_libc::syscall(SYS_exit, retval); + __llvm_libc::exit(retval); } extern "C" void _start() { diff --git a/libc/loader/linux/x86_64/CMakeLists.txt b/libc/loader/linux/x86_64/CMakeLists.txt --- a/libc/loader/linux/x86_64/CMakeLists.txt +++ b/libc/loader/linux/x86_64/CMakeLists.txt @@ -8,6 +8,8 @@ libc.include.sys_syscall libc.src.__support.threads.thread libc.src.__support.OSUtil.osutil + libc.src.stdlib.exit + libc.src.stdlib.atexit libc.src.string.memory_utils.memcpy_implementation COMPILE_OPTIONS -fno-omit-frame-pointer diff --git a/libc/loader/linux/x86_64/start.cpp b/libc/loader/linux/x86_64/start.cpp --- a/libc/loader/linux/x86_64/start.cpp +++ b/libc/loader/linux/x86_64/start.cpp @@ -9,6 +9,8 @@ #include "config/linux/app.h" #include "src/__support/OSUtil/syscall.h" #include "src/__support/threads/thread.h" +#include "src/stdlib/atexit.h" +#include "src/stdlib/exit.h" #include "src/string/memory_utils/memcpy_implementations.h" #include @@ -115,8 +117,8 @@ static void call_fini_array_callbacks() { size_t fini_array_size = __fini_array_end - __fini_array_start; - for (size_t i = 0; i < fini_array_size; ++i) - reinterpret_cast(__fini_array_start[i])(); + for (size_t i = fini_array_size; i > 0; --i) + reinterpret_cast(__fini_array_start[i - 1])(); } } // namespace __llvm_libc @@ -204,6 +206,12 @@ __llvm_libc::self.attrib = &__llvm_libc::main_thread_attrib; + // We want the fini array callbacks to be run after other atexit + // callbacks are run. So, we register them before running the init + // array callbacks as they can potentially register their own atexit + // callbacks. + __llvm_libc::atexit(&__llvm_libc::call_fini_array_callbacks); + __llvm_libc::call_init_array_callbacks( app.args->argc, reinterpret_cast(app.args->argv), reinterpret_cast(env_ptr)); @@ -211,8 +219,9 @@ int retval = main(app.args->argc, reinterpret_cast(app.args->argv), reinterpret_cast(env_ptr)); - __llvm_libc::call_fini_array_callbacks(); - + // TODO: TLS cleanup should be done after all other atexit callbacks + // are run. So, register a cleanup callback for it with atexit before + // everything else. __llvm_libc::cleanup_tls(tls.addr, tls.size); - __llvm_libc::syscall(SYS_exit, retval); + __llvm_libc::exit(retval); } diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt --- a/libc/src/stdlib/CMakeLists.txt +++ b/libc/src/stdlib/CMakeLists.txt @@ -286,6 +286,7 @@ CXX_STANDARD 20 # For constinit of the atexit callback list. DEPENDS + libc.src.__support.fixedvector libc.src.__support.CPP.blockstore libc.src.__support.threads.mutex ) diff --git a/libc/src/stdlib/atexit.h b/libc/src/stdlib/atexit.h --- a/libc/src/stdlib/atexit.h +++ b/libc/src/stdlib/atexit.h @@ -9,8 +9,12 @@ #ifndef LLVM_LIBC_SRC_STDLIB_ATEXIT_H #define LLVM_LIBC_SRC_STDLIB_ATEXIT_H +#include // For size_t + namespace __llvm_libc { +constexpr size_t CALLBACK_LIST_SIZE_FOR_TESTS = 1024; + int atexit(void (*function)()); } // namespace __llvm_libc diff --git a/libc/src/stdlib/atexit.cpp b/libc/src/stdlib/atexit.cpp --- a/libc/src/stdlib/atexit.cpp +++ b/libc/src/stdlib/atexit.cpp @@ -9,6 +9,7 @@ #include "src/stdlib/atexit.h" #include "src/__support/CPP/blockstore.h" #include "src/__support/common.h" +#include "src/__support/fixedvector.h" #include "src/__support/threads/mutex.h" namespace __llvm_libc { @@ -17,10 +18,36 @@ Mutex handler_list_mtx(false, false, false); -using AtExitCallback = void(void); -using ExitCallbackList = cpp::ReverseOrderBlockStore; +using AtExitCallback = void(void *); +using StdCAtExitCallback = void(void); + +struct AtExitUnit { + AtExitCallback *callback = nullptr; + void *payload = nullptr; + constexpr AtExitUnit() = default; + constexpr AtExitUnit(AtExitCallback *c, void *p) : callback(c), payload(p) {} +}; + +#ifdef LLVM_LIBC_PUBLIC_PACKAGING +using ExitCallbackList = cpp::ReverseOrderBlockStore; +#else +// BlockStore uses dynamic memory allocation. To avoid dynamic memory +// allocation in tests, we use a fixed size callback list when built for +// tests. +// If we use BlockStore, then we will have to pull in malloc etc into +// the tests. While this is not bad, the problem we have currently is +// that LLVM libc' allocator is SCUDO. So, we will end up pulling SCUDO's +// deps also (some of which are not yet available in LLVM libc) into the +// integration tests. +using ExitCallbackList = FixedVector; +#endif // LLVM_LIBC_PUBLIC_PACKAGING + constinit ExitCallbackList exit_callbacks; +void stdc_at_exit_func(void *payload) { + reinterpret_cast(payload)(); +} + } // namespace namespace internal { @@ -28,10 +55,10 @@ void call_exit_callbacks() { handler_list_mtx.lock(); while (!exit_callbacks.empty()) { - auto *callback = exit_callbacks.back(); + auto unit = exit_callbacks.back(); exit_callbacks.pop_back(); handler_list_mtx.unlock(); - callback(); + unit.callback(unit.payload); handler_list_mtx.lock(); } ExitCallbackList::destroy(&exit_callbacks); @@ -39,11 +66,25 @@ } // namespace internal -LLVM_LIBC_FUNCTION(int, atexit, (AtExitCallback * callback)) { +static int add_atexit_unit(const AtExitUnit &unit) { + // TODO: Use the return value of push_back and bubble it to the public + // function as error return value. Note that a BlockStore push_back can + // fail because of allocation failure. Likewise, a FixedVector push_back + // can fail when it is full. handler_list_mtx.lock(); - exit_callbacks.push_back(callback); + exit_callbacks.push_back(unit); handler_list_mtx.unlock(); return 0; } +// TODO: Handle the last dso handle argument. +extern "C" int __cxa_atexit(AtExitCallback *callback, void *payload, void *) { + return add_atexit_unit({callback, payload}); +} + +LLVM_LIBC_FUNCTION(int, atexit, (StdCAtExitCallback * callback)) { + return add_atexit_unit( + {&stdc_at_exit_func, reinterpret_cast(callback)}); +} + } // namespace __llvm_libc diff --git a/libc/test/integration/loader/linux/init_fini_array_test.cpp b/libc/test/integration/loader/linux/init_fini_array_test.cpp --- a/libc/test/integration/loader/linux/init_fini_array_test.cpp +++ b/libc/test/integration/loader/linux/init_fini_array_test.cpp @@ -8,6 +8,10 @@ #include "utils/IntegrationTest/test.h" +#include + +int global_destroyed = false; + class A { private: int val[1024]; @@ -19,10 +23,7 @@ val[i] = a; } - // TODO: When we have implementation for __cxa_atexit, an explicit definition - // of the destructor should be provided to test that path of registering the - // destructor callback for a global. - ~A() = default; + ~A() { global_destroyed = true; } int get(int i) const { return val[i]; } }; @@ -33,14 +34,23 @@ A global(GLOBAL_INDEX, INITVAL_INITIALIZER); int initval = 0; +int preinitval = 0; + __attribute__((constructor)) void set_initval() { initval = INITVAL_INITIALIZER; } -__attribute__((destructor)) void reset_initval() { initval = 0; } +__attribute__((destructor(1))) void reset_initval() { + ASSERT_TRUE(global_destroyed); + ASSERT_EQ(preinitval, 0); + initval = 0; +} -int preinitval = 0; void set_preinitval() { preinitval = INITVAL_INITIALIZER; } -__attribute__((destructor)) void reset_preinitval() { preinitval = 0; } +__attribute__((destructor(2))) void reset_preinitval() { + ASSERT_TRUE(global_destroyed); + ASSERT_EQ(initval, INITVAL_INITIALIZER); + preinitval = 0; +} using PreInitFunc = void(); __attribute__((section(".preinit_array"))) PreInitFunc *preinit_func_ptr =