diff --git a/libcxxabi/src/cxa_guard_impl.h b/libcxxabi/src/cxa_guard_impl.h --- a/libcxxabi/src/cxa_guard_impl.h +++ b/libcxxabi/src/cxa_guard_impl.h @@ -154,92 +154,82 @@ constexpr uint32_t (*PlatformThreadID)() = nullptr; #endif -constexpr bool PlatformSupportsThreadID() { return +PlatformThreadID != nullptr; } - //===----------------------------------------------------------------------===// -// GuardBase +// GuardByte //===----------------------------------------------------------------------===// -enum class AcquireResult { - INIT_IS_DONE, - INIT_IS_PENDING, -}; -constexpr AcquireResult INIT_IS_DONE = AcquireResult::INIT_IS_DONE; -constexpr AcquireResult INIT_IS_PENDING = AcquireResult::INIT_IS_PENDING; - static constexpr uint8_t UNSET = 0; static constexpr uint8_t COMPLETE_BIT = (1 << 0); static constexpr uint8_t PENDING_BIT = (1 << 1); static constexpr uint8_t WAITING_BIT = (1 << 2); -template -struct GuardObject { - GuardObject() = delete; - GuardObject(GuardObject const&) = delete; - GuardObject& operator=(GuardObject const&) = delete; +/// Manages reads and writes to the guard byte. +struct GuardByte { + GuardByte() = delete; + GuardByte(GuardByte const&) = delete; + GuardByte& operator=(GuardByte const&) = delete; - explicit GuardObject(uint32_t* g) - : base_address(g), guard_byte_address(reinterpret_cast(g)), - init_byte_address(reinterpret_cast(g) + 1), thread_id_address(nullptr) {} - - explicit GuardObject(uint64_t* g) - : base_address(g), guard_byte_address(reinterpret_cast(g)), - init_byte_address(reinterpret_cast(g) + 1), thread_id_address(reinterpret_cast(g) + 1) {} + explicit GuardByte(uint8_t* const guard_byte_address) : guard_byte(guard_byte_address) {} public: - /// Implements __cxa_guard_acquire - AcquireResult cxa_guard_acquire() { - AtomicInt guard_byte(guard_byte_address); - if (guard_byte.load(std::_AO_Acquire) != UNSET) - return INIT_IS_DONE; - return derived()->acquire_init_byte(); + /// The guard_byte portion of cxa_guard_acquire. Returns true if + /// initialization has already been completed. + /// Note: On completion, we haven't 'acquired' ownership of anything mutex + /// related. The name is simply referring to __cxa_guard_acquire. + bool acquire_guard_byte() { + // if guard_byte is non-zero, we have already completed initialization + // (i.e. release_guard_byte has been called) + return guard_byte.load(std::_AO_Acquire) != UNSET; } - /// Implements __cxa_guard_release - void cxa_guard_release() { - AtomicInt guard_byte(guard_byte_address); - // Store complete first, so that when release wakes other folks, they see - // it as having been completed. - guard_byte.store(COMPLETE_BIT, std::_AO_Release); - derived()->release_init_byte(); - } + /// The guard_byte portion of cxa_guard_release. + /// Note: On completion, we haven't 'released' ownership of anything mutex + /// related. The name is simply referring to __cxa_guard_release. + void release_guard_byte() { guard_byte.store(COMPLETE_BIT, std::_AO_Release); } - /// Implements __cxa_guard_abort - void cxa_guard_abort() { derived()->abort_init_byte(); } - -public: - /// base_address - the address of the original guard object. - void* const base_address; - /// The address of the guard byte at offset 0. - uint8_t* const guard_byte_address; - /// The address of the byte used by the implementation during initialization. - uint8_t* const init_byte_address; - /// An optional address storing an identifier for the thread performing initialization. - /// It's used to detect recursive initialization. - uint32_t* const thread_id_address; + /// The guard_byte portion of cxa_guard_abort. + void abort_guard_byte() {} // Nothing to do private: - Derived* derived() { return static_cast(this); } + AtomicInt guard_byte; }; //===----------------------------------------------------------------------===// // Single Threaded Implementation //===----------------------------------------------------------------------===// -struct InitByteNoThreads : GuardObject { - using GuardObject::GuardObject; +/// InitByteNoThreads - Doesn't use any inter-thread synchronization when +/// managing reads and writes to the init byte. +struct InitByteNoThreads { + InitByteNoThreads() = delete; + InitByteNoThreads(InitByteNoThreads const&) = delete; + InitByteNoThreads& operator=(InitByteNoThreads const&) = delete; + + explicit InitByteNoThreads(uint8_t* _init_byte_address, uint32_t*) : init_byte_address(_init_byte_address) {} - AcquireResult acquire_init_byte() { + /// The init_byte portion of cxa_guard_acquire. Returns true if + /// initialization has already been completed. + /// Note: On completion, we haven't 'acquired' ownership of anything mutex + /// related. The name is simply referring to __cxa_guard_acquire. + bool acquire_init_byte() { if (*init_byte_address == COMPLETE_BIT) - return INIT_IS_DONE; + return true; if (*init_byte_address & PENDING_BIT) ABORT_WITH_MESSAGE("__cxa_guard_acquire detected recursive initialization"); *init_byte_address = PENDING_BIT; - return INIT_IS_PENDING; + return false; } + /// The init_byte portion of cxa_guard_release. + /// Note: On completion, we haven't 'released' ownership of anything mutex + /// related. The name is simply referring to __cxa_guard_release. void release_init_byte() { *init_byte_address = COMPLETE_BIT; } + /// The init_byte portion of cxa_guard_abort. void abort_init_byte() { *init_byte_address = UNSET; } + +private: + /// The address of the byte used during initialization. + uint8_t* const init_byte_address; }; //===----------------------------------------------------------------------===// @@ -279,18 +269,22 @@ struct LibcppCondVar {}; #endif // !defined(_LIBCXXABI_HAS_NO_THREADS) +/// InitByteGlobalMutex - Uses a global mutex and condition variable (common to +/// all static local variables) to manage reads and writes to the init byte. template -struct InitByteGlobalMutex : GuardObject> { +struct InitByteGlobalMutex { - using BaseT = typename InitByteGlobalMutex::GuardObject; - using BaseT::BaseT; - - explicit InitByteGlobalMutex(uint32_t* g) : BaseT(g), has_thread_id_support(false) {} - explicit InitByteGlobalMutex(uint64_t* g) : BaseT(g), has_thread_id_support(PlatformSupportsThreadID()) {} + explicit InitByteGlobalMutex(uint8_t* _init_byte_address, uint32_t* _thread_id_address) + : init_byte_address(_init_byte_address), thread_id_address(_thread_id_address), + has_thread_id_support(_thread_id_address != nullptr && GetThreadID != nullptr) {} public: - AcquireResult acquire_init_byte() { + /// The init_byte portion of cxa_guard_acquire. Returns true if + /// initialization has already been completed. + /// Note: On completion, we haven't 'acquired' ownership of anything mutex + /// related. The name is simply referring to __cxa_guard_acquire. + bool acquire_init_byte() { LockGuard g("__cxa_guard_acquire"); // Check for possible recursive initialization. if (has_thread_id_support && (*init_byte_address & PENDING_BIT)) { @@ -305,15 +299,18 @@ } if (*init_byte_address == COMPLETE_BIT) - return INIT_IS_DONE; + return true; if (has_thread_id_support) *thread_id_address = current_thread_id.get(); *init_byte_address = PENDING_BIT; - return INIT_IS_PENDING; + return false; } + /// The init_byte portion of cxa_guard_release. + /// Note: On completion, we haven't 'released' ownership of anything mutex + /// related. The name is simply referring to __cxa_guard_release. void release_init_byte() { bool has_waiting; { @@ -328,6 +325,7 @@ } } + /// The init_byte portion of cxa_guard_abort. void abort_init_byte() { bool has_waiting; { @@ -345,8 +343,12 @@ } private: - using BaseT::init_byte_address; - using BaseT::thread_id_address; + /// The address of the byte used during initialization. + uint8_t* const init_byte_address; + /// An optional address storing an identifier for the thread performing initialization. + /// It's used to detect recursive initialization. + uint32_t* const thread_id_address; + const bool has_thread_id_support; LazyValue current_thread_id; @@ -393,36 +395,34 @@ constexpr bool PlatformSupportsFutex() { return +PlatformFutexWait != nullptr; } -/// InitByteFutex - Manages initialization using atomics and the futex syscall -/// for waiting and waking. +/// InitByteFutex - Uses a futex to manage reads and writes to the init byte. template -struct InitByteFutex : GuardObject> { - using BaseT = typename InitByteFutex::GuardObject; - - /// ARM Constructor - explicit InitByteFutex(uint32_t* g) - : BaseT(g), init_byte(this->init_byte_address), has_thread_id_support(this->thread_id_address && GetThreadIDArg), - thread_id(this->thread_id_address) {} +struct InitByteFutex { - /// Itanium Constructor - explicit InitByteFutex(uint64_t* g) - : BaseT(g), init_byte(this->init_byte_address), has_thread_id_support(this->thread_id_address && GetThreadIDArg), - thread_id(this->thread_id_address) {} + explicit InitByteFutex(uint8_t* _init_byte_address, uint32_t* _thread_id_address) + : init_byte(_init_byte_address), + has_thread_id_support(_thread_id_address != nullptr && GetThreadIDArg != nullptr), + thread_id(_thread_id_address), base_address(reinterpret_cast( + /*_init_byte_address & ~0x3*/ _init_byte_address - 1)) {} public: - AcquireResult acquire_init_byte() { + /// The init_byte portion of cxa_guard_acquire. Returns true if + /// initialization has already been completed. + /// Note: On completion, we haven't 'acquired' ownership of anything mutex + /// related. The name is simply referring to __cxa_guard_acquire. + bool acquire_init_byte() { while (true) { uint8_t last_val = UNSET; if (init_byte.compare_exchange(&last_val, PENDING_BIT, std::_AO_Acq_Rel, std::_AO_Acquire)) { if (has_thread_id_support) { thread_id.store(current_thread_id.get(), std::_AO_Relaxed); } - return INIT_IS_PENDING; + return false; } if (last_val == COMPLETE_BIT) - return INIT_IS_DONE; + return true; if (last_val & PENDING_BIT) { @@ -439,7 +439,7 @@ if (!init_byte.compare_exchange(&last_val, PENDING_BIT | WAITING_BIT, std::_AO_Acq_Rel, std::_AO_Release)) { // (1) success, via someone else's work! if (last_val == COMPLETE_BIT) - return INIT_IS_DONE; + return true; // (3) someone else, bailed on doing the work, retry from the start! if (last_val == UNSET) @@ -453,12 +453,16 @@ } } + /// The init_byte portion of cxa_guard_release. + /// Note: On completion, we haven't 'released' ownership of anything mutex + /// related. The name is simply referring to __cxa_guard_release. void release_init_byte() { uint8_t old = init_byte.exchange(COMPLETE_BIT, std::_AO_Acq_Rel); if (old & WAITING_BIT) wake_all(); } + /// The init_byte portion of cxa_guard_abort. void abort_init_byte() { if (has_thread_id_support) thread_id.store(0, std::_AO_Relaxed); @@ -470,12 +474,11 @@ private: /// Use the futex to wait on the current guard variable. Futex expects a - /// 32-bit 4-byte aligned address as the first argument, so we have to use use - /// the base address of the guard variable (not the init byte). - void wait_on_initialization() { - Wait(static_cast(this->base_address), expected_value_for_futex(PENDING_BIT | WAITING_BIT)); - } - void wake_all() { Wake(static_cast(this->base_address)); } + /// 32-bit 4-byte aligned address as the first argument, so we use the 4-byte + /// aligned address that encompasses the init byte (i.e. the address of the + /// raw guard object that was passed to __cxa_guard_acquire/release/abort). + void wait_on_initialization() { Wait(base_address, expected_value_for_futex(PENDING_BIT | WAITING_BIT)); } + void wake_all() { Wake(base_address); } private: AtomicInt init_byte; @@ -485,6 +488,10 @@ AtomicInt thread_id; LazyValue current_thread_id; + /// the 4-byte-aligned address that encompasses the init byte (i.e. the + /// address of the raw guard object). + int* const base_address; + /// Create the expected integer value for futex `wait(int* addr, int expected)`. /// We pass the base address as the first argument, So this function creates /// an zero-initialized integer with `b` copied at the correct offset. @@ -497,6 +504,86 @@ static_assert(Wait != nullptr && Wake != nullptr, ""); }; +//===----------------------------------------------------------------------===// +// Guards +//===----------------------------------------------------------------------===// + +enum class AcquireResult { + INIT_IS_DONE, + INIT_IS_PENDING, +}; +constexpr AcquireResult INIT_IS_DONE = AcquireResult::INIT_IS_DONE; +constexpr AcquireResult INIT_IS_PENDING = AcquireResult::INIT_IS_PENDING; + +/// Co-ordinates between GuardByte and InitByte. +template +struct GuardBase : GuardByte, InitByteT { + GuardBase() = delete; + GuardBase(GuardBase const&) = delete; + GuardBase& operator=(GuardBase const&) = delete; + + /// ARM Constructor + explicit GuardBase(uint32_t* raw_guard_object) + : GuardByte(reinterpret_cast(raw_guard_object)), + InitByteT(reinterpret_cast(raw_guard_object) + 1, nullptr) {} + + /// Itanium Constructor + explicit GuardBase(uint64_t* raw_guard_object) + : GuardByte(reinterpret_cast(raw_guard_object)), + InitByteT(reinterpret_cast(raw_guard_object) + 1, reinterpret_cast(raw_guard_object) + 1) { + } + + /// Implements __cxa_guard_acquire. + AcquireResult cxa_guard_acquire() { + // use short-circuit evalutation to avoid calling acquire_init_byte when + // acquire_guard_byte returns true. (i.e. don't call it when we know from + // the guard byte that initialization has already been completed) + if (acquire_guard_byte() || InitByteT::acquire_init_byte()) + return INIT_IS_DONE; + return INIT_IS_PENDING; + } + + /// Implements __cxa_guard_release. + void cxa_guard_release() { + // Update guard byte first, so if somebody is woken up by release_init_byte + // and comes all the way back around to __cxa_guard_acquire again, they see + // it as having completed initialization. + release_guard_byte(); + InitByteT::release_init_byte(); + } + + /// Implements __cxa_guard_abort. + void cxa_guard_abort() { + abort_guard_byte(); + InitByteT::abort_init_byte(); + } +}; + +/// NoThreadsGuard - Manages initialization without performing any inter-thread +/// synchronization. +struct NoThreadsGuard : GuardBase { + using BaseT = typename NoThreadsGuard::GuardBase; + using BaseT::BaseT; +}; + +/// GlobalMutexGuard - Manages initialization using a global mutex and +/// condition variable. +template +struct GlobalMutexGuard : GuardBase> { + using BaseT = typename GlobalMutexGuard::GuardBase; + using BaseT::BaseT; +}; + +/// FutexGuard - Manages initialization using atomics and the futex syscall for +/// waiting and waking. +template +struct FutexGuard : GuardBase> { + using BaseT = typename FutexGuard::GuardBase; + using BaseT::BaseT; +}; + //===----------------------------------------------------------------------===// // //===----------------------------------------------------------------------===// @@ -508,25 +595,25 @@ template _LIBCPP_SAFE_STATIC T GlobalStatic::instance = {}; -enum class Implementation { NoThreads, GlobalLock, Futex }; +enum class Implementation { NoThreads, GlobalMutex, Futex }; template struct SelectImplementation; template <> struct SelectImplementation { - using type = InitByteNoThreads; + using type = NoThreadsGuard; }; template <> -struct SelectImplementation { - using type = InitByteGlobalMutex::instance, - GlobalStatic::instance, PlatformThreadID>; +struct SelectImplementation { + using type = GlobalMutexGuard::instance, + GlobalStatic::instance, PlatformThreadID>; }; template <> struct SelectImplementation { - using type = InitByteFutex; + using type = FutexGuard; }; // TODO(EricWF): We should prefer the futex implementation when available. But @@ -537,7 +624,7 @@ #elif defined(_LIBCXXABI_USE_FUTEX) Implementation::Futex; #else - Implementation::GlobalLock; + Implementation::GlobalMutex; #endif static_assert(CurrentImplementation != Implementation::Futex || PlatformSupportsFutex(), diff --git a/libcxxabi/test/guard_test_basic.pass.cpp b/libcxxabi/test/guard_test_basic.pass.cpp --- a/libcxxabi/test/guard_test_basic.pass.cpp +++ b/libcxxabi/test/guard_test_basic.pass.cpp @@ -12,8 +12,9 @@ #include "../src/cxa_guard_impl.h" #include -// Disable GCC warning about tautological comparison of a function's address -#if defined(__GNUC__) && !defined(__clang__) +#if defined(__clang__) +#pragma clang diagnostic ignored "-Wtautological-pointer-compare" +#elif defined(__GNUC__) # pragma GCC diagnostic ignored "-Waddress" #endif @@ -118,42 +119,35 @@ { #if defined(_LIBCXXABI_HAS_NO_THREADS) static_assert(CurrentImplementation == Implementation::NoThreads, ""); - static_assert( - std::is_same::value, ""); + static_assert(std::is_same::value, ""); #else - static_assert(CurrentImplementation == Implementation::GlobalLock, ""); - static_assert( - std::is_same< - SelectedImplementation, - InitByteGlobalMutex::instance, - GlobalStatic::instance>>::value, - ""); + static_assert(CurrentImplementation == Implementation::GlobalMutex, ""); + static_assert(std::is_same::instance, + GlobalStatic::instance>>::value, + ""); #endif } { #if (defined(__APPLE__) || defined(__linux__)) && !defined(_LIBCXXABI_HAS_NO_THREADS) assert(PlatformThreadID); #endif - if (PlatformSupportsThreadID()) { + if (PlatformThreadID != nullptr) { assert(PlatformThreadID() != 0); assert(PlatformThreadID() == PlatformThreadID()); } } { - Tests::test(); - Tests::test(); + Tests::test(); + Tests::test(); } { - using MutexImpl = - InitByteGlobalMutex; + using MutexImpl = GlobalMutexGuard; Tests::test(); Tests::test(); } { - using FutexImpl = - InitByteFutex<&NopFutexWait, &NopFutexWake, &MockGetThreadID>; + using FutexImpl = FutexGuard<&NopFutexWait, &NopFutexWake, &MockGetThreadID>; Tests::test(); Tests::test(); } diff --git a/libcxxabi/test/guard_threaded_test.pass.cpp b/libcxxabi/test/guard_threaded_test.pass.cpp --- a/libcxxabi/test/guard_threaded_test.pass.cpp +++ b/libcxxabi/test/guard_threaded_test.pass.cpp @@ -326,7 +326,7 @@ } void test_all_impls() { - using MutexImpl = SelectImplementation::type; + using MutexImpl = SelectImplementation::type; // Attempt to test the Futex based implementation if it's supported on the // target platform.