Index: benchmarks/shared_ptr_create_destroy.cpp =================================================================== --- /dev/null +++ benchmarks/shared_ptr_create_destroy.cpp @@ -0,0 +1,37 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include <memory> +#include <iostream> +#include <chrono> +#include <atomic> + +void clobber() +{ + asm volatile("" : : : "memory"); +} + +std::atomic<int> g_int; +std::atomic<int> g_other; + +int main() { + auto a = std::chrono::high_resolution_clock::now(); + { + clobber(); + for (int i = 0; i < 1000000000; ++i) + { + auto sp = std::make_shared<int>(g_int.load(std::memory_order_relaxed)); + g_other.store(*sp, std::memory_order_relaxed); + } + clobber(); + } + auto b = std::chrono::high_resolution_clock::now(); + std::cout<<std::chrono::duration_cast<std::chrono::nanoseconds>(b - a).count()/1000000000.0<<" seconds"<<std::endl; + return 0; +} Index: benchmarks/shared_ptr_inc_dec_ref.cpp =================================================================== --- /dev/null +++ benchmarks/shared_ptr_inc_dec_ref.cpp @@ -0,0 +1,38 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include <memory> +#include <iostream> +#include <chrono> +#include <atomic> + +void clobber() +{ + asm volatile("" : : : "memory"); +} + +std::atomic<int> g_int; +std::atomic<int> g_other; + +int main() { + auto a = std::chrono::high_resolution_clock::now(); + auto sp = std::make_shared<int>(g_int.load(std::memory_order_relaxed)); + { + clobber(); + for (int i = 0; i < 1000000000; ++i) + { + std::shared_ptr<int> sp2(sp); + g_other.store(*sp2, std::memory_order_relaxed); + } + clobber(); + } + auto b = std::chrono::high_resolution_clock::now(); + std::cout<<std::chrono::duration_cast<std::chrono::nanoseconds>(b - a).count()/1000000000.0<<" seconds"<<std::endl; + return 0; +} Index: benchmarks/weak_ptr_inc_dec_ref.cpp =================================================================== --- /dev/null +++ benchmarks/weak_ptr_inc_dec_ref.cpp @@ -0,0 +1,37 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include <memory> +#include <iostream> +#include <chrono> +#include <atomic> + +void clobber() +{ + asm volatile("" : : : "memory"); +} + +std::atomic<int> g_int; +std::atomic<int> g_other; + +int main() { + auto a = std::chrono::high_resolution_clock::now(); + auto sp = std::make_shared<int>(g_int.load(std::memory_order_relaxed)); + { + clobber(); + for (int i = 0; i < 1000000000; ++i) + { + std::weak_ptr<int> wp(sp); + } + clobber(); + } + auto b = std::chrono::high_resolution_clock::now(); + std::cout<<std::chrono::duration_cast<std::chrono::nanoseconds>(b - a).count()/1000000000.0<<" seconds"<<std::endl; + return 0; +} Index: src/memory.cpp =================================================================== --- src/memory.cpp +++ src/memory.cpp @@ -96,7 +96,35 @@ void __shared_weak_count::__release_weak() _NOEXCEPT { - if (decrement(__shared_weak_owners_) == -1) + // NOTE: The acquire load here is an optimization of the very + // common case where a shared pointer is being destructed while + // having no other contended references. + // + // BENEFIT: We avoid expensive atomic stores like XADD and STREX + // in a common case. Those instructions are slow and do nasty + // things to caches. + // + // IS THIS SAFE? Yes. During weak destruction, if we see that we + // are the last reference, we know that no-one else is accessing + // us. If someone were accessing us, then they would be doing so + // while the last shared / weak_ptr was being destructed, and + // that's undefined anyway. + // + // If we see anything other than a 0, then we have possible + // contention, and need to use an atomicrmw primitive. + // The same arguments don't apply for increment, where it is legal + // (though inadvisable) to share shared_ptr references between + // threads, and have them all get copied at once. The argument + // also doesn't apply for __release_shared, because an outstanding + // weak_ptr::lock() could read / modify the shared count. + if (__libcpp_atomic_load(&__shared_weak_owners_, _AO_Aquire) == 0) + { + // no need to do this store, because we are about + // to destroy everything. + //__libcpp_atomic_store(&__shared_weak_owners_, -1, _AO_Release); + __on_zero_shared_weak(); + } + else if (decrement(__shared_weak_owners_) == -1) __on_zero_shared_weak(); }