Index: benchmarks/algorithms.bench.cpp =================================================================== --- benchmarks/algorithms.bench.cpp +++ benchmarks/algorithms.bench.cpp @@ -14,14 +14,27 @@ namespace { -enum class ValueType { Uint32, String }; -struct AllValueTypes : EnumValuesAsTuple { - static constexpr const char* Names[] = {"uint32", "string"}; +struct TriviallyRelocatableType { + std::string datamember; + explicit TriviallyRelocatableType(std::string m) : datamember(m) {} + bool operator<(const TriviallyRelocatableType& rhs) const { + return this->datamember < rhs.datamember; + } + bool operator>(const TriviallyRelocatableType& rhs) const { + return this->datamember > rhs.datamember; + } +}; + +enum class ValueType { Uint32, String, Trivre }; +struct AllValueTypes : EnumValuesAsTuple { + static constexpr const char* Names[] = {"uint32", "string", "trivre"}; }; template using Value = - std::conditional_t; + std::conditional_t >; enum class Order { Random, @@ -56,6 +69,15 @@ } } +void fillValues(std::vector& V, size_t N, Order O) { + if (O == Order::SingleElement) { + V.resize(N, TriviallyRelocatableType(getRandomString(1024))); + } else { + while (V.size() < N) + V.emplace_back(getRandomString(1024)); + } +} + template void sortValues(T& V, Order O) { assert(std::is_sorted(V.begin(), V.end())); Index: benchmarks/swap.bench.cpp =================================================================== --- /dev/null +++ benchmarks/swap.bench.cpp @@ -0,0 +1,94 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" +#include "GenerateInput.h" + +struct R { + std::vector m; + void initializeWith(const std::string& s) { + m.assign(s.begin(), s.end()); + } + struct Less { + bool operator()(const R& a, const R& b) const noexcept { + return a.m < b.m; + } + }; +}; + +struct NR { + std::list m; + void initializeWith(const std::string& s) { + m.assign(s.begin(), s.end()); + } + struct Less { + bool operator()(const NR& a, const NR& b) const noexcept { + return a.m < b.m; + } + }; +}; + +static void BM_TriviallyRelocatableSwapRanges(benchmark::State& st) { + std::vector pv(1000); + std::vector qv(1000); + while (st.KeepRunning()) { + std::swap_ranges(pv.begin(), pv.end(), qv.begin()); + benchmark::DoNotOptimize(pv); + benchmark::DoNotOptimize(qv); + } +} +BENCHMARK(BM_TriviallyRelocatableSwapRanges); + +static void BM_NonTriviallyRelocatableSwapRanges(benchmark::State& st) { + std::vector pv(1000); + std::vector qv(1000); + while (st.KeepRunning()) { + std::swap_ranges(pv.begin(), pv.end(), qv.begin()); + benchmark::DoNotOptimize(pv); + benchmark::DoNotOptimize(qv); + } +} +BENCHMARK(BM_NonTriviallyRelocatableSwapRanges); + +static void BM_TriviallyRelocatableMakeHeap(benchmark::State& st) { + std::vector original(1000); + for (R& elt : original) + elt.initializeWith(getRandomString(256)); + std::vector v(1000); + while (st.KeepRunning()) { + st.PauseTiming(); + v = original; + st.ResumeTiming(); + std::make_heap(v.begin(), v.end(), R::Less{}); + benchmark::DoNotOptimize(v); + } +} +BENCHMARK(BM_TriviallyRelocatableMakeHeap); + +static void BM_NonTriviallyRelocatableMakeHeap(benchmark::State& st) { + std::vector original(1000); + for (NR& elt : original) + elt.initializeWith(getRandomString(256)); + std::vector v(1000); + while (st.KeepRunning()) { + st.PauseTiming(); + v = original; + st.ResumeTiming(); + std::make_heap(v.begin(), v.end(), NR::Less{}); + benchmark::DoNotOptimize(v); + } +} +BENCHMARK(BM_NonTriviallyRelocatableMakeHeap); + +BENCHMARK_MAIN(); Index: include/memory =================================================================== --- include/memory +++ include/memory @@ -1609,28 +1609,24 @@ _LIBCPP_INLINE_VISIBILITY static void - __construct_forward(allocator_type& __a, _Ptr __begin1, _Ptr __end1, _Ptr& __begin2) + __construct_forward(allocator_type& __a, _Ptr __begin1, _Ptr __end1, _Ptr& __begin2, false_type) { for (; __begin1 != __end1; ++__begin1, (void) ++__begin2) construct(__a, _VSTD::__to_raw_pointer(__begin2), _VSTD::move_if_noexcept(*__begin1)); } - template + template _LIBCPP_INLINE_VISIBILITY static - typename enable_if - < - (__is_default_allocator::value - || !__has_construct::value) && - is_trivially_move_constructible<_Tp>::value, - void - >::type - __construct_forward(allocator_type&, _Tp* __begin1, _Tp* __end1, _Tp*& __begin2) + void + __construct_forward(allocator_type&, _Ptr __begin1, _Ptr __end1, _Ptr& __begin2, true_type) { + typedef typename iterator_traits<_Ptr>::value_type _Tp; + typedef typename remove_const<_Tp>::type _Vp; ptrdiff_t _Np = __end1 - __begin1; if (_Np > 0) { - _VSTD::memcpy(__begin2, __begin1, _Np * sizeof(_Tp)); + _VSTD::memcpy(const_cast<_Vp*>(_VSTD::__to_raw_pointer(__begin2)), _VSTD::__to_raw_pointer(__begin1), _Np * sizeof(_Tp)); __begin2 += _Np; } } @@ -1672,7 +1668,7 @@ _LIBCPP_INLINE_VISIBILITY static void - __construct_backward(allocator_type& __a, _Ptr __begin1, _Ptr __end1, _Ptr& __end2) + __construct_backward(allocator_type& __a, _Ptr __begin1, _Ptr __end1, _Ptr& __end2, false_type) { while (__end1 != __begin1) { @@ -1681,22 +1677,20 @@ } } - template + template _LIBCPP_INLINE_VISIBILITY static - typename enable_if - < - (__is_default_allocator::value - || !__has_construct::value) && - is_trivially_move_constructible<_Tp>::value, - void - >::type - __construct_backward(allocator_type&, _Tp* __begin1, _Tp* __end1, _Tp*& __end2) + void + __construct_backward(allocator_type&, _Ptr __begin1, _Ptr __end1, _Ptr& __end2, true_type) { + typedef typename iterator_traits<_Ptr>::value_type _Tp; + typedef typename remove_const<_Tp>::type _Vp; ptrdiff_t _Np = __end1 - __begin1; __end2 -= _Np; if (_Np > 0) - _VSTD::memcpy(__end2, __begin1, _Np * sizeof(_Tp)); + { + _VSTD::memcpy(const_cast<_Vp*>(_VSTD::__to_raw_pointer(__end2)), _VSTD::__to_raw_pointer(__begin1), _Np * sizeof(_Tp)); + } } private: @@ -5757,6 +5751,23 @@ } }; +// __has_trivial_construct, __has_trivial_destroy + +template +struct __is_exactly_std_allocator : false_type {}; + +template +struct __is_exactly_std_allocator > : true_type {}; + +template +struct __has_trivial_construct : integral_constant::value || !__has_construct<_Alloc, _Tp*, _Args...>::value) +> {}; + +template +struct __has_trivial_destroy : integral_constant::value || !__has_destroy<_Alloc, _Tp*>::value) +> {}; _LIBCPP_END_NAMESPACE_STD Index: include/type_traits =================================================================== --- include/type_traits +++ include/type_traits @@ -418,6 +418,7 @@ */ #include <__config> #include +#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -431,6 +432,21 @@ template struct _LIBCPP_TEMPLATE_VIS hash; +#ifndef _LIBCPP_HAS_NO_BUILTIN_IS_CONSTANT_EVALUATED +#if _LIBCPP_STD_VER > 17 +_LIBCPP_INLINE_VISIBILITY +inline constexpr bool is_constant_evaluated() noexcept { + return __builtin_is_constant_evaluated(); +} +#endif + +inline _LIBCPP_CONSTEXPR +bool __libcpp_is_constant_evaluated() _NOEXCEPT { return __builtin_is_constant_evaluated(); } +#else +inline _LIBCPP_CONSTEXPR +bool __libcpp_is_constant_evaluated() _NOEXCEPT { return false; } +#endif + template struct _LIBCPP_TEMPLATE_VIS integral_constant { @@ -3696,9 +3712,24 @@ swap(_Tp& __x, _Tp& __y) _NOEXCEPT_(is_nothrow_move_constructible<_Tp>::value && is_nothrow_move_assignable<_Tp>::value) { - _Tp __t(_VSTD::move(__x)); - __x = _VSTD::move(__y); - __y = _VSTD::move(__t); + if (__libcpp_is_trivially_relocatable<_Tp>::value && !is_volatile<_Tp>::value && !__libcpp_is_constant_evaluated()) { + if (!is_empty<_Tp>::value) { + // TODO: if either __x or __y is a "possibly overlapping subobject" + // in Itanium ABI terms, this will memcpy too much data. But the + // same caveat has applied to std::copy and friends, since forever. + char __t[sizeof (_Tp)]; + // We cannot use __libcpp_relocate_at here, because the stack array + // is deliberately underaligned for an actual object of type _Tp. + // We are deliberately shuffling bytes here, not objects. + _VSTD::memcpy(__t, _VSTD::addressof(__x), sizeof(_Tp)); + _VSTD::memcpy(_VSTD::addressof(__x), _VSTD::addressof(__y), sizeof(_Tp)); + _VSTD::memcpy(_VSTD::addressof(__y), __t, sizeof(_Tp)); + } + } else { + _Tp __t(_VSTD::move(__x)); + __x = _VSTD::move(__y); + __y = _VSTD::move(__t); + } } template @@ -4000,21 +4031,6 @@ #endif -#ifndef _LIBCPP_HAS_NO_BUILTIN_IS_CONSTANT_EVALUATED -#if _LIBCPP_STD_VER > 17 -_LIBCPP_INLINE_VISIBILITY -inline constexpr bool is_constant_evaluated() noexcept { - return __builtin_is_constant_evaluated(); -} -#endif - -inline _LIBCPP_CONSTEXPR -bool __libcpp_is_constant_evaluated() _NOEXCEPT { return __builtin_is_constant_evaluated(); } -#else -inline _LIBCPP_CONSTEXPR -bool __libcpp_is_constant_evaluated() _NOEXCEPT { return false; } -#endif - template using _IsCharLikeType = _And, is_trivial<_CharT> >; Index: include/vector =================================================================== --- include/vector +++ include/vector @@ -472,6 +472,21 @@ } } +template +struct __vector_move_via_memcpy : integral_constant::value && + __has_trivial_construct<_Allocator, _Tp, _Tp&&>::value && + !is_volatile<_Tp>::value +> {}; + +template +struct __vector_relocate_via_memcpy : integral_constant::value && + __has_trivial_construct<_Allocator, _Tp, _Tp&&>::value && + __has_trivial_destroy<_Allocator, _Tp>::value && + !is_volatile<_Tp>::value +> {}; + template */> class _LIBCPP_TEMPLATE_VIS _LIBCPP_TRIVIALLY_RELOCATABLE_IF((__vector_base<_Tp, _Allocator>::__allow_trivial_relocation::value)) @@ -830,6 +845,8 @@ void __swap_out_circular_buffer(__split_buffer& __v); pointer __swap_out_circular_buffer(__split_buffer& __v, pointer __p); void __move_range(pointer __from_s, pointer __from_e, pointer __to); + template + void __relocate_upward_and_insert(pointer __from_s, pointer __from_e, _Up&& __elt); void __move_assign(vector& __c, true_type) _NOEXCEPT_(is_nothrow_move_assignable::value); void __move_assign(vector& __c, false_type) @@ -956,12 +973,21 @@ void vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer& __v) { + typedef integral_constant::value || + __vector_relocate_via_memcpy<_Tp, _Allocator>::value + > __move_via_memcpy; + typedef integral_constant::value + > __destroy_via_noop; + __annotate_delete(); - __alloc_traits::__construct_backward(this->__alloc(), this->__begin_, this->__end_, __v.__begin_); + __alloc_traits::__construct_backward(this->__alloc(), this->__begin_, this->__end_, __v.__begin_, __move_via_memcpy()); _VSTD::swap(this->__begin_, __v.__begin_); _VSTD::swap(this->__end_, __v.__end_); _VSTD::swap(this->__end_cap(), __v.__end_cap()); __v.__first_ = __v.__begin_; + __v.__destruct_at_end(__v.__begin_, __destroy_via_noop()); __annotate_new(size()); __invalidate_all_iterators(); } @@ -970,14 +996,23 @@ typename vector<_Tp, _Allocator>::pointer vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer& __v, pointer __p) { + typedef integral_constant::value || + __vector_relocate_via_memcpy<_Tp, _Allocator>::value + > __move_via_memcpy; + typedef integral_constant::value + > __destroy_via_noop; + __annotate_delete(); pointer __r = __v.__begin_; - __alloc_traits::__construct_backward(this->__alloc(), this->__begin_, __p, __v.__begin_); - __alloc_traits::__construct_forward(this->__alloc(), __p, this->__end_, __v.__end_); + __alloc_traits::__construct_backward(this->__alloc(), this->__begin_, __p, __v.__begin_, __move_via_memcpy()); + __alloc_traits::__construct_forward(this->__alloc(), __p, this->__end_, __v.__end_, __move_via_memcpy()); _VSTD::swap(this->__begin_, __v.__begin_); _VSTD::swap(this->__end_, __v.__end_); _VSTD::swap(this->__end_cap(), __v.__end_cap()); __v.__first_ = __v.__begin_; + __v.__destruct_at_end(__v.__begin_, __destroy_via_noop()); __annotate_new(size()); __invalidate_all_iterators(); return __r; @@ -1719,7 +1754,16 @@ "vector::erase(iterator) called with a non-dereferenceable iterator"); difference_type __ps = __position - cbegin(); pointer __p = this->__begin_ + __ps; - this->__destruct_at_end(_VSTD::move(__p + 1, this->__end_, __p)); + _Tp *__rawp = _VSTD::__to_raw_pointer(__p); + if (__vector_relocate_via_memcpy<_Tp, _Allocator>::value) { + __alloc_traits::destroy(this->__alloc(), __rawp); + --this->__end_; + if (__p != this->__end_) { + _VSTD::memmove(__rawp, __rawp + 1, this->__end_ - __p); + } + } else { + this->__destruct_at_end(_VSTD::move(__p + 1, this->__end_, __p)); + } this->__invalidate_iterators_past(__p-1); iterator __r = __make_iter(__p); return __r; @@ -1747,6 +1791,23 @@ return __r; } +template +template +void +vector<_Tp, _Allocator>::__relocate_upward_and_insert(pointer __from_s, pointer __from_e, _Up&& __elt) +{ + if (__vector_relocate_via_memcpy<_Tp, _Allocator>::value) { + _VSTD::memmove(_VSTD::__to_raw_pointer(__from_s + 1), _VSTD::__to_raw_pointer(__from_s), (__from_e - __from_s) * sizeof(_Tp)); + ++this->__end_; + __alloc_traits::construct(this->__alloc(), + _VSTD::__to_raw_pointer(__from_s), + static_cast<_Up&&>(__elt)); + } else { + __move_range(__from_s, __from_e, __from_s + 1); + *__from_s = static_cast<_Up&&>(__elt); + } +} + template void vector<_Tp, _Allocator>::__move_range(pointer __from_s, pointer __from_e, pointer __to) @@ -1783,11 +1844,10 @@ } else { - __move_range(__p, this->__end_, __p + 1); const_pointer __xr = pointer_traits::pointer_to(__x); if (__p <= __xr && __xr < this->__end_) ++__xr; - *__p = *__xr; + __relocate_upward_and_insert(__p, this->__end_, *__xr); } } else @@ -1820,8 +1880,7 @@ } else { - __move_range(__p, this->__end_, __p + 1); - *__p = _VSTD::move(__x); + __relocate_upward_and_insert(__p, this->__end_, _VSTD::move(__x)); } } else @@ -1854,8 +1913,7 @@ else { __temp_value __tmp(this->__alloc(), _VSTD::forward<_Args>(__args)...); - __move_range(__p, this->__end_, __p + 1); - *__p = _VSTD::move(__tmp.get()); + __relocate_upward_and_insert(__p, this->__end_, _VSTD::move(__tmp.get())); } } else