Index: libcxx/include/__algorithm/nth_element.h =================================================================== --- libcxx/include/__algorithm/nth_element.h +++ libcxx/include/__algorithm/nth_element.h @@ -9,10 +9,11 @@ #ifndef _LIBCPP___ALGORITHM_NTH_ELEMENT_H #define _LIBCPP___ALGORITHM_NTH_ELEMENT_H -#include <__config> -#include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> +#include <__algorithm/comp.h> +#include <__algorithm/min_element.h> #include <__algorithm/sort.h> +#include <__config> #include <__iterator/iterator_traits.h> #include <__utility/swap.h> @@ -22,6 +23,18 @@ _LIBCPP_BEGIN_NAMESPACE_STD +// Assumes size > 0 +template +_LIBCPP_CONSTEXPR_AFTER_CXX11 void __selection_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, + _Compare __comp) { + _BidirectionalIterator __lm1 = __last; + for (--__lm1; __first != __lm1; ++__first) { + _BidirectionalIterator __i = _VSTD::min_element<_BidirectionalIterator, _Compare&>(__first, __last, __comp); + if (__i != __first) + swap(*__first, *__i); + } +} + template _LIBCPP_CONSTEXPR_AFTER_CXX11 bool __nth_element_find_guard(_RandomAccessIterator& __i, _RandomAccessIterator& __j, @@ -45,6 +58,7 @@ // _Compare is known to be a reference type typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; const difference_type __limit = 7; + __sorting_network::__conditional_swap<_RandomAccessIterator, _Compare> __cond_swap(__comp); while (true) { if (__nth == __last) @@ -62,7 +76,7 @@ case 3: { _RandomAccessIterator __m = __first; - _VSTD::__sort3<_Compare>(__first, ++__m, --__last, __comp); + __sorting_network::__sort3(__first, ++__m, --__last, __cond_swap); return; } } @@ -74,7 +88,7 @@ // __len > __limit >= 3 _RandomAccessIterator __m = __first + __len/2; _RandomAccessIterator __lm1 = __last; - unsigned __n_swaps = _VSTD::__sort3<_Compare>(__first, __m, --__lm1, __comp); + unsigned __n_swaps = __sorting_network::__sort3_with_number_of_swaps<_Compare>(__first, __m, --__lm1, __comp); // *__m is median // partition [__first, __m) < *__m and *__m <= [__m, __last) // (this inhibits tossing elements equivalent to __m around unnecessarily) Index: libcxx/include/__algorithm/sort.h =================================================================== --- libcxx/include/__algorithm/sort.h +++ libcxx/include/__algorithm/sort.h @@ -9,14 +9,21 @@ #ifndef _LIBCPP___ALGORITHM_SORT_H #define _LIBCPP___ALGORITHM_SORT_H -#include <__config> -#include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> -#include <__algorithm/min_element.h> +#include <__algorithm/comp.h> +#include <__algorithm/iter_swap.h> +#include <__algorithm/make_heap.h> #include <__algorithm/partial_sort.h> +#include <__algorithm/sort_heap.h> #include <__algorithm/unwrap_iter.h> +#include <__config> +#include <__iterator/iterator_traits.h> +#include <__iterator/move_iterator.h> +#include <__utility/move.h> +#include <__utility/pair.h> #include <__utility/swap.h> #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -24,501 +31,661 @@ _LIBCPP_BEGIN_NAMESPACE_STD -// stable, 2-3 compares, 0-2 swaps +namespace __sorting_network { -template -_LIBCPP_CONSTEXPR_AFTER_CXX11 unsigned -__sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z, _Compare __c) -{ - unsigned __r = 0; - if (!__c(*__y, *__x)) // if x <= y - { - if (!__c(*__z, *__y)) // if y <= z - return __r; // x <= y && y <= z - // x <= y && y > z - swap(*__y, *__z); // x <= z && y < z - __r = 1; - if (__c(*__y, *__x)) // if x > y - { - swap(*__x, *__y); // x < y && y <= z - __r = 2; - } - return __r; // x <= y && y < z - } - if (__c(*__z, *__y)) // x > y, if y > z - { - swap(*__x, *__z); // x < y && y < z - __r = 1; - return __r; - } - swap(*__x, *__y); // x > y && y <= z - __r = 1; // x < y && x <= z - if (__c(*__z, *__y)) // if y > z - { - swap(*__y, *__z); // x <= y && y < z - __r = 2; +template +class __conditional_swap { + _Compare comp_; + +public: + _LIBCPP_CONSTEXPR_AFTER_CXX11 _Compare get() const { return comp_; } + _LIBCPP_CONSTEXPR_AFTER_CXX11 __conditional_swap(_Compare __comp) : comp_(__comp) {} + _LIBCPP_CONSTEXPR_AFTER_CXX11 inline void operator()(_RandomAccessIterator __x, _RandomAccessIterator __y) { + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; + bool __result = comp_(*__x, *__y); + // Expect a compiler would short-circuit the following if-block. + // 4 * sizeof(size_t) is a magic number. Expect a compiler to use SIMD + // instruction on them. + if (_VSTD::is_trivially_copy_constructible::value && + _VSTD::is_trivially_copy_assignable::value && sizeof(value_type) <= 4 * sizeof(size_t)) { + value_type __min = __result ? _VSTD::move(*__x) : _VSTD::move(*__y); + *__y = __result ? _VSTD::move(*__y) : _VSTD::move(*__x); + *__x = _VSTD::move(__min); + } else { + if (!__result) { + _VSTD::iter_swap(__x, __y); + } } - return __r; -} // x <= y && y <= z + } +}; -// stable, 3-6 compares, 0-5 swaps +template +class __reverse_conditional_swap { + _Compare comp_; -template -unsigned -__sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, - _ForwardIterator __x4, _Compare __c) -{ - unsigned __r = _VSTD::__sort3<_Compare>(__x1, __x2, __x3, __c); - if (__c(*__x4, *__x3)) - { - swap(*__x3, *__x4); - ++__r; - if (__c(*__x3, *__x2)) - { - swap(*__x2, *__x3); - ++__r; - if (__c(*__x2, *__x1)) - { - swap(*__x1, *__x2); - ++__r; - } - } +public: + _Compare get() const { return comp_; } + __reverse_conditional_swap(_Compare __comp) : comp_(__comp) {} + inline void operator()(_RandomAccessIterator __x, _RandomAccessIterator __y) { + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; + bool __result = !comp_(*__x, *__y); + // Expect a compiler would short-circuit the following if-block. + if (_VSTD::is_trivially_copy_constructible::value && + _VSTD::is_trivially_copy_assignable::value && sizeof(value_type) <= 4 * sizeof(size_t)) { + value_type __min = __result ? _VSTD::move(*__x) : _VSTD::move(*__y); + *__y = __result ? _VSTD::move(*__y) : _VSTD::move(*__x); + *__x = _VSTD::move(__min); + } else { + if (!__result) { + _VSTD::iter_swap(__x, __y); + } } - return __r; + } +}; + +template +_LIBCPP_HIDE_FROM_ABI void __sort2(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { + __cond_swap(__a + 0, __a + 1); +} + +template +_LIBCPP_HIDE_FROM_ABI void __sort3(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { + __cond_swap(__a + 1, __a + 2); + __cond_swap(__a + 0, __a + 2); + __cond_swap(__a + 0, __a + 1); } -// stable, 4-10 compares, 0-9 swaps +template +_LIBCPP_HIDE_FROM_ABI void __sort4(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { + __cond_swap(__a + 0, __a + 1); + __cond_swap(__a + 2, __a + 3); + __cond_swap(__a + 0, __a + 2); + __cond_swap(__a + 1, __a + 3); + __cond_swap(__a + 1, __a + 2); +} + +template +_LIBCPP_HIDE_FROM_ABI void __sort5(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { + __cond_swap(__a + 0, __a + 1); + __cond_swap(__a + 3, __a + 4); + __cond_swap(__a + 2, __a + 4); + __cond_swap(__a + 2, __a + 3); + __cond_swap(__a + 0, __a + 3); + __cond_swap(__a + 1, __a + 4); + __cond_swap(__a + 0, __a + 2); + __cond_swap(__a + 1, __a + 3); + __cond_swap(__a + 1, __a + 2); +} + +template +_LIBCPP_HIDE_FROM_ABI void __sort6(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { + __cond_swap(__a + 1, __a + 2); + __cond_swap(__a + 4, __a + 5); + __cond_swap(__a + 0, __a + 2); + __cond_swap(__a + 3, __a + 5); + __cond_swap(__a + 0, __a + 1); + __cond_swap(__a + 3, __a + 4); + __cond_swap(__a + 0, __a + 3); + __cond_swap(__a + 1, __a + 4); + __cond_swap(__a + 2, __a + 5); + __cond_swap(__a + 2, __a + 4); + __cond_swap(__a + 1, __a + 3); + __cond_swap(__a + 2, __a + 3); +} +template +_LIBCPP_HIDE_FROM_ABI void __sort7(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { + __cond_swap(__a + 1, __a + 2); + __cond_swap(__a + 3, __a + 4); + __cond_swap(__a + 5, __a + 6); + __cond_swap(__a + 0, __a + 2); + __cond_swap(__a + 3, __a + 5); + __cond_swap(__a + 4, __a + 6); + __cond_swap(__a + 0, __a + 1); + __cond_swap(__a + 4, __a + 5); + __cond_swap(__a + 0, __a + 4); + __cond_swap(__a + 1, __a + 5); + __cond_swap(__a + 2, __a + 6); + __cond_swap(__a + 0, __a + 3); + __cond_swap(__a + 2, __a + 5); + __cond_swap(__a + 1, __a + 3); + __cond_swap(__a + 2, __a + 4); + __cond_swap(__a + 2, __a + 3); +} + +template +_LIBCPP_HIDE_FROM_ABI void __sort8(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { + __cond_swap(__a + 0, __a + 1); + __cond_swap(__a + 2, __a + 3); + __cond_swap(__a + 4, __a + 5); + __cond_swap(__a + 6, __a + 7); + __cond_swap(__a + 0, __a + 2); + __cond_swap(__a + 1, __a + 3); + __cond_swap(__a + 4, __a + 6); + __cond_swap(__a + 5, __a + 7); + __cond_swap(__a + 1, __a + 2); + __cond_swap(__a + 5, __a + 6); + __cond_swap(__a + 0, __a + 4); + __cond_swap(__a + 1, __a + 5); + __cond_swap(__a + 2, __a + 6); + __cond_swap(__a + 3, __a + 7); + __cond_swap(__a + 1, __a + 4); + __cond_swap(__a + 3, __a + 6); + __cond_swap(__a + 2, __a + 4); + __cond_swap(__a + 3, __a + 5); + __cond_swap(__a + 3, __a + 4); +} + +template +_LIBCPP_HIDE_FROM_ABI void __sort1to8(_RandomAccessIterator __a, + typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type __len, + _ConditionalSwap __cond_swap) { + switch (__len) { + case 0: + case 1: + return; + case 2: + __sort2(__a, __cond_swap); + return; + case 3: + __sort3(__a, __cond_swap); + return; + case 4: + __sort4(__a, __cond_swap); + return; + case 5: + __sort5(__a, __cond_swap); + return; + case 6: + __sort6(__a, __cond_swap); + return; + case 7: + __sort7(__a, __cond_swap); + return; + case 8: + __sort8(__a, __cond_swap); + return; + } + // ignore +} +template +_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_HIDE_FROM_ABI void __sort3(_RandomAccessIterator __a0, _RandomAccessIterator __a1, + _RandomAccessIterator __a2, + _ConditionalSwap __cond_swap) { + __cond_swap(__a1, __a2); + __cond_swap(__a0, __a2); + __cond_swap(__a0, __a1); +} + +// stable, 2-3 compares, 0-2 swaps template -_LIBCPP_HIDDEN -unsigned -__sort5(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, - _ForwardIterator __x4, _ForwardIterator __x5, _Compare __c) -{ - unsigned __r = _VSTD::__sort4<_Compare>(__x1, __x2, __x3, __x4, __c); - if (__c(*__x5, *__x4)) +_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_HIDE_FROM_ABI unsigned +__sort3_with_number_of_swaps(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z, _Compare __c) { + unsigned __r = 0; + if (!__c(*__y, *__x)) // if x <= y + { + if (!__c(*__z, *__y)) // if y <= z + return __r; // x <= y && y <= z + // x <= y && y > z + swap(*__y, *__z); // x <= z && y < z + __r = 1; + if (__c(*__y, *__x)) // if x > y { - swap(*__x4, *__x5); - ++__r; - if (__c(*__x4, *__x3)) - { - swap(*__x3, *__x4); - ++__r; - if (__c(*__x3, *__x2)) - { - swap(*__x2, *__x3); - ++__r; - if (__c(*__x2, *__x1)) - { - swap(*__x1, *__x2); - ++__r; - } - } - } + swap(*__x, *__y); // x < y && y <= z + __r = 2; } + return __r; // x <= y && y < z + } + if (__c(*__z, *__y)) // x > y, if y > z + { + swap(*__x, *__z); // x < y && y < z + __r = 1; return __r; + } + swap(*__x, *__y); // x > y && y <= z + __r = 1; // x < y && x <= z + if (__c(*__z, *__y)) // if y > z + { + swap(*__y, *__z); // x <= y && y < z + __r = 2; + } + return __r; } -// Assumes size > 0 -template -_LIBCPP_CONSTEXPR_AFTER_CXX11 void -__selection_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, _Compare __comp) -{ - _BidirectionalIterator __lm1 = __last; - for (--__lm1; __first != __lm1; ++__first) - { - _BidirectionalIterator __i = _VSTD::min_element<_BidirectionalIterator, _Compare&>(__first, __last, __comp); - if (__i != __first) - swap(*__first, *__i); +} // namespace __sorting_network + +namespace __bitonic { +class __detail { +public: + enum { + __batch = 8, + __bitonic_batch = __batch * 2, + __small_sort_max = __bitonic_batch * 2, + }; +}; + +template +_LIBCPP_HIDE_FROM_ABI void __enforce_order(_RandomAccessIterator __first, _RandomAccessIterator __last, + _ConditionalSwap __cond_swap, _ReverseConditionalSwap __reverse_cond_swap) { + _RandomAccessIterator __i = __first; + while (__detail::__bitonic_batch <= __last - __i) { + __sorting_network::__sort8(__i, __cond_swap); + __sorting_network::__sort8(__i + __detail::__batch, __reverse_cond_swap); + __i += __detail::__bitonic_batch; + } + if (__detail::__batch <= __last - __i) { + __sorting_network::__sort8(__i, __cond_swap); + __i += __detail::__batch; + __sorting_network::__sort1to8(__i, __last - __i, __reverse_cond_swap); + } else { + __sorting_network::__sort1to8(__i, __last - __i, __cond_swap); + } +} + +class __construct { +public: + template + static inline void __op(_Type* __result, _Type&& __val) { + new ((void*)__result) _Type(_VSTD::move(__val)); + } +}; + +class __move_assign { +public: + template + static inline void __op(_Type* __result, _Type&& __val) { + *__result = _VSTD::move(__val); + } +}; + +template +_LIBCPP_HIDE_FROM_ABI void __forward_merge(_InputIterator __first, _InputIterator __last, _OutputIterator __result, + _Compare __comp) { + --__last; + // The len used here is one less than the actual length. This is so that the + // comparison is carried out against 0. The final move is done + // unconditionally at the end. + typename _VSTD::iterator_traits<_InputIterator>::difference_type __len = __last - __first; + for (; __len > 0; __len--) { + if (__comp(*__first, *__last)) { + _Copy::__op(&*__result, _VSTD::move(*__first++)); + } else { + _Copy::__op(&*__result, _VSTD::move(*__last--)); } + __result++; + } + _Copy::__op(&*__result, _VSTD::move(*__first)); } -template -void -__insertion_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, _Compare __comp) -{ - typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type; - if (__first != __last) - { - _BidirectionalIterator __i = __first; - for (++__i; __i != __last; ++__i) - { - _BidirectionalIterator __j = __i; - value_type __t(_VSTD::move(*__j)); - for (_BidirectionalIterator __k = __i; __k != __first && __comp(__t, *--__k); --__j) - *__j = _VSTD::move(*__k); - *__j = _VSTD::move(__t); - } +template +_LIBCPP_HIDE_FROM_ABI void __backward_merge(_InputIterator __first, _InputIterator __last, _OutputIterator __result, + _Compare __comp) { + --__last; + __result += __last - __first; + // The len used here is one less than the actual length. This is so that the + // comparison is carried out against 0. The final move is done + // unconditionally at the end. + typename _VSTD::iterator_traits<_InputIterator>::difference_type __len = __last - __first; + for (; __len > 0; __len--) { + if (__comp(*__first, *__last)) { + _Copy::__op(&*__result, _VSTD::move(*__first++)); + } else { + _Copy::__op(&*__result, _VSTD::move(*__last--)); } + __result--; + } + _Copy::__op(&*__result, _VSTD::move(*__first)); } -template -void -__insertion_sort_3(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) -{ - typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; - _RandomAccessIterator __j = __first+2; - _VSTD::__sort3<_Compare>(__first, __first+1, __j, __comp); - for (_RandomAccessIterator __i = __j+1; __i != __last; ++__i) - { - if (__comp(*__i, *__j)) - { - value_type __t(_VSTD::move(*__i)); - _RandomAccessIterator __k = __j; - __j = __i; - do - { - *__j = _VSTD::move(*__k); - __j = __k; - } while (__j != __first && __comp(__t, *--__k)); - *__j = _VSTD::move(__t); - } - __j = __i; +template +inline _LIBCPP_HIDE_FROM_ABI bool +__small_sort(_RandomAccessIterator __first, + typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type __len, + typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type* __buff, _ConditionalSwap __cond_swap, + _ReverseConditionalSwap __reverse_cond_swap) { + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; + if (__len > __detail::__small_sort_max) { + return false; + } + _RandomAccessIterator __last = __first + __len; + __enforce_order(__first, __last, __cond_swap, __reverse_cond_swap); + if (__len <= __detail::__batch) { + // sorted. + return true; + } + auto __comp = __cond_swap.get(); + if (__len <= __detail::__bitonic_batch) { + // single bitonic order merge. + __forward_merge<__bitonic::__construct>(__first, __last, __buff, __comp); + copy(_VSTD::make_move_iterator(__buff), _VSTD::make_move_iterator(__buff + __len), __first); + for (auto __iter = __buff; __iter < __buff + __len; __iter++) { + (*__iter).~value_type(); } + return true; + } + // double bitonic order merge. + __forward_merge<__construct>(__first, __first + __detail::__bitonic_batch, __buff, __comp); + __backward_merge<__construct>(__first + __detail::__bitonic_batch, __last, __buff + __detail::__bitonic_batch, + __comp); + __forward_merge<__move_assign>(__buff, __buff + __len, __first, __comp); + for (auto __iter = __buff; __iter < __buff + __len; __iter++) { + (*__iter).~value_type(); + } + return true; } +} // namespace __bitonic -template -bool -__insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) -{ - switch (__last - __first) - { - case 0: - case 1: - return true; - case 2: - if (__comp(*--__last, *__first)) - swap(*__first, *__last); - return true; - case 3: - _VSTD::__sort3<_Compare>(__first, __first+1, --__last, __comp); - return true; - case 4: - _VSTD::__sort4<_Compare>(__first, __first+1, __first+2, --__last, __comp); - return true; - case 5: - _VSTD::__sort5<_Compare>(__first, __first+1, __first+2, __first+3, --__last, __comp); - return true; +namespace __bitsetsort { +struct __64bit_set { + typedef uint64_t __storage_t; + enum { __block_size = 64 }; + static __storage_t __blsr(__storage_t x) { + // _blsr_u64 can be used here but it did not make any performance + // difference in practice. + return x ^ (x & -x); + } + static int __clz(__storage_t x) { return __builtin_clzll(x); } + static int __ctz(__storage_t x) { return __builtin_ctzll(x); } +}; + +struct __32bit_set { + typedef uint32_t __storage_t; + enum { __block_size = 32 }; + static __storage_t __blsr(__storage_t x) { + // _blsr_u32 can be used here but it did not make any performance + // difference in practice. + return x ^ (x & -x); + } + static int __clz(__storage_t x) { return __builtin_clzl(x); } + static int __ctz(__storage_t x) { return __builtin_ctzl(x); } +}; + +template +struct __set_selector { + typedef __64bit_set __set; +}; + +template <> +struct __set_selector<4> { + typedef __32bit_set __set; +}; + +template +inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos(_RandomAccessIterator __first, _RandomAccessIterator __last, + typename _Bitset::__storage_t& __left_bitset, + typename _Bitset::__storage_t& __right_bitset) { + while (__left_bitset != 0 & __right_bitset != 0) { + int tz_left = _Bitset::__ctz(__left_bitset); + __left_bitset = _Bitset::__blsr(__left_bitset); + int tz_right = _Bitset::__ctz(__right_bitset); + __right_bitset = _Bitset::__blsr(__right_bitset); + _VSTD::iter_swap(__first + tz_left, __last - tz_right); + } +} + +template +_LIBCPP_HIDE_FROM_ABI _VSTD::pair<_RandomAccessIterator, bool> +__bitset_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type difference_type; + typedef typename _Bitset::__storage_t __storage_t; + _RandomAccessIterator __begin = __first; + value_type __pivot = _VSTD::move(*__first); + + // Check if pivot is less than the last element. Checking this first avoids + // comparing the first and the last iterators on each iteration as done in the + // else part. + if (__comp(__pivot, *(__last - 1))) { + // Guarded. + while (!__comp(__pivot, *++__first)) { } - typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; - _RandomAccessIterator __j = __first+2; - _VSTD::__sort3<_Compare>(__first, __first+1, __j, __comp); - const unsigned __limit = 8; - unsigned __count = 0; - for (_RandomAccessIterator __i = __j+1; __i != __last; ++__i) - { - if (__comp(*__i, *__j)) - { - value_type __t(_VSTD::move(*__i)); - _RandomAccessIterator __k = __j; - __j = __i; - do - { - *__j = _VSTD::move(*__k); - __j = __k; - } while (__j != __first && __comp(__t, *--__k)); - *__j = _VSTD::move(__t); - if (++__count == __limit) - return ++__i == __last; - } - __j = __i; + } else { + while (++__first < __last && !__comp(__pivot, *__first)) { } - return true; + } + + if (__first < __last) { + // It will be always guarded because __bitset_sort will do the + // median-of-three before calling this. + while (__comp(__pivot, *--__last)) { + } + } + bool __already_partitioned = __first >= __last; + if (!__already_partitioned) { + _VSTD::iter_swap(__first, __last); + ++__first; + } + + // In [__first, __last) __last is not inclusive. From now one, it uses last + // minus one to be inclusive on both sides. + _RandomAccessIterator __lm1 = __last - 1; + __storage_t __left_bitset = 0; + __storage_t __right_bitset = 0; + + // Reminder: length = __lm1 - __first + 1. + while (__lm1 - __first >= 2 * _Bitset::__block_size - 1) { + if (__left_bitset == 0) { + // Possible vectorization. With a proper "-march" flag, the following loop + // will be compiled into a set of SIMD instructions. + _RandomAccessIterator __iter = __first; + for (int __j = 0; __j < _Bitset::__block_size;) { + __left_bitset |= (static_cast<__storage_t>(__comp(__pivot, *__iter)) << __j); + __j++; + __iter++; + } + } + if (__right_bitset == 0) { + // Possible vectorization. With a proper "-march" flag, the following loop + // will be compiled into a set of SIMD instructions. + _RandomAccessIterator __iter = __lm1; + for (int __j = 0; __j < _Bitset::__block_size;) { + __right_bitset |= (static_cast<__storage_t>(!__comp(__pivot, *__iter)) << __j); + __j++; + __iter--; + } + } + __swap_bitmap_pos<_Bitset>(__first, __lm1, __left_bitset, __right_bitset); + __first += (__left_bitset == 0) ? _Bitset::__block_size : 0; + __lm1 -= (__right_bitset == 0) ? _Bitset::__block_size : 0; + } + // Now, we have a less-than a block on each side. + difference_type __remaining_len = __lm1 - __first + 1; + difference_type __l_size; + difference_type __r_size; + if (__left_bitset == 0 && __right_bitset == 0) { + __l_size = __remaining_len / 2; + __r_size = __remaining_len - __l_size; + } else if (__left_bitset == 0) { + // We know at least one side is a full block. + __l_size = __remaining_len - _Bitset::__block_size; + __r_size = _Bitset::__block_size; + } else { // if (__right_bitset == 0) + __l_size = _Bitset::__block_size; + __r_size = __remaining_len - _Bitset::__block_size; + } + if (__left_bitset == 0) { + _RandomAccessIterator __iter = __first; + for (int j = 0; j < __l_size; j++) { + __left_bitset |= (static_cast<__storage_t>(__comp(__pivot, *(__iter))) << j); + __iter++; + } + } + if (__right_bitset == 0) { + _RandomAccessIterator __iter = __lm1; + for (int j = 0; j < __r_size; j++) { + __right_bitset |= (static_cast<__storage_t>(!__comp(__pivot, *(__iter))) << j); + --__iter; + } + } + __swap_bitmap_pos<_Bitset>(__first, __lm1, __left_bitset, __right_bitset); + __first += (__left_bitset == 0) ? __l_size : 0; + __lm1 -= (__right_bitset == 0) ? __r_size : 0; + + if (__left_bitset) { + // Swap within the left side. + // Need to find set positions in the reverse order. + while (__left_bitset != 0) { + int __tz_left = _Bitset::__block_size - 1 - _Bitset::__clz(__left_bitset); + __left_bitset &= (static_cast<__storage_t>(1) << __tz_left) - 1; + _RandomAccessIterator it = __first + __tz_left; + if (it != __lm1) { + _VSTD::iter_swap(it, __lm1); + } + --__lm1; + } + __first = __lm1 + 1; + } else if (__right_bitset) { + // Swap within the right side. + // Need to find set positions in the reverse order. + while (__right_bitset != 0) { + int __tz_right = _Bitset::__block_size - 1 - _Bitset::__clz(__right_bitset); + __right_bitset &= (static_cast<__storage_t>(1) << __tz_right) - 1; + _RandomAccessIterator it = __lm1 - __tz_right; + if (it != __first) { + _VSTD::iter_swap(it, __first); + } + ++__first; + } + } + + _RandomAccessIterator __pivot_pos = __first - 1; + if (__begin != __pivot_pos) { + *__begin = _VSTD::move(*__pivot_pos); + } + *__pivot_pos = _VSTD::move(__pivot); + return _VSTD::make_pair(__pivot_pos, __already_partitioned); } -template -void -__insertion_sort_move(_BidirectionalIterator __first1, _BidirectionalIterator __last1, - typename iterator_traits<_BidirectionalIterator>::value_type* __first2, _Compare __comp) -{ - typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type; - if (__first1 != __last1) - { - __destruct_n __d(0); - unique_ptr __h(__first2, __d); - value_type* __last2 = __first2; - ::new ((void*)__last2) value_type(_VSTD::move(*__first1)); - __d.template __incr(); - for (++__last2; ++__first1 != __last1; ++__last2) - { - value_type* __j2 = __last2; - value_type* __i2 = __j2; - if (__comp(*__first1, *--__i2)) - { - ::new ((void*)__j2) value_type(_VSTD::move(*__i2)); - __d.template __incr(); - for (--__j2; __i2 != __first2 && __comp(*__first1, *--__i2); --__j2) - *__j2 = _VSTD::move(*__i2); - *__j2 = _VSTD::move(*__first1); - } - else - { - ::new ((void*)__j2) value_type(_VSTD::move(*__first1)); - __d.template __incr(); - } - } - __h.release(); +template +inline _LIBCPP_HIDE_FROM_ABI bool __partial_insertion_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) { + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; + if (__first == __last) + return true; + + const unsigned __limit = 8; + unsigned __count = 0; + _RandomAccessIterator __j = __first; + for (_RandomAccessIterator __i = __j + 1; __i != __last; ++__i) { + if (__comp(*__i, *__j)) { + value_type __t(_VSTD::move(*__i)); + _RandomAccessIterator __k = __j; + __j = __i; + do { + *__j = _VSTD::move(*__k); + __j = __k; + } while (__j != __first && __comp(__t, *--__k)); + *__j = _VSTD::move(__t); + if (++__count == __limit) + return ++__i == __last; } + __j = __i; + } + return true; } template -void -__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) -{ - typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; - typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; - const difference_type __limit = is_trivially_copy_constructible::value && - is_trivially_copy_assignable::value ? 30 : 6; - while (true) - { - __restart: - difference_type __len = __last - __first; - switch (__len) - { - case 0: - case 1: - return; - case 2: - if (__comp(*--__last, *__first)) - swap(*__first, *__last); - return; - case 3: - _VSTD::__sort3<_Compare>(__first, __first+1, --__last, __comp); - return; - case 4: - _VSTD::__sort4<_Compare>(__first, __first+1, __first+2, --__last, __comp); - return; - case 5: - _VSTD::__sort5<_Compare>(__first, __first+1, __first+2, __first+3, --__last, __comp); - return; - } - if (__len <= __limit) - { - _VSTD::__insertion_sort_3<_Compare>(__first, __last, __comp); - return; - } - // __len > 5 - _RandomAccessIterator __m = __first; - _RandomAccessIterator __lm1 = __last; - --__lm1; - unsigned __n_swaps; - { - difference_type __delta; - if (__len >= 1000) - { - __delta = __len/2; - __m += __delta; - __delta /= 2; - __n_swaps = _VSTD::__sort5<_Compare>(__first, __first + __delta, __m, __m+__delta, __lm1, __comp); - } - else - { - __delta = __len/2; - __m += __delta; - __n_swaps = _VSTD::__sort3<_Compare>(__first, __m, __lm1, __comp); - } - } - // *__m is median - // partition [__first, __m) < *__m and *__m <= [__m, __last) - // (this inhibits tossing elements equivalent to __m around unnecessarily) - _RandomAccessIterator __i = __first; - _RandomAccessIterator __j = __lm1; - // j points beyond range to be tested, *__m is known to be <= *__lm1 - // The search going up is known to be guarded but the search coming down isn't. - // Prime the downward search with a guard. - if (!__comp(*__i, *__m)) // if *__first == *__m - { - // *__first == *__m, *__first doesn't go in first part - // manually guard downward moving __j against __i - while (true) - { - if (__i == --__j) - { - // *__first == *__m, *__m <= all other elements - // Parition instead into [__first, __i) == *__first and *__first < [__i, __last) - ++__i; // __first + 1 - __j = __last; - if (!__comp(*__first, *--__j)) // we need a guard if *__first == *(__last-1) - { - while (true) - { - if (__i == __j) - return; // [__first, __last) all equivalent elements - if (__comp(*__first, *__i)) - { - swap(*__i, *__j); - ++__n_swaps; - ++__i; - break; - } - ++__i; - } - } - // [__first, __i) == *__first and *__first < [__j, __last) and __j == __last - 1 - if (__i == __j) - return; - while (true) - { - while (!__comp(*__first, *__i)) - ++__i; - while (__comp(*__first, *--__j)) - ; - if (__i >= __j) - break; - swap(*__i, *__j); - ++__n_swaps; - ++__i; - } - // [__first, __i) == *__first and *__first < [__i, __last) - // The first part is sorted, sort the second part - // _VSTD::__sort<_Compare>(__i, __last, __comp); - __first = __i; - goto __restart; - } - if (__comp(*__j, *__m)) - { - swap(*__i, *__j); - ++__n_swaps; - break; // found guard for downward moving __j, now use unguarded partition - } - } - } - // It is known that *__i < *__m - ++__i; - // j points beyond range to be tested, *__m is known to be <= *__lm1 - // if not yet partitioned... - if (__i < __j) - { - // known that *(__i - 1) < *__m - // known that __i <= __m - while (true) - { - // __m still guards upward moving __i - while (__comp(*__i, *__m)) - ++__i; - // It is now known that a guard exists for downward moving __j - while (!__comp(*--__j, *__m)) - ; - if (__i > __j) - break; - swap(*__i, *__j); - ++__n_swaps; - // It is known that __m != __j - // If __m just moved, follow it - if (__m == __i) - __m = __j; - ++__i; - } - } - // [__first, __i) < *__m and *__m <= [__i, __last) - if (__i != __m && __comp(*__m, *__i)) - { - swap(*__i, *__m); - ++__n_swaps; - } - // [__first, __i) < *__i and *__i <= [__i+1, __last) - // If we were given a perfect partition, see if insertion sort is quick... - if (__n_swaps == 0) - { - bool __fs = _VSTD::__insertion_sort_incomplete<_Compare>(__first, __i, __comp); - if (_VSTD::__insertion_sort_incomplete<_Compare>(__i+1, __last, __comp)) - { - if (__fs) - return; - __last = __i; - continue; - } - else - { - if (__fs) - { - __first = ++__i; - continue; - } - } - } - // sort smaller range with recursive call and larger with tail recursion elimination - if (__i - __first < __last - __i) - { - _VSTD::__sort<_Compare>(__first, __i, __comp); - // _VSTD::__sort<_Compare>(__i+1, __last, __comp); - __first = ++__i; - } - else - { - _VSTD::__sort<_Compare>(__i+1, __last, __comp); - // _VSTD::__sort<_Compare>(__first, __i, __comp); - __last = __i; +void __bitsetsort_loop(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, + typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type* __buff, + typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type __limit) { + _LIBCPP_CONSTEXPR_AFTER_CXX11 int __ninther_threshold = 128; + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type difference_type; + __sorting_network::__conditional_swap<_RandomAccessIterator, _Compare> __cond_swap(__comp); + __sorting_network::__reverse_conditional_swap<_RandomAccessIterator, _Compare> __reverse_cond_swap(__comp); + while (true) { + if (__limit == 0) { + // Fallback to heap sort as Introsort suggests. + _VSTD::make_heap(__first, __last, __comp); + _VSTD::sort_heap(__first, __last, __comp); + return; + } + __limit--; + difference_type __len = __last - __first; + if (__len <= __bitonic::__detail::__batch) { + __sorting_network::__sort1to8(__first, __len, __cond_swap); + return; + } else if (__len <= __bitonic::__detail::__small_sort_max) { + __bitonic::__small_sort(__first, __len, __buff, __cond_swap, __reverse_cond_swap); + return; + } + difference_type __half_len = __len / 2; + if (__len > __ninther_threshold) { + __sorting_network::__sort3(__first, __first + __half_len, __last - 1, __cond_swap); + __sorting_network::__sort3(__first + 1, __first + (__half_len - 1), __last - 2, __cond_swap); + __sorting_network::__sort3(__first + 2, __first + (__half_len + 1), __last - 3, __cond_swap); + __sorting_network::__sort3(__first + (__half_len - 1), __first + __half_len, __first + (__half_len + 1), + __cond_swap); + _VSTD::iter_swap(__first, __first + __half_len); + } else { + __sorting_network::__sort3(__first + __half_len, __first, __last - 1, __cond_swap); + } + auto __ret = __bitset_partition<__64bit_set>(__first, __last, __comp); + if (__ret.second) { + bool __left = __partial_insertion_sort(__first, __ret.first, __comp); + if (__partial_insertion_sort(__ret.first + 1, __last, __comp)) { + if (__left) + return; + __last = __ret.first; + continue; + } else { + if (__left) { + __first = ++__ret.first; + continue; } + } } + + // Sort smaller range with recursive call and larger with tail recursion + // elimination. + if (__ret.first - __first < __last - __ret.first) { + __bitsetsort_loop<_Compare>(__first, __ret.first, __comp, __buff, __limit); + __first = ++__ret.first; + } else { + __bitsetsort_loop<_Compare>(__ret.first + 1, __last, __comp, __buff, __limit); + __last = __ret.first; + } + } } -template -inline _LIBCPP_INLINE_VISIBILITY -void -__sort(_Tp** __first, _Tp** __last, __less<_Tp*>&) -{ - __less __comp; - _VSTD::__sort<__less&, uintptr_t*>((uintptr_t*)__first, (uintptr_t*)__last, __comp); +template +inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) { + _Number __log2 = 0; + while (__n > 1) { + __log2++; + __n >>= 1; + } + return __log2; } -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less&, char*>(char*, char*, __less&)) -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less&, wchar_t*>(wchar_t*, wchar_t*, __less&)) -#endif -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less&, signed char*>(signed char*, signed char*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less&, unsigned char*>(unsigned char*, unsigned char*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less&, short*>(short*, short*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less&, unsigned short*>(unsigned short*, unsigned short*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less&, int*>(int*, int*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less&, unsigned*>(unsigned*, unsigned*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less&, long*>(long*, long*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less&, unsigned long*>(unsigned long*, unsigned long*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less&, long long*>(long long*, long long*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less&, unsigned long long*>(unsigned long long*, unsigned long long*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less&, float*>(float*, float*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less&, double*>(double*, double*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less&, long double*>(long double*, long double*, __less&)) - -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, char*>(char*, char*, __less&)) -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, wchar_t*>(wchar_t*, wchar_t*, __less&)) -#endif -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, signed char*>(signed char*, signed char*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, unsigned char*>(unsigned char*, unsigned char*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, short*>(short*, short*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, unsigned short*>(unsigned short*, unsigned short*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, int*>(int*, int*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, unsigned*>(unsigned*, unsigned*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, long*>(long*, long*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, unsigned long*>(unsigned long*, unsigned long*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, long long*>(long long*, long long*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, unsigned long long*>(unsigned long long*, unsigned long long*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, float*>(float*, float*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, double*>(double*, double*, __less&)) -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, long double*>(long double*, long double*, __less&)) - -_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS unsigned __sort5<__less&, long double*>(long double*, long double*, long double*, long double*, long double*, __less&)) +template +inline _LIBCPP_HIDE_FROM_ABI void __bitsetsort_internal(_RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) { + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type difference_type; + typename _VSTD::aligned_storage::type __buff[__bitonic::__detail::__small_sort_max]; + + // 2*log2 comes from Introsort https://reviews.llvm.org/D36423. + difference_type __depth_limit = 2 * __log2i(__last - __first); + __bitsetsort_loop(__first, __last, __comp, reinterpret_cast(&__buff[0]), __depth_limit); +} +} // namespace __bitsetsort template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -void -sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) -{ - typedef typename __comp_ref_type<_Compare>::type _Comp_ref; - if (__libcpp_is_constant_evaluated()) { - _VSTD::__partial_sort<_Comp_ref>(__first, __last, __last, _Comp_ref(__comp)); - } else { - _VSTD::__sort<_Comp_ref>(_VSTD::__unwrap_iter(__first), _VSTD::__unwrap_iter(__last), _Comp_ref(__comp)); - } +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 void sort(_RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) { + typedef typename __comp_ref_type<_Compare>::type _Comp_ref; + if (__libcpp_is_constant_evaluated()) { + _VSTD::__partial_sort<_Comp_ref>(__first, __last, __last, _Comp_ref(__comp)); + } else { + __bitsetsort::__bitsetsort_internal<_Comp_ref>(_VSTD::__unwrap_iter(__first), _VSTD::__unwrap_iter(__last), + _Comp_ref(__comp)); + } } template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -void -sort(_RandomAccessIterator __first, _RandomAccessIterator __last) -{ - _VSTD::sort(__first, __last, __less::value_type>()); +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 void sort(_RandomAccessIterator __first, + _RandomAccessIterator __last) { + _VSTD::sort(__first, __last, __less::value_type>()); } _LIBCPP_END_NAMESPACE_STD Index: libcxx/include/__algorithm/stable_sort.h =================================================================== --- libcxx/include/__algorithm/stable_sort.h +++ libcxx/include/__algorithm/stable_sort.h @@ -25,6 +25,49 @@ _LIBCPP_BEGIN_NAMESPACE_STD +template +void __insertion_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, _Compare __comp) { + typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type; + if (__first != __last) { + _BidirectionalIterator __i = __first; + for (++__i; __i != __last; ++__i) { + _BidirectionalIterator __j = __i; + value_type __t(_VSTD::move(*__j)); + for (_BidirectionalIterator __k = __i; __k != __first && __comp(__t, *--__k); --__j) + *__j = _VSTD::move(*__k); + *__j = _VSTD::move(__t); + } + } +} + +template +void __insertion_sort_move(_BidirectionalIterator __first1, _BidirectionalIterator __last1, + typename iterator_traits<_BidirectionalIterator>::value_type* __first2, _Compare __comp) { + typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type; + if (__first1 != __last1) { + __destruct_n __d(0); + unique_ptr __h(__first2, __d); + value_type* __last2 = __first2; + ::new ((void*)__last2) value_type(_VSTD::move(*__first1)); + __d.template __incr(); + for (++__last2; ++__first1 != __last1; ++__last2) { + value_type* __j2 = __last2; + value_type* __i2 = __j2; + if (__comp(*__first1, *--__i2)) { + ::new ((void*)__j2) value_type(_VSTD::move(*__i2)); + __d.template __incr(); + for (--__j2; __i2 != __first2 && __comp(*__first1, *--__i2); --__j2) + *__j2 = _VSTD::move(*__i2); + *__j2 = _VSTD::move(*__first1); + } else { + ::new ((void*)__j2) value_type(_VSTD::move(*__first1)); + __d.template __incr(); + } + } + __h.release(); + } +} + template void __merge_move_construct(_InputIterator1 __first1, _InputIterator1 __last1, Index: libcxx/src/CMakeLists.txt =================================================================== --- libcxx/src/CMakeLists.txt +++ libcxx/src/CMakeLists.txt @@ -2,7 +2,6 @@ # Get sources set(LIBCXX_SOURCES - algorithm.cpp any.cpp atomic.cpp barrier.cpp @@ -19,6 +18,7 @@ include/atomic_support.h include/config_elast.h include/refstring.h + legacy-sort.cpp memory.cpp mutex.cpp mutex_destructor.cpp Index: libcxx/src/legacy-sort.cpp =================================================================== --- /dev/null +++ libcxx/src/legacy-sort.cpp @@ -0,0 +1,395 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// +// This file contains the legacy implementation of std::sort that used to ship +// with libc++. Since we used to explicitly instantiate some specializations of +// the sorting functions in the built library (to improve code size), we can't +// just remove those symbols without breaking ABI. +// +// Hence, the old std::sort implementation remains in this file and we keep +// exporting the necessary symbols to keep our ABI stable, however programs +// compiled against newer versions of libc++ will never actually rely on those +// symbols. +// + +#include "algorithm" +#include "memory" +#include "utility" + +_LIBCPP_BEGIN_NAMESPACE_STD + +// stable, 2-3 compares, 0-2 swaps + +template +_LIBCPP_CONSTEXPR_AFTER_CXX11 unsigned __sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z, + _Compare __c) { + unsigned __r = 0; + if (!__c(*__y, *__x)) // if x <= y + { + if (!__c(*__z, *__y)) // if y <= z + return __r; // x <= y && y <= z + // x <= y && y > z + swap(*__y, *__z); // x <= z && y < z + __r = 1; + if (__c(*__y, *__x)) // if x > y + { + swap(*__x, *__y); // x < y && y <= z + __r = 2; + } + return __r; // x <= y && y < z + } + if (__c(*__z, *__y)) // x > y, if y > z + { + swap(*__x, *__z); // x < y && y < z + __r = 1; + return __r; + } + swap(*__x, *__y); // x > y && y <= z + __r = 1; // x < y && x <= z + if (__c(*__z, *__y)) // if y > z + { + swap(*__y, *__z); // x <= y && y < z + __r = 2; + } + return __r; +} // x <= y && y <= z + +// stable, 3-6 compares, 0-5 swaps + +template +unsigned __sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4, + _Compare __c) { + unsigned __r = _VSTD::__sort3<_Compare>(__x1, __x2, __x3, __c); + if (__c(*__x4, *__x3)) { + swap(*__x3, *__x4); + ++__r; + if (__c(*__x3, *__x2)) { + swap(*__x2, *__x3); + ++__r; + if (__c(*__x2, *__x1)) { + swap(*__x1, *__x2); + ++__r; + } + } + } + return __r; +} + +// stable, 4-10 compares, 0-9 swaps + +template +_LIBCPP_HIDDEN unsigned __sort5(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, + _ForwardIterator __x4, _ForwardIterator __x5, _Compare __c) { + unsigned __r = _VSTD::__sort4<_Compare>(__x1, __x2, __x3, __x4, __c); + if (__c(*__x5, *__x4)) { + swap(*__x4, *__x5); + ++__r; + if (__c(*__x4, *__x3)) { + swap(*__x3, *__x4); + ++__r; + if (__c(*__x3, *__x2)) { + swap(*__x2, *__x3); + ++__r; + if (__c(*__x2, *__x1)) { + swap(*__x1, *__x2); + ++__r; + } + } + } + } + return __r; +} + +template +void __insertion_sort_3(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; + _RandomAccessIterator __j = __first + 2; + _VSTD::__sort3<_Compare>(__first, __first + 1, __j, __comp); + for (_RandomAccessIterator __i = __j + 1; __i != __last; ++__i) { + if (__comp(*__i, *__j)) { + value_type __t(_VSTD::move(*__i)); + _RandomAccessIterator __k = __j; + __j = __i; + do { + *__j = _VSTD::move(*__k); + __j = __k; + } while (__j != __first && __comp(__t, *--__k)); + *__j = _VSTD::move(__t); + } + __j = __i; + } +} + +template +bool __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + switch (__last - __first) { + case 0: + case 1: + return true; + case 2: + if (__comp(*--__last, *__first)) + swap(*__first, *__last); + return true; + case 3: + _VSTD::__sort3<_Compare>(__first, __first + 1, --__last, __comp); + return true; + case 4: + _VSTD::__sort4<_Compare>(__first, __first + 1, __first + 2, --__last, __comp); + return true; + case 5: + _VSTD::__sort5<_Compare>(__first, __first + 1, __first + 2, __first + 3, --__last, __comp); + return true; + } + typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; + _RandomAccessIterator __j = __first + 2; + _VSTD::__sort3<_Compare>(__first, __first + 1, __j, __comp); + const unsigned __limit = 8; + unsigned __count = 0; + for (_RandomAccessIterator __i = __j + 1; __i != __last; ++__i) { + if (__comp(*__i, *__j)) { + value_type __t(_VSTD::move(*__i)); + _RandomAccessIterator __k = __j; + __j = __i; + do { + *__j = _VSTD::move(*__k); + __j = __k; + } while (__j != __first && __comp(__t, *--__k)); + *__j = _VSTD::move(__t); + if (++__count == __limit) + return ++__i == __last; + } + __j = __i; + } + return true; +} + +template +void __sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; + typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; + const difference_type __limit = + is_trivially_copy_constructible::value && is_trivially_copy_assignable::value ? 30 : 6; + while (true) { + __restart: + difference_type __len = __last - __first; + switch (__len) { + case 0: + case 1: + return; + case 2: + if (__comp(*--__last, *__first)) + swap(*__first, *__last); + return; + case 3: + _VSTD::__sort3<_Compare>(__first, __first + 1, --__last, __comp); + return; + case 4: + _VSTD::__sort4<_Compare>(__first, __first + 1, __first + 2, --__last, __comp); + return; + case 5: + _VSTD::__sort5<_Compare>(__first, __first + 1, __first + 2, __first + 3, --__last, __comp); + return; + } + if (__len <= __limit) { + _VSTD::__insertion_sort_3<_Compare>(__first, __last, __comp); + return; + } + // __len > 5 + _RandomAccessIterator __m = __first; + _RandomAccessIterator __lm1 = __last; + --__lm1; + unsigned __n_swaps; + { + difference_type __delta; + if (__len >= 1000) { + __delta = __len / 2; + __m += __delta; + __delta /= 2; + __n_swaps = _VSTD::__sort5<_Compare>(__first, __first + __delta, __m, __m + __delta, __lm1, __comp); + } else { + __delta = __len / 2; + __m += __delta; + __n_swaps = _VSTD::__sort3<_Compare>(__first, __m, __lm1, __comp); + } + } + // *__m is median + // partition [__first, __m) < *__m and *__m <= [__m, __last) + // (this inhibits tossing elements equivalent to __m around unnecessarily) + _RandomAccessIterator __i = __first; + _RandomAccessIterator __j = __lm1; + // j points beyond range to be tested, *__m is known to be <= *__lm1 + // The search going up is known to be guarded but the search coming down isn't. + // Prime the downward search with a guard. + if (!__comp(*__i, *__m)) // if *__first == *__m + { + // *__first == *__m, *__first doesn't go in first part + // manually guard downward moving __j against __i + while (true) { + if (__i == --__j) { + // *__first == *__m, *__m <= all other elements + // Parition instead into [__first, __i) == *__first and *__first < [__i, __last) + ++__i; // __first + 1 + __j = __last; + if (!__comp(*__first, *--__j)) // we need a guard if *__first == *(__last-1) + { + while (true) { + if (__i == __j) + return; // [__first, __last) all equivalent elements + if (__comp(*__first, *__i)) { + swap(*__i, *__j); + ++__n_swaps; + ++__i; + break; + } + ++__i; + } + } + // [__first, __i) == *__first and *__first < [__j, __last) and __j == __last - 1 + if (__i == __j) + return; + while (true) { + while (!__comp(*__first, *__i)) + ++__i; + while (__comp(*__first, *--__j)) + ; + if (__i >= __j) + break; + swap(*__i, *__j); + ++__n_swaps; + ++__i; + } + // [__first, __i) == *__first and *__first < [__i, __last) + // The first part is sorted, sort the second part + // _VSTD::__sort<_Compare>(__i, __last, __comp); + __first = __i; + goto __restart; + } + if (__comp(*__j, *__m)) { + swap(*__i, *__j); + ++__n_swaps; + break; // found guard for downward moving __j, now use unguarded partition + } + } + } + // It is known that *__i < *__m + ++__i; + // j points beyond range to be tested, *__m is known to be <= *__lm1 + // if not yet partitioned... + if (__i < __j) { + // known that *(__i - 1) < *__m + // known that __i <= __m + while (true) { + // __m still guards upward moving __i + while (__comp(*__i, *__m)) + ++__i; + // It is now known that a guard exists for downward moving __j + while (!__comp(*--__j, *__m)) + ; + if (__i > __j) + break; + swap(*__i, *__j); + ++__n_swaps; + // It is known that __m != __j + // If __m just moved, follow it + if (__m == __i) + __m = __j; + ++__i; + } + } + // [__first, __i) < *__m and *__m <= [__i, __last) + if (__i != __m && __comp(*__m, *__i)) { + swap(*__i, *__m); + ++__n_swaps; + } + // [__first, __i) < *__i and *__i <= [__i+1, __last) + // If we were given a perfect partition, see if insertion sort is quick... + if (__n_swaps == 0) { + bool __fs = _VSTD::__insertion_sort_incomplete<_Compare>(__first, __i, __comp); + if (_VSTD::__insertion_sort_incomplete<_Compare>(__i + 1, __last, __comp)) { + if (__fs) + return; + __last = __i; + continue; + } else { + if (__fs) { + __first = ++__i; + continue; + } + } + } + // sort smaller range with recursive call and larger with tail recursion elimination + if (__i - __first < __last - __i) { + _VSTD::__sort<_Compare>(__first, __i, __comp); + // _VSTD::__sort<_Compare>(__i+1, __last, __comp); + __first = ++__i; + } else { + _VSTD::__sort<_Compare>(__i + 1, __last, __comp); + // _VSTD::__sort<_Compare>(__first, __i, __comp); + __last = __i; + } + } +} + +template _LIBCPP_FUNC_VIS void __sort<__less&, char*>(char*, char*, __less&); +template _LIBCPP_FUNC_VIS void __sort<__less&, wchar_t*>(wchar_t*, wchar_t*, __less&); +template _LIBCPP_FUNC_VIS void __sort<__less&, signed char*>(signed char*, signed char*, + __less&); +template _LIBCPP_FUNC_VIS void __sort<__less&, unsigned char*>(unsigned char*, unsigned char*, + __less&); +template _LIBCPP_FUNC_VIS void __sort<__less&, short*>(short*, short*, __less&); +template _LIBCPP_FUNC_VIS void __sort<__less&, unsigned short*>(unsigned short*, unsigned short*, + __less&); +template _LIBCPP_FUNC_VIS void __sort<__less&, int*>(int*, int*, __less&); +template _LIBCPP_FUNC_VIS void __sort<__less&, unsigned*>(unsigned*, unsigned*, __less&); +template _LIBCPP_FUNC_VIS void __sort<__less&, long*>(long*, long*, __less&); +template _LIBCPP_FUNC_VIS void __sort<__less&, unsigned long*>(unsigned long*, unsigned long*, + __less&); +template _LIBCPP_FUNC_VIS void __sort<__less&, long long*>(long long*, long long*, __less&); +template _LIBCPP_FUNC_VIS void __sort<__less&, unsigned long long*>(unsigned long long*, + unsigned long long*, + __less&); +template _LIBCPP_FUNC_VIS void __sort<__less&, float*>(float*, float*, __less&); +template _LIBCPP_FUNC_VIS void __sort<__less&, double*>(double*, double*, __less&); +template _LIBCPP_FUNC_VIS void __sort<__less&, long double*>(long double*, long double*, + __less&); + +template _LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, char*>(char*, char*, __less&); +template _LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, wchar_t*>(wchar_t*, wchar_t*, + __less&); +template _LIBCPP_FUNC_VIS bool +__insertion_sort_incomplete<__less&, signed char*>(signed char*, signed char*, __less&); +template _LIBCPP_FUNC_VIS bool +__insertion_sort_incomplete<__less&, unsigned char*>(unsigned char*, unsigned char*, + __less&); +template _LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, short*>(short*, short*, __less&); +template _LIBCPP_FUNC_VIS bool +__insertion_sort_incomplete<__less&, unsigned short*>(unsigned short*, unsigned short*, + __less&); +template _LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, int*>(int*, int*, __less&); +template _LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, unsigned*>(unsigned*, unsigned*, + __less&); +template _LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, long*>(long*, long*, __less&); +template _LIBCPP_FUNC_VIS bool +__insertion_sort_incomplete<__less&, unsigned long*>(unsigned long*, unsigned long*, + __less&); +template _LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, long long*>(long long*, long long*, + __less&); +template _LIBCPP_FUNC_VIS bool +__insertion_sort_incomplete<__less&, unsigned long long*>(unsigned long long*, unsigned long long*, + __less&); +template _LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, float*>(float*, float*, __less&); +template _LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less&, double*>(double*, double*, __less&); +template _LIBCPP_FUNC_VIS bool +__insertion_sort_incomplete<__less&, long double*>(long double*, long double*, __less&); + +template _LIBCPP_FUNC_VIS unsigned __sort5<__less&, long double*>(long double*, long double*, long double*, + long double*, long double*, + __less&); + +_LIBCPP_END_NAMESPACE_STD