Index: benchmarks/algorithms.bench.cpp =================================================================== --- benchmarks/algorithms.bench.cpp +++ benchmarks/algorithms.bench.cpp @@ -58,5 +58,35 @@ BENCHMARK_CAPTURE(BM_Sort, single_element_strings, getDuplicateStringInputs)->Arg(TestNumInputs); +template +void BM_LowerBound(benchmark::State& st, GenInputs gen) { + using ValueType = typename decltype(gen(0))::value_type; + auto in = gen(st.range(0)); + std::sort(in.begin(), in.end()); + + const auto every_10_percentile = [&]() -> std::vector { + size_t step = in.size() / 10; + + if (step == 0) { + st.SkipWithError("Input doesn't contain enough elements"); + return {}; + } + + std::vector res; + for (size_t i = 0; i < in.size(); i += step) + res.push_back(&in[i]); + + return res; + }(); + + for (auto _ : st) + { + for (auto* test : every_10_percentile) + benchmark::DoNotOptimize(std::lower_bound(in.begin(), in.end(), *test)); + } +} + +BENCHMARK_CAPTURE(BM_LowerBound, random_int32, + getRandomIntegerInputs)->Arg(TestNumInputs); BENCHMARK_MAIN(); Index: include/algorithm =================================================================== --- include/algorithm +++ include/algorithm @@ -3193,17 +3193,41 @@ // partition_point +// Maybe casts to size_t (if the conversion from the type is safe). +// Implicit assumption - the cast will happen from the positive number. +// +// This is necessary because some operations are faster on the unsigned numbers, +// see Bug 39129. +template ::value> +struct __difference_type_to_size_t_cast { + typedef _Tp __type; +}; + +#if _LIBCPP_STD_VER >= 11 // There isn't a way to get numeric_limits::max() in C++03. +template +struct __difference_type_to_size_t_cast<_Tp, /*isNumeric = */ true> { + typedef typename + conditional< + std::numeric_limits<_Tp>::max() <= numeric_limits::max(), + size_t, + _Tp + >::type __type; +}; +#endif // _LIBCPP_STD_VER >= 11 + template _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator partition_point(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - typedef typename iterator_traits<_ForwardIterator>::difference_type difference_type; - difference_type __len = _VSTD::distance(__first, __last); + typedef typename iterator_traits<_ForwardIterator>::difference_type __difference_type; + typedef typename __difference_type_to_size_t_cast<__difference_type>::__type __len_type; + + __len_type __len = static_cast<__len_type>(_VSTD::distance(__first, __last)); while (__len != 0) { - difference_type __l2 = __len / 2; + __len_type __l2 = __len / 2; _ForwardIterator __m = __first; - _VSTD::advance(__m, __l2); + _VSTD::advance(__m, static_cast<__difference_type>(__l2)); if (__pred(*__m)) { __first = ++__m; @@ -4060,26 +4084,39 @@ // lower_bound +template +struct __less_than_t +{ + _Compare __comp_; // We can't use an empty base class optimization for comp + // since it might be a function pointer. + // + // Because this functor is compiled out completly, it doesn't matter. + const _Tp* __value_; + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 + __less_than_t(_Compare __comp, const _Tp& __value_) : + __comp_(__comp), + __value_(_VSTD::addressof(__value_)) {} + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 + bool operator()(_Reference __u) + { + return __comp_(__u, *__value_); + } +}; + +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 +__less_than_t<_Compare, _Tp, typename iterator_traits<_Iterator>::reference> +__less_than(_Compare __comp, const _Tp& __value_) { + return __less_than_t<_Compare, _Tp, typename iterator_traits<_Iterator>::reference>(__comp, __value_); +} + template _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator __lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_, _Compare __comp) { - typedef typename iterator_traits<_ForwardIterator>::difference_type difference_type; - difference_type __len = _VSTD::distance(__first, __last); - while (__len != 0) - { - difference_type __l2 = __len / 2; - _ForwardIterator __m = __first; - _VSTD::advance(__m, __l2); - if (__comp(*__m, __value_)) - { - __first = ++__m; - __len -= __l2 + 1; - } - else - __len = __l2; - } - return __first; + return _VSTD::partition_point(__first, __last, __less_than<_ForwardIterator>(__comp, __value_)); } template @@ -4108,26 +4145,40 @@ // upper_bound +template +struct __not_greater_than_t +{ + _Compare __comp_; // We can't use an empty base class optimization for comp + // since it might be a function pointer. + // + // Because this functor is compiled out completly, it doesn't matter. + const _Tp* __value_; + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 + __not_greater_than_t(_Compare __comp, const _Tp& __value_) : + __comp_(__comp), + __value_(_VSTD::addressof(__value_)) {} + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 + bool operator()(_Reference __u) + { + return !__comp_(*__value_, __u); + } +}; + +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 +__not_greater_than_t<_Compare, _Tp, typename iterator_traits<_Iterator>::reference> +__not_greater_than(_Compare __comp, const _Tp& __value_) +{ + return __not_greater_than_t<_Compare, _Tp, typename iterator_traits<_Iterator>::reference>(__comp, __value_); +} + template _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator __upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_, _Compare __comp) { - typedef typename iterator_traits<_ForwardIterator>::difference_type difference_type; - difference_type __len = _VSTD::distance(__first, __last); - while (__len != 0) - { - difference_type __l2 = __len / 2; - _ForwardIterator __m = __first; - _VSTD::advance(__m, __l2); - if (__comp(__value_, *__m)) - __len = __l2; - else - { - __first = ++__m; - __len -= __l2 + 1; - } - } - return __first; + return _VSTD::partition_point(__first, __last, __not_greater_than<_ForwardIterator>(__comp, __value_)); } template Index: test/std/algorithms/alg.modifying.operations/alg.partitions/partition_point.pass.cpp =================================================================== --- test/std/algorithms/alg.modifying.operations/alg.partitions/partition_point.pass.cpp +++ test/std/algorithms/alg.modifying.operations/alg.partitions/partition_point.pass.cpp @@ -15,6 +15,7 @@ #include #include +#include #include "test_macros.h" #include "test_iterators.h" @@ -87,6 +88,25 @@ is_odd()) == forward_iterator(ia)); } + { + typedef typename std::__difference_type_to_size_t_cast::__type cast_int; + static_assert(std::is_same::value, ""); + + typedef typename std::__difference_type_to_size_t_cast::__type cast_ptrdiff; + static_assert(std::is_same::value, ""); + + typedef typename + std::__difference_type_to_size_t_cast::difference_type>::__type cast_vector_difference_type; + static_assert(std::is_same::value, ""); + + typedef typename std::__difference_type_to_size_t_cast<__int128_t>::__type cast_int128_t; + static_assert(std::is_same::value, ""); + + struct S {}; + typedef typename std::__difference_type_to_size_t_cast::__type cast_user_defined; + static_assert(std::is_same::value, ""); + } + #if TEST_STD_VER > 17 static_assert(test_constexpr()); #endif