Index: benchmarks/algorithms.bench.cpp =================================================================== --- benchmarks/algorithms.bench.cpp +++ benchmarks/algorithms.bench.cpp @@ -58,5 +58,69 @@ BENCHMARK_CAPTURE(BM_Sort, single_element_strings, getDuplicateStringInputs)->Arg(TestNumInputs); +template +void do_binary_search_benchmark(benchmark::State& st, GenInputs gen, Alg alg) +{ + using ValueType = typename decltype(gen(0))::value_type; + auto in = gen(st.range(0)); + std::sort(in.begin(), in.end()); + + const auto every_10_percentile = [&]() -> std::vector { + size_t step = in.size() / 10; + + if (step == 0) { + st.SkipWithError("Input doesn't contain enough elements"); + return {}; + } + + std::vector res; + for (size_t i = 0; i < in.size(); i += step) + res.push_back(&in[i]); + + return res; + }(); + + for (auto _ : st) + { + for (auto* test : every_10_percentile) + benchmark::DoNotOptimize(alg(in.begin(), in.end(), *test)); + } +} + +template +void BM_LowerBound(benchmark::State& st, GenInputs gen) +{ + do_binary_search_benchmark(st, gen, [](auto f, auto l, const auto& v) { + return std::lower_bound(f, l, v); + }); +} + +BENCHMARK_CAPTURE(BM_LowerBound, random_int32, getRandomIntegerInputs) + ->Arg(TestNumInputs) // Small int32_t vector + ->Arg(TestNumInputs * TestNumInputs); // Big int32_t vector + +BENCHMARK_CAPTURE(BM_LowerBound, random_int64, getRandomIntegerInputs) + ->Arg(TestNumInputs); // Small int64_t vector. Should also represent pointers. + +BENCHMARK_CAPTURE(BM_LowerBound, random_strings, getRandomStringInputs) + ->Arg(TestNumInputs); // Small string vector. What happens if the comparison is not very cheap. + +template +void BM_EqualRange(benchmark::State& st, GenInputs gen) +{ + do_binary_search_benchmark(st, gen, [](auto f, auto l, const auto& v) { + return std::equal_range(f, l, v); + }); +} + +BENCHMARK_CAPTURE(BM_EqualRange, random_int32, getRandomIntegerInputs) + ->Arg(TestNumInputs) // Small int32_t vector + ->Arg(TestNumInputs * TestNumInputs); // Big int32_t vector + +BENCHMARK_CAPTURE(BM_EqualRange, random_int64, getRandomIntegerInputs) + ->Arg(TestNumInputs); // Small int64_t vector. Should also represent pointers. + +BENCHMARK_CAPTURE(BM_EqualRange, random_strings, getRandomStringInputs) + ->Arg(TestNumInputs); // Small string vector. What happens if the comparison is not very cheap. BENCHMARK_MAIN(); Index: include/algorithm =================================================================== --- include/algorithm +++ include/algorithm @@ -3193,6 +3193,47 @@ // partition_point +template ::value> +struct __do_half_as_unsigned { + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 + _Tp operator()(const _Tp& __value_) const { + return __value_ / 2; + } +}; + + +#if _LIBCPP_STD_VER >= 11 // There isn't an easy compile time way to get + // numeric_limits::max() in C++03 +template +struct __do_half_as_unsigned<_Integral, /* is_integral =*/ true> { + typedef typename conditional< + std::numeric_limits<_Integral>::max() <= numeric_limits::max(), + size_t, + _Integral + >::type __size_t_if_safe; + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 + _Integral operator()(_Integral __value_) const { + return static_cast<_Integral>(static_cast<__size_t_if_safe>(__value_) / 2); + } +}; +#endif // _LIBCPP_STD_VER >= 11 + + +// Casts to size_t to divide the number in half (if the type conversion is safe). +// Implicit assumption - the cast will happen from the positive number. +// +// This is necessary because division by two is faster for unsigned numbers and it +// matters (Bug 39129). +// +// This function needs to be constexpr because in C++20 algorithms that use it are +// constexpr. +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 +_Tp __half_as_unsigned(const _Tp& __value_) { + return __do_half_as_unsigned<_Tp>()(__value_); +} + template _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator partition_point(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) @@ -3201,7 +3242,7 @@ difference_type __len = _VSTD::distance(__first, __last); while (__len != 0) { - difference_type __l2 = __len / 2; + difference_type __l2 = __half_as_unsigned(__len); _ForwardIterator __m = __first; _VSTD::advance(__m, __l2); if (__pred(*__m)) @@ -4060,26 +4101,39 @@ // lower_bound +template +struct __less_than_t +{ + _Compare __comp_; // We can't use an empty base class optimization for comp + // since it might be a function pointer. + // + // Because this functor is compiled out completly, it doesn't matter. + const _Tp* __value_; + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 + __less_than_t(_Compare __comp, const _Tp& __value_) : + __comp_(__comp), + __value_(_VSTD::addressof(__value_)) {} + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 + bool operator()(_Reference __u) + { + return __comp_(__u, *__value_); + } +}; + +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 +__less_than_t<_Compare, _Tp, typename iterator_traits<_Iterator>::reference> +__less_than(_Compare __comp, const _Tp& __value_) { + return __less_than_t<_Compare, _Tp, typename iterator_traits<_Iterator>::reference>(__comp, __value_); +} + template _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator __lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_, _Compare __comp) { - typedef typename iterator_traits<_ForwardIterator>::difference_type difference_type; - difference_type __len = _VSTD::distance(__first, __last); - while (__len != 0) - { - difference_type __l2 = __len / 2; - _ForwardIterator __m = __first; - _VSTD::advance(__m, __l2); - if (__comp(*__m, __value_)) - { - __first = ++__m; - __len -= __l2 + 1; - } - else - __len = __l2; - } - return __first; + return _VSTD::partition_point(__first, __last, __less_than<_ForwardIterator>(__comp, __value_)); } template @@ -4108,26 +4162,40 @@ // upper_bound +template +struct __not_greater_than_t +{ + _Compare __comp_; // We can't use an empty base class optimization for comp + // since it might be a function pointer. + // + // Because this functor is compiled out completly, it doesn't matter. + const _Tp* __value_; + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 + __not_greater_than_t(_Compare __comp, const _Tp& __value_) : + __comp_(__comp), + __value_(_VSTD::addressof(__value_)) {} + + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 + bool operator()(_Reference __u) + { + return !__comp_(*__value_, __u); + } +}; + +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 +__not_greater_than_t<_Compare, _Tp, typename iterator_traits<_Iterator>::reference> +__not_greater_than(_Compare __comp, const _Tp& __value_) +{ + return __not_greater_than_t<_Compare, _Tp, typename iterator_traits<_Iterator>::reference>(__comp, __value_); +} + template _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator __upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_, _Compare __comp) { - typedef typename iterator_traits<_ForwardIterator>::difference_type difference_type; - difference_type __len = _VSTD::distance(__first, __last); - while (__len != 0) - { - difference_type __l2 = __len / 2; - _ForwardIterator __m = __first; - _VSTD::advance(__m, __l2); - if (__comp(__value_, *__m)) - __len = __l2; - else - { - __first = ++__m; - __len -= __l2 + 1; - } - } - return __first; + return _VSTD::partition_point(__first, __last, __not_greater_than<_ForwardIterator>(__comp, __value_)); } template @@ -4164,7 +4232,7 @@ difference_type __len = _VSTD::distance(__first, __last); while (__len != 0) { - difference_type __l2 = __len / 2; + difference_type __l2 = __half_as_unsigned(__len); _ForwardIterator __m = __first; _VSTD::advance(__m, __l2); if (__comp(*__m, __value_)) Index: test/libcxx/algorithms/half_as_unsigned.pass.cpp =================================================================== --- /dev/null +++ test/libcxx/algorithms/half_as_unsigned.pass.cpp @@ -0,0 +1,74 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template _Tp __half_as_unsigned(const _Tp&); + +// __half_as_unsigned divide integer number by 2 as unsigned number +// if it's safe to do so. It can be an important optimization for lower bound, +// for example. + +#include +#include +#include +#include + +#include "user_defined_integral.hpp" + + +template +struct do_unsigned_castable : std::false_type {}; + +template +struct do_unsigned_castable::__size_t_if_safe>::type> : + std::is_same::__size_t_if_safe> {}; + +template +constexpr bool Unsigned_castable() { return do_unsigned_castable::value; } + +int main() +{ + { + typedef int type; + + static_assert(Unsigned_castable(), ""); + constexpr auto max_v = std::numeric_limits::max(); + static_assert(std::__half_as_unsigned(max_v) == max_v / 2, ""); + } + + { + typedef size_t type; + + static_assert(Unsigned_castable(), ""); + constexpr auto max_v = std::numeric_limits::max(); + static_assert(std::__half_as_unsigned(max_v) == max_v / 2, ""); + } + + { + typedef UserDefinedIntegral type; + + static_assert(!Unsigned_castable(), ""); + const auto max_v = type(std::numeric_limits::max()); + assert(std::__half_as_unsigned(max_v) == max_v / 2); + } + +#if !defined(_LIBCPP_HAS_NO_INT128) + { + typedef __int128_t type; + + static_assert(std::is_integral::value, ""); + + static_assert(!Unsigned_castable(), ""); + constexpr auto max_v = std::numeric_limits::max(); + static_assert(std::__half_as_unsigned(max_v) == max_v / 2, ""); + } +#endif // !defined(_LIBCPP_HAS_NO_INT128) +}