diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt --- a/libcxx/benchmarks/CMakeLists.txt +++ b/libcxx/benchmarks/CMakeLists.txt @@ -162,6 +162,7 @@ #============================================================================== set(BENCHMARK_TESTS algorithms.partition_point.bench.cpp + algorithms/count.bench.cpp algorithms/equal.bench.cpp algorithms/find.bench.cpp algorithms/lower_bound.bench.cpp diff --git a/libcxx/benchmarks/algorithms/count.bench.cpp b/libcxx/benchmarks/algorithms/count.bench.cpp new file mode 100644 --- /dev/null +++ b/libcxx/benchmarks/algorithms/count.bench.cpp @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +static void bm_vector_bool_count(benchmark::State& state) { + std::vector vec1(state.range(), false); + + for (auto _ : state) { + benchmark::DoNotOptimize(vec1); + benchmark::DoNotOptimize(std::count(vec1.begin(), vec1.end(), true)); + } +} +BENCHMARK(bm_vector_bool_count)->DenseRange(1, 8)->Range(16, 1 << 20); + +static void bm_vector_bool_ranges_count(benchmark::State& state) { + std::vector vec1(state.range(), false); + + for (auto _ : state) { + benchmark::DoNotOptimize(vec1); + benchmark::DoNotOptimize(std::ranges::count(vec1.begin(), vec1.end(), true)); + } +} +BENCHMARK(bm_vector_bool_ranges_count)->DenseRange(1, 8)->Range(16, 1 << 20); + +BENCHMARK_MAIN(); diff --git a/libcxx/docs/ReleaseNotes/18.rst b/libcxx/docs/ReleaseNotes/18.rst --- a/libcxx/docs/ReleaseNotes/18.rst +++ b/libcxx/docs/ReleaseNotes/18.rst @@ -57,6 +57,9 @@ Improvements and New Features ----------------------------- +- ``std::ranges::count`` is now optimized for ``vector::iterator``, which + can lead up to 350x performance improvements. + - The library now provides a hardened mode under which common cases of library undefined behavior will be turned into a reliable program termination. Vendors can configure whether the hardened mode is enabled by default with the ``LIBCXX_HARDENING_MODE`` variable at CMake configuration time. Users can control whether the hardened mode is diff --git a/libcxx/include/__algorithm/count.h b/libcxx/include/__algorithm/count.h --- a/libcxx/include/__algorithm/count.h +++ b/libcxx/include/__algorithm/count.h @@ -10,26 +10,83 @@ #ifndef _LIBCPP___ALGORITHM_COUNT_H #define _LIBCPP___ALGORITHM_COUNT_H +#include <__algorithm/iterator_operations.h> +#include <__algorithm/min.h> +#include <__bit/invert_if.h> +#include <__bit/popcount.h> #include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__fwd/bit_reference.h> #include <__iterator/iterator_traits.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD -template -_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 - typename iterator_traits<_InputIterator>::difference_type - count(_InputIterator __first, _InputIterator __last, const _Tp& __value) { - typename iterator_traits<_InputIterator>::difference_type __r(0); +// generic implementation +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 typename _IterOps<_AlgPolicy>::template __difference_type<_Iter> +__count(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) { + typename _IterOps<_AlgPolicy>::template __difference_type<_Iter> __r(0); for (; __first != __last; ++__first) - if (*__first == __value) + if (std::__invoke(__proj, *__first) == __value) ++__r; return __r; } +// __bit_iterator implementation +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 typename __bit_iterator<_Cp, _IsConst>::difference_type +__count_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) { + using _It = __bit_iterator<_Cp, _IsConst>; + using __storage_type = typename _It::__storage_type; + using difference_type = typename _It::difference_type; + + const int __bits_per_word = _It::__bits_per_word; + difference_type __r = 0; + // do first partial word + if (__first.__ctz_ != 0) { + __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); + __storage_type __dn = std::min(__clz_f, __n); + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __r = std::__libcpp_popcount(std::__invert_if(*__first.__seg_) & __m); + __n -= __dn; + ++__first.__seg_; + } + // do middle whole words + for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) + __r += std::__libcpp_popcount(std::__invert_if(*__first.__seg_)); + // do last partial word + if (__n > 0) { + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + __r += std::__libcpp_popcount(std::__invert_if(*__first.__seg_) & __m); + } + return __r; +} + +template ::value, int> = 0> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iter_diff_t<__bit_iterator<_Cp, _IsConst> > +__count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value, _Proj&) { + if (__value) + return std::__count_bool(__first, static_cast(__last - __first)); + return std::__count_bool(__first, static_cast(__last - __first)); +} + +template +_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iter_diff_t<_InputIterator> +count(_InputIterator __first, _InputIterator __last, const _Tp& __value) { + __identity __proj; + return std::__count<_ClassicAlgPolicy>(__first, __last, __value, __proj); +} + _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_COUNT_H diff --git a/libcxx/include/__algorithm/ranges_count.h b/libcxx/include/__algorithm/ranges_count.h --- a/libcxx/include/__algorithm/ranges_count.h +++ b/libcxx/include/__algorithm/ranges_count.h @@ -9,7 +9,8 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_COUNT_H #define _LIBCPP___ALGORITHM_RANGES_COUNT_H -#include <__algorithm/ranges_count_if.h> +#include <__algorithm/count.h> +#include <__algorithm/iterator_operations.h> #include <__config> #include <__functional/identity.h> #include <__functional/ranges_operations.h> @@ -36,16 +37,14 @@ requires indirect_binary_predicate, const _Type*> _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr iter_difference_t<_Iter> operator()(_Iter __first, _Sent __last, const _Type& __value, _Proj __proj = {}) const { - auto __pred = [&](auto&& __e) { return __e == __value; }; - return ranges::__count_if_impl(std::move(__first), std::move(__last), __pred, __proj); + return std::__count<_RangeAlgPolicy>(std::move(__first), std::move(__last), __value, __proj); } template requires indirect_binary_predicate, _Proj>, const _Type*> _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr range_difference_t<_Range> operator()(_Range&& __r, const _Type& __value, _Proj __proj = {}) const { - auto __pred = [&](auto&& __e) { return __e == __value; }; - return ranges::__count_if_impl(ranges::begin(__r), ranges::end(__r), __pred, __proj); + return std::__count<_RangeAlgPolicy>(ranges::begin(__r), ranges::end(__r), __value, __proj); } }; } // namespace __count diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference --- a/libcxx/include/__bit_reference +++ b/libcxx/include/__bit_reference @@ -171,45 +171,6 @@ __bit_const_reference& operator=(const __bit_const_reference&) = delete; }; -// count - -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 typename __bit_iterator<_Cp, _IsConst>::difference_type -__count_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) { - using _It = __bit_iterator<_Cp, _IsConst>; - using __storage_type = typename _It::__storage_type; - using difference_type = typename _It::difference_type; - - const int __bits_per_word = _It::__bits_per_word; - difference_type __r = 0; - // do first partial word - if (__first.__ctz_ != 0) { - __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); - __storage_type __dn = std::min(__clz_f, __n); - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __r = std::__libcpp_popcount(std::__invert_if(*__first.__seg_) & __m); - __n -= __dn; - ++__first.__seg_; - } - // do middle whole words - for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) - __r += std::__libcpp_popcount(std::__invert_if(*__first.__seg_)); - // do last partial word - if (__n > 0) { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __r += std::__libcpp_popcount(std::__invert_if(*__first.__seg_) & __m); - } - return __r; -} - -template -inline _LIBCPP_HIDE_FROM_ABI typename __bit_iterator<_Cp, _IsConst>::difference_type -count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value) { - if (static_cast(__value)) - return std::__count_bool(__first, static_cast(__last - __first)); - return std::__count_bool(__first, static_cast(__last - __first)); -} - // fill_n template @@ -1092,7 +1053,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, _IC> __find_bool(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); template - friend typename __bit_iterator<_Dp, _IC>::difference_type _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 + friend typename __bit_iterator<_Dp, _IC>::difference_type _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __count_bool(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); }; diff --git a/libcxx/include/bitset b/libcxx/include/bitset --- a/libcxx/include/bitset +++ b/libcxx/include/bitset @@ -122,6 +122,7 @@ */ +#include <__algorithm/count.h> #include <__algorithm/fill.h> #include <__algorithm/find.h> #include <__assert> // all public C++ headers provide the assertion handler @@ -1042,7 +1043,7 @@ size_t bitset<_Size>::count() const _NOEXCEPT { - return static_cast(_VSTD::__count_bool(base::__make_iter(0), _Size)); + return static_cast(std::count(base::__make_iter(0), base::__make_iter(_Size), true)); } template diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.count/count.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.count/count.pass.cpp --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.count/count.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.count/count.pass.cpp @@ -13,35 +13,50 @@ // constexpr Iter::difference_type // constexpr after C++17 // count(Iter first, Iter last, const T& value); +// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=20000000 +// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-ops-limit): -fconstexpr-ops-limit=70000000 + #include #include +#include #include "test_macros.h" #include "test_iterators.h" - -#if TEST_STD_VER > 17 -TEST_CONSTEXPR bool test_constexpr() { - int ia[] = {0, 1, 2, 2, 0, 1, 2, 3}; - int ib[] = {1, 2, 3, 4, 5, 6}; - return (std::count(std::begin(ia), std::end(ia), 2) == 3) - && (std::count(std::begin(ib), std::end(ib), 9) == 0) - ; +#include "type_algorithms.h" + +struct Test { + template + TEST_CONSTEXPR_CXX20 void operator()() { + int ia[] = {0, 1, 2, 2, 0, 1, 2, 3}; + const unsigned sa = sizeof(ia) / sizeof(ia[0]); + assert(std::count(Iter(ia), Iter(ia + sa), 2) == 3); + assert(std::count(Iter(ia), Iter(ia + sa), 7) == 0); + assert(std::count(Iter(ia), Iter(ia), 2) == 0); + } +}; + +TEST_CONSTEXPR_CXX20 bool test() { + types::for_each(types::cpp17_input_iterator_list(), Test()); + + if (!TEST_IS_CONSTANT_EVALUATED || TEST_STD_VER >= 20) { + std::vector vec(256 + 64); + for (ptrdiff_t i = 0; i != 256; ++i) { + for (size_t offset = 0; offset != 64; ++offset) { + std::fill(vec.begin(), vec.end(), false); + std::fill(vec.begin() + offset, vec.begin() + i + offset, true); + assert(std::count(vec.begin() + offset, vec.begin() + offset + 256, true) == i); + assert(std::count(vec.begin() + offset, vec.begin() + offset + 256, false) == 256 - i); + } } -#endif + } + + return true; +} -int main(int, char**) -{ - int ia[] = {0, 1, 2, 2, 0, 1, 2, 3}; - const unsigned sa = sizeof(ia)/sizeof(ia[0]); - assert(std::count(cpp17_input_iterator(ia), - cpp17_input_iterator(ia + sa), 2) == 3); - assert(std::count(cpp17_input_iterator(ia), - cpp17_input_iterator(ia + sa), 7) == 0); - assert(std::count(cpp17_input_iterator(ia), - cpp17_input_iterator(ia), 2) == 0); - -#if TEST_STD_VER > 17 - static_assert(test_constexpr()); +int main(int, char**) { + test(); +#if TEST_STD_VER >= 20 + static_assert(test()); #endif return 0; diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.count/ranges.count.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.count/ranges.count.pass.cpp --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.count/ranges.count.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.count/ranges.count.pass.cpp @@ -10,6 +10,9 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 +// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=20000000 +// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-ops-limit): -fconstexpr-ops-limit=70000000 + // template S, class T, class Proj = identity> // requires indirect_binary_predicate, const T*> // constexpr iter_difference_t @@ -23,6 +26,7 @@ #include #include #include +#include #include "almost_satisfies_types.h" #include "test_iterators.h" @@ -253,6 +257,18 @@ } } + { // check that __bit_iterator optimizations work as expected + std::vector vec(256 + 64); + for (ptrdiff_t i = 0; i != 256; ++i) { + for (size_t offset = 0; offset != 64; ++offset) { + std::fill(vec.begin(), vec.end(), false); + std::fill(vec.begin() + offset, vec.begin() + i + offset, true); + assert(std::ranges::count(vec.begin() + offset, vec.begin() + offset + 256, true) == i); + assert(std::ranges::count(vec.begin() + offset, vec.begin() + offset + 256, false) == 256 - i); + } + } + } + return true; } diff --git a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt --- a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt +++ b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt @@ -9,7 +9,7 @@ # libc++ normally prefers the use the last stable release instead of the ToT. # For modules we need ToT since they are still under heavy development. So # temporary the ToT version is preferred. -find_package(Clang 17) +find_package(Clang 18) if (NOT Clang_FOUND) find_package(Clang 16) endif()