diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt --- a/libcxx/benchmarks/CMakeLists.txt +++ b/libcxx/benchmarks/CMakeLists.txt @@ -176,6 +176,7 @@ algorithms/count.bench.cpp algorithms/equal.bench.cpp algorithms/find.bench.cpp + algorithms/for_each.bench.cpp algorithms/lower_bound.bench.cpp algorithms/make_heap.bench.cpp algorithms/make_heap_then_sort_heap.bench.cpp diff --git a/libcxx/benchmarks/algorithms/for_each.bench.cpp b/libcxx/benchmarks/algorithms/for_each.bench.cpp new file mode 100644 --- /dev/null +++ b/libcxx/benchmarks/algorithms/for_each.bench.cpp @@ -0,0 +1,23 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +static void bm_deque_for_each(benchmark::State& state) { + std::deque vec1(state.range(), '1'); + for (auto _ : state) { + benchmark::DoNotOptimize(vec1); + benchmark::DoNotOptimize( + std::for_each(vec1.begin(), vec1.end(), [](char& v) { v = std::clamp(v, (char)10, (char)100); })); + } +} +BENCHMARK(bm_deque_for_each)->DenseRange(1, 8)->Range(16, 1 << 20); + +BENCHMARK_MAIN(); diff --git a/libcxx/docs/ReleaseNotes/18.rst b/libcxx/docs/ReleaseNotes/18.rst --- a/libcxx/docs/ReleaseNotes/18.rst +++ b/libcxx/docs/ReleaseNotes/18.rst @@ -55,6 +55,9 @@ - ``std::ranges::count`` is now optimized for ``vector::iterator``, which can lead up to 350x performance improvements. +- ``std::for_each`` has been optimized for segmented iterators like ``std::deque::iterator`` in C++23 and + later, which can lead up to 40x performance improvements. + - The library now provides a hardened mode under which common cases of library undefined behavior will be turned into a reliable program termination. Vendors can configure whether the hardened mode is enabled by default with the ``LIBCXX_HARDENING_MODE`` variable at CMake configuration time. Users can control whether the hardened mode is diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h --- a/libcxx/include/__algorithm/for_each.h +++ b/libcxx/include/__algorithm/for_each.h @@ -10,7 +10,13 @@ #ifndef _LIBCPP___ALGORITHM_FOR_EACH_H #define _LIBCPP___ALGORITHM_FOR_EACH_H +#include <__algorithm/for_each_segment.h> #include <__config> +#include <__iterator/segmented_iterator.h> +#include <__ranges/movable_box.h> +#include <__type_traits/enable_if.h> +#include <__utility/in_place.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -19,14 +25,28 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20 _Function for_each(_InputIterator __first, - _InputIterator __last, - _Function __f) { +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Function +for_each(_InputIterator __first, _InputIterator __last, _Function __f) { for (; __first != __last; ++__first) __f(*__first); return __f; } +// __movable_box is available in C++20, but is actually a copyable-box, so optimization is only correct in C++23 +#if _LIBCPP_STD_VER >= 23 +template + requires __is_segmented_iterator<_SegmentedIterator>::value +_LIBCPP_HIDE_FROM_ABI constexpr _Function +for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function __func) { + ranges::__movable_box<_Function> __wrapped_func(in_place, std::move(__func)); + std::__for_each_segment(__first, __last, [&](auto __lfirst, auto __llast) { + __wrapped_func = + ranges::__movable_box<_Function>(in_place, std::for_each(__lfirst, __llast, std::move(*__wrapped_func))); + }); + return std::move(*__wrapped_func); +} +#endif // _LIBCPP_STD_VER >= 23 + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___ALGORITHM_FOR_EACH_H diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each.pass.cpp @@ -0,0 +1,98 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template Function> +// constexpr Function // constexpr since C++20 +// for_each(Iter first, Iter last, Function f); + +#include +#include +#include +#if __has_include() +# include +#endif +#include + +#include "test_macros.h" +#include "test_iterators.h" + +struct for_each_test { + TEST_CONSTEXPR for_each_test(int c) : count(c) {} + + // for_each functors only have to be move constructible + for_each_test(const for_each_test&) = delete; + for_each_test(for_each_test&&) = default; + for_each_test& operator=(const for_each_test&) = delete; + for_each_test& operator=(for_each_test&&) = delete; + + int count; + TEST_CONSTEXPR_CXX14 void operator()(int& i) { + ++i; + ++count; + } +}; + +struct Test { + template + TEST_CONSTEXPR_CXX20 void operator()() { + int sizes[] = {0, 1, 6}; + for (const int size : sizes) { + int ia[] = {0, 1, 2, 3, 4, 5}; + for_each_test f = std::for_each(Iter(ia), Iter(ia + size), for_each_test(0)); + assert(f.count == size); + for (int i = 0; i < size; ++i) + assert(ia[i] == static_cast(i + 1)); + } + } +}; + +TEST_CONSTEXPR_CXX20 bool test() { + types::for_each(types::cpp17_input_iterator_list(), Test()); + +#if TEST_STD_VER >= 20 + { // Make sure that the segmented iterator optimization works during constant evaluation + std::vector> vecs = {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; + auto v = std::views::join(vecs); + std::for_each(v.begin(), v.end(), [i = 0](int& a) mutable { assert(a == ++i); }); + } +#endif + + return true; +} + +struct deque_test { + std::deque* d_; + int* i_; + + deque_test(std::deque& d, int& i) : d_(&d), i_(&i) {} + + void operator()(int& v) { + assert(&(*d_)[*i_] == &v); + ++*i_; + } +}; + +int main(int, char**) { + test(); +#if TEST_STD_VER >= 20 + static_assert(test()); +#endif + + // check that segmented iterators work properly + int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049}; + for (const int size : sizes) { + std::deque d(size); + int index = 0; + + std::for_each(d.begin(), d.end(), deque_test(d, index)); + } + + return 0; +} diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/test.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/test.pass.cpp deleted file mode 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/test.pass.cpp +++ /dev/null @@ -1,56 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// template Function> -// requires CopyConstructible -// constexpr Function // constexpr after C++17 -// for_each(Iter first, Iter last, Function f); - -#include -#include - -#include "test_macros.h" -#include "test_iterators.h" - -#if TEST_STD_VER > 17 -TEST_CONSTEXPR bool test_constexpr() { - int ia[] = {1, 3, 6, 7}; - int expected[] = {3, 5, 8, 9}; - - std::for_each(std::begin(ia), std::end(ia), [](int &a) { a += 2; }); - return std::equal(std::begin(ia), std::end(ia), std::begin(expected)) - ; - } -#endif - -struct for_each_test -{ - for_each_test(int c) : count(c) {} - int count; - void operator()(int& i) {++i; ++count;} -}; - -int main(int, char**) -{ - int ia[] = {0, 1, 2, 3, 4, 5}; - const unsigned s = sizeof(ia)/sizeof(ia[0]); - for_each_test f = std::for_each(cpp17_input_iterator(ia), - cpp17_input_iterator(ia+s), - for_each_test(0)); - assert(f.count == s); - for (unsigned i = 0; i < s; ++i) - assert(ia[i] == static_cast(i+1)); - -#if TEST_STD_VER > 17 - static_assert(test_constexpr()); -#endif - - return 0; -} diff --git a/libcxx/utils/data/ignore_format.txt b/libcxx/utils/data/ignore_format.txt --- a/libcxx/utils/data/ignore_format.txt +++ b/libcxx/utils/data/ignore_format.txt @@ -10,7 +10,6 @@ libcxx/include/__algorithm/fill_n.h libcxx/include/__algorithm/find_end.h libcxx/include/__algorithm/find_first_of.h -libcxx/include/__algorithm/for_each.h libcxx/include/__algorithm/for_each_n.h libcxx/include/__algorithm/generate.h libcxx/include/__algorithm/generate_n.h @@ -1152,7 +1151,6 @@ libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp -libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/test.pass.cpp libcxx/test/std/algorithms/alg.nonmodifying/alg.is_permutation/is_permutation.pass.cpp libcxx/test/std/algorithms/alg.nonmodifying/alg.is_permutation/is_permutation_pred.pass.cpp libcxx/test/std/algorithms/alg.nonmodifying/alg.is_permutation/ranges.is_permutation.pass.cpp