diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -806,9 +806,11 @@ config_define(1 _LIBCPP_PSTL_CPU_BACKEND_SERIAL) elseif(LIBCXX_PSTL_CPU_BACKEND STREQUAL "std_thread") config_define(1 _LIBCPP_PSTL_CPU_BACKEND_THREAD) +elseif(LIBCXX_PSTL_CPU_BACKEND STREQUAL "libdispatch") + config_define(1 _LIBCPP_PSTL_CPU_BACKEND_LIBDISPATCH) else() message(FATAL_ERROR "LIBCXX_PSTL_CPU_BACKEND is set to ${LIBCXX_PSTL_CPU_BACKEND}, which is not a valid backend. - Valid backends are: serial, std_thread") + Valid backends are: serial, std_thread and libdispatch") endif() if (LIBCXX_ABI_DEFINES) diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -77,6 +77,7 @@ __algorithm/pstl_backends/cpu_backends/fill.h __algorithm/pstl_backends/cpu_backends/find_if.h __algorithm/pstl_backends/cpu_backends/for_each.h + __algorithm/pstl_backends/cpu_backends/libdispatch.h __algorithm/pstl_backends/cpu_backends/merge.h __algorithm/pstl_backends/cpu_backends/serial.h __algorithm/pstl_backends/cpu_backends/thread.h diff --git a/libcxx/include/__algorithm/pstl_backend.h b/libcxx/include/__algorithm/pstl_backend.h --- a/libcxx/include/__algorithm/pstl_backend.h +++ b/libcxx/include/__algorithm/pstl_backend.h @@ -100,7 +100,8 @@ }; # endif -# if defined(_LIBCPP_PSTL_CPU_BACKEND_SERIAL) || defined(_LIBCPP_PSTL_CPU_BACKEND_THREAD) +# if defined(_LIBCPP_PSTL_CPU_BACKEND_SERIAL) || defined(_LIBCPP_PSTL_CPU_BACKEND_THREAD) || \ + defined(_LIBCPP_PSTL_CPU_BACKEND_LIBDISPATCH) template <> struct __select_backend { using type = __cpu_backend_tag; diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/backend.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/backend.h --- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/backend.h +++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/backend.h @@ -15,6 +15,8 @@ # include <__algorithm/pstl_backends/cpu_backends/serial.h> #elif defined(_LIBCPP_PSTL_CPU_BACKEND_THREAD) # include <__algorithm/pstl_backends/cpu_backends/thread.h> +#elif defined(_LIBCPP_PSTL_CPU_BACKEND_LIBDISPATCH) +# include <__algorithm/pstl_backends/cpu_backends/libdispatch.h> #else # error "Invalid CPU backend choice" #endif diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h new file mode 100644 --- /dev/null +++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/libdispatch.h @@ -0,0 +1,234 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_LIBDISPATCH_H +#define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_LIBDISPATCH_H + +#include <__algorithm/lower_bound.h> +#include <__algorithm/upper_bound.h> +#include <__atomic/atomic.h> +#include <__config> +#include <__iterator/iterator_traits.h> +#include <__memory/construct_at.h> +#include <__memory/unique_ptr.h> +#include <__numeric/transform_reduce.h> +#include <__utility/move.h> +#include +#include + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace __par_backend { +inline namespace __libdispatch { + +_LIBCPP_FUNC_VIS void +__dispatch_apply(size_t __chunk_count, void* __context, void (*__func)(void* __context, size_t __chunk)); + +template +_LIBCPP_HIDE_FROM_ABI void __dispatch_apply(size_t __chunk_count, _Func __func) { + __libdispatch::__dispatch_apply(__chunk_count, &__func, [](void* __context, size_t __chunk) { + (*static_cast<_Func*>(__context))(__chunk); + }); +} + +// Preliminary size of each chunk: requires further discussion +constexpr std::size_t __default_chunk_size = 2048; + +struct __chunk_partitions { + size_t __chunk_count_; + size_t __chunk_size_; + size_t __first_chunk_size_; +}; + +template +_LIBCPP_HIDE_FROM_ABI __chunk_partitions +__partition_chunks(_RandomAccessIterator __first, _RandomAccessIterator __last) { + using _Size = __iter_diff_t<_RandomAccessIterator>; + __chunk_partitions __partitions; + + auto& __chunk_size = __partitions.__chunk_size_; + auto& __first_chunk_size = __partitions.__first_chunk_size_; + auto& __n_chunks = __partitions.__chunk_count_; + auto __requested_chunk_size = __default_chunk_size; + + const _Size __first_chunk_size_ = __last - __first; + if (__first_chunk_size_ < __requested_chunk_size) { + __chunk_size = __first_chunk_size_; + __first_chunk_size = __first_chunk_size_; + __n_chunks = 1; + return __partitions; + } + + __n_chunks = (__first_chunk_size_ / __requested_chunk_size) + 1; + __chunk_size = __first_chunk_size_ / __n_chunks; + __first_chunk_size = __chunk_size; + const _Size __n_leftover_items = __first_chunk_size_ - (__n_chunks * __chunk_size); + + if (__n_leftover_items == __chunk_size) { + __n_chunks += 1; + return __partitions; + } else if (__n_leftover_items == 0) { + __first_chunk_size = __chunk_size; + return __partitions; + } + + const _Size __n_extra_items_per_chunk = __n_leftover_items / __n_chunks; + const _Size __n_final_leftover_items = __n_leftover_items - (__n_extra_items_per_chunk * __n_chunks); + + __chunk_size += __n_extra_items_per_chunk; + __first_chunk_size = __chunk_size + __n_final_leftover_items; + return __partitions; +} + +template +_LIBCPP_HIDE_FROM_ABI void __parallel_for(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __f) { + // Create an executor over the range that will execute f in + // chunks over the range. + + auto __partitions = __libdispatch::__partition_chunks(__first, __last); + + // Perform the chunked execution. + __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __chunk) { + auto __this_chunk_size = __chunk == 0 ? __partitions.__first_chunk_size_ : __partitions.__chunk_size_; + auto __index = + __chunk == 0 + ? 0 + : (__chunk * __partitions.__chunk_size_) + (__partitions.__first_chunk_size_ - __partitions.__chunk_size_); + __f(__first + __index, __first + __index + __this_chunk_size); + }); +} + +template +_LIBCPP_HIDE_FROM_ABI void __parallel_invoke(_Func1&& __f1, _Func2&& __f2) { + __libdispatch::__dispatch_apply(2, [&](size_t __index) { + if (__index == 0) + __f1(); + else + __f2(); + }); +} + +template +_LIBCPP_HIDE_FROM_ABI void __parallel_merge_body( + std::size_t __size_x, + std::size_t __size_y, + _RandomAccessIterator1 __xs, + _RandomAccessIterator1 __xe, + _RandomAccessIterator2 __ys, + _RandomAccessIterator2 __ye, + _RandomAccessIterator3 __zs, + _Compare __comp, + _LeafMerge __leaf_merge) { + if (__size_x + __size_y <= __default_chunk_size) { + __leaf_merge(__xs, __xe, __ys, __ye, __zs, __comp); + return; + } + + _RandomAccessIterator1 __xm; + _RandomAccessIterator2 __ym; + + if (__size_x < __size_y) { + __ym = __ys + (__size_y / 2); + __xm = std::upper_bound(__xs, __xe, *__ym, __comp); + } else { + __xm = __xs + (__size_x / 2); + __ym = std::lower_bound(__ys, __ye, *__xm, __comp); + } + + auto __zm = __zs + (__xm - __xs) + (__ym - __ys); + + __libdispatch::__parallel_invoke( + [__xs, __xm, __ys, __ym, __zs, __comp, &__leaf_merge]() { + __libdispatch::__parallel_merge_body( + __xm - __xs, __ym - __ys, __xs, __xm, __ys, __ym, __zs, __comp, __leaf_merge); + }, + [__xm, __xe, __ym, __ye, __zm, __comp, &__leaf_merge]() { + __libdispatch::__parallel_merge_body( + __xe - __xm, __ye - __ym, __xm, __xe, __ym, __ye, __zm, __comp, __leaf_merge); + }); +} + +template +_LIBCPP_HIDE_FROM_ABI void __parallel_merge( + _RandomAccessIterator1 __xs, + _RandomAccessIterator1 __xe, + _RandomAccessIterator2 __ys, + _RandomAccessIterator2 __ye, + _RandomAccessIterator3 __zs, + _Compare __comp, + _LeafMerge __leaf_merge) { + std::size_t __size_x = __xe - __xs; + std::size_t __size_y = __ye - __ys; + + if (__size_x + __size_y <= __default_chunk_size) { + __leaf_merge(__xs, __xe, __ys, __ye, __zs, __comp); + return; + } + + _RandomAccessIterator1 __xm; + _RandomAccessIterator2 __ym; + + if (__size_x < __size_y) { + __ym = __ys + (__size_y / 2); + __xm = std::upper_bound(__xs, __xe, *__ym, __comp); + } else { + __xm = __xs + (__size_x / 2); + __ym = std::lower_bound(__ys, __ye, *__xm, __comp); + } + + auto __zm = __zs + (__xm - __xs) + (__ym - __ys); + + __libdispatch::__parallel_invoke( + [__xs, __xm, __ys, __ym, __zs, __comp, &__leaf_merge]() { + __parallel_merge_body(__xm - __xs, __ym - __ys, __xs, __xm, __ys, __ym, __zs, __comp, __leaf_merge); + }, + [__xm, __xe, __ym, __ye, __zm, __comp, &__leaf_merge]() { + __parallel_merge_body(__xe - __xm, __ye - __ym, __xm, __xe, __ym, __ye, __zm, __comp, __leaf_merge); + }); +} + +template +_LIBCPP_HIDE_FROM_ABI _Value __parallel_transform_reduce( + _RandomAccessIterator __first, + _RandomAccessIterator __last, + _UnaryOp __unary_op, + _Value __init, + _Combiner __combiner, + _Reduction __reduction) { + auto __partitions = __libdispatch::__partition_chunks(__first, __last); + unique_ptr<_Value> __values(::operator new(__partitions.__chunk_count_, align_val_t{alignof(_Value)}, nothrow)); + if (__values == nullptr) + std::__throw_pstl_bad_alloc(); + + __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __chunk) { + auto __this_chunk_size = __chunk == 0 ? __partitions.__first_chunk_size_ : __partitions.__chunk_size_; + auto __index = + __chunk == 0 + ? 0 + : (__chunk * __partitions.__chunk_size_) + (__partitions.__first_chunk_size_ - __partitions.__chunk_size_); + std::__construct_at(__values + __index, __f(__first + __index, __first + __index + __this_chunk_size)); + }); + return std::transform_reduce(__values, __values + __partitions.__chunk_count_, __init, __unary_op, __reduction); +} + +_LIBCPP_HIDE_FROM_ABI inline void __cancel_execution() {} + +} // namespace __libdispatch +} // namespace __par_backend + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_LIBDISPATCH_H diff --git a/libcxx/include/__config_site.in b/libcxx/include/__config_site.in --- a/libcxx/include/__config_site.in +++ b/libcxx/include/__config_site.in @@ -35,6 +35,7 @@ // PSTL backends #cmakedefine _LIBCPP_PSTL_CPU_BACKEND_SERIAL #cmakedefine _LIBCPP_PSTL_CPU_BACKEND_THREAD +#cmakedefine _LIBCPP_PSTL_CPU_BACKEND_LIBDISPATCH // __USE_MINGW_ANSI_STDIO gets redefined on MinGW #ifdef __clang__ diff --git a/libcxx/include/__utility/terminate_on_exception.h b/libcxx/include/__utility/terminate_on_exception.h --- a/libcxx/include/__utility/terminate_on_exception.h +++ b/libcxx/include/__utility/terminate_on_exception.h @@ -11,6 +11,7 @@ #include <__config> #include <__exception/terminate.h> +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -26,6 +27,8 @@ _LIBCPP_HIDE_FROM_ABI auto __terminate_on_exception(_Func __func) { try { return __func(); + } catch (const __pstl_bad_alloc&) { + throw; } catch (...) { std::terminate(); } diff --git a/libcxx/include/new b/libcxx/include/new --- a/libcxx/include/new +++ b/libcxx/include/new @@ -172,6 +172,20 @@ _LIBCPP_NORETURN _LIBCPP_FUNC_VIS void __throw_bad_alloc(); // not in C++ spec +#if _LIBCPP_STD_VER >= 17 +struct _LIBCPP_EXCEPTION_ABI __pstl_bad_alloc : bad_alloc { + const char* what() const noexcept override; +}; + +[[noreturn]] _LIBCPP_HIDE_FROM_ABI inline void __throw_pstl_bad_alloc() { +#ifndef _LIBCPP_HAS_NO_EXCEPTIONS + throw __pstl_bad_alloc{}; +#else + _LIBCPP_VERBOSE_ABORT("PSTL allocation failed in -fno-exceptions mode"); +#endif +} +#endif // _LIBCPP_STD_VER >= 17 + _LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY void __throw_bad_array_new_length() { diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -313,6 +313,11 @@ experimental/memory_resource.cpp ) +if (LIBCXX_PSTL_CPU_BACKEND STREQUAL "libdispatch") + set(LIBCXX_EXPERIMENTAL_SOURCES ${LIBCXX_EXPERIMENTAL_SOURCES} + pstl/libdispatch.cpp) +endif() + add_library(cxx_experimental STATIC ${LIBCXX_EXPERIMENTAL_SOURCES}) target_link_libraries(cxx_experimental PUBLIC cxx-headers) if (LIBCXX_ENABLE_SHARED) diff --git a/libcxx/src/new.cpp b/libcxx/src/new.cpp --- a/libcxx/src/new.cpp +++ b/libcxx/src/new.cpp @@ -48,6 +48,10 @@ #endif // !LIBSTDCXX +const char* __pstl_bad_alloc::what() const noexcept { + return "PSTL memory allocation failed"; +} + } // std #if !defined(__GLIBCXX__) && \ diff --git a/libcxx/src/pstl/libdispatch.cpp b/libcxx/src/pstl/libdispatch.cpp new file mode 100644 --- /dev/null +++ b/libcxx/src/pstl/libdispatch.cpp @@ -0,0 +1,23 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <__algorithm/pstl_backends/cpu_backends/libdispatch.h> +#include <__config> +#include + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace __par_backend::inline __libdispatch { + +void __dispatch_apply(size_t chunk_count, void* context, void (*func)(void* context, size_t chunk)) { + ::dispatch_apply_f(chunk_count, DISPATCH_APPLY_AUTO, context, func); +} + +} // namespace __par_backend::inline __libdispatch + +_LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/test/libcxx/private_headers.verify.cpp b/libcxx/test/libcxx/private_headers.verify.cpp --- a/libcxx/test/libcxx/private_headers.verify.cpp +++ b/libcxx/test/libcxx/private_headers.verify.cpp @@ -120,6 +120,7 @@ #include <__algorithm/pstl_backends/cpu_backends/fill.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/pstl_backends/cpu_backends/fill.h'}} #include <__algorithm/pstl_backends/cpu_backends/find_if.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/pstl_backends/cpu_backends/find_if.h'}} #include <__algorithm/pstl_backends/cpu_backends/for_each.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/pstl_backends/cpu_backends/for_each.h'}} +#include <__algorithm/pstl_backends/cpu_backends/libdispatch.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/pstl_backends/cpu_backends/libdispatch.h'}} #include <__algorithm/pstl_backends/cpu_backends/merge.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/pstl_backends/cpu_backends/merge.h'}} #include <__algorithm/pstl_backends/cpu_backends/serial.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/pstl_backends/cpu_backends/serial.h'}} #include <__algorithm/pstl_backends/cpu_backends/thread.h> // expected-error@*:* {{use of private header from outside its module: '__algorithm/pstl_backends/cpu_backends/thread.h'}}