diff --git a/pstl/CMakeLists.txt b/pstl/CMakeLists.txt --- a/pstl/CMakeLists.txt +++ b/pstl/CMakeLists.txt @@ -50,6 +50,9 @@ message(STATUS "Parallel STL uses the omp backend") target_compile_options(ParallelSTL INTERFACE "-fopenmp=libomp") set(_PSTL_PAR_BACKEND_OPENMP ON) +elseif (PSTL_PARALLEL_BACKEND STREQUAL "gcd") + message(STATUS "Parallel STL uses the gcd backend") + set(_PSTL_PAR_BACKEND_GCD ON) else() message(FATAL_ERROR "Requested unknown Parallel STL backend '${PSTL_PARALLEL_BACKEND}'.") endif() diff --git a/pstl/CREDITS.txt b/pstl/CREDITS.txt --- a/pstl/CREDITS.txt +++ b/pstl/CREDITS.txt @@ -18,4 +18,4 @@ N: Christopher Nelson E: nadiasvertex@gmail.com -D: Add support for an OpenMP backend. +D: Add support for the OpenMP and Grand Central Dispatch backends. diff --git a/pstl/include/__pstl_config_site.in b/pstl/include/__pstl_config_site.in --- a/pstl/include/__pstl_config_site.in +++ b/pstl/include/__pstl_config_site.in @@ -12,6 +12,7 @@ #cmakedefine _PSTL_PAR_BACKEND_SERIAL #cmakedefine _PSTL_PAR_BACKEND_TBB #cmakedefine _PSTL_PAR_BACKEND_OPENMP +#cmakedefine _PSTL_PAR_BACKEND_GCD #cmakedefine _PSTL_HIDE_FROM_ABI_PER_TU #endif // __PSTL_CONFIG_SITE diff --git a/pstl/include/pstl/internal/execution_impl.h b/pstl/include/pstl/internal/execution_impl.h --- a/pstl/include/pstl/internal/execution_impl.h +++ b/pstl/include/pstl/internal/execution_impl.h @@ -39,11 +39,16 @@ struct __openmp_backend_tag { }; +struct __gcd_backend_tag +{ +}; #if defined(_PSTL_PAR_BACKEND_TBB) using __par_backend_tag = __tbb_backend_tag; #elif defined(_PSTL_PAR_BACKEND_OPENMP) using __par_backend_tag = __openmp_backend_tag; +#elif defined(_PSTL_PAR_BACKEND_GCD) +using __par_backend_tag = __gcd_backend_tag; #elif defined(_PSTL_PAR_BACKEND_SERIAL) using __par_backend_tag = __serial_backend_tag; #else diff --git a/pstl/include/pstl/internal/gcd/parallel_for.h b/pstl/include/pstl/internal/gcd/parallel_for.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/gcd/parallel_for.h @@ -0,0 +1,42 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_GCD_PARALLEL_FOR_H +#define _PSTL_INTERNAL_GCD_PARALLEL_FOR_H + +#include "util.h" + +namespace __pstl +{ +namespace __gcd_backend +{ + +//------------------------------------------------------------------------ +// Notation: +// Evaluation of brick f[i,j) for each subrange [i,j) of [first, last) +//------------------------------------------------------------------------ + +template +void +__parallel_for(___pstl::__internal::__gcd_backend_tag /* __tag */, ExecutionPolicy&&, _Index __first, _Index __last, + _Fp __f) +{ + // Create an executor over the range that will execute f in + // chunks over the range. + __pstl::__gcd_backend::__chunked_executor __for_loop(__first, __last, __f); + + // Perform the chunked execution. + __for_loop(); +} + +} // namespace __gcd_backend +} // namespace __pstl + +#endif diff --git a/pstl/include/pstl/internal/gcd/parallel_for_each.h b/pstl/include/pstl/internal/gcd/parallel_for_each.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/gcd/parallel_for_each.h @@ -0,0 +1,46 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_GCD_PARALLEL_FOR_EACH_H +#define _PSTL_INTERNAL_GCD_PARALLEL_FOR_EACH_H + +#include "util.h" + +namespace __pstl +{ +namespace __gcd_backend +{ + +template +void +__parallel_for_each(__pstl::__internal::__gcd_backend_tag /* __tag */, _ExecutionPolicy&&, _ForwardIterator __first, + _ForwardIterator __last, _Fp __f) +{ + std::size_t __count = std::distance(__first, __last); + using _data_type = std::tuple<_ForwardIt, _UnaryFunction>; + _data_type __data(__first, std::move(__f)); + + // dispatch_apply_f blocks until all iterations are complete. Therefore, it + // is safe to pass a pointer to the stack allocated data above. + ::dispatch_apply_f(__count, dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), &__data, + [](void* __ctx, std::size_t __index) + { + auto* __d = static_cast<_data_type*>(__ctx); + // Get the data item to operate on + auto __elem_it = std::next(std::get<0>(*__d), __index); + // Apply the function to the argument + std::get<1> (*__d)(*(__elem_it)); + }); +} + +} // namespace __gcd_backend +} // namespace __pstl + +#endif diff --git a/pstl/include/pstl/internal/gcd/parallel_invoke.h b/pstl/include/pstl/internal/gcd/parallel_invoke.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/gcd/parallel_invoke.h @@ -0,0 +1,46 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_GCD_PARALLEL_INVOKE_H +#define _PSTL_INTERNAL_GCD_PARALLEL_INVOKE_H + +#include "util.h" + +namespace __pstl +{ +namespace __gcd_backend +{ + +template +void +__parallel_invoke(__pstl::__internal::__gcd_backend_tag /* __tag */, _ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) +{ + using _ValueType = std::tuple<_F1, _F2>; + + _ValueType __data(std::forward<_F1>(__f1), std::forward<_F2>(__f2)); + ::dispatch_apply_f(2, dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), &__data, + [](void* __ctx, std::size_t __index) + { + auto* __d = static_cast<_ValueType*>(__ctx); + if (__index == 0) + { + std::get<0> (*__d)(); + } + else + { + std::get<1> (*__d)(); + } + }); +} + +} // namespace __gcd_backend +} // namespace __pstl + +#endif diff --git a/pstl/include/pstl/internal/gcd/parallel_merge.h b/pstl/include/pstl/internal/gcd/parallel_merge.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/gcd/parallel_merge.h @@ -0,0 +1,77 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_GCD_PARALLEL_MERGE_H +#define _PSTL_INTERNAL_GCD_PARALLEL_MERGE_H + +#include "util.h" + +namespace __pstl +{ +namespace __gcd_backend +{ +template +void +__parallel_merge_body(std::size_t __size_x, std::size_t __size_y, _RandomAccessIterator1 __xs, + _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, + _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge) +{ + + if (__size_x + __size_y <= gcd_backend::default_chunk_size) + { + __leaf_merge(__xs, __xe, __ys, __ye, __zs, __comp); + return; + } + + _RandomAccessIterator1 __xm; + _RandomAccessIterator2 __ym; + + if (__size_x < __size_y) + { + __ym = __ys + (__size_y / 2); + __xm = std::upper_bound(__xs, __xe, *__ym, __comp); + } + else + { + __xm = __xs + (__size_x / 2); + __ym = std::lower_bound(__ys, __ye, *__xm, __comp); + } + + auto zm = __zs + (__xm - __xs) + (__ym - __ys); + + __pstl::__gcd_backend::__parallel_invoke( + [__xs, __xm, __ys, __ym, __zs, __comp, &__leaf_merge]() + { __parallel_merge_body(__xm - __xs, __ym - __ys, __xs, __xm, __ys, __ym, __zs, __comp, __leaf_merge); }, + [__xm, __xe, __ym, __ye, zm, __comp, &__leaf_merge]() + { __parallel_merge_body(__xe - __xm, __ye - __ym, __xm, __xe, __ym, __ye, zm, __comp, __leaf_merge); }); +} + +template +void +__parallel_merge(__pstl::__internal::__gcd_backend_tag /* __tag */, _ExecutionPolicy&& /*__exec*/, + _RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, + _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge) +{ + std::size_t __size_x = __xe - __xs; + std::size_t __size_y = __ye - __ys; + + /* + * Run the merge in parallel by chunking it up. + */ + __pstl::__gcd_backend::__parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp, + __leaf_merge); +} + +} // namespace __gcd_backend +} // namespace __pstl + +#endif diff --git a/pstl/include/pstl/internal/gcd/parallel_reduce.h b/pstl/include/pstl/internal/gcd/parallel_reduce.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/gcd/parallel_reduce.h @@ -0,0 +1,40 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_GCD_PARALLEL_REDUCE_H +#define _PSTL_INTERNAL_GCD_PARALLEL_REDUCE_H + +#include "util.h" + +namespace __pstl +{ +namespace __gcd_backend +{ + +//------------------------------------------------------------------------ +// Notation: +// r(i,j,init) returns __reduction of init with __reduction over [i,j) +// c(x,y) combines values x and y that were the result of r +//------------------------------------------------------------------------ + +template +_Value +__parallel_reduce(__pstl::__internal::__gcd_backend_tag /* __tag */, _ExecutionPolicy&& /*__exec*/, + _RandomAccessIterator __first, _RandomAccessIterator __last, _Value __identity, _RealBody __real_body, + _Reduction __reduction) +{ + __pstl::__gcd_backend::__chunked_reduce_executor __reduce(__first, __last, __real_body, __reduction, __identity); + + return __reduce(); +} + +} // namespace __gcd_backend +} // namespace __pstl +#endif diff --git a/pstl/include/pstl/internal/gcd/parallel_stable_partial_sort.h b/pstl/include/pstl/internal/gcd/parallel_stable_partial_sort.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/gcd/parallel_stable_partial_sort.h @@ -0,0 +1,34 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_GCD_PARALLEL_PARTIAL_STABLE_SORT_H +#define _PSTL_INTERNAL_GCD_PARALLEL_PARTIAL_STABLE_SORT_H + +#include "util.h" + +namespace __pstl +{ +namespace __gcd_backend +{ + +template +void +__parallel_stable_partial_sort(__pstl::__internal::__gcd_backend_tag /* __tag */, _RandomAccessIterator __xs, + _RandomAccessIterator __xe, _Compare __comp, _LeafSort __leaf_sort, + std::size_t /* __nsort */) +{ + // TODO: Parallel partial sort needs to be implemented. + // TODO: The partial sort implementation should be shared with the other backends. + __leaf_sort(__xs, __xe, __comp); +} + +} // namespace __gcd_backend +} // namespace __pstl +#endif diff --git a/pstl/include/pstl/internal/gcd/parallel_stable_sort.h b/pstl/include/pstl/internal/gcd/parallel_stable_sort.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/gcd/parallel_stable_sort.h @@ -0,0 +1,123 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_GCD_PARALLEL_STABLE_SORT_H +#define _PSTL_INTERNAL_GCD_PARALLEL_STABLE_SORT_H + +#include "util.h" +#include "parallel_merge.h" + +namespace __pstl +{ +namespace __gcd_backend +{ + +namespace __sort_details +{ + +template +_Iterator2 +__parallel_move_range(_Iterator1 __first1, _Iterator1 __last1, _Iterator2 __first2) +{ + std::size_t __size = std::distance(__first1, __last1); + + // Perform serial moving of small chunks + + if (__size <= __default_chunk_size) + { + return std::move(__first1, __last1, __first2); + } + + // Perform parallel moving of larger chunks + __pstl::__gcd_backend::__chunked_executor __exec(__first1, __last1, + [__first1, __first2](auto __chunk_first, auto __chunk_last) + { + auto __chunk_offset = std::distance(__first1, __chunk_first); + auto __output = std::next(__first2, __chunk_offset); + std::move(__chunk_first, __chunk_last, __output); + }); + + // Perform the chunked execution. + __exec(); + + return __first2 + __size; +} + +template +struct __move_range +{ + _Iterator2 + operator()(_Iterator1 __first1, _Iterator1 __last1, _Iterator2 __first2) + { + return __pstl::__gcd_backend::__parallel_move_range(__first1, __last1, __first2); + } +}; +} // namespace __sort_details + +template +void +__parallel_stable_sort_body(_RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp, + _LeafSort __leaf_sort) +{ + using _ValueType = typename std::iterator_traits<_RandomAccessIterator>::value_type; + using _VecType = typename std::vector<_ValueType>; + using _OutputIterator = typename _VecType::iterator; + using _MoveValueType = typename utils::__move_value<_RandomAccessIterator, _OutputIterator>; + using _MoveRangeType = __sort_details::__move_range<_RandomAccessIterator, _OutputIterator>; + + std::size_t __size = __xe - __xs; + if (__size <= __default_chunk_size) + { + __leaf_sort(__xs, __xe, __comp); + } + else + { + auto __mid = __xs + (__size / 2); + __pstl::__gcd_backend::__parallel_invoke( + [__xs, __mid, &__comp, &__leaf_sort]() { __parallel_stable_sort_body(__xs, __mid, __comp, __leaf_sort); }, + [__mid, __xe, &__comp, &__leaf_sort]() { __parallel_stable_sort_body(__mid, __xe, __comp, __leaf_sort); }); + + // Perform a parallel __merge of the sorted ranges into __output. + _VecType __output(__size); + _MoveValueType __move_value; + _MoveRangeType __move_range; + __utils::__serial_move_merge __merge(__size); + __parallel_merge_body( + std::distance(__xs, __mid), std::distance(__mid, __xe), __xs, __mid, __mid, __xe, __output.begin(), __comp, + [&__merge, &__move_value, &__move_range](_RandomAccessIterator __as, _RandomAccessIterator __ae, + _RandomAccessIterator __bs, _RandomAccessIterator __be, + _OutputIterator __cs, _Compare __comp) + { __merge(__as, __ae, __bs, __be, __cs, __comp, __move_value, __move_value, __move_range, __move_range); }); + + // Move the values from __output back in the original source range. + __sort_details::__parallel_move_range(__output.begin(), __output.end(), __xs); + } +} + +template +void +__parallel_stable_sort(__pstl::__internal::__gcd_backend_tag __tag, _RandomAccessIterator __xs, + _RandomAccessIterator __xe, _Compare __comp, _LeafSort __leaf_sort, std::size_t __nsort = 0) +{ + + auto __count = static_cast(__xe - __xs); + if (__count <= __nsort) + { + __pstl::__gcd_backend::__parallel_stable_sort_body(__xs, __xe, __comp, __leaf_sort); + } + else + { + __pstl::__gcd_backend::__parallel_stable_partial_sort(__tag, __xs, __xe, __comp, __leaf_sort, __nsort); + } +} + +} // namespace __gcd_backend +} // namespace __pstl +#endif diff --git a/pstl/include/pstl/internal/gcd/parallel_transform_reduce.h b/pstl/include/pstl/internal/gcd/parallel_transform_reduce.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/gcd/parallel_transform_reduce.h @@ -0,0 +1,58 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_GCD_PARALLEL_TRANSFORM_REDUCE_H +#define _PSTL_INTERNAL_GCD_PARALLEL_TRANSFORM_REDUCE_H + +#include "util.h" + +namespace __pstl +{ +namespace __gcd_backend +{ + +//------------------------------------------------------------------------ +// parallel_transform_reduce +// +// Notation: +// r(i,j,init) returns reduction of init with reduction over [i,j) +// u(i) returns f(i,i+1,identity) for a hypothetical left identity element +// of r c(x,y) combines values x and y that were the result of r or u +//------------------------------------------------------------------------ + +template +_Value +__parallel_transform_reduce(__pstl::__internal::__gcd_backend_tag /* __tag */, _ExecutionPolicy&&, + _RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryOp __unary_op, + _Value __init, _Combiner __combiner, _Reduction __reduction) +{ + + __pstl::__gcd_backend::__chunked_reduce_executor __reduce( + __first, __last, + [&__unary_op, &__reduction](auto __begin, auto __end) + { + std::vector<_Value> __xforms; + __xforms.reserve(std::distance(__begin, __end)); + for (auto __item = __begin; __item != __end; ++__item) + { + __xforms.emplace_back(__unary_op(*__item)); + } + return __reduction(__xforms.begin(), __xforms.end()); + }, + __combiner, __init); + + return __reduce(); +} + +} // namespace __gcd_backend +} // namespace __pstl + +#endif // _PSTL_INTERNAL_GCD_PARALLEL_TRANSFORM_REDUCE_H diff --git a/pstl/include/pstl/internal/gcd/parallel_transform_scan.h b/pstl/include/pstl/internal/gcd/parallel_transform_scan.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/gcd/parallel_transform_scan.h @@ -0,0 +1,33 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_GCD_PARALLEL_TRANSFORM_SCAN_H +#define _PSTL_INTERNAL_GCD_PARALLEL_TRANSFORM_SCAN_H + +#include "util.h" + +namespace __pstl +{ +namespace __gcd_backend +{ + +template +_Tp +__parallel_transform_scan(__pstl::__internal::__gcd_backend_tag /* __tag */, _ExecutionPolicy&&, _Index __n, + _Up /* __u */, _Tp __init, _Cp /* __combine */, _Rp /* __brick_reduce */, _Sp __scan) +{ + // TODO: parallelize this function. + return __scan(_Index(0), __n, __init); +} + +} // namespace __gcd_backend +} // namespace __pstl + +#endif // _PSTL_INTERNAL_GCD_PARALLEL_TRANSFORM_SCAN_H diff --git a/pstl/include/pstl/internal/gcd/util.h b/pstl/include/pstl/internal/gcd/util.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/gcd/util.h @@ -0,0 +1,205 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_INTERNAL_GCD_UTIL_H +#define _PSTL_INTERNAL_GCD_UTIL_H + +#include +#include +#include +#include + +#include + +namespace __pstl +{ +namespace __gcd_backend +{ + +// Preliminary size of each chunk: requires further discussion +constexpr std::size_t __default_chunk_size = 2048; + +// The iteration space partitioner according to __requested_chunk_size +template +void +__chunk_partitioner(_RandomAccessIterator __first, _RandomAccessIterator __last, _Size& __n_chunks, _Size& __chunk_size, + _Size& __first_chunk_size, _Size __requested_chunk_size = __default_chunk_size) +{ + /* + * This algorithm improves distribution of elements in chunks by avoiding + * small tail chunks. The leftover elements that do not fit neatly into + * the chunk size are redistributed to early chunks. This improves + * utilization of the processor's prefetch and reduces the number of + * tasks needed by 1. + */ + + const _Size __first_chunk_size_ = __last - __first; + if (__first_chunk_size_ < __requested_chunk_size) + { + __chunk_size = __first_chunk_size_; + __first_chunk_size = __first_chunk_size_; + __n_chunks = 1; + return; + } + + __n_chunks = (__first_chunk_size_ / __requested_chunk_size) + 1; + __chunk_size = __first_chunk_size_ / __n_chunks; + __first_chunk_size = __chunk_size; + const _Size __n_leftover_items = __first_chunk_size_ - (__n_chunks * __chunk_size); + + if (__n_leftover_items == __chunk_size) + { + __n_chunks += 1; + return; + } + else if (__n_leftover_items == 0) + { + __first_chunk_size = __chunk_size; + return; + } + + const _Size __n_extra_items_per_chunk = __n_leftover_items / __n_chunks; + const _Size __n_final_leftover_items = __n_leftover_items - (__n_extra_items_per_chunk * __n_chunks); + + __chunk_size += __n_extra_items_per_chunk; + __first_chunk_size = __chunk_size + __n_final_leftover_items; +} + +template +struct __chunked_executor +{ + std::size_t __n_chunks_{0}, __chunk_size_{0}, __first_chunk_size_{0}; + _RandomAccessIterator __first_, __last_; + _Func _f; + + /** + * __chunked_executor constructor initializes a grand central dispatch queue + * dispatcher that runs the given function using chunks. + * + * @param __first the __first item to consider + * @param __last the __last item to consider + * @param __func a function that will receive iterators for a chunk to process. + */ + __chunked_executor(_RandomAccessIterator __first, _RandomAccessIterator __last, _Func __func) + : __first_(std::move(__first)), __last_(std::move(__last)), _f(std::move(__func)) + { + __pstl::__gcd_backend::__chunk_partitioner(__first_, __last_, __n_chunks_, __chunk_size_, __first_chunk_size_); + } + + /** + * operator() executes _Func over the range. + */ + void + operator()() + { + // dispatch_apply_f blocks until all iterations are complete. Therefore, it + // is safe to pass a pointer to the stack allocated data above. + ::dispatch_apply_f(__n_chunks_, dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), this, + [](void* __ctx, std::size_t __chunk) + { + auto* __d = static_cast<__chunked_executor<_RandomAccessIterator, _Func>*>(__ctx); + auto __this_chunk_size = __chunk == 0 ? __d->__first_chunk_size_ : __d->__chunk_size_; + auto __index = __chunk == 0 ? 0 + : (__chunk * __d->__chunk_size_) + + (__d->__first_chunk_size_ - __d->__chunk_size_); + auto __begin = __d->__first_ + __index; + auto __end = __begin + __this_chunk_size; + __d->_f(__begin, __end); + }); + } +}; + +template +struct __chunked_reduce_executor +{ + _RandomAccessIterator __first_, __last_; + _Func __func_; + _Reducer __reducer_; + _Value __identity_; + + /** + * __chunked_reduce_executor constructor initializes a grand central dispatch + * queue dispatcher that runs the given function using chunks. Each chunk + * provides a return value that is reduced with other results. + * + * @param __first the __first item to consider + * @param __last the __last item to consider + * @param __func a function that will receive iterators for a chunk to process. + * @param __reducer a function that will receive values from the chunks to reduce. + * @param __identity a value that serves as an identity for the reduce + * oepration. + */ + __chunked_reduce_executor(_RandomAccessIterator __first, _RandomAccessIterator __last, _Func __func, + _Reducer __reducer, _Value __identity) + : __first_(std::move(__first)), __last_(std::move(__last)), __func_(std::move(__func)), + __reducer_(std::move(__reducer)), __identity_(std::move(__identity)) + { + } + + struct reduce_chunk + { + _RandomAccessIterator __first_; + _RandomAccessIterator __last_; + _Func __func_; + _Reducer __reducer_; + _Value __identity_; + _Value __result_; + + reduce_chunk(_RandomAccessIterator __first, _RandomAccessIterator __last, _Func __func, _Reducer __reducer, + _Value __identity) + : __first_(std::move(__first)), __last_(std::move(__last)), __func_(std::move(__func)), + __reducer_(std::move(__reducer)), __identity_(std::move(__identity)), __result_(__identity_) + { + } + + void + operator()() + { + std::size_t __size = std::distance(__first_, __last_); + + if (__size <= __default_chunk_size) + { + __result_ = __func_(__first_, __last_, __identity_); + } + else + { + auto __middle = __first_ + (__size / 2); + std::array __chunks{ + reduce_chunk(__first_, __middle, __func_, __reducer_, __identity_), + reduce_chunk(__middle, __last_, __func_, __reducer_, __identity_), + }; + + // dispatch_apply_f blocks until all iterations are complete. Therefore, + // it is safe to pass a pointer to the stack allocated data above. + ::dispatch_apply_f(2, dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), __chunks.data(), + [](void* __ctx, ::std::size_t __chunk) + { + auto* __d = static_cast(__ctx); + __d[__chunk](); + }); + + __result_ = __reducer_(__chunks[0].__result_, __chunks[1].__result_); + } + } + }; + + _Value + operator()() + { + reduce_chunk __root(__first_, __last_, __func_, __reducer_, __identity_); + __root(); + return __root.__result_; + } +}; + +} // namespace __gcd_backend +} // namespace __pstl + +#endif diff --git a/pstl/include/pstl/internal/parallel_backend.h b/pstl/include/pstl/internal/parallel_backend.h --- a/pstl/include/pstl/internal/parallel_backend.h +++ b/pstl/include/pstl/internal/parallel_backend.h @@ -30,6 +30,12 @@ { namespace __par_backend = __omp_backend; } +#elif defined(_PSTL_PAR_BACKEND_GCD) +# include "parallel_backend_gcd.h" +namespace __pstl +{ +namespace __par_backend = __gcd_backend; +} #else _PSTL_PRAGMA_MESSAGE("Parallel backend was not specified"); #endif diff --git a/pstl/include/pstl/internal/parallel_backend_gcd.h b/pstl/include/pstl/internal/parallel_backend_gcd.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/parallel_backend_gcd.h @@ -0,0 +1,49 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_PARALLEL_BACKEND_GCD_H +#define _PSTL_PARALLEL_BACKEND_GCD_H + +//------------------------------------------------------------------------ +// parallel_invoke +//------------------------------------------------------------------------ + +#include "./gcd/parallel_invoke.h" + +//------------------------------------------------------------------------ +// parallel_for +//------------------------------------------------------------------------ + +#include "./gcd/parallel_for.h" + +//------------------------------------------------------------------------ +// parallel_for_each +//------------------------------------------------------------------------ + +#include "./gcd/parallel_for_each.h" + +//------------------------------------------------------------------------ +// parallel_reduce +//------------------------------------------------------------------------ + +#include "./gcd/parallel_reduce.h" + +//------------------------------------------------------------------------ +// parallel_stable_sort +//------------------------------------------------------------------------ + +#include "./gcd/parallel_stable_partial_sort.h" +#include "./gcd/parallel_stable_sort.h" + +//------------------------------------------------------------------------ +// parallel_merge +//------------------------------------------------------------------------ +#include "./gcd/parallel_merge.h" +#endif diff --git a/pstl/include/pstl/internal/pstl_config.h b/pstl/include/pstl/internal/pstl_config.h --- a/pstl/include/pstl/internal/pstl_config.h +++ b/pstl/include/pstl/internal/pstl_config.h @@ -18,7 +18,8 @@ #define _PSTL_VERSION_MINOR ((_PSTL_VERSION % 1000) / 10) #define _PSTL_VERSION_PATCH (_PSTL_VERSION % 10) -#if !defined(_PSTL_PAR_BACKEND_SERIAL) && !defined(_PSTL_PAR_BACKEND_TBB) && !defined(_PSTL_PAR_BACKEND_OPENMP) +#if !defined(_PSTL_PAR_BACKEND_SERIAL) && !defined(_PSTL_PAR_BACKEND_TBB) && \ + !defined(_PSTL_PAR_BACKEND_OPENMP) && !defined(_PSTL_PAR_BACKEND_GCD) # error "A parallel backend must be specified" #endif