diff --git a/libcxx/cmake/Modules/DefineLinkerScript.cmake b/libcxx/cmake/Modules/DefineLinkerScript.cmake --- a/libcxx/cmake/Modules/DefineLinkerScript.cmake +++ b/libcxx/cmake/Modules/DefineLinkerScript.cmake @@ -31,7 +31,7 @@ set(link_libraries) if (interface_libs) foreach(lib IN LISTS interface_libs) - if ("${lib}" STREQUAL "cxx-headers") + if ("${lib}" MATCHES "cxx-headers|ParallelSTL") continue() endif() # If ${lib} is not a target, we use a dummy target which we know will diff --git a/pstl/CMakeLists.txt b/pstl/CMakeLists.txt --- a/pstl/CMakeLists.txt +++ b/pstl/CMakeLists.txt @@ -16,7 +16,7 @@ project(ParallelSTL VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH} LANGUAGES CXX) -set(PSTL_PARALLEL_BACKEND "serial" CACHE STRING "Threading backend to use. Valid choices are 'serial' and 'tbb'. The default is 'serial'.") +set(PSTL_PARALLEL_BACKEND "serial" CACHE STRING "Threading backend to use. Valid choices are 'serial', 'omp', and 'tbb'. The default is 'serial'.") set(PSTL_HIDE_FROM_ABI_PER_TU OFF CACHE BOOL "Whether to constrain ABI-unstable symbols to each translation unit (basically, mark them with C's static keyword).") set(_PSTL_HIDE_FROM_ABI_PER_TU ${PSTL_HIDE_FROM_ABI_PER_TU}) # For __pstl_config_site @@ -43,6 +43,10 @@ message(STATUS "Parallel STL uses TBB ${TBB_VERSION} (interface version: ${TBB_INTERFACE_VERSION})") target_link_libraries(ParallelSTL INTERFACE TBB::tbb) set(_PSTL_PAR_BACKEND_TBB ON) +elseif (PSTL_PARALLEL_BACKEND STREQUAL "omp") + message(STATUS "Parallel STL uses the omp backend") + target_compile_options(ParallelSTL INTERFACE "-fopenmp=libomp") + set(_PSTL_PAR_BACKEND_OMP ON) else() message(FATAL_ERROR "Requested unknown Parallel STL backend '${PSTL_PARALLEL_BACKEND}'.") endif() diff --git a/pstl/CREDITS.txt b/pstl/CREDITS.txt --- a/pstl/CREDITS.txt +++ b/pstl/CREDITS.txt @@ -15,3 +15,7 @@ N: Thomas Rodgers E: trodgers@redhat.com D: Identifier name transformation for inclusion in a Standard C++ library. + +N: Christopher Nelson +E: nadiasvertex@gmail.com +D: Add support for an OpenMP backend. \ No newline at end of file diff --git a/pstl/include/__pstl_config_site.in b/pstl/include/__pstl_config_site.in --- a/pstl/include/__pstl_config_site.in +++ b/pstl/include/__pstl_config_site.in @@ -11,6 +11,7 @@ #cmakedefine _PSTL_PAR_BACKEND_SERIAL #cmakedefine _PSTL_PAR_BACKEND_TBB +#cmakedefine _PSTL_PAR_BACKEND_OMP #cmakedefine _PSTL_HIDE_FROM_ABI_PER_TU #endif // __PSTL_CONFIG_SITE diff --git a/pstl/include/pstl/internal/omp/parallel_for.h b/pstl/include/pstl/internal/omp/parallel_for.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/omp/parallel_for.h @@ -0,0 +1,62 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef PSTL_INTERNAL_OMP_PARALLEL_FOR_H +#define PSTL_INTERNAL_OMP_PARALLEL_FOR_H + +#include "util.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +template +void +__parallel_for_body(_Index __first, _Index __last, _Fp __f) +{ + // initial partition of the iteration space into chunks + auto __policy = __omp_backend::__chunk_partitioner(__first, __last); + + // To avoid over-subscription we use taskloop for the nested parallelism + _PSTL_PRAGMA(omp taskloop untied mergeable) + for (std::size_t __chunk = 0; __chunk < __policy.__n_chunks; ++__chunk) + { + __omp_backend::__process_chunk(__policy, __first, __chunk, __f); + } +} + +//------------------------------------------------------------------------ +// Notation: +// Evaluation of brick f[i,j) for each subrange [i,j) of [first, last) +//------------------------------------------------------------------------ + +template +void +__parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) +{ + if (omp_in_parallel()) + { + // we don't create a nested parallel region in an existing parallel + // region: just create tasks + __pstl::__omp_backend::__parallel_for_body(__first, __last, __f); + } + else + { + // in any case (nested or non-nested) one parallel region is created and + // only one thread creates a set of tasks + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) { __pstl::__omp_backend::__parallel_for_body(__first, __last, __f); } + } +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // PSTL_INTERNAL_OMP_PARALLEL_FOR_H diff --git a/pstl/include/pstl/internal/omp/parallel_for_each.h b/pstl/include/pstl/internal/omp/parallel_for_each.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/omp/parallel_for_each.h @@ -0,0 +1,59 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef PSTL_INTERNAL_OMP_PARALLEL_FOR_EACH_H +#define PSTL_INTERNAL_OMP_PARALLEL_FOR_EACH_H + +#include "util.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +template +void +__parallel_for_each_body(_ForwardIterator __first, _ForwardIterator __last, _Fp __f) +{ + using DifferenceType = typename ::std::iterator_traits<_ForwardIterator>::difference_type; + // TODO: Think of an approach to remove the std::distance call + auto __size = ::std::distance(__first, __last); + + _PSTL_PRAGMA(omp taskloop untied mergeable) + for (DifferenceType __index = 0; __index < __size; ++__index) + { + // TODO: Think of an approach to remove the increment here each time. + auto __iter = ::std::next(__first, __index); + __f(*__iter); + } +} + +template +void +__parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Fp __f) +{ + if (omp_in_parallel()) + { + // we don't create a nested parallel region in an existing parallel + // region: just create tasks + __pstl::__omp_backend::__parallel_for_each_body(__first, __last, __f); + } + else + { + // in any case (nested or non-nested) one parallel region is created and + // only one thread creates a set of tasks + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) { __pstl::__omp_backend::__parallel_for_each_body(__first, __last, __f); } + } +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // PSTL_INTERNAL_OMP_PARALLEL_FOR_EACH_H diff --git a/pstl/include/pstl/internal/omp/parallel_invoke.h b/pstl/include/pstl/internal/omp/parallel_invoke.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/omp/parallel_invoke.h @@ -0,0 +1,50 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef PSTL_INTERNAL_OMP_PARALLEL_INVOKE_H +#define PSTL_INTERNAL_OMP_PARALLEL_INVOKE_H + +#include "util.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +template +void +__parallel_invoke_body(_F1&& __f1, _F2&& __f2) +{ + _PSTL_PRAGMA(omp taskgroup) + { + _PSTL_PRAGMA(omp task untied mergeable) { std::forward<_F1>(__f1)(); } + _PSTL_PRAGMA(omp task untied mergeable) { std::forward<_F2>(__f2)(); } + } +} + +template +void +__parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) +{ + if (omp_in_parallel()) + { + __parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2)); + } + else + { + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) + __parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2)); + } +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // PSTL_INTERNAL_OMP_PARALLEL_INVOKE_H diff --git a/pstl/include/pstl/internal/omp/parallel_merge.h b/pstl/include/pstl/internal/omp/parallel_merge.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/omp/parallel_merge.h @@ -0,0 +1,94 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef PSTL_INTERNAL_OMP_PARALLEL_MERGE_H +#define PSTL_INTERNAL_OMP_PARALLEL_MERGE_H + +#include "util.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +template +void +__parallel_merge_body(std::size_t __size_x, std::size_t __size_y, _RandomAccessIterator1 __xs, + _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, + _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge) +{ + + if (__size_x + __size_y <= __omp_backend::__default_chunk_size) + { + __leaf_merge(__xs, __xe, __ys, __ye, __zs, __comp); + return; + } + + _RandomAccessIterator1 __xm; + _RandomAccessIterator2 __ym; + + if (__size_x < __size_y) + { + __ym = __ys + (__size_y / 2); + __xm = ::std::upper_bound(__xs, __xe, *__ym, __comp); + } + else + { + __xm = __xs + (__size_x / 2); + __ym = ::std::lower_bound(__ys, __ye, *__xm, __comp); + } + + auto __zm = __zs + (__xm - __xs) + (__ym - __ys); + + _PSTL_PRAGMA(omp task untied mergeable default(none) + firstprivate(__xs, __xm, __ys, __ym, __zs, __comp, __leaf_merge)) + __parallel_merge_body((__xm - __xs), (__ym - __ys), __xs, __xm, __ys, __ym, __zs, __comp, __leaf_merge); + + _PSTL_PRAGMA(omp task untied mergeable default(none) + firstprivate(__xm, __xe, __ym, __ye, __zm, __comp, __leaf_merge)) + __parallel_merge_body((__xe - __xm), (__ye - __ym), __xm, __xe, __ym, __ye, __zm, __comp, __leaf_merge); + + _PSTL_PRAGMA(omp taskwait) +} + +template +void +__parallel_merge(_ExecutionPolicy&& /*__exec*/, _RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, + _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, + _LeafMerge __leaf_merge) + +{ + std::size_t __size_x = __xe - __xs; + std::size_t __size_y = __ye - __ys; + + /* + * Run the merge in parallel by chunking it up. Use the smaller range (if any) as the iteration range, and the + * larger range as the search range. + */ + + if (omp_in_parallel()) + { + __parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp, __leaf_merge); + } + else + { + _PSTL_PRAGMA(omp parallel) + { + _PSTL_PRAGMA(omp single nowait) + __parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp, __leaf_merge); + } + } +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // PSTL_INTERNAL_OMP_PARALLEL_MERGE_H diff --git a/pstl/include/pstl/internal/omp/parallel_reduce.h b/pstl/include/pstl/internal/omp/parallel_reduce.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/omp/parallel_reduce.h @@ -0,0 +1,68 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef PSTL_INTERNAL_OMP_PARALLEL_REDUCE_H +#define PSTL_INTERNAL_OMP_PARALLEL_REDUCE_H + +#include "util.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +template +_Value +__parallel_reduce_body(_RandomAccessIterator __first, _RandomAccessIterator __last, _Value __identity, + _RealBody __real_body, _Reduction __reduce) +{ + auto __middle = __first + ((__last - __first) / 2); + _Value __v1(__identity), __v2(__identity); + __parallel_invoke_body( + [&]() { __v1 = __parallel_reduce_body(__first, __middle, __identity, __real_body, __reduce); }, + [&]() { __v2 = __parallel_reduce_body(__middle, __last, __identity, __real_body, __reduce); }); + + return __reduce(__v1, __v2); +} + +//------------------------------------------------------------------------ +// Notation: +// r(i,j,init) returns reduction of init with reduction over [i,j) +// c(x,y) combines values x and y that were the result of r +//------------------------------------------------------------------------ + +template +_Value +__parallel_reduce(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Value __identity, + _RealBody __real_body, _Reduction __reduction) +{ + // We don't create a nested parallel region in an existing parallel region: + // just create tasks. + if (omp_in_parallel()) + { + return __pstl::__omp_backend::__parallel_reduce_body(__first, __last, __identity, __real_body, __reduction); + } + + // In any case (nested or non-nested) one parallel region is created and only + // one thread creates a set of tasks. + _Value __res = __identity; + + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) + { + __res = __pstl::__omp_backend::__parallel_reduce_body(__first, __last, __identity, __real_body, __reduction); + } + + return __res; +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // PSTL_INTERNAL_OMP_PARALLEL_REDUCE_H diff --git a/pstl/include/pstl/internal/omp/parallel_scan.h b/pstl/include/pstl/internal/omp/parallel_scan.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/omp/parallel_scan.h @@ -0,0 +1,136 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef PSTL_INTERNAL_OMP_PARALLEL_SCAN_H +#define PSTL_INTERNAL_OMP_PARALLEL_SCAN_H + +#include "parallel_invoke.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +template +_Index +__split(_Index __m) +{ + _Index __k = 1; + while (2 * __k < __m) + __k *= 2; + return __k; +} + +template +void +__upsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsize, _Rp __reduce, _Cp __combine) +{ + if (__m == 1) + __r[0] = __reduce(__i * __tilesize, __lastsize); + else + { + _Index __k = __split(__m); + __omp_backend::__parallel_invoke_body( + [=] { __omp_backend::__upsweep(__i, __k, __tilesize, __r, __tilesize, __reduce, __combine); }, + [=] { + __omp_backend::__upsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, __reduce, __combine); + }); + if (__m == 2 * __k) + __r[__m - 1] = __combine(__r[__k - 1], __r[__m - 1]); + } +} + +template +void +__downsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsize, _Tp __initial, _Cp __combine, + _Sp __scan) +{ + if (__m == 1) + __scan(__i * __tilesize, __lastsize, __initial); + else + { + const _Index __k = __split(__m); + __omp_backend::__parallel_invoke_body( + [=] { __omp_backend::__downsweep(__i, __k, __tilesize, __r, __tilesize, __initial, __combine, __scan); }, + // Assumes that __combine never throws. + // TODO: Consider adding a requirement for user functors to be constant. + [=, &__combine] + { + __omp_backend::__downsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, + __combine(__initial, __r[__k - 1]), __combine, __scan); + }); + } +} + +template +void +__parallel_strict_scan_body(_Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex) +{ + _Index __p = omp_get_num_threads(); + const _Index __slack = 4; + _Index __tilesize = (__n - 1) / (__slack * __p) + 1; + _Index __m = (__n - 1) / __tilesize; + __buffer<_Tp> __buf(__m + 1); + _Tp* __r = __buf.get(); + + __omp_backend::__upsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __reduce, __combine); + + std::size_t __k = __m + 1; + _Tp __t = __r[__k - 1]; + while ((__k &= __k - 1)) + { + __t = __combine(__r[__k - 1], __t); + } + + __apex(__combine(__initial, __t)); + __omp_backend::__downsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __initial, + __combine, __scan); +} + +template +void +__parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, _Sp __scan, + _Ap __apex) +{ + if (__n <= __default_chunk_size) + { + _Tp __sum = __initial; + if (__n) + { + __sum = __combine(__sum, __reduce(_Index(0), __n)); + } + __apex(__sum); + if (__n) + { + __scan(_Index(0), __n, __initial); + } + return; + } + + if (omp_in_parallel()) + { + __pstl::__omp_backend::__parallel_strict_scan_body<_ExecutionPolicy>(__n, __initial, __reduce, __combine, + __scan, __apex); + } + else + { + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) + { + __pstl::__omp_backend::__parallel_strict_scan_body<_ExecutionPolicy>(__n, __initial, __reduce, __combine, + __scan, __apex); + } + } +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // PSTL_INTERNAL_OMP_PARALLEL_SCAN_H diff --git a/pstl/include/pstl/internal/omp/parallel_stable_partial_sort.h b/pstl/include/pstl/internal/omp/parallel_stable_partial_sort.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/omp/parallel_stable_partial_sort.h @@ -0,0 +1,32 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef PSTL_INTERNAL_OMP_PARALLEL_STABLE_PARTIAL_SORT_H +#define PSTL_INTERNAL_OMP_PARALLEL_STABLE_PARTIAL_SORT_H + +#include "util.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +template +void +__parallel_stable_partial_sort(_RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp, + _LeafSort __leaf_sort, std::size_t /* __nsort */) +{ + // TODO: "Parallel partial sort needs to be implemented."); + __leaf_sort(__xs, __xe, __comp); +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // PSTL_INTERNAL_OMP_PARALLEL_STABLE_PARTIAL_SORT_H diff --git a/pstl/include/pstl/internal/omp/parallel_stable_sort.h b/pstl/include/pstl/internal/omp/parallel_stable_sort.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/omp/parallel_stable_sort.h @@ -0,0 +1,157 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef PSTL_INTERNAL_OMP_PARALLEL_STABLE_SORT_H +#define PSTL_INTERNAL_OMP_PARALLEL_STABLE_SORT_H + +#include "util.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +namespace __sort_details +{ +struct __move_value +{ + template + void + operator()(Iterator __x, OutputIterator __z) const + { + *__z = ::std::move(*__x); + } +}; + +template +OutputIterator +__parallel_move_range(Iterator __first1, Iterator __last1, OutputIterator __d_first) +{ + std::size_t __size = std::distance(__first1, __last1); + + // Perform serial moving of small chunks + + if (__size <= __default_chunk_size) + { + return ::std::move(__first1, __last1, __d_first); + } + + // Perform parallel moving of larger chunks + auto __policy = __omp_backend::__chunk_partitioner(__first1, __last1); + + _PSTL_PRAGMA(omp taskloop) + for (std::size_t __chunk = 0; __chunk < __policy.__n_chunks; ++__chunk) + { + __omp_backend::__process_chunk(__policy, __first1, __chunk, + [&](auto __chunk_first, auto __chunk_last) + { + auto __chunk_offset = ::std::distance(__first1, __chunk_first); + auto __output = __d_first + __chunk_offset; + ::std::move(__chunk_first, __chunk_last, __output); + }); + } + + return __d_first + __size; +} + +struct __move_range +{ + template + OutputIterator + operator()(Iterator __first1, Iterator __last1, OutputIterator __d_first) const + { + return __parallel_move_range(__first1, __last1, __d_first); + } +}; +} // namespace __sort_details + +template +void +__parallel_stable_sort_body(_RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp, + _LeafSort __leaf_sort) +{ + using _ValueType = typename std::iterator_traits<_RandomAccessIterator>::value_type; + using _VecType = typename std::vector<_ValueType>; + using _OutputIterator = typename _VecType::iterator; + using _MoveValue = typename __omp_backend::__sort_details::__move_value; + using _MoveRange = __omp_backend::__sort_details::__move_range; + + if (__should_run_serial(__xs, __xe)) + { + __leaf_sort(__xs, __xe, __comp); + } + else + { + std::size_t __size = __xe - __xs; + auto __mid = __xs + (__size / 2); + __parallel_invoke_body([&]() { __parallel_stable_sort_body(__xs, __mid, __comp, __leaf_sort); }, + [&]() { __parallel_stable_sort_body(__mid, __xe, __comp, __leaf_sort); }); + + // Perform a parallel merge of the sorted ranges into __output. + _VecType __output(__size); + _MoveValue __move_value; + _MoveRange __move_range; + __utils::__serial_move_merge __merge(__size); + __parallel_merge_body( + __mid - __xs, __xe - __mid, __xs, __mid, __mid, __xe, __output.begin(), __comp, + [&__merge, &__move_value, &__move_range](_RandomAccessIterator __as, _RandomAccessIterator __ae, + _RandomAccessIterator __bs, _RandomAccessIterator __be, + _OutputIterator __cs, _Compare __comp) + { __merge(__as, __ae, __bs, __be, __cs, __comp, __move_value, __move_value, __move_range, __move_range); }); + + // Move the values from __output back in the original source range. + __omp_backend::__sort_details::__parallel_move_range(__output.begin(), __output.end(), __xs); + } +} + +template +void +__parallel_stable_sort(_ExecutionPolicy&& /*__exec*/, _RandomAccessIterator __xs, _RandomAccessIterator __xe, + _Compare __comp, _LeafSort __leaf_sort, std::size_t __nsort = 0) +{ + auto __count = static_cast(__xe - __xs); + if (__count <= __default_chunk_size || __nsort < __count) + { + __leaf_sort(__xs, __xe, __comp); + return; + } + + // TODO: the partial sort implementation should + // be shared with the other backends. + + if (omp_in_parallel()) + { + if (__count <= __nsort) + { + __parallel_stable_sort_body(__xs, __xe, __comp, __leaf_sort); + } + else + { + __parallel_stable_partial_sort(__xs, __xe, __comp, __leaf_sort, __nsort); + } + } + else + { + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) + if (__count <= __nsort) + { + __parallel_stable_sort_body(__xs, __xe, __comp, __leaf_sort); + } + else + { + __parallel_stable_partial_sort(__xs, __xe, __comp, __leaf_sort, __nsort); + } + } +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // PSTL_INTERNAL_OMP_PARALLEL_STABLE_SORT_H diff --git a/pstl/include/pstl/internal/omp/parallel_transform_reduce.h b/pstl/include/pstl/internal/omp/parallel_transform_reduce.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/omp/parallel_transform_reduce.h @@ -0,0 +1,116 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_REDUCE_H +#define PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_REDUCE_H + +#include "util.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +//------------------------------------------------------------------------ +// parallel_transform_reduce +// +// Notation: +// r(i,j,init) returns reduction of init with reduction over [i,j) +// u(i) returns f(i,i+1,identity) for a hypothetical left identity element +// of r c(x,y) combines values x and y that were the result of r or u +//------------------------------------------------------------------------ + +template +auto +__transform_reduce_body(_RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryOp __unary_op, _Value __init, + _Combiner __combiner, _Reduction __reduction) +{ + using _Size = std::size_t; + const _Size __num_threads = omp_get_num_threads(); + const _Size __size = __last - __first; + + // Initial partition of the iteration space into chunks. If the range is too small, + // this will result in a nonsense policy, so we check on the size as well below. + auto __policy = __omp_backend::__chunk_partitioner(__first + __num_threads, __last); + + if (__size <= __num_threads || __policy.__n_chunks < 2) + { + return __reduction(__first, __last, __init); + } + + // Here, we cannot use OpenMP UDR because we must store the init value in + // the combiner and it will be used several times. Although there should be + // the only one; we manually generate the identity elements for each thread. + ::std::vector<_Value> __accums; + __accums.reserve(__num_threads); + + // initialize accumulators for all threads + for (_Size __i = 0; __i < __num_threads; ++__i) + { + __accums.emplace_back(__unary_op(__first + __i)); + } + + // main loop + _PSTL_PRAGMA(omp taskloop shared(__accums)) + for (std::size_t __chunk = 0; __chunk < __policy.__n_chunks; ++__chunk) + { + __omp_backend::__process_chunk(__policy, __first + __num_threads, __chunk, + [&](auto __chunk_first, auto __chunk_last) + { + auto __thread_num = omp_get_thread_num(); + __accums[__thread_num] = + __reduction(__chunk_first, __chunk_last, __accums[__thread_num]); + }); + } + + // combine by accumulators + for (_Size __i = 0; __i < __num_threads; ++__i) + { + __init = __combiner(__init, __accums[__i]); + } + + return __init; +} + +template _Value + __parallel_transform_reduce(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, + _UnaryOp __unary_op, _Value __init, _Combiner __combiner, _Reduction __reduction) +{ + _Value __result = __init; + if (omp_in_parallel()) + { + // We don't create a nested parallel region in an existing parallel + // region: just create tasks + __result = __pstl::__omp_backend::__transform_reduce_body(__first, __last, __unary_op, __init, __combiner, + __reduction); + } + else + { + // Create a parallel region, and a single thread will create tasks + // for the region. + _PSTL_PRAGMA(omp parallel) + _PSTL_PRAGMA(omp single nowait) + { + __result = __pstl::__omp_backend::__transform_reduce_body(__first, __last, __unary_op, __init, __combiner, + __reduction); + } + } + + return __result; +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_REDUCE_H diff --git a/pstl/include/pstl/internal/omp/parallel_transform_scan.h b/pstl/include/pstl/internal/omp/parallel_transform_scan.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/omp/parallel_transform_scan.h @@ -0,0 +1,32 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_SCAN_H +#define PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_SCAN_H + +#include "util.h" + +namespace __pstl +{ +namespace __omp_backend +{ + +template +_Tp +__parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _Up __u, _Tp __init, _Cp __combine, _Rp __brick_reduce, + _Sp __scan) +{ + // TODO: parallelize this function. + return __scan(_Index(0), __n, __init); +} + +} // namespace __omp_backend +} // namespace __pstl +#endif // PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_SCAN_H diff --git a/pstl/include/pstl/internal/omp/util.h b/pstl/include/pstl/internal/omp/util.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/omp/util.h @@ -0,0 +1,177 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef PSTL_INTERNAL_OMP_UTIL_H +#define PSTL_INTERNAL_OMP_UTIL_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../parallel_backend_utils.h" +#include "../unseq_backend_simd.h" +#include "../utils.h" + +#if !defined(_OPENMP) +# error _OPENMP not defined. +#endif + +// Portability "#pragma" definition +#ifdef _MSC_VER +# define _PSTL_PRAGMA(x) __pragma(x) +#else +# define _PSTL_PRAGMA(x) _Pragma(# x) +#endif + +_PSTL_HIDE_FROM_ABI_PUSH + +namespace __pstl +{ +namespace __omp_backend +{ + +//------------------------------------------------------------------------ +// use to cancel execution +//------------------------------------------------------------------------ +inline void +__cancel_execution() +{ + // TODO: Figure out how to make cancelation work. +} + +//------------------------------------------------------------------------ +// raw buffer +//------------------------------------------------------------------------ + +template +class __buffer +{ + std::allocator<_Tp> __allocator_; + _Tp* __ptr_; + const std::size_t __buf_size_; + __buffer(const __buffer&) = delete; + void + operator=(const __buffer&) = delete; + + public: + __buffer(std::size_t __n) : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) {} + + operator bool() const { return __ptr_ != nullptr; } + + _Tp* + get() const + { + return __ptr_; + } + ~__buffer() { __allocator_.deallocate(__ptr_, __buf_size_); } +}; + +// Preliminary size of each chunk: requires further discussion +inline constexpr std::size_t __default_chunk_size = 2048; + +// Convenience function to determine when we should run serial. +template ::value, bool> = true> +constexpr auto +__should_run_serial(_Iterator __first, _Iterator __last) -> bool +{ + using _difference_type = typename ::std::iterator_traits<_Iterator>::difference_type; + auto __size = ::std::distance(__first, __last); + return __size <= static_cast<_difference_type>(__default_chunk_size); +} + +template ::value, bool> = true> +constexpr auto +__should_run_serial(_Index __first, _Index __last) -> bool +{ + using _difference_type = _Index; + auto __size = __last - __first; + return __size <= static_cast<_difference_type>(__default_chunk_size); +} + +struct __chunk_metrics +{ + std::size_t __n_chunks; + std::size_t __chunk_size; + std::size_t __first_chunk_size; +}; + +// The iteration space partitioner according to __requested_chunk_size +template +auto +__chunk_partitioner(_RandomAccessIterator __first, _RandomAccessIterator __last, + _Size __requested_chunk_size = __default_chunk_size) -> __chunk_metrics +{ + /* + * This algorithm improves distribution of elements in chunks by avoiding + * small tail chunks. The leftover elements that do not fit neatly into + * the chunk size are redistributed to early chunks. This improves + * utilization of the processor's prefetch and reduces the number of + * tasks needed by 1. + */ + + const _Size __n = __last - __first; + _Size __n_chunks = 0; + _Size __chunk_size = 0; + _Size __first_chunk_size = 0; + if (__n < __requested_chunk_size) + { + __chunk_size = __n; + __first_chunk_size = __n; + __n_chunks = 1; + return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size}; + } + + __n_chunks = (__n / __requested_chunk_size) + 1; + __chunk_size = __n / __n_chunks; + __first_chunk_size = __chunk_size; + const _Size __n_leftover_items = __n - (__n_chunks * __chunk_size); + + if (__n_leftover_items == __chunk_size) + { + __n_chunks += 1; + return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size}; + } + else if (__n_leftover_items == 0) + { + __first_chunk_size = __chunk_size; + return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size}; + } + + const _Size __n_extra_items_per_chunk = __n_leftover_items / __n_chunks; + const _Size __n_final_leftover_items = __n_leftover_items - (__n_extra_items_per_chunk * __n_chunks); + + __chunk_size += __n_extra_items_per_chunk; + __first_chunk_size = __chunk_size + __n_final_leftover_items; + + return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size}; +} + +template +void +__process_chunk(const __chunk_metrics& __metrics, _Iterator __base, _Index __chunk_index, _F __f) +{ + auto __this_chunk_size = __chunk_index == 0 ? __metrics.__first_chunk_size : __metrics.__chunk_size; + auto __index = __chunk_index == 0 ? 0 + : (__chunk_index * __metrics.__chunk_size) + + (__metrics.__first_chunk_size - __metrics.__chunk_size); + auto __first = __base + __index; + auto __last = __first + __this_chunk_size; + __f(__first, __last); +} + +} // namespace __omp_backend +} // namespace __pstl + +#endif // PSTL_INTERNAL_OMP_UTIL_H diff --git a/pstl/include/pstl/internal/parallel_backend.h b/pstl/include/pstl/internal/parallel_backend.h --- a/pstl/include/pstl/internal/parallel_backend.h +++ b/pstl/include/pstl/internal/parallel_backend.h @@ -24,6 +24,12 @@ { namespace __par_backend = __tbb_backend; } +#elif defined(_PSTL_PAR_BACKEND_OMP) +# include "parallel_backend_omp.h" +namespace __pstl +{ +namespace __par_backend = __omp_backend; +} #else _PSTL_PRAGMA_MESSAGE("Parallel backend was not specified"); #endif diff --git a/pstl/include/pstl/internal/parallel_backend_omp.h b/pstl/include/pstl/internal/parallel_backend_omp.h new file mode 100644 --- /dev/null +++ b/pstl/include/pstl/internal/parallel_backend_omp.h @@ -0,0 +1,58 @@ +// -*- C++ -*- +// -*-===----------------------------------------------------------------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef _PSTL_PARALLEL_BACKEND_OMP_H +#define _PSTL_PARALLEL_BACKEND_OMP_H + +//------------------------------------------------------------------------ +// parallel_invoke +//------------------------------------------------------------------------ + +#include "./omp/parallel_invoke.h" + +//------------------------------------------------------------------------ +// parallel_for +//------------------------------------------------------------------------ + +#include "./omp/parallel_for.h" + +//------------------------------------------------------------------------ +// parallel_for_each +//------------------------------------------------------------------------ + +#include "./omp/parallel_for_each.h" + +//------------------------------------------------------------------------ +// parallel_reduce +//------------------------------------------------------------------------ + +#include "./omp/parallel_reduce.h" +#include "./omp/parallel_transform_reduce.h" + +//------------------------------------------------------------------------ +// parallel_scan +//------------------------------------------------------------------------ + +#include "./omp/parallel_scan.h" +#include "./omp/parallel_transform_scan.h" + +//------------------------------------------------------------------------ +// parallel_stable_sort +//------------------------------------------------------------------------ + +#include "./omp/parallel_stable_partial_sort.h" +#include "./omp/parallel_stable_sort.h" + +//------------------------------------------------------------------------ +// parallel_merge +//------------------------------------------------------------------------ +#include "./omp/parallel_merge.h" + +#endif //_PSTL_PARALLEL_BACKEND_OMP_H diff --git a/pstl/include/pstl/internal/pstl_config.h b/pstl/include/pstl/internal/pstl_config.h --- a/pstl/include/pstl/internal/pstl_config.h +++ b/pstl/include/pstl/internal/pstl_config.h @@ -12,31 +12,33 @@ #include <__pstl_config_site> -// The version is XYYZ, where X is major, YY is minor, and Z is patch (i.e. X.YY.Z) +// The version is XYYZ, where X is major, YY is minor, and Z is patch (i.e. +// X.YY.Z) #define _PSTL_VERSION 14000 #define _PSTL_VERSION_MAJOR (_PSTL_VERSION / 1000) #define _PSTL_VERSION_MINOR ((_PSTL_VERSION % 1000) / 10) #define _PSTL_VERSION_PATCH (_PSTL_VERSION % 10) -#if !defined(_PSTL_PAR_BACKEND_SERIAL) && !defined(_PSTL_PAR_BACKEND_TBB) -# error "A parallel backend must be specified" +#if !defined(_PSTL_PAR_BACKEND_SERIAL) && !defined(_PSTL_PAR_BACKEND_TBB) && \ + !defined(_PSTL_PAR_BACKEND_OMP) +#error "A parallel backend must be specified" #endif // Check the user-defined macro for warnings #if defined(PSTL_USAGE_WARNINGS) -# define _PSTL_USAGE_WARNINGS +#define _PSTL_USAGE_WARNINGS #endif #if !defined(_PSTL_ASSERT) -# include -# define _PSTL_ASSERT(pred) (assert((pred))) +#include +#define _PSTL_ASSERT(pred) (assert((pred))) #endif // Portability "#pragma" definition #ifdef _MSC_VER -# define _PSTL_PRAGMA(x) __pragma(x) +#define _PSTL_PRAGMA(x) __pragma(x) #else -# define _PSTL_PRAGMA(x) _Pragma(# x) +#define _PSTL_PRAGMA(x) _Pragma(#x) #endif #define _PSTL_STRING_AUX(x) #x @@ -44,106 +46,117 @@ #define _PSTL_STRING_CONCAT(x, y) x #y #ifdef _PSTL_HIDE_FROM_ABI_PER_TU -# define _PSTL_HIDE_FROM_ABI_PUSH \ - _Pragma("clang attribute push(__attribute__((internal_linkage)), apply_to=any(function,record))") -# define _PSTL_HIDE_FROM_ABI_POP _Pragma("clang attribute pop") +#define _PSTL_HIDE_FROM_ABI_PUSH \ + _Pragma("clang attribute push(__attribute__((internal_linkage)), " \ + "apply_to=any(function,record))") +#define _PSTL_HIDE_FROM_ABI_POP _Pragma("clang attribute pop") #else -# define _PSTL_HIDE_FROM_ABI_PUSH /* nothing */ -# define _PSTL_HIDE_FROM_ABI_POP /* nothing */ +#define _PSTL_HIDE_FROM_ABI_PUSH /* nothing */ +#define _PSTL_HIDE_FROM_ABI_POP /* nothing */ #endif -// note that when ICC or Clang is in use, _PSTL_GCC_VERSION might not fully match -// the actual GCC version on the system. -#define _PSTL_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +// note that when ICC or Clang is in use, _PSTL_GCC_VERSION might not fully +// match the actual GCC version on the system. +#define _PSTL_GCC_VERSION \ + (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) #if defined(__clang__) // according to clang documentation, version can be vendor specific -# define _PSTL_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) +#define _PSTL_CLANG_VERSION \ + (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) #endif // Enable SIMD for compilers that support OpenMP 4.0 -#if (defined(_OPENMP) && _OPENMP >= 201307) || \ - (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1600) || \ - (!defined(__INTEL_COMPILER) && _PSTL_GCC_VERSION >= 40900) || \ +#if (defined(_OPENMP) && _OPENMP >= 201307) || \ + (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1600) || \ + (!defined(__INTEL_COMPILER) && _PSTL_GCC_VERSION >= 40900) || \ defined(__clang__) -# define _PSTL_PRAGMA_SIMD _PSTL_PRAGMA(omp simd) -# define _PSTL_PRAGMA_DECLARE_SIMD _PSTL_PRAGMA(omp declare simd) -# define _PSTL_PRAGMA_SIMD_REDUCTION(PRM) _PSTL_PRAGMA(omp simd reduction(PRM)) +#define _PSTL_PRAGMA_SIMD _PSTL_PRAGMA(omp simd) +#define _PSTL_PRAGMA_DECLARE_SIMD _PSTL_PRAGMA(omp declare simd) +#define _PSTL_PRAGMA_SIMD_REDUCTION(PRM) _PSTL_PRAGMA(omp simd reduction(PRM)) #elif !defined(_MSC_VER) //#pragma simd -# define _PSTL_PRAGMA_SIMD _PSTL_PRAGMA(simd) -# define _PSTL_PRAGMA_DECLARE_SIMD -# define _PSTL_PRAGMA_SIMD_REDUCTION(PRM) _PSTL_PRAGMA(simd reduction(PRM)) -#else //no simd -# define _PSTL_PRAGMA_SIMD -# define _PSTL_PRAGMA_DECLARE_SIMD -# define _PSTL_PRAGMA_SIMD_REDUCTION(PRM) -#endif //Enable SIMD +#define _PSTL_PRAGMA_SIMD _PSTL_PRAGMA(simd) +#define _PSTL_PRAGMA_DECLARE_SIMD +#define _PSTL_PRAGMA_SIMD_REDUCTION(PRM) _PSTL_PRAGMA(simd reduction(PRM)) +#else // no simd +#define _PSTL_PRAGMA_SIMD +#define _PSTL_PRAGMA_DECLARE_SIMD +#define _PSTL_PRAGMA_SIMD_REDUCTION(PRM) +#endif // Enable SIMD #if defined(__INTEL_COMPILER) -# define _PSTL_PRAGMA_FORCEINLINE _PSTL_PRAGMA(forceinline) +#define _PSTL_PRAGMA_FORCEINLINE _PSTL_PRAGMA(forceinline) #else -# define _PSTL_PRAGMA_FORCEINLINE +#define _PSTL_PRAGMA_FORCEINLINE #endif #if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900 -# define _PSTL_PRAGMA_SIMD_SCAN(PRM) _PSTL_PRAGMA(omp simd reduction(inscan, PRM)) -# define _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(PRM) _PSTL_PRAGMA(omp scan inclusive(PRM)) -# define _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(PRM) _PSTL_PRAGMA(omp scan exclusive(PRM)) +#define _PSTL_PRAGMA_SIMD_SCAN(PRM) \ + _PSTL_PRAGMA(omp simd reduction(inscan, PRM)) +#define _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(PRM) \ + _PSTL_PRAGMA(omp scan inclusive(PRM)) +#define _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(PRM) \ + _PSTL_PRAGMA(omp scan exclusive(PRM)) #else -# define _PSTL_PRAGMA_SIMD_SCAN(PRM) -# define _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(PRM) -# define _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(PRM) +#define _PSTL_PRAGMA_SIMD_SCAN(PRM) +#define _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(PRM) +#define _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(PRM) #endif -// Should be defined to 1 for environments with a vendor implementation of C++17 execution policies -#define _PSTL_CPP17_EXECUTION_POLICIES_PRESENT (_MSC_VER >= 1912) +// Should be defined to 1 for environments with a vendor implementation of C++17 +// execution policies +#define _PSTL_CPP17_EXECUTION_POLICIES_PRESENT \ + (_MSC_VER >= 1912 && _MSVC_LANG >= 201703L) || \ + (_GLIBCXX_RELEASE >= 9 && __GLIBCXX__ >= 20190503 && \ + __cplusplus >= 201703L) -#if (defined(_MSC_VER) && _MSC_VER >= 1900) || \ - __cplusplus >= 201300L || \ +#if (defined(_MSC_VER) && _MSC_VER >= 1900) || __cplusplus >= 201300L || \ __cpp_lib_robust_nonmodifying_seq_ops == 201304 -# define _PSTL_CPP14_2RANGE_MISMATCH_EQUAL_PRESENT +#define _PSTL_CPP14_2RANGE_MISMATCH_EQUAL_PRESENT #endif -#if (defined(_MSC_VER) && _MSC_VER >= 1900) || \ - __cplusplus >= 201402L || \ +#if (defined(_MSC_VER) && _MSC_VER >= 1900) || __cplusplus >= 201402L || \ __cpp_lib_make_reverse_iterator == 201402 -# define _PSTL_CPP14_MAKE_REVERSE_ITERATOR_PRESENT +#define _PSTL_CPP14_MAKE_REVERSE_ITERATOR_PRESENT #endif #if (defined(_MSC_VER) && _MSC_VER >= 1900) || __cplusplus >= 201402L -# define _PSTL_CPP14_INTEGER_SEQUENCE_PRESENT +#define _PSTL_CPP14_INTEGER_SEQUENCE_PRESENT #endif -#if (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1700) || \ - (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 190023918) || \ +#if (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1700) || \ + (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 190023918) || \ __cplusplus >= 201402L -# define _PSTL_CPP14_VARIABLE_TEMPLATES_PRESENT +#define _PSTL_CPP14_VARIABLE_TEMPLATES_PRESENT #endif #if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1800 -# define _PSTL_EARLYEXIT_PRESENT -# define _PSTL_MONOTONIC_PRESENT +#define _PSTL_EARLYEXIT_PRESENT +#define _PSTL_MONOTONIC_PRESENT #endif -#if (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900) || \ - (!defined(__INTEL_COMPILER) && _PSTL_GCC_VERSION >= 40900) || \ +#if (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900) || \ + (!defined(__INTEL_COMPILER) && _PSTL_GCC_VERSION >= 40900) || \ (defined(_OPENMP) && _OPENMP >= 201307) -# define _PSTL_UDR_PRESENT +#define _PSTL_UDR_PRESENT #endif -#if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900 && __INTEL_COMPILER_BUILD_DATE >= 20180626 -# define _PSTL_UDS_PRESENT +#if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900 && \ + __INTEL_COMPILER_BUILD_DATE >= 20180626 +#define _PSTL_UDS_PRESENT #endif #if defined(_PSTL_EARLYEXIT_PRESENT) -# define _PSTL_PRAGMA_SIMD_EARLYEXIT _PSTL_PRAGMA(omp simd early_exit) +#define _PSTL_PRAGMA_SIMD_EARLYEXIT _PSTL_PRAGMA(omp simd early_exit) #else -# define _PSTL_PRAGMA_SIMD_EARLYEXIT +#define _PSTL_PRAGMA_SIMD_EARLYEXIT #endif #if defined(_PSTL_MONOTONIC_PRESENT) -# define _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(PRM) _PSTL_PRAGMA(omp ordered simd monotonic(PRM)) -# define _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC_2ARGS(PRM1, PRM2) _PSTL_PRAGMA(omp ordered simd monotonic(PRM1, PRM2)) +#define _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(PRM) \ + _PSTL_PRAGMA(omp ordered simd monotonic(PRM)) +#define _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC_2ARGS(PRM1, PRM2) \ + _PSTL_PRAGMA(omp ordered simd monotonic(PRM1, PRM2)) #else -# define _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(PRM) -# define _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC_2ARGS(PRM1, PRM2) +#define _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(PRM) +#define _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC_2ARGS(PRM1, PRM2) #endif // Declaration of reduction functor, where @@ -153,46 +166,52 @@ // omp_out - refers to the final value of the combiner operator // omp_priv - refers to the private copy of the initial value // omp_orig - refers to the original variable to be reduced -#define _PSTL_PRAGMA_DECLARE_REDUCTION(NAME, OP) \ - _PSTL_PRAGMA(omp declare reduction(NAME:OP : omp_out(omp_in)) initializer(omp_priv = omp_orig)) +#define _PSTL_PRAGMA_DECLARE_REDUCTION(NAME, OP) \ + _PSTL_PRAGMA(omp declare reduction(NAME:OP \ + : omp_out(omp_in)) \ + initializer(omp_priv = omp_orig)) #if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1600 -# define _PSTL_PRAGMA_VECTOR_UNALIGNED _PSTL_PRAGMA(vector unaligned) +#define _PSTL_PRAGMA_VECTOR_UNALIGNED _PSTL_PRAGMA(vector unaligned) #else -# define _PSTL_PRAGMA_VECTOR_UNALIGNED +#define _PSTL_PRAGMA_VECTOR_UNALIGNED #endif // Check the user-defined macro to use non-temporal stores #if defined(PSTL_USE_NONTEMPORAL_STORES) && (__INTEL_COMPILER >= 1600) -# define _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED _PSTL_PRAGMA(vector nontemporal) +#define _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED _PSTL_PRAGMA(vector nontemporal) #else -# define _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED +#define _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED #endif -#if defined(_MSC_VER) || defined(__INTEL_COMPILER) // the preprocessors don't type a message location -# define _PSTL_PRAGMA_LOCATION __FILE__ ":" _PSTL_STRING(__LINE__) ": [Parallel STL message]: " +#if defined(_MSC_VER) || \ + defined( \ + __INTEL_COMPILER) // the preprocessors don't type a message location +#define _PSTL_PRAGMA_LOCATION \ + __FILE__ ":" _PSTL_STRING(__LINE__) ": [Parallel STL message]: " #else -# define _PSTL_PRAGMA_LOCATION " [Parallel STL message]: " +#define _PSTL_PRAGMA_LOCATION " [Parallel STL message]: " #endif -#define _PSTL_PRAGMA_MESSAGE_IMPL(x) _PSTL_PRAGMA(message(_PSTL_STRING_CONCAT(_PSTL_PRAGMA_LOCATION, x))) +#define _PSTL_PRAGMA_MESSAGE_IMPL(x) \ + _PSTL_PRAGMA(message(_PSTL_STRING_CONCAT(_PSTL_PRAGMA_LOCATION, x))) #if defined(_PSTL_USAGE_WARNINGS) -# define _PSTL_PRAGMA_MESSAGE(x) _PSTL_PRAGMA_MESSAGE_IMPL(x) -# define _PSTL_PRAGMA_MESSAGE_POLICIES(x) _PSTL_PRAGMA_MESSAGE_IMPL(x) +#define _PSTL_PRAGMA_MESSAGE(x) _PSTL_PRAGMA_MESSAGE_IMPL(x) +#define _PSTL_PRAGMA_MESSAGE_POLICIES(x) _PSTL_PRAGMA_MESSAGE_IMPL(x) #else -# define _PSTL_PRAGMA_MESSAGE(x) -# define _PSTL_PRAGMA_MESSAGE_POLICIES(x) +#define _PSTL_PRAGMA_MESSAGE(x) +#define _PSTL_PRAGMA_MESSAGE_POLICIES(x) #endif // broken macros -#if (defined(__GLIBCXX__) && __GLIBCXX__ < 20150716) || \ +#if (defined(__GLIBCXX__) && __GLIBCXX__ < 20150716) || \ (defined(_MSC_VER) && _MSC_VER < 1800) -# define _PSTL_CPP11_STD_ROTATE_BROKEN +#define _PSTL_CPP11_STD_ROTATE_BROKEN #endif #if defined(__INTEL_COMPILER) && __INTEL_COMPILER == 1800 -# define _PSTL_ICC_18_OMP_SIMD_BROKEN +#define _PSTL_ICC_18_OMP_SIMD_BROKEN #endif #endif /* _PSTL_CONFIG_H */ diff --git a/pstl/include/pstl/internal/utils.h b/pstl/include/pstl/internal/utils.h --- a/pstl/include/pstl/internal/utils.h +++ b/pstl/include/pstl/internal/utils.h @@ -12,6 +12,7 @@ #include #include +#include _PSTL_HIDE_FROM_ABI_PUSH @@ -21,7 +22,7 @@ { template -typename std::result_of<_Fp()>::type +typename std::invoke_result<_Fp()>::type __except_handler(_Fp __f) { try @@ -63,14 +64,14 @@ } template -typename std::result_of<_F1()>::type +typename std::invoke_result<_F1()>::type __invoke_if_else(std::true_type, _F1 __f1, _F2) { return __f1(); } template -typename std::result_of<_F2()>::type +typename std::invoke_result<_F2()>::type __invoke_if_else(std::false_type, _F1, _F2 __f2) { return __f2();