Index: include/pstl/internal/algorithm_impl.h =================================================================== --- include/pstl/internal/algorithm_impl.h +++ include/pstl/internal/algorithm_impl.h @@ -65,10 +65,10 @@ _IsVector __is_vector, /*parallel=*/std::true_type) { return internal::__except_handler([&]() { - return internal::parallel_or(std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__pred, __is_vector](_ForwardIterator __i, _ForwardIterator __j) { - return internal::__brick_any_of(__i, __j, __pred, __is_vector); - }); + return internal::__parallel_or(std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__pred, __is_vector](_ForwardIterator __i, _ForwardIterator __j) { + return internal::__brick_any_of(__i, __j, __pred, __is_vector); + }); }); } #endif @@ -122,10 +122,10 @@ /*parallel=*/std::true_type) { internal::__except_handler([&]() { - par_backend::parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__f, __is_vector](_ForwardIterator __i, _ForwardIterator __j) { - internal::__brick_walk1(__i, __j, __f, __is_vector); - }); + par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__f, __is_vector](_ForwardIterator __i, _ForwardIterator __j) { + internal::__brick_walk1(__i, __j, __f, __is_vector); + }); }); } #endif @@ -145,8 +145,8 @@ /*parallel=*/std::true_type) { internal::__except_handler([&]() { - par_backend::parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__brick](_ForwardIterator __i, _ForwardIterator __j) { __brick(__i, __j); }); + par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__brick](_ForwardIterator __i, _ForwardIterator __j) { __brick(__i, __j); }); }); } #endif @@ -205,7 +205,7 @@ /*is_parallel=*/std::true_type) { return internal::__except_handler([&]() { - par_backend::parallel_for( + par_backend::__parallel_for( std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, [__brick](_RandomAccessIterator __i, _RandomAccessIterator __j) { __brick(__i, __j - __i); }); return __first + __n; @@ -269,10 +269,11 @@ _ForwardIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/std::true_type) { return internal::__except_handler([&]() { - par_backend::parallel_for(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - [__f, __first1, __first2, __is_vector](_ForwardIterator1 __i, _ForwardIterator1 __j) { - internal::__brick_walk2(__i, __j, __first2 + (__i - __first1), __f, __is_vector); - }); + par_backend::__parallel_for( + std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__f, __first1, __first2, __is_vector](_ForwardIterator1 __i, _ForwardIterator1 __j) { + internal::__brick_walk2(__i, __j, __first2 + (__i - __first1), __f, __is_vector); + }); return __first2 + (__last1 - __first1); }); } @@ -312,7 +313,7 @@ _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/std::true_type) { return __except_handler([&]() { - par_backend::parallel_for( + par_backend::__parallel_for( std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { __brick(__i, __j, __first2 + (__i - __first1)); @@ -329,7 +330,7 @@ _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/std::true_type) { return __except_handler([&]() { - par_backend::parallel_for( + par_backend::__parallel_for( std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, [__first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { __brick(__i, __j - __i, __first2 + (__i - __first1)); @@ -388,7 +389,7 @@ /*parallel=*/std::true_type) { return internal::__except_handler([&]() { - par_backend::parallel_for( + par_backend::__parallel_for( std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__f, __first1, __first2, __first3, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { internal::__brick_walk3(__i, __j, __first2 + (__i - __first1), __first3 + (__i - __first1), __f, @@ -438,7 +439,7 @@ /*is_parallel=*/std::true_type) { return internal::__except_handler([&]() { - return !internal::parallel_or( + return !internal::__parallel_or( std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__first1, __first2, __p, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return !__brick_equal(__i, __j, __first2 + (__i - __first1), __p, __is_vector); @@ -486,12 +487,12 @@ /*is_parallel=*/std::true_type) { return internal::__except_handler([&]() { - return internal::parallel_find(std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__pred, __is_vector](_ForwardIterator __i, _ForwardIterator __j) { - return internal::__brick_find_if(__i, __j, __pred, __is_vector); - }, - std::less::difference_type>(), - /*is_first=*/true); + return internal::__parallel_find(std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__pred, __is_vector](_ForwardIterator __i, _ForwardIterator __j) { + return internal::__brick_find_if(__i, __j, __pred, __is_vector); + }, + std::less::difference_type>(), + /*is_first=*/true); }); } #endif @@ -638,7 +639,7 @@ else { return __except_handler([&]() { - return internal::parallel_find( + return internal::__parallel_find( std::forward<_ExecutionPolicy>(__exec), __first, __last, [__last, __s_first, __s_last, __pred, __is_vector](_ForwardIterator1 __i, _ForwardIterator1 __j) { return internal::find_subrange(__i, __j, __last, __s_first, __s_last, __pred, false, __is_vector); @@ -687,7 +688,7 @@ _IsVector __is_vector, /*is_parallel=*/std::true_type) noexcept { return __except_handler([&]() { - return internal::parallel_find( + return internal::__parallel_find( std::forward<_ExecutionPolicy>(__exec), __first, __last, [__s_first, __s_last, __pred, __is_vector](_ForwardIterator1 __i, _ForwardIterator1 __j) { return internal::__brick_find_first_of(__i, __j, __s_first, __s_last, __pred, __is_vector); @@ -744,7 +745,7 @@ else { return __except_handler([&]() { - return internal::parallel_find( + return internal::__parallel_find( std::forward<_ExecutionPolicy>(__exec), __first, __last, [__last, __s_first, __s_last, __pred, __is_vector](_ForwardIterator1 __i, _ForwardIterator1 __j) { return internal::find_subrange(__i, __j, __last, __s_first, __s_last, __pred, true, __is_vector); @@ -803,7 +804,7 @@ else { return __except_handler([&__exec, __first, __last, __count, &__value, __pred, __is_vector]() { - return internal::parallel_find( + return internal::__parallel_find( std::forward<_ExecutionPolicy>(__exec), __first, __last, [__last, __count, &__value, __pred, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { return internal::find_subrange(__i, __j, __last, __count, __value, __pred, __is_vector); @@ -1029,7 +1030,7 @@ const _DifferenceType __n = __last - __first; if (_DifferenceType(1) < __n) { - par_backend::buffer __mask_buf(__n); + par_backend::__buffer __mask_buf(__n); return __except_handler([&__exec, __n, __first, __result, __is_vector, __pred, &__mask_buf]() { bool* __mask = __mask_buf.get(); _DifferenceType __m{}; @@ -1090,7 +1091,7 @@ { typedef typename std::iterator_traits<_ForwardIterator>::difference_type _SizeType; return __except_handler([&]() { - return par_backend::parallel_reduce( + return par_backend::__parallel_reduce( std::forward<_ExecutionPolicy>(__exec), __first, __last, _SizeType(0), [__pred, __is_vector](_ForwardIterator __begin, _ForwardIterator __end, _SizeType __value) -> _SizeType { return __value + __brick_count(__begin, __end, __pred, __is_vector); @@ -1140,11 +1141,11 @@ typedef typename std::iterator_traits<_ForwardIterator>::difference_type _DifferenceType; typedef typename std::iterator_traits<_ForwardIterator>::value_type _Tp; _DifferenceType __n = __last - __first; - par_backend::buffer __mask_buf(__n); + par_backend::__buffer __mask_buf(__n); // 1. find a first iterator that should be removed return __except_handler([&]() { bool* __mask = __mask_buf.get(); - _DifferenceType __min = par_backend::parallel_reduce( + _DifferenceType __min = par_backend::__parallel_reduce( std::forward<_ExecutionPolicy>(__exec), _DifferenceType(0), __n, __n, [__first, __mask, &__calc_mask, __is_vector](_DifferenceType __i, _DifferenceType __j, _DifferenceType __local_min) -> _DifferenceType { @@ -1177,7 +1178,7 @@ __n -= __min; __first += __min; - par_backend::buffer<_Tp> __buf(__n); + par_backend::__buffer<_Tp> __buf(__n); _Tp* __result = __buf.get(); __mask += __min; _DifferenceType __m{}; @@ -1199,10 +1200,10 @@ [&__m](_DifferenceType __total) { __m = __total; }); // 3. Elements from result are moved to [first, last) - par_backend::parallel_for(std::forward<_ExecutionPolicy>(__exec), __result, __result + __m, - [__result, __first, __is_vector](_Tp* __i, _Tp* __j) { - __brick_move(__i, __j, __first + (__i - __result), __is_vector); - }); + par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __result, __result + __m, + [__result, __first, __is_vector](_Tp* __i, _Tp* __j) { + __brick_move(__i, __j, __first + (__i - __result), __is_vector); + }); return __first + __m; }); } @@ -1303,7 +1304,7 @@ const _DifferenceType __n = __last - __first; if (_DifferenceType(2) < __n) { - par_backend::buffer __mask_buf(__n); + par_backend::__buffer __mask_buf(__n); if (_DifferenceType(2) < __n) { return internal::__except_handler([&__exec, __n, __first, __result, __pred, __is_vector, &__mask_buf]() { @@ -1410,7 +1411,7 @@ __pattern_reverse(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, _IsVector __is_vector, /*is_parallel=*/std::true_type) { - par_backend::parallel_for( + par_backend::__parallel_for( std::forward<_ExecutionPolicy>(__exec), __first, __first + (__last - __first) / 2, [__is_vector, __first, __last](_BidirectionalIterator __inner_first, _BidirectionalIterator __inner_last) { __brick_reverse(__inner_first, __inner_last, __last - (__inner_first - __first), __is_vector); @@ -1457,12 +1458,12 @@ _OutputIterator __d_first, _IsVector __is_vector, /*is_parallel=*/std::true_type) { auto __len = __last - __first; - par_backend::parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__is_vector, __first, __len, __d_first](_BidirectionalIterator __inner_first, - _BidirectionalIterator __inner_last) { - __brick_reverse_copy(__inner_first, __inner_last, - __d_first + (__len - (__inner_last - __first)), __is_vector); - }); + par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__is_vector, __first, __len, __d_first](_BidirectionalIterator __inner_first, + _BidirectionalIterator __inner_last) { + __brick_reverse_copy(__inner_first, __inner_last, + __d_first + (__len - (__inner_last - __first)), __is_vector); + }); return __d_first + __len; } #endif @@ -1542,48 +1543,49 @@ auto __m = __middle - __first; if (__m <= __n / 2) { - par_backend::buffer<_Tp> __buf(__n - __m); + par_backend::__buffer<_Tp> __buf(__n - __m); return __except_handler([&__exec, __n, __m, __first, __middle, __last, __is_vector, &__buf]() { _Tp* __result = __buf.get(); - par_backend::parallel_for(std::forward<_ExecutionPolicy>(__exec), __middle, __last, - [__middle, __result, __is_vector](_ForwardIterator __b, _ForwardIterator __e) { - __brick_uninitialized_move(__b, __e, __result + (__b - __middle), - __is_vector); - }); - - par_backend::parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __middle, - [__last, __middle, __is_vector](_ForwardIterator __b, _ForwardIterator __e) { - __brick_move(__b, __e, __b + (__last - __middle), __is_vector); - }); - - par_backend::parallel_for(std::forward<_ExecutionPolicy>(__exec), __result, __result + (__n - __m), - [__first, __result, __is_vector](_Tp* __b, _Tp* __e) { - __brick_move(__b, __e, __first + (__b - __result), __is_vector); - }); + par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __middle, __last, + [__middle, __result, __is_vector](_ForwardIterator __b, _ForwardIterator __e) { + __brick_uninitialized_move(__b, __e, __result + (__b - __middle), + __is_vector); + }); + + par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __middle, + [__last, __middle, __is_vector](_ForwardIterator __b, _ForwardIterator __e) { + __brick_move(__b, __e, __b + (__last - __middle), __is_vector); + }); + + par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __result, __result + (__n - __m), + [__first, __result, __is_vector](_Tp* __b, _Tp* __e) { + __brick_move(__b, __e, __first + (__b - __result), __is_vector); + }); return __first + (__last - __middle); }); } else { - par_backend::buffer<_Tp> __buf(__m); + par_backend::__buffer<_Tp> __buf(__m); return __except_handler([&__exec, __n, __m, __first, __middle, __last, __is_vector, &__buf]() { _Tp* __result = __buf.get(); - par_backend::parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __middle, - [__first, __result, __is_vector](_ForwardIterator __b, _ForwardIterator __e) { - __brick_uninitialized_move(__b, __e, __result + (__b - __first), __is_vector); - }); - - par_backend::parallel_for(std::forward<_ExecutionPolicy>(__exec), __middle, __last, - [__first, __middle, __is_vector](_ForwardIterator __b, _ForwardIterator __e) { - __brick_move(__b, __e, __first + (__b - __middle), __is_vector); - }); - - par_backend::parallel_for(std::forward<_ExecutionPolicy>(__exec), __result, __result + __m, - [__n, __m, __first, __result, __is_vector](_Tp* __b, _Tp* __e) { - __brick_move(__b, __e, __first + ((__n - __m) + (__b - __result)), - __is_vector); - }); + par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __middle, + [__first, __result, __is_vector](_ForwardIterator __b, _ForwardIterator __e) { + __brick_uninitialized_move(__b, __e, __result + (__b - __first), + __is_vector); + }); + + par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __middle, __last, + [__first, __middle, __is_vector](_ForwardIterator __b, _ForwardIterator __e) { + __brick_move(__b, __e, __first + (__b - __middle), __is_vector); + }); + + par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __result, __result + __m, + [__n, __m, __first, __result, __is_vector](_Tp* __b, _Tp* __e) { + __brick_move(__b, __e, __first + ((__n - __m) + (__b - __result)), + __is_vector); + }); return __first + (__last - __middle); }); @@ -1627,7 +1629,7 @@ _ForwardIterator __last, _OutputIterator __result, _IsVector __is_vector, /*is_parallel=*/std::true_type) { - par_backend::parallel_for( + par_backend::__parallel_for( std::forward<_ExecutionPolicy>(__exec), __first, __last, [__first, __last, __middle, __result, __is_vector](_ForwardIterator __b, _ForwardIterator __e) { if (__b > __middle) @@ -1733,7 +1735,7 @@ __true_false, __true_false, __broken, __broken, __all_false, __broken, __broken, __broken, __true_false, __broken}; - __init = par_backend::parallel_reduce( + __init = par_backend::__parallel_reduce( std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, [&__pred, &__table, __is_vector](_ForwardIterator __i, _ForwardIterator __j, _ReduceType __value) -> _ReduceType { @@ -1748,7 +1750,7 @@ // find first element that don't satisfy pred _ForwardIterator __x = __brick_find_if(__i + 1, __j, __not_pred<_UnaryPredicate>(__pred), __is_vector); - + if (__x != __j) { // find first element after "x" that satisfy pred @@ -1862,7 +1864,7 @@ // then we should swap the false part of left range and last part of true part of right range else if (__size2 > __size1) { - par_backend::parallel_for( + par_backend::__parallel_for( std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size1, [__val1, __val2, __size1, __is_vector](_ForwardIterator __i, _ForwardIterator __j) { __brick_swap_ranges(__i, __j, (__val2.__pivot - __size1) + (__i - __val1.__pivot), __is_vector); @@ -1872,7 +1874,7 @@ // else we should swap the first part of false part of left range and true part of right range else { - par_backend::parallel_for( + par_backend::__parallel_for( std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size2, [__val1, __val2, __is_vector](_ForwardIterator __i, _ForwardIterator __j) { __brick_swap_ranges(__i, __j, __val2.__begin + (__i - __val1.__pivot), __is_vector); @@ -1881,18 +1883,17 @@ } }; - _PartitionRange __result = - par_backend::parallel_reduce(std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, - [__pred, __is_vector, __reductor](_ForwardIterator __i, _ForwardIterator __j, - _PartitionRange __value) -> _PartitionRange { - //1. serial partition - _ForwardIterator __pivot = - __brick_partition(__i, __j, __pred, __is_vector); + _PartitionRange __result = par_backend::__parallel_reduce( + std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, + [__pred, __is_vector, __reductor](_ForwardIterator __i, _ForwardIterator __j, + _PartitionRange __value) -> _PartitionRange { + //1. serial partition + _ForwardIterator __pivot = __brick_partition(__i, __j, __pred, __is_vector); - // 2. merging of two ranges (left and right respectively) - return __reductor(__value, {__i, __pivot, __j}); - }, - __reductor); + // 2. merging of two ranges (left and right respectively) + return __reductor(__value, {__i, __pivot, __j}); + }, + __reductor); return __result.__pivot; }); } @@ -1966,7 +1967,7 @@ } }; - _PartitionRange __result = par_backend::parallel_reduce( + _PartitionRange __result = par_backend::__parallel_reduce( std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, [&__pred, __is_vector, __reductor](_BidirectionalIterator __i, _BidirectionalIterator __j, _PartitionRange __value) -> _PartitionRange { @@ -2029,7 +2030,7 @@ const _DifferenceType __n = __last - __first; if (_DifferenceType(1) < __n) { - par_backend::buffer __mask_buf(__n); + par_backend::__buffer __mask_buf(__n); return internal::__except_handler( [&__exec, __n, __first, __out_true, __out_false, __is_vector, __pred, &__mask_buf]() { bool* __mask = __mask_buf.get(); @@ -2077,10 +2078,10 @@ _IsVector /*is_vector*/, /*is_parallel=*/std::true_type, /*is_move_constructible=*/std::true_type) { __except_handler([&]() { - par_backend::parallel_stable_sort(std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - [](_RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp) { std::sort(__first, __last, __comp); }, - __last - __first); + par_backend::__parallel_stable_sort(std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + [](_RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) { std::sort(__first, __last, __comp); }, + __last - __first); }); } #endif @@ -2104,9 +2105,9 @@ _Compare __comp, _IsVector /*is_vector*/, /*is_parallel=*/std::true_type) { internal::__except_handler([&]() { - par_backend::parallel_stable_sort(std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - [](_RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp) { std::stable_sort(__first, __last, __comp); }); + par_backend::__parallel_stable_sort(std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + [](_RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) { std::stable_sort(__first, __last, __comp); }); }); } #endif @@ -2132,7 +2133,7 @@ { const auto __n = __middle - __first; __except_handler([&]() { - par_backend::parallel_stable_sort( + par_backend::__parallel_stable_sort( std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, [__n](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Compare __comp) { if (__n < __end - __begin) @@ -2174,7 +2175,7 @@ return __except_handler([&]() { if (__n2 >= __n1) { - par_backend::parallel_stable_sort( + par_backend::__parallel_stable_sort( std::forward<_ExecutionPolicy>(__exec), __d_first, __d_first + __n1, __comp, [__first, __d_first, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j, _Compare __comp) { @@ -2197,32 +2198,32 @@ { typedef typename std::iterator_traits<_ForwardIterator>::value_type _T1; typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _T2; - par_backend::buffer<_T1> __buf(__n1); + par_backend::__buffer<_T1> __buf(__n1); _T1* __r = __buf.get(); - par_backend::parallel_stable_sort(std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp, - [__n2, __first, __r](_T1* __i, _T1* __j, _Compare __comp) { - _ForwardIterator __it = __first + (__i - __r); - - // 1. Copy elements from input to raw memory - for (_T1* __k = __i; __k != __j; ++__k, ++__it) - { - ::new (__k) _T2(*__it); - } - - // 2. Sort elements in temporary buffer - if (__n2 < __j - __i) - std::partial_sort(__i, __i + __n2, __j, __comp); - else - std::sort(__i, __j, __comp); - }, - __n2); - - // 3. Move elements from temporary buffer to output - par_backend::parallel_for(std::forward<_ExecutionPolicy>(__exec), __r, __r + __n2, - [__r, __d_first, __is_vector](_T1* __i, _T1* __j) { - __brick_move(__i, __j, __d_first + (__i - __r), __is_vector); - }); + par_backend::__parallel_stable_sort(std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp, + [__n2, __first, __r](_T1* __i, _T1* __j, _Compare __comp) { + _ForwardIterator __it = __first + (__i - __r); + + // 1. Copy elements from input to raw memory + for (_T1* __k = __i; __k != __j; ++__k, ++__it) + { + ::new (__k) _T2(*__it); + } + + // 2. Sort elements in temporary __buffer + if (__n2 < __j - __i) + std::partial_sort(__i, __i + __n2, __j, __comp); + else + std::sort(__i, __j, __comp); + }, + __n2); + + // 3. Move elements from temporary __buffer to output + par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __r, __r + __n2, + [__r, __d_first, __is_vector](_T1* __i, _T1* __j) { + __brick_move(__i, __j, __d_first + (__i - __r), __is_vector); + }); return __d_first + __n2; } }); @@ -2267,7 +2268,7 @@ return __last; return internal::__except_handler([&]() { - return par_backend::parallel_reduce( + return par_backend::__parallel_reduce( std::forward<_ExecutionPolicy>(__exec), __first, __last, __last, [__last, __pred, __is_vector, __or_semantic](_RandomAccessIterator __begin, _RandomAccessIterator __end, _RandomAccessIterator __value) -> _RandomAccessIterator { @@ -2275,7 +2276,7 @@ // checking (compare_and_swap idiom) its __value at __first. if (__or_semantic && __value < __last) { //found - par_backend::cancel_execution(); + par_backend::__cancel_execution(); return __value; } @@ -2399,10 +2400,10 @@ /*is_parallel=*/std::true_type, _IsVector __is_vector) { return __except_handler([&__exec, __first, __last, &__value, __is_vector]() { - par_backend::parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last, - [&__value, __is_vector](_ForwardIterator __begin, _ForwardIterator __end) { - internal::__brick_fill(__begin, __end, __value, __is_vector); - }); + par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last, + [&__value, __is_vector](_ForwardIterator __begin, _ForwardIterator __end) { + internal::__brick_fill(__begin, __end, __value, __is_vector); + }); return __last; }); } @@ -2473,10 +2474,10 @@ /*is_parallel=*/std::true_type, _IsVector __is_vector) { return internal::__except_handler([&]() { - par_backend::parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__g, __is_vector](_ForwardIterator __begin, _ForwardIterator __end) { - internal::__brick_generate(__begin, __end, __g, __is_vector); - }); + par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__g, __is_vector](_ForwardIterator __begin, _ForwardIterator __end) { + internal::__brick_generate(__begin, __end, __g, __is_vector); + }); return __last; }); } @@ -2614,7 +2615,7 @@ _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _OutputIterator __d_first, _Compare __comp, _IsVector __is_vector, /* is_parallel = */ std::true_type) { - par_backend::parallel_merge( + par_backend::__parallel_merge( std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, __comp, [__is_vector](_RandomAccessIterator1 __f1, _RandomAccessIterator1 __l1, _RandomAccessIterator2 __f2, _RandomAccessIterator2 __l2, _OutputIterator __f3, @@ -2665,7 +2666,7 @@ } typedef typename std::iterator_traits<_BidirectionalIterator>::value_type _Tp; auto __n = __last - __first; - par_backend::buffer<_Tp> __buf(__n); + par_backend::__buffer<_Tp> __buf(__n); _Tp* __r = __buf.get(); __except_handler([&]() { auto __move_values = [](_BidirectionalIterator __x, _Tp* __z) { @@ -2677,20 +2678,20 @@ return __brick_uninitialized_move(__first1, __last1, __first2, _IsVector()); }; - par_backend::parallel_merge( + par_backend::__parallel_merge( std::forward<_ExecutionPolicy>(__exec), __first, __middle, __middle, __last, __r, __comp, [__n, __move_values, __move_sequences](_BidirectionalIterator __f1, _BidirectionalIterator __l1, _BidirectionalIterator __f2, _BidirectionalIterator __l2, _Tp* __f3, _Compare __comp) { - auto __func = par_backend::serial_move_merge( + auto __func = par_backend::__serial_move_merge( __n, __move_values, __move_sequences); __func(__f1, __l1, __f2, __l2, __f3, __comp); return __f3 + (__l1 - __f1) + (__l2 - __f2); }); - par_backend::parallel_for(std::forward<_ExecutionPolicy>(__exec), __r, __r + __n, - [__r, __first, __is_vector](_Tp* __i, _Tp* __j) { - __brick_move(__i, __j, __first + (__i - __r), __is_vector); - }); + par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __r, __r + __n, + [__r, __first, __is_vector](_Tp* __i, _Tp* __j) { + __brick_move(__i, __j, __first + (__i - __r), __is_vector); + }); }); } #endif @@ -2729,7 +2730,7 @@ return !__comp(*__first1, *__first2) && !__comp(*__first2, *__first1); return __except_handler([&]() { - return !internal::parallel_or( + return !internal::__parallel_or( std::forward<_ExecutionPolicy>(__exec), __first2, __last2, [__first1, __last1, __first2, __last2, &__comp](_ForwardIterator2 __i, _ForwardIterator2 __j) { assert(__j > __i); @@ -2771,9 +2772,9 @@ template _OutputIterator -parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, - _SizeFunction __size_func, _SetOP __set_op, _IsVector __is_vector) +__parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, + _SizeFunction __size_func, _SetOP __set_op, _IsVector __is_vector) { typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; typedef typename std::iterator_traits<_OutputIterator>::value_type _T; @@ -2791,7 +2792,7 @@ const _DifferenceType __n1 = __last1 - __first1; const _DifferenceType __n2 = __last2 - __first2; - par_backend::buffer<_T> __buf(__size_func(__n1, __n2)); + par_backend::__buffer<_T> __buf(__size_func(__n1, __n2)); return __except_handler([&__exec, __n1, __first1, __last1, __first2, __last2, __result, __is_vector, __comp, __size_func, __set_op, &__buf]() { @@ -2895,15 +2896,16 @@ if (__left_bound_seq_1 == __last1) { //{1} < {2}: seq2 is wholly greater than seq1, so, do parallel copying seq1 and seq2 - par_backend::parallel_invoke(std::forward<_ExecutionPolicy>(__exec), - [=] { - __pattern_walk2_brick(std::forward<_ExecutionPolicy>(__exec), __first1, - __last1, __result, copy_range1, std::true_type()); - }, - [=] { - __pattern_walk2_brick(std::forward<_ExecutionPolicy>(__exec), __first2, - __last2, __result + __n1, copy_range2, std::true_type()); - }); + par_backend::__parallel_invoke(std::forward<_ExecutionPolicy>(__exec), + [=] { + __pattern_walk2_brick(std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __result, copy_range1, std::true_type()); + }, + [=] { + __pattern_walk2_brick(std::forward<_ExecutionPolicy>(__exec), __first2, + __last2, __result + __n1, copy_range2, + std::true_type()); + }); return __result + __n1 + __n2; } @@ -2913,15 +2915,16 @@ if (__left_bound_seq_2 == __last2) { //{2} < {1}: seq2 is wholly greater than seq1, so, do parallel copying seq1 and seq2 - par_backend::parallel_invoke(std::forward<_ExecutionPolicy>(__exec), - [=] { - __pattern_walk2_brick(std::forward<_ExecutionPolicy>(__exec), __first2, - __last2, __result, copy_range2, std::true_type()); - }, - [=] { - __pattern_walk2_brick(std::forward<_ExecutionPolicy>(__exec), __first1, - __last1, __result + __n2, copy_range1, std::true_type()); - }); + par_backend::__parallel_invoke(std::forward<_ExecutionPolicy>(__exec), + [=] { + __pattern_walk2_brick(std::forward<_ExecutionPolicy>(__exec), __first2, + __last2, __result, copy_range2, std::true_type()); + }, + [=] { + __pattern_walk2_brick(std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __result + __n2, copy_range1, + std::true_type()); + }); return __result + __n1 + __n2; } @@ -2930,7 +2933,7 @@ { auto __res_or = __result; __result += __m1; //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) - par_backend::parallel_invoke( + par_backend::__parallel_invoke( std::forward<_ExecutionPolicy>(__exec), //do parallel copying of [first1; left_bound_seq_1) [=] { @@ -2938,10 +2941,10 @@ copy_range1, std::true_type()); }, [=, &__result] { - __result = parallel_set_op(std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, - __first2, __last2, __result, __comp, - [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, - __set_union_op, __is_vector); + __result = __parallel_set_op(std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, + __first2, __last2, __result, __comp, + [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, + __set_union_op, __is_vector); }); return __result; } @@ -2952,7 +2955,7 @@ { auto __res_or = __result; __result += __m2; //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) - par_backend::parallel_invoke( + par_backend::__parallel_invoke( std::forward<_ExecutionPolicy>(__exec), //do parallel copying of [first2; left_bound_seq_2) [=] { @@ -2960,17 +2963,17 @@ copy_range2, std::true_type()); }, [=, &__result] { - __result = parallel_set_op(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - __left_bound_seq_2, __last2, __result, __comp, - [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, - __set_union_op, __is_vector); + __result = __parallel_set_op(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __left_bound_seq_2, __last2, __result, __comp, + [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, + __set_union_op, __is_vector); }); return __result; } - return parallel_set_op(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, - __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op, - __is_vector); + return __parallel_set_op(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, + __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op, + __is_vector); } #endif @@ -3102,21 +3105,21 @@ if (__m1 > __set_algo_cut_off) { //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) - return parallel_set_op(std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, - __result, __comp, - [](_DifferenceType __n, _DifferenceType __m) { return std::min(__n, __m); }, - [](_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, - _ForwardIterator2 __last2, _T* __result, _Compare __comp) { - return std::set_intersection(__first1, __last1, __first2, __last2, __result, __comp); - }, - __is_vector); + return __parallel_set_op( + std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, __comp, + [](_DifferenceType __n, _DifferenceType __m) { return std::min(__n, __m); }, + [](_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _T* __result, _Compare __comp) { + return std::set_intersection(__first1, __last1, __first2, __last2, __result, __comp); + }, + __is_vector); } const auto __m2 = __last2 - __left_bound_seq_2 + __n1; if (__m2 > __set_algo_cut_off) { //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) - __result = parallel_set_op( + __result = __parallel_set_op( std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return std::min(__n, __m); }, [](_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, @@ -3215,13 +3218,13 @@ std::true_type()); if (__n1 + __n2 > __set_algo_cut_off) - return parallel_set_op(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, - __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n; }, - [](_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, - _ForwardIterator2 __last2, _T* __result, _Compare __comp) { - return std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); - }, - __is_vector); + return __parallel_set_op(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, + __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n; }, + [](_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _T* __result, _Compare __comp) { + return std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); + }, + __is_vector); // use serial algorithm return std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); @@ -3358,7 +3361,7 @@ return __last; return internal::__except_handler([&]() { - return internal::parallel_find( + return internal::__parallel_find( std::forward<_ExecutionPolicy>(__exec), __first, __last, [__first, __comp, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { return internal::is_heap_until_local(__first, __i - __first, __j - __first, __comp, __is_vector); @@ -3410,7 +3413,7 @@ return __last; return internal::__except_handler([&]() { - return par_backend::parallel_reduce( + return par_backend::__parallel_reduce( std::forward<_ExecutionPolicy>(__exec), __first + 1, __last, __first, [=](_RandomAccessIterator __begin, _RandomAccessIterator __end, _RandomAccessIterator __init) -> _RandomAccessIterator { @@ -3468,7 +3471,7 @@ return internal::__except_handler([&]() { typedef std::pair<_ForwardIterator, _ForwardIterator> _Result; - return par_backend::parallel_reduce( + return par_backend::__parallel_reduce( std::forward<_ExecutionPolicy>(__exec), __first + 1, __last, std::make_pair(__first, __first), [=](_ForwardIterator __begin, _ForwardIterator __end, _Result __init) -> _Result { const _Result __subresult = __brick_minmax_element(__begin, __end, __comp, __is_vector); @@ -3539,7 +3542,7 @@ { return internal::__except_handler([&]() { auto __n = std::min(__last1 - __first1, __last2 - __first2); - auto __result = internal::parallel_find( + auto __result = internal::__parallel_find( std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, [__first1, __first2, __pred, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return internal::__brick_mismatch(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), @@ -3632,7 +3635,7 @@ --__last1; --__last2; auto __n = std::min(__last1 - __first1, __last2 - __first2); - auto __result = internal::parallel_find( + auto __result = internal::__parallel_find( std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, [__first1, __first2, &__comp, __is_vector](_ForwardIterator1 __i, _ForwardIterator1 __j) { return __brick_mismatch(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), Index: include/pstl/internal/numeric_impl.h =================================================================== --- include/pstl/internal/numeric_impl.h +++ include/pstl/internal/numeric_impl.h @@ -72,7 +72,7 @@ _BinaryOperation2 __binary_op2, _IsVector __is_vector, /*is_parallel=*/std::true_type) { return internal::__except_handler([&]() { - return par_backend::parallel_transform_reduce( + return par_backend::__parallel_transform_reduce( std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__first1, __first2, __binary_op2](_RandomAccessIterator1 __i) mutable { return __binary_op2(*__i, *(__first2 + (__i - __first1))); @@ -134,7 +134,7 @@ /*is_parallel=*/std::true_type) { return __except_handler([&]() { - return par_backend::parallel_transform_reduce( + return par_backend::__parallel_transform_reduce( std::forward<_ExecutionPolicy>(__exec), __first, __last, [__unary_op](_ForwardIterator __i) mutable { return __unary_op(*__i); }, __init, __binary_op, [__unary_op, __binary_op, __is_vector](_ForwardIterator __i, _ForwardIterator __j, _Tp __init) { @@ -239,7 +239,7 @@ typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; return internal::__except_handler([&]() { - par_backend::parallel_transform_scan( + par_backend::__parallel_transform_scan( std::forward<_ExecutionPolicy>(__exec), __last - __first, [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init, __binary_op, @@ -349,15 +349,14 @@ typedef typename std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; *__d_first = *__first; - par_backend::parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, - [&__op, __is_vector, __d_first, __first](_ForwardIterator1 __b, _ForwardIterator1 __e) { - _ForwardIterator2 __d_b = __d_first + (__b - __first); - __brick_walk3(__b, __e, __b + 1, __d_b + 1, - [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { - __z = __op(__y, __x); - }, - __is_vector); - }); + par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, + [&__op, __is_vector, __d_first, __first](_ForwardIterator1 __b, _ForwardIterator1 __e) { + _ForwardIterator2 __d_b = __d_first + (__b - __first); + __brick_walk3(__b, __e, __b + 1, __d_b + 1, + [&__op](_ReferenceType1 __x, _ReferenceType1 __y, + _ReferenceType2 __z) { __z = __op(__y, __x); }, + __is_vector); + }); return __d_first + (__last - __first); } #endif Index: include/pstl/internal/parallel_backend_tbb.h =================================================================== --- include/pstl/internal/parallel_backend_tbb.h +++ include/pstl/internal/parallel_backend_tbb.h @@ -40,18 +40,18 @@ would make the span be at least O(N). */ // tbb::allocator can improve performance in some cases. template -class buffer +class __buffer { tbb::tbb_allocator<_Tp> _M_allocator; _Tp* _M_ptr; const std::size_t _M_buf_size; - buffer(const buffer&) = delete; + __buffer(const __buffer&) = delete; void - operator=(const buffer&) = delete; + operator=(const __buffer&) = delete; public: //! Try to obtain buffer of given size to store objects of _Tp type - buffer(std::size_t n) : _M_allocator(), _M_ptr(_M_allocator.allocate(n)), _M_buf_size(n) {} + __buffer(std::size_t n) : _M_allocator(), _M_ptr(_M_allocator.allocate(n)), _M_buf_size(n) {} //! True if buffer was successfully obtained, zero otherwise. operator bool() const { return _M_ptr != NULL; } //! Return pointer to buffer, or NULL if buffer could not be obtained. @@ -61,12 +61,12 @@ return _M_ptr; } //! Destroy buffer - ~buffer() { _M_allocator.deallocate(_M_ptr, _M_buf_size); } + ~__buffer() { _M_allocator.deallocate(_M_ptr, _M_buf_size); } }; // Wrapper for tbb::task inline void -cancel_execution() +__cancel_execution() { tbb::task::self().group()->cancel_group_execution(); } @@ -76,11 +76,11 @@ //------------------------------------------------------------------------ template -class parallel_for_body +class __parallel_for_body { public: - parallel_for_body(const _RealBody& __body) : _M_body(__body) {} - parallel_for_body(const parallel_for_body& __body) : _M_body(__body._M_body) {} + __parallel_for_body(const _RealBody& __body) : _M_body(__body) {} + __parallel_for_body(const __parallel_for_body& __body) : _M_body(__body._M_body) {} void operator()(const tbb::blocked_range<_Index>& __range) const { @@ -95,18 +95,19 @@ // wrapper over tbb::parallel_for template void -parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) +__parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) { - tbb::this_task_arena::isolate( - [=]() { tbb::parallel_for(tbb::blocked_range<_Index>(__first, __last), parallel_for_body<_Index, _Fp>(__f)); }); + tbb::this_task_arena::isolate([=]() { + tbb::parallel_for(tbb::blocked_range<_Index>(__first, __last), __parallel_for_body<_Index, _Fp>(__f)); + }); } //! Evaluation of brick f[i,j) for each subrange [i,j) of [first,last) // wrapper over tbb::parallel_reduce template _Value -parallel_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, const _Value& __identity, - const _RealBody& __real_body, const _Reduction& __reduction) +__parallel_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, const _Value& __identity, + const _RealBody& __real_body, const _Reduction& __reduction) { return tbb::this_task_arena::isolate([__first, __last, &__identity, &__real_body, &__reduction]() -> _Value { return tbb::parallel_reduce( @@ -128,7 +129,7 @@ //------------------------------------------------------------------------ template -struct par_trans_red_body +struct __par_trans_red_body { alignas(_Tp) char _M_sum_storage[sizeof(_Tp)]; // Holds generalized non-commutative sum when has_sum==true _Rp _M_brick_reduce; // Most likely to have non-empty layout @@ -141,18 +142,18 @@ __TBB_ASSERT(_M_has_sum, "sum expected"); return *(_Tp*)_M_sum_storage; } - par_trans_red_body(_Up __u, _Tp __init, _Cp __c, _Rp __r) + __par_trans_red_body(_Up __u, _Tp __init, _Cp __c, _Rp __r) : _M_brick_reduce(__r), _M_u(__u), _M_combine(__c), _M_has_sum(true) { new (_M_sum_storage) _Tp(__init); } - par_trans_red_body(par_trans_red_body& __left, tbb::split) + __par_trans_red_body(__par_trans_red_body& __left, tbb::split) : _M_brick_reduce(__left._M_brick_reduce), _M_u(__left._M_u), _M_combine(__left._M_combine), _M_has_sum(false) { } - ~par_trans_red_body() + ~__par_trans_red_body() { // 17.6.5.12 tells us to not worry about catching exceptions from destructors. if (_M_has_sum) @@ -160,7 +161,7 @@ } void - join(par_trans_red_body& __rhs) + join(__par_trans_red_body& __rhs) { sum() = _M_combine(sum(), __rhs.sum()); } @@ -186,10 +187,10 @@ template _Tp -parallel_transform_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, _Up __u, _Tp __init, _Cp __combine, - _Rp __brick_reduce) +__parallel_transform_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, _Up __u, _Tp __init, _Cp __combine, + _Rp __brick_reduce) { - par_trans_red_body<_Index, _Up, _Tp, _Cp, _Rp> __body(__u, __init, __combine, __brick_reduce); + __par_trans_red_body<_Index, _Up, _Tp, _Cp, _Rp> __body(__u, __init, __combine, __brick_reduce); // The grain size of 3 is used in order to provide mininum 2 elements for each body tbb::this_task_arena::isolate( [__first, __last, &__body]() { tbb::parallel_reduce(tbb::blocked_range<_Index>(__first, __last, 3), __body); }); @@ -201,7 +202,7 @@ //------------------------------------------------------------------------ template -class trans_scan_body +class __trans_scan_body { alignas(_Tp) char _M_sum_storage[sizeof(_Tp)]; // Holds generalized non-commutative sum when has_sum==true _Rp _M_brick_reduce; // Most likely to have non-empty layout @@ -210,19 +211,19 @@ _Sp _M_scan; bool _M_has_sum; // Put last to minimize size of class public: - trans_scan_body(_Up __u, _Tp __init, _Cp __combine, _Rp __reduce, _Sp __scan) + __trans_scan_body(_Up __u, _Tp __init, _Cp __combine, _Rp __reduce, _Sp __scan) : _M_brick_reduce(__reduce), _M_u(__u), _M_combine(__combine), _M_scan(__scan), _M_has_sum(true) { new (_M_sum_storage) _Tp(__init); } - trans_scan_body(trans_scan_body& __b, tbb::split) + __trans_scan_body(__trans_scan_body& __b, tbb::split) : _M_brick_reduce(__b._M_brick_reduce), _M_u(__b._M_u), _M_combine(__b._M_combine), _M_scan(__b._M_scan), _M_has_sum(false) { } - ~trans_scan_body() + ~__trans_scan_body() { // 17.6.5.12 tells us to not worry about catching exceptions from destructors. if (_M_has_sum) @@ -259,7 +260,7 @@ } void - reverse_join(trans_scan_body& __a) + reverse_join(__trans_scan_body& __a) { if (_M_has_sum) { @@ -273,7 +274,7 @@ } void - assign(trans_scan_body& __b) + assign(__trans_scan_body& __b) { sum() = __b.sum(); } @@ -281,7 +282,7 @@ template _Index -split(_Index __m) +__split(_Index __m) { _Index __k = 1; while (2 * __k < __m) @@ -295,18 +296,18 @@ template void -upsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsize, _Rp __reduce, _Cp __combine) +__upsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsize, _Rp __reduce, _Cp __combine) { if (__m == 1) __r[0] = __reduce(__i * __tilesize, __lastsize); else { - _Index __k = split(__m); - tbb::parallel_invoke([=] { par_backend::upsweep(__i, __k, __tilesize, __r, __tilesize, __reduce, __combine); }, - [=] { - par_backend::upsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, __reduce, - __combine); - }); + _Index __k = __split(__m); + tbb::parallel_invoke( + [=] { par_backend::__upsweep(__i, __k, __tilesize, __r, __tilesize, __reduce, __combine); }, + [=] { + par_backend::__upsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, __reduce, __combine); + }); if (__m == 2 * __k) __r[__m - 1] = __combine(__r[__k - 1], __r[__m - 1]); } @@ -314,21 +315,21 @@ template void -downsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsize, _Tp __initial, _Cp __combine, - _Sp __scan) +__downsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsize, _Tp __initial, _Cp __combine, + _Sp __scan) { if (__m == 1) __scan(__i * __tilesize, __lastsize, __initial); else { - const _Index __k = par_backend::split(__m); + const _Index __k = par_backend::__split(__m); tbb::parallel_invoke( - [=] { par_backend::downsweep(__i, __k, __tilesize, __r, __tilesize, __initial, __combine, __scan); }, + [=] { par_backend::__downsweep(__i, __k, __tilesize, __r, __tilesize, __initial, __combine, __scan); }, // Assumes that __combine never throws. //TODO: Consider adding a requirement for user functors to be constant. [=, &__combine] { - par_backend::downsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, - __combine(__initial, __r[__k - 1]), __combine, __scan); + par_backend::__downsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, + __combine(__initial, __r[__k - 1]), __combine, __scan); }); } } @@ -345,7 +346,7 @@ // Thus callers can rely upon side effects in reduce. // combine must not throw an exception. // apex is called exactly once, after all calls to reduce and before all calls to scan. -// For example, it's useful for allocating a buffer used by scan but whose size is the sum of all reduction values. +// For example, it's useful for allocating a __buffer used by scan but whose size is the sum of all reduction values. // T must have a trivial constructor and destructor. template void @@ -358,10 +359,10 @@ const _Index __slack = 4; _Index __tilesize = (__n - 1) / (__slack * __p) + 1; _Index __m = (__n - 1) / __tilesize; - buffer<_Tp> __buf(__m + 1); + __buffer<_Tp> __buf(__m + 1); _Tp* __r = __buf.get(); - par_backend::upsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __reduce, - __combine); + par_backend::__upsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __reduce, + __combine); // When __apex is a no-op and __combine has no side effects, a good optimizer // should be able to eliminate all code between here and __apex. // Alternatively, provide a default value for __apex that can be @@ -371,8 +372,8 @@ while ((__k &= __k - 1)) __t = __combine(__r[__k - 1], __t); __apex(__combine(__initial, __t)); - par_backend::downsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __initial, - __combine, __scan); + par_backend::__downsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __initial, + __combine, __scan); return; } // Fewer than 2 elements in sequence, or out of memory. Handle has single block. @@ -387,10 +388,10 @@ template _Tp -parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _Up __u, _Tp __init, _Cp __combine, _Rp __brick_reduce, - _Sp __scan) +__parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _Up __u, _Tp __init, _Cp __combine, _Rp __brick_reduce, + _Sp __scan) { - trans_scan_body<_Index, _Up, _Tp, _Cp, _Rp, _Sp> __body(__u, __init, __combine, __brick_reduce, __scan); + __trans_scan_body<_Index, _Up, _Tp, _Cp, _Rp, _Sp> __body(__u, __init, __combine, __brick_reduce, __scan); auto __range = tbb::blocked_range<_Index>(0, __n); tbb::this_task_arena::isolate([__range, &__body]() { tbb::parallel_scan(__range, __body); }); return __body.sum(); @@ -408,7 +409,7 @@ template -class merge_task : public tbb::task +class __merge_task : public tbb::task { /*override*/ tbb::task* execute(); @@ -420,9 +421,9 @@ _LeafMerge _M_leaf_merge; public: - merge_task(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, - _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, _Cleanup __cleanup, - _LeafMerge __leaf_merge) + __merge_task(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, + _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, _Cleanup __cleanup, + _LeafMerge __leaf_merge) : _M_xs(__xs), _M_xe(__xe), _M_ys(__ys), _M_ye(__ye), _M_zs(__zs), _M_comp(__comp), _M_cleanup(__cleanup), _M_leaf_merge(__leaf_merge) { @@ -434,8 +435,8 @@ template tbb::task* -merge_task<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3, __M_Compare, _Cleanup, - _LeafMerge>::execute() +__merge_task<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3, __M_Compare, _Cleanup, + _LeafMerge>::execute() { typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType1; typedef typename std::iterator_traits<_RandomAccessIterator2>::difference_type _DifferenceType2; @@ -467,7 +468,7 @@ } const _RandomAccessIterator3 __zm = _M_zs + ((__xm - _M_xs) + (__ym - _M_ys)); tbb::task* __right = new (tbb::task::allocate_additional_child_of(*parent())) - merge_task(__xm, _M_xe, __ym, _M_ye, __zm, _M_comp, _M_cleanup, _M_leaf_merge); + __merge_task(__xm, _M_xe, __ym, _M_ye, __zm, _M_comp, _M_cleanup, _M_leaf_merge); tbb::task::spawn(*__right); tbb::task::recycle_as_continuation(); _M_xe = __xm; @@ -477,7 +478,7 @@ } template -class stable_sort_task : public tbb::task +class __stable_sort_task : public tbb::task { public: typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType1; @@ -495,8 +496,8 @@ _SizeType _M_nsort; public: - stable_sort_task(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __zs, - int32_t __inplace, _Compare __comp, _LeafSort __leaf_sort, _SizeType __n) + __stable_sort_task(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __zs, + int32_t __inplace, _Compare __comp, _LeafSort __leaf_sort, _SizeType __n) : _M_xs(__xs), _M_xe(__xe), _M_zs(__zs), _M_comp(__comp), _M_leaf_sort(__leaf_sort), _M_inplace(__inplace), _M_nsort(__n) { @@ -504,7 +505,7 @@ }; //! Binary operator that does nothing -struct binary_no_op +struct __binary_no_op { template void operator()(_T, _T) @@ -516,7 +517,7 @@ template tbb::task* -stable_sort_task<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, _LeafSort>::execute() +__stable_sort_task<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, _LeafSort>::execute() { const _SizeType __n = _M_xe - _M_xs; const _SizeType __nmerge = _M_nsort > 0 ? _M_nsort : __n; @@ -525,7 +526,7 @@ { _M_leaf_sort(_M_xs, _M_xe, _M_comp); if (_M_inplace != 2) - init_buf(_M_xs, _M_xe, _M_zs, _M_inplace == 0); + __init_buf(_M_xs, _M_xe, _M_zs, _M_inplace == 0); return NULL; } else @@ -539,18 +540,19 @@ _RandomAccessIterator1 __first2) { return std::move(__first1, __last1, __first2); }; if (_M_inplace == 2) __m = new (allocate_continuation()) - merge_task<_RandomAccessIterator2, _RandomAccessIterator2, _RandomAccessIterator1, _Compare, - serial_destroy, serial_move_merge>( - _M_zs, __zm, __zm, __ze, _M_xs, _M_comp, serial_destroy(), - serial_move_merge(__nmerge, __move_values, - __move_sequences)); + __merge_task<_RandomAccessIterator2, _RandomAccessIterator2, _RandomAccessIterator1, _Compare, + __serial_destroy, + __serial_move_merge>( + _M_zs, __zm, __zm, __ze, _M_xs, _M_comp, __serial_destroy(), + __serial_move_merge(__nmerge, __move_values, + __move_sequences)); else if (_M_inplace) __m = new (allocate_continuation()) - merge_task<_RandomAccessIterator2, _RandomAccessIterator2, _RandomAccessIterator1, _Compare, - binary_no_op, serial_move_merge>( - _M_zs, __zm, __zm, __ze, _M_xs, _M_comp, binary_no_op(), - serial_move_merge(__nmerge, __move_values, - __move_sequences)); + __merge_task<_RandomAccessIterator2, _RandomAccessIterator2, _RandomAccessIterator1, _Compare, + __binary_no_op, __serial_move_merge>( + _M_zs, __zm, __zm, __ze, _M_xs, _M_comp, __binary_no_op(), + __serial_move_merge(__nmerge, __move_values, + __move_sequences)); else { auto __move_values = [](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = std::move(*__x); }; @@ -559,15 +561,15 @@ return std::move(__first1, __last1, __first2); }; __m = new (allocate_continuation()) - merge_task<_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _Compare, - binary_no_op, serial_move_merge>( - _M_xs, __xm, __xm, _M_xe, _M_zs, _M_comp, binary_no_op(), - serial_move_merge(__nmerge, __move_values, - __move_sequences)); + __merge_task<_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _Compare, + __binary_no_op, __serial_move_merge>( + _M_xs, __xm, __xm, _M_xe, _M_zs, _M_comp, __binary_no_op(), + __serial_move_merge(__nmerge, __move_values, + __move_sequences)); } __m->set_ref_count(2); task* __right = new (__m->allocate_child()) - stable_sort_task(__xm, _M_xe, __zm, !_M_inplace, _M_comp, _M_leaf_sort, __nmerge); + __stable_sort_task(__xm, _M_xe, __zm, !_M_inplace, _M_comp, _M_leaf_sort, __nmerge); spawn(*__right); recycle_as_child_of(*__m); _M_xe = __xm; @@ -578,8 +580,8 @@ template void -parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp, - _LeafSort __leaf_sort, std::size_t __nsort = 0) +__parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp, + _LeafSort __leaf_sort, std::size_t __nsort = 0) { tbb::this_task_arena::isolate([=, &__nsort]() { //sorting based on task tree and parallel merge @@ -593,10 +595,10 @@ if (__n > __sort_cut_off) { assert(__nsort > 0 && __nsort <= __n); - buffer<_ValueType> __buf(__n); + __buffer<_ValueType> __buf(__n); using tbb::task; task::spawn_root_and_wait(*new (task::allocate_root()) - stable_sort_task<_RandomAccessIterator, _ValueType*, _Compare, _LeafSort>( + __stable_sort_task<_RandomAccessIterator, _ValueType*, _Compare, _LeafSort>( __xs, __xe, (_ValueType*)__buf.get(), 2, __comp, __leaf_sort, __nsort)); return; } @@ -612,9 +614,9 @@ template void -parallel_merge(_ExecutionPolicy&&, _RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, - _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, - _LeafMerge __leaf_merge) +__parallel_merge(_ExecutionPolicy&&, _RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, + _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, + _LeafMerge __leaf_merge) { typedef typename std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType1; typedef typename std::iterator_traits<_RandomAccessIterator2>::difference_type _DifferenceType2; @@ -629,11 +631,11 @@ else { tbb::this_task_arena::isolate([=]() { - typedef merge_task<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3, _Compare, - par_backend::binary_no_op, _LeafMerge> + typedef __merge_task<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3, _Compare, + par_backend::__binary_no_op, _LeafMerge> _TaskType; tbb::task::spawn_root_and_wait(*new (tbb::task::allocate_root()) _TaskType( - __xs, __xe, __ys, __ye, __zs, __comp, par_backend::binary_no_op(), __leaf_merge)); + __xs, __xe, __ys, __ye, __zs, __comp, par_backend::__binary_no_op(), __leaf_merge)); }); } } @@ -643,7 +645,7 @@ //------------------------------------------------------------------------ template void -parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) +__parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) { //TODO: a version of tbb::this_task_arena::isolate with variadic arguments pack should be added in the future tbb::this_task_arena::isolate([&]() { tbb::parallel_invoke(std::forward<_F1>(__f1), std::forward<_F2>(__f2)); }); Index: include/pstl/internal/parallel_backend_utils.h =================================================================== --- include/pstl/internal/parallel_backend_utils.h +++ include/pstl/internal/parallel_backend_utils.h @@ -20,7 +20,7 @@ { //! Destroy sequence [xs,xe) -struct serial_destroy +struct __serial_destroy { template void @@ -37,13 +37,13 @@ //! Merge sequences [__xs,__xe) and [__ys,__ye) to output sequence [__zs,(__xe-__xs)+(__ye-__ys)), using std::move template -struct serial_move_merge +struct __serial_move_merge { const std::size_t _M_nmerge; _MoveValues _M_move_values; _MoveSequences _M_move_sequences; - explicit serial_move_merge(std::size_t __nmerge, _MoveValues __move_values, _MoveSequences __move_sequences) + explicit __serial_move_merge(std::size_t __nmerge, _MoveValues __move_values, _MoveSequences __move_sequences) : _M_nmerge(__nmerge), _M_move_values(__move_values), _M_move_sequences(__move_sequences) { } @@ -107,7 +107,7 @@ template void -init_buf(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _OutputIterator __zs, bool __bMove) +__init_buf(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _OutputIterator __zs, bool __bMove) { const _OutputIterator __ze = __zs + (__xe - __xs); typedef typename std::iterator_traits<_OutputIterator>::value_type _ValueType; @@ -125,8 +125,9 @@ } } +// TODO is this actually used anywhere? template -class stack +class __stack { typedef typename std::iterator_traits::value_type _ValueType; typedef typename std::iterator_traits<_ValueType*>::difference_type _DifferenceType; @@ -135,14 +136,14 @@ _ValueType* _M_ptr; _DifferenceType _M_maxsize; - stack(const stack&) = delete; + __stack(const __stack&) = delete; void - operator=(const stack&) = delete; + operator=(const __stack&) = delete; public: - stack(_DifferenceType __max_size) : _M_buf(__max_size), _M_maxsize(__max_size) { _M_ptr = _M_buf.get(); } + __stack(_DifferenceType __max_size) : _M_buf(__max_size), _M_maxsize(__max_size) { _M_ptr = _M_buf.get(); } - ~stack() + ~__stack() { assert(size() <= _M_maxsize); while (!empty()) Index: include/pstl/internal/parallel_impl.h =================================================================== --- include/pstl/internal/parallel_impl.h +++ include/pstl/internal/parallel_impl.h @@ -26,32 +26,32 @@ Each f[i,j) must return a value in [i,j). */ template _Index -parallel_find(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, _Compare __comp, bool __b_first) +__parallel_find(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, _Compare __comp, bool __b_first) { typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType; const _DifferenceType __n = __last - __first; _DifferenceType __initial_dist = __b_first ? __n : -1; std::atomic<_DifferenceType> __extremum(__initial_dist); // TODO: find out what is better here: parallel_for or parallel_reduce - par_backend::parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__comp, __f, __first, &__extremum](_Index __i, _Index __j) { - // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of - // why using a shared variable scales fairly well in this situation. - if (__comp(__i - __first, __extremum)) - { - _Index __res = __f(__i, __j); - // If not '__last' returned then we found what we want so put this to extremum - if (__res != __j) - { - const _DifferenceType __k = __res - __first; - for (_DifferenceType __old = __extremum; __comp(__k, __old); - __old = __extremum) - { - __extremum.compare_exchange_weak(__old, __k); - } - } - } - }); + par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__comp, __f, __first, &__extremum](_Index __i, _Index __j) { + // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of + // why using a shared variable scales fairly well in this situation. + if (__comp(__i - __first, __extremum)) + { + _Index __res = __f(__i, __j); + // If not '__last' returned then we found what we want so put this to extremum + if (__res != __j) + { + const _DifferenceType __k = __res - __first; + for (_DifferenceType __old = __extremum; __comp(__k, __old); + __old = __extremum) + { + __extremum.compare_exchange_weak(__old, __k); + } + } + } + }); return __extremum != __initial_dist ? __first + __extremum : __last; } @@ -61,17 +61,17 @@ //! Return true if brick f[i,j) returns true for some subrange [i,j) of [first,last) template bool -parallel_or(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f) +__parallel_or(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f) { std::atomic __found(false); - par_backend::parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__f, &__found](_Index __i, _Index __j) { - if (!__found.load(std::memory_order_relaxed) && __f(__i, __j)) - { - __found.store(true, std::memory_order_relaxed); - par_backend::cancel_execution(); - } - }); + par_backend::__parallel_for(std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__f, &__found](_Index __i, _Index __j) { + if (!__found.load(std::memory_order_relaxed) && __f(__i, __j)) + { + __found.store(true, std::memory_order_relaxed); + par_backend::__cancel_execution(); + } + }); return __found; }