Index: libcxx/include/experimental/simd =================================================================== --- libcxx/include/experimental/simd +++ libcxx/include/experimental/simd @@ -1368,6 +1368,19 @@ }; #endif // !defined(_LIBCPP_HAS_NO_INT128) +#if _LIBCPP_STD_VER > 11 +using __simd_plus_op = std::plus<>; +#else +struct __simd_plus_op { + template + inline auto operator()(_T1&& __t, _T2&& __u) const + noexcept(noexcept(std::forward<_T1>(__t) + std::forward<_T2>(__u))) + -> decltype(std::forward<_T1>(__t) + std::forward<_T2>(__u)) { + return std::forward<_T1>(__t) + std::forward<_T2>(__u); + } +}; +#endif + _LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_SIMD_ABI @@ -2071,9 +2084,54 @@ return 0; } +template +std::array::size() / 2, _Abi>>, 2> +__deinterleave_impl(const simd<_Tp, _Abi>& __v, + std::index_sequence<__indices...>) { + return {{__simd_shuffle<(2 * __indices)...>(__v, __v), + __simd_shuffle<(2 * __indices + 1)...>(__v, __v)}}; +} + +template +std::array::size() / 2, _Abi>>, 2> +__deinterleave(const simd<_Tp, _Abi>& __v) { + static_assert(simd<_Tp, _Abi>::size() % 2 == 0, ""); + return __deinterleave_impl( + __v, std::make_index_sequence::size() / 2>()); +} + // reductions [simd.reductions] template -typename _SimdType::value_type __reduce(const _SimdType& __v, _BinaryOp __op) { +typename std::enable_if<_SimdType::size() == 1, + typename _SimdType::value_type>::type +__reduce(const _SimdType& __v, _BinaryOp) { + return __v[0]; +} + +template +typename std::enable_if<(_SimdType::size() > 1 && is_simd<_SimdType>::value && + __floor_pow_of_2(_SimdType::size()) == + _SimdType::size()), + typename _SimdType::value_type>::type +__reduce(const _SimdType& __v, _BinaryOp __op) { + if (std::is_same<_BinaryOp, __simd_plus_op>::value) { + using _Tp = typename _SimdType::value_type; + if (std::is_integral<_Tp>::value && sizeof(_Tp) < 8) { + auto __arr = __deinterleave(__v); + return __reduce(__arr[0] + __arr[1], __op); + } + } + auto __arr = split_by<2>(__v); + return __reduce(__op(__arr[0], __arr[1]), __op); +} + +template +typename std::enable_if<(_SimdType::size() > 1 && + !(is_simd<_SimdType>::value && + __floor_pow_of_2(_SimdType::size()) == + _SimdType::size())), + typename _SimdType::value_type>::type +__reduce(const _SimdType& __v, _BinaryOp __op) { auto __acc = __v[0]; for (size_t __i = 1; __i < __v.size(); __i++) { __acc = __op(__acc, __v[__i]); @@ -2082,7 +2140,29 @@ } template -typename _SimdType::value_type __hmin(const _SimdType& __v) { +typename std::enable_if<_SimdType::size() == 1, + typename _SimdType::value_type>::type +__hmin(const _SimdType& __v) { + return __v[0]; +} + +template +typename std::enable_if<(_SimdType::size() > 1 && is_simd<_SimdType>::value && + __floor_pow_of_2(_SimdType::size()) == + _SimdType::size()), + typename _SimdType::value_type>::type +__hmin(const _SimdType& __v) { + auto __arr = split_by<2>(__v); + return __hmin(min(__arr[0], __arr[1])); +} + +template +typename std::enable_if<(_SimdType::size() > 1 && + !(is_simd<_SimdType>::value && + __floor_pow_of_2(_SimdType::size()) == + _SimdType::size())), + typename _SimdType::value_type>::type +__hmin(const _SimdType& __v) { auto __acc = __v[0]; for (size_t __i = 1; __i < __v.size(); __i++) { __acc = __acc > __v[__i] ? __v[__i] : __acc; @@ -2091,7 +2171,29 @@ } template -typename _SimdType::value_type __hmax(const _SimdType& __v) { +typename std::enable_if<_SimdType::size() == 1, + typename _SimdType::value_type>::type +__hmax(const _SimdType& __v) { + return __v[0]; +} + +template +typename std::enable_if<(_SimdType::size() > 1 && is_simd<_SimdType>::value && + __floor_pow_of_2(_SimdType::size()) == + _SimdType::size()), + typename _SimdType::value_type>::type +__hmax(const _SimdType& __v) { + auto __arr = split_by<2>(__v); + return __hmax(max(__arr[0], __arr[1])); +} + +template +typename std::enable_if<(_SimdType::size() > 1 && + !(is_simd<_SimdType>::value && + __floor_pow_of_2(_SimdType::size()) == + _SimdType::size())), + typename _SimdType::value_type>::type +__hmax(const _SimdType& __v) { auto __acc = __v[0]; for (size_t __i = 1; __i < __v.size(); __i++) { __acc = __acc < __v[__i] ? __v[__i] : __acc; @@ -2099,7 +2201,7 @@ return __acc; } -template > +template _Tp reduce(const simd<_Tp, _Abi>& __v, _BinaryOp __op = _BinaryOp()) { return __reduce(__v, __op); } @@ -3017,38 +3119,41 @@ template typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>& __w, - plus __op = {}) { + __simd_plus_op __op = {}) { return reduce(__w, typename _SimdType::value_type(0), __op); } +#if _LIBCPP_STD_VER > 11 + template typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>& __w, - multiplies __op) { + multiplies<> __op) { return reduce(__w, typename _SimdType::value_type(1), __op); } template typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>& __w, - bit_and __op) { + bit_and<> __op) { return reduce(__w, typename _SimdType::value_type(-1), __op); } template typename _SimdType::value_type -reduce(const const_where_expression<_MaskType, _SimdType>& __w, - bit_or __op) { +reduce(const const_where_expression<_MaskType, _SimdType>& __w, bit_or<> __op) { return reduce(__w, typename _SimdType::value_type(0), __op); } template typename _SimdType::value_type reduce(const const_where_expression<_MaskType, _SimdType>& __w, - bit_xor __op) { + bit_xor<> __op) { return reduce(__w, typename _SimdType::value_type(0), __op); } +#endif // _LIBCPP_STD_VER > 11 + template typename _SimdType::value_type hmin(const const_where_expression<_MaskType, _SimdType>& __w) { Index: libcxx/test/std/experimental/simd/simd.horizontal/hmax.pass.cpp =================================================================== --- libcxx/test/std/experimental/simd/simd.horizontal/hmax.pass.cpp +++ libcxx/test/std/experimental/simd/simd.horizontal/hmax.pass.cpp @@ -20,22 +20,47 @@ using namespace std::experimental::parallelism_v2; -void test_hmax_simd() { +template +void test_hmax_simd_power_of_2() { { int a[] = {2, 5, -4, 6}; - assert(hmax(fixed_size_simd(a, element_aligned_tag())) == 6); + assert(hmax(SimdType(a, element_aligned_tag())) == 6); } { int a[] = {6, 2, 5, -4}; - assert(hmax(fixed_size_simd(a, element_aligned_tag())) == 6); + assert(hmax(SimdType(a, element_aligned_tag())) == 6); } { int a[] = {-4, 6, 2, 5}; - assert(hmax(fixed_size_simd(a, element_aligned_tag())) == 6); + assert(hmax(SimdType(a, element_aligned_tag())) == 6); } { int a[] = {5, -4, 6, 2}; - assert(hmax(fixed_size_simd(a, element_aligned_tag())) == 6); + assert(hmax(SimdType(a, element_aligned_tag())) == 6); + } +} + +template +void test_hmax_simd() { + { + int a[] = {0, 2, 5, -4, 6}; + assert(hmax(SimdType(a, element_aligned_tag())) == 6); + } + { + int a[] = {6, 0, 2, 5, -4}; + assert(hmax(SimdType(a, element_aligned_tag())) == 6); + } + { + int a[] = {0, 6, 2, 5, -4}; + assert(hmax(SimdType(a, element_aligned_tag())) == 6); + } + { + int a[] = {0, -4, 6, 2, 5}; + assert(hmax(SimdType(a, element_aligned_tag())) == 6); + } + { + int a[] = {0, 5, -4, 6, 2}; + assert(hmax(SimdType(a, element_aligned_tag())) == 6); } } @@ -67,6 +92,10 @@ } int main() { - test_hmax_simd(); + test_hmax_simd_power_of_2>(); + test_hmax_simd_power_of_2< + simd>>>(); + test_hmax_simd>(); + test_hmax_simd>>>(); test_hmax_mask(); } Index: libcxx/test/std/experimental/simd/simd.horizontal/hmin.pass.cpp =================================================================== --- libcxx/test/std/experimental/simd/simd.horizontal/hmin.pass.cpp +++ libcxx/test/std/experimental/simd/simd.horizontal/hmin.pass.cpp @@ -20,22 +20,47 @@ using namespace std::experimental::parallelism_v2; -void test_hmin_simd() { +template +void test_hmin_simd_power_of_2() { { int a[] = {2, 5, -4, 6}; - assert(hmin(fixed_size_simd(a, element_aligned_tag())) == -4); + assert(hmin(SimdType(a, element_aligned_tag())) == -4); } { int a[] = {6, 2, 5, -4}; - assert(hmin(fixed_size_simd(a, element_aligned_tag())) == -4); + assert(hmin(SimdType(a, element_aligned_tag())) == -4); } { int a[] = {-4, 6, 2, 5}; - assert(hmin(fixed_size_simd(a, element_aligned_tag())) == -4); + assert(hmin(SimdType(a, element_aligned_tag())) == -4); } { int a[] = {5, -4, 6, 2}; - assert(hmin(fixed_size_simd(a, element_aligned_tag())) == -4); + assert(hmin(SimdType(a, element_aligned_tag())) == -4); + } +} + +template +void test_hmin_simd() { + { + int a[] = {0, 2, 5, -4, 6}; + assert(hmin(SimdType(a, element_aligned_tag())) == -4); + } + { + int a[] = {0, 6, 2, 5, -4}; + assert(hmin(SimdType(a, element_aligned_tag())) == -4); + } + { + int a[] = {-4, 0, 5, 6, 2}; + assert(hmin(SimdType(a, element_aligned_tag())) == -4); + } + { + int a[] = {0, -4, 6, 2, 5}; + assert(hmin(SimdType(a, element_aligned_tag())) == -4); + } + { + int a[] = {0, 5, -4, 6, 2}; + assert(hmin(SimdType(a, element_aligned_tag())) == -4); } } @@ -60,6 +85,12 @@ } int main() { - test_hmin_simd(); + test_hmin_simd_power_of_2>(); + test_hmin_simd_power_of_2< + simd>>>(); + test_hmin_simd>(); + test_hmin_simd>>>(); + test_hmin_simd>(); + test_hmin_simd>>>(); test_hmin_mask(); } Index: libcxx/test/std/experimental/simd/simd.horizontal/reduce.pass.cpp =================================================================== --- libcxx/test/std/experimental/simd/simd.horizontal/reduce.pass.cpp +++ libcxx/test/std/experimental/simd/simd.horizontal/reduce.pass.cpp @@ -38,30 +38,38 @@ #include #include +#include "test_macros.h" + using namespace std::experimental::parallelism_v2; inline int factorial(int n) { return n == 1 ? 1 : n * factorial(n - 1); } +template void test_reduce_simd() { - int n = (int)native_simd::size(); - assert(reduce(native_simd([](int i) { return i; })) == n * (n - 1) / 2); - assert(reduce(native_simd([](int i) { return i; }), std::plus()) == + int n = (int)SimdType::size(); + assert(reduce(SimdType([](int i) { return i; })) == n * (n - 1) / 2); + +#if TEST_STD_VER >= 14 + assert(reduce(SimdType([](int i) { return i; }), std::plus<>()) == n * (n - 1) / 2); - assert(reduce(native_simd([](int i) { return i + 1; }), - std::multiplies()) == factorial(n)); + assert(reduce(SimdType([](int i) { return i + 1; }), std::multiplies<>()) == + factorial(n)); +#endif } void test_reduce_mask() { { fixed_size_simd a([](int i) { return i; }); - assert(reduce(where(a < 2, a), 0, std::plus()) == 0 + 1); - assert(reduce(where(a >= 2, a), 1, std::multiplies()) == 2 * 3); assert(reduce(where(a >= 2, a)) == 2 + 3); - assert(reduce(where(a >= 2, a), std::plus()) == 2 + 3); - assert(reduce(where(a >= 2, a), std::multiplies()) == 2 * 3); - assert(reduce(where(a >= 2, a), std::bit_and()) == (2 & 3)); - assert(reduce(where(a >= 2, a), std::bit_or()) == (2 | 3)); - assert(reduce(where(a >= 2, a), std::bit_xor()) == (2 ^ 3)); +#if TEST_STD_VER >= 14 + assert(reduce(where(a < 2, a), 0, std::plus<>()) == 0 + 1); + assert(reduce(where(a >= 2, a), 1, std::multiplies<>()) == 2 * 3); + assert(reduce(where(a >= 2, a), std::plus<>()) == 2 + 3); + assert(reduce(where(a >= 2, a), std::multiplies<>()) == 2 * 3); + assert(reduce(where(a >= 2, a), std::bit_and<>()) == (2 & 3)); + assert(reduce(where(a >= 2, a), std::bit_or<>()) == (2 | 3)); + assert(reduce(where(a >= 2, a), std::bit_xor<>()) == (2 ^ 3)); +#endif } { fixed_size_simd_mask a; @@ -70,20 +78,24 @@ a[2] = true; a[3] = false; assert(reduce(where(fixed_size_simd_mask(true), a)) == true); +#if TEST_STD_VER >= 14 assert(reduce(where(fixed_size_simd_mask(true), a), - std::plus()) == true); + std::plus<>()) == true); assert(reduce(where(fixed_size_simd_mask(true), a), - std::multiplies()) == false); + std::multiplies<>()) == false); assert(reduce(where(fixed_size_simd_mask(true), a), - std::bit_and()) == false); + std::bit_and<>()) == false); assert(reduce(where(fixed_size_simd_mask(true), a), - std::bit_or()) == true); + std::bit_or<>()) == true); assert(reduce(where(fixed_size_simd_mask(true), a), - std::bit_xor()) == false); + std::bit_xor<>()) == false); +#endif } } int main() { - test_reduce_simd(); + test_reduce_simd>(); + test_reduce_simd>(); + test_reduce_simd>(); test_reduce_mask(); }