Index: libcxx/include/experimental/simd =================================================================== --- libcxx/include/experimental/simd +++ libcxx/include/experimental/simd @@ -968,12 +968,24 @@ _LIBCPP_INLINE_VAR constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value; #endif -template + +// NOTE: _Abis... is the extension proposed by P0820, allowing the APIs to +// propagate _StorageKind during transforming input type(s) to the output type. +template struct abi_for_size { using type = simd_abi::fixed_size<_Np>; }; -template -using abi_for_size_t = typename abi_for_size<_Tp, _Np>::type; + +template +struct abi_for_size<_Tp, _Np, __simd_abi<__kind, __old_size>...> { + using type = __simd_abi<__kind, _Np>; +}; + +template +using abi_for_size_t = typename abi_for_size<_Tp, _Np, _Abis...>::type; + +template +using rebind_abi_t = abi_for_size_t<_Tp, _Np, _Abis...>; template > struct simd_size; @@ -1080,32 +1092,137 @@ template simd_mask<_Tp> to_compatible(const fixed_size_simd_mask<_Tp, _Np>&) noexcept; +template +_TupleType __split_tuple_impl(_Tp** __buffers, + std::index_sequence<__indicies...>) { + return _TupleType(typename std::tuple_element<__indicies, _TupleType>::type( + __buffers[__indicies], element_aligned_tag())...); +} + template -tuple>...> split(const simd<_Tp, _Abi>&); +typename std::enable_if< + __variadic_sum(__sizes...) == simd<_Tp, _Abi>::size(), + tuple>...>>::type +split(const simd<_Tp, _Abi>& __v) { + _Tp __buffer[__v.size()]; + __v.copy_to(__buffer, element_aligned_tag()); + + _Tp* __buffers[sizeof...(__sizes)]; + { + size_t __offsets[] = {__sizes...}; + size_t __cur_offset = 0; + for (size_t __i = 0; __i < sizeof...(__sizes); __i++) { + __buffers[__i] = __buffer + __cur_offset; + __cur_offset += __offsets[__i]; + } + } + + return __split_tuple_impl< + tuple>...>>( + __buffers, std::make_index_sequence()); +} template -tuple>...> +tuple>...> split(const simd_mask<_Tp, _Abi>&); +// NOTE: P0820 extension +template +typename std::enable_if< + simd_size<_Tp, _Abi>::value % __array_size == 0, + array::value / __array_size, _Abi>>, + __array_size>>::type +split_by(const simd<_Tp, _Abi>& __v) { + array::value / __array_size, + _Abi>>, + __array_size> + __ret; + constexpr size_t __element_size = simd_size<_Tp, _Abi>::value / __array_size; + for (size_t __i = 0; __i < __v.size(); __i++) { + __ret[__i / __element_size][__i % __element_size] = __v[__i]; + } + return __ret; +} + template -array<_SimdType, simd_size::value / - _SimdType::size()> -split(const simd&); +typename std::enable_if< + is_simd<_SimdType>::value && + simd_size::value % + _SimdType::size() == + 0, + array<_SimdType, simd_size::value / + _SimdType::size()>>::type +split(const simd& __v) { + return split_by::value / + _SimdType::size()>(__v); +} + +template +array< + simd_mask<_Tp, rebind_abi_t<_Tp, simd_size<_Tp, _Abi>::value / __array_size, + _Abi>>, + __array_size> +split_by(const simd_mask<_Tp, _Abi>& x); template array<_SimdType, simd_size::value / _SimdType::size()> split(const simd_mask&); +template +void __concat_tuple_impl(_TupleType __vs, _Tp** __buffers, + std::index_sequence<__indicies...>) { + int __unused[] = {(std::get<__indicies>(__vs).copy_to(__buffers[__indicies], + element_aligned_tag()), + 0)...}; + (void)__unused; +} + template -simd<_Tp, abi_for_size_t<_Tp, __variadic_sum(simd_size<_Tp, _Abis>::value...)>> -concat(const simd<_Tp, _Abis>&...); +simd<_Tp, + rebind_abi_t<_Tp, __variadic_sum(simd_size<_Tp, _Abis>::value...), + _Abis...>> +concat(const simd<_Tp, _Abis>&... __vs) { + constexpr auto __size = + __variadic_sum(simd_size<_Tp, _Abis>::value...); + _Tp __buffer[__size]; + _Tp* __buffers[__size]; + { + size_t __offsets[] = {simd_size<_Tp, _Abis>::value...}; + size_t __cur_offset = 0; + for (size_t __i = 0; __i < __size; __i++) { + __buffers[__i] = __buffer + __cur_offset; + __cur_offset += __offsets[__i]; + } + } + __concat_tuple_impl(std::forward_as_tuple(__vs...), __buffers, + std::make_index_sequence()); + return simd<_Tp, rebind_abi_t<_Tp, __size, _Abis...>>(__buffer, + element_aligned_tag()); +} + +template +simd<_Tp, rebind_abi_t<_Tp, _Np * simd<_Tp, _Abi>::size(), _Abi>> +concat(const std::array, _Np>& __arr) { + simd<_Tp, rebind_abi_t<_Tp, _Np * simd<_Tp, _Abi>::size(), _Abi>> __v; + for (size_t __i = 0; __i < __v.size(); __i++) { + __v[__i] = + __arr[__i / simd<_Tp, _Abi>::size()][__i % simd<_Tp, _Abi>::size()]; + } + return __v; +} template -simd_mask<_Tp, - abi_for_size_t<_Tp, __variadic_sum(simd_size<_Tp, _Abis>::value...)>> +simd_mask<_Tp, rebind_abi_t< + _Tp, __variadic_sum(simd_size<_Tp, _Abis>::value...), + _Abis...>> concat(const simd_mask<_Tp, _Abis>&...); +template +simd_mask<_Tp, rebind_abi_t<_Tp, _Np, _Abi>> +concat(const std::array, _Np>&); + // reductions [simd.mask.reductions] template bool all_of(const simd_mask<_Tp, _Abi>&) noexcept; Index: libcxx/test/std/experimental/simd/simd.horizontal/concat.pass.cpp =================================================================== --- /dev/null +++ libcxx/test/std/experimental/simd/simd.horizontal/concat.pass.cpp @@ -0,0 +1,80 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++98, c++03 + +// +// +// template +// simd + ...)>> +// concat(const simd&...); +// +// template +// simd, Abi>> +// concat(const std::array, N>& __v); +// +// template +// simd_mask + ...)>> +// concat(const simd_mask&...); +// +// template +// simd_mask, Abi>> +// concat(const std::array, N>&); + +#include +#include +#include + +using namespace std::experimental::parallelism_v2; + +void test_concat() { + auto v = concat(fixed_size_simd([](int i) { return i; }), + fixed_size_simd([](int i) { return i + 1; }), + fixed_size_simd([](int i) { return i + 3; })); + static_assert(v.size() == 6, ""); + assert(v[0] == 0); + assert(v[1] == 1); + assert(v[2] == 2); + assert(v[3] == 3); + assert(v[4] == 4); + assert(v[5] == 5); +} + +void test_concat_array() { + std::array, 2> arr; + arr[0] = fixed_size_simd([](int) { return 0; }); + arr[1] = fixed_size_simd([](int) { return 1; }); + + auto v = concat(arr); + static_assert(v.size() == 4, ""); + assert(v[0] == 0); + assert(v[1] == 0); + assert(v[2] == 1); + assert(v[3] == 1); +} + +void compile_split_propagate_abi() { + static_assert(std::is_same(), simd()))::abi_type, + rebind_abi_t::size() * 2, + simd_abi::compatible>>::value, + ""); + + static_assert( + std::is_same(), + native_simd()))::abi_type, + rebind_abi_t::size() * 2, + simd_abi::native>>::value, + ""); +} + +int main() { + test_concat(); + test_concat_array(); +} Index: libcxx/test/std/experimental/simd/simd.horizontal/split.pass.cpp =================================================================== --- /dev/null +++ libcxx/test/std/experimental/simd/simd.horizontal/split.pass.cpp @@ -0,0 +1,125 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++98, c++03 + +// +// +// template +// tuple>...> split(const simd&); +// +// template +// tuple>...> split(const simd_mask&); +// +// template +// array / V::size()> split( +// const simd&); +// +// template +// array / V::size()> split( +// const simd_mask&); +// +// template +// array / n, A>>, n> split_by( +// const simd& x); +// +// template +// array / n, A>>, n> split_by( +// const simd_mask& x); + +#include +#include +#include + +using namespace std::experimental::parallelism_v2; + +void test_split() { + auto t = split<1, 2, 3>(fixed_size_simd([](int i) { return i; })); + static_assert(std::tuple_size::value == 3, ""); + + assert(std::get<0>(t).size() == 1); + assert(std::get<0>(t)[0] == 0); + + assert(std::get<1>(t).size() == 2); + assert(std::get<1>(t)[0] == 1); + assert(std::get<1>(t)[1] == 2); + + assert(std::get<2>(t).size() == 3); + assert(std::get<2>(t)[0] == 3); + assert(std::get<2>(t)[1] == 4); + assert(std::get<2>(t)[2] == 5); +} + +void test_split_array() { + { + auto arr = split_by<2>(fixed_size_simd([](int i) { return i; })); + static_assert(arr.size() == 2, ""); + + assert(arr[0].size() == 3); + assert(arr[0][0] == 0); + assert(arr[0][1] == 1); + assert(arr[0][2] == 2); + + assert(arr[1].size() == 3); + assert(arr[1][0] == 3); + assert(arr[1][1] == 4); + assert(arr[1][2] == 5); + } + { + auto arr = split>( + fixed_size_simd([](int i) { return i; })); + static_assert(arr.size() == 2, ""); + + assert(arr[0].size() == 3); + assert(arr[0][0] == 0); + assert(arr[0][1] == 1); + assert(arr[0][2] == 2); + + assert(arr[1].size() == 3); + assert(arr[1][0] == 3); + assert(arr[1][1] == 4); + assert(arr[1][2] == 5); + } +} + +void compile_split_propagate_abi() { + using compatible_simd_half = + simd::size() / 2, simd_abi::compatible>>; + using native_simd_half = + simd::size() / 2, + simd_abi::native>>; + + static_assert( + std::is_same< + decltype( + split::size() / 2, simd::size() / 2>(simd())), + std::tuple>::value, + ""); + + static_assert( + std::is_same::size() / 2, + native_simd::size() / 2>(native_simd())), + std::tuple>::value, + ""); + + static_assert(std::is_same(simd())), + std::array>::value, + ""); + + static_assert(std::is_same(native_simd())), + std::array>::value, + ""); +} + +int main() { + test_split(); + test_split_array(); +}