Index: libcxx/include/experimental/simd =================================================================== --- libcxx/include/experimental/simd +++ libcxx/include/experimental/simd @@ -685,6 +685,11 @@ friend struct simd_mask; public: + using __raw_type = std::array<_Tp, __num_element>; + + __simd_storage() = default; + void __assign(__raw_type __raw) { __storage_ = __raw; } + __raw_type __raw() const { return __storage_; } _Tp __get(size_t __index) const noexcept { return __storage_[__index]; }; void __set(size_t __index, _Tp __val) noexcept { __storage_[__index] = __val; @@ -702,6 +707,11 @@ friend struct simd_mask; public: + using __raw_type = _Tp; + + __simd_storage() = default; + void __assign(__raw_type __raw) { __storage_ = __raw; } + __raw_type __raw() const { return __storage_; } _Tp __get(size_t __index) const noexcept { return (&__storage_)[__index]; }; void __set(size_t __index, _Tp __val) noexcept { (&__storage_)[__index] = __val; @@ -804,6 +814,11 @@ friend struct simd_mask; public: + using __raw_type = _StorageType; + + __simd_storage() = default; + void __assign(__raw_type __raw) { __storage_ = __raw; } + __raw_type __raw() const { return __storage_; } _Tp __get(size_t __index) const noexcept { return __storage_[__index]; }; void __set(size_t __index, _Tp __val) noexcept { __storage_[__index] = __val; @@ -1036,12 +1051,22 @@ template _LIBCPP_INLINE_VAR constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value; -template + +template struct abi_for_size { using type = simd_abi::fixed_size<_Np>; }; -template -using abi_for_size_t = typename abi_for_size<_Tp, _Np>::type; + +template +struct abi_for_size<_Tp, _Np, __simd_abi<__kind, __old_size>...> { + using type = __simd_abi<__kind, _Np>; +}; + +template +using abi_for_size_t = typename abi_for_size<_Tp, _Np, _Abis...>::type; + +template +using rebind_abi_t = abi_for_size_t<_Tp, _Np, _Abis...>; template > struct simd_size; @@ -1146,32 +1171,184 @@ template simd_mask<_Tp> to_compatible(const fixed_size_simd_mask<_Tp, _Np>&) noexcept; +template +_TupleType __split_tuple_impl(_Tp** __buffers, + std::index_sequence<__indices...>) { + return _TupleType(typename std::tuple_element<__indices, _TupleType>::type( + __buffers[__indices], element_aligned_tag())...); +} + +#if !defined(_LIBCPP_HAS_NO_VECTOR_EXTENSION) && defined(_LIBCPP_COMPILER_CLANG) +template +simd<_Tp, __simd_abi<_StorageKind::_VecExt, sizeof...(__indices)>> +__simd_shuffle( + const simd<_Tp, __simd_abi<_StorageKind::_VecExt, __num_element>>& __v, + const simd<_Tp, __simd_abi<_StorageKind::_VecExt, __num_element>>& __u, + std::index_sequence<__indices...> = {}) { + simd<_Tp, __simd_abi<_StorageKind::_VecExt, sizeof...(__indices)>> __ret; + __ret.__s_.__assign(__builtin_shufflevector(__v.__s_.__raw(), + __u.__s_.__raw(), __indices...)); + return __ret; +} +#endif + +template +simd<_Tp, rebind_abi_t<_Tp, sizeof...(__indices), _Abi1, _Abi2>> +__simd_shuffle(const simd<_Tp, _Abi1>& __v, const simd<_Tp, _Abi2>& __u, + std::index_sequence<__indices...> = {}) { + simd<_Tp, rebind_abi_t<_Tp, sizeof...(__indices), _Abi1, _Abi2>> __ret; + size_t __i = 0; + for (size_t __index : {__indices...}) { + __ret[__i++] = + __index < __v.size() ? __v[__index] : __u[__index - __v.size()]; + } + return __ret; +} + +template +tuple< + simd<_Tp, rebind_abi_t<_Tp, __first_size, _Abi>>, + simd<_Tp, rebind_abi_t<_Tp, simd<_Tp, _Abi>::size() - __first_size, _Abi>>> +__split_to_two(const simd<_Tp, _Abi>& __v, std::index_sequence<__indices...>) { + static_assert(__first_size + sizeof...(__indices) == simd<_Tp, _Abi>::size(), + ""); + return std::make_tuple( + __simd_shuffle(__v, __v, std::make_index_sequence<__first_size>()), + __simd_shuffle<(__first_size + __indices)...>(__v, __v)); +} + +template +std::tuple> +__split_impl(const simd<_Tp, _Abi>& __v, + std::integral_constant) { + return std::make_tuple(__v); +} + +template +tuple>, + simd<_Tp, rebind_abi_t<_Tp, __rest, _Abi>>...> +__split_impl(const simd<_Tp, _Abi>& __v, + std::integral_constant, + std::integral_constant...) { + auto __res = __split_to_two<__first>( + __v, std::make_index_sequence::size() - __first>()); + return std::tuple_cat( + std::make_tuple(std::get<0>(__res)), + __split_impl(std::get<1>(__res), + std::integral_constant()...)); +} + template -tuple>...> split(const simd<_Tp, _Abi>&); +typename std::enable_if< + __variadic_sum(__sizes...) == simd<_Tp, _Abi>::size(), + tuple>...>>::type +split(const simd<_Tp, _Abi>& __v) { + return __split_impl(__v, std::integral_constant()...); +} template -tuple>...> +tuple>...> split(const simd_mask<_Tp, _Abi>&); +template +void __split_by_impl(const simd<_Tp, _Abi>& __v, + std::array, __array_size>* __arr, + std::index_sequence<__indices...>) { + auto __tp = split<(__indices, simd<_Tp, _Abi2>::size())...>(__v); + int __not_used[]{((*__arr)[__indices] = std::get<__indices>(__tp), 0)...}; + (void)__not_used; +} + +template +typename std::enable_if< + simd_size<_Tp, _Abi>::value % __array_size == 0, + array::value / __array_size, _Abi>>, + __array_size>>::type +split_by(const simd<_Tp, _Abi>& __v) { + array::value / __array_size, + _Abi>>, + __array_size> + __ret; + __split_by_impl(__v, &__ret, std::make_index_sequence<__array_size>()); + return __ret; +} + template -array<_SimdType, simd_size::value / - _SimdType::size()> -split(const simd&); +typename std::enable_if< + is_simd<_SimdType>::value && + simd_size::value % + _SimdType::size() == + 0, + array<_SimdType, simd_size::value / + _SimdType::size()>>::type +split(const simd& __v) { + return split_by::value / + _SimdType::size()>(__v); +} + +template +array< + simd_mask<_Tp, rebind_abi_t<_Tp, simd_size<_Tp, _Abi>::value / __array_size, + _Abi>>, + __array_size> +split_by(const simd_mask<_Tp, _Abi>& x); template array<_SimdType, simd_size::value / _SimdType::size()> split(const simd_mask&); +template +simd<_Tp, _Abi> __concat_variadic(const simd<_Tp, _Abi>& __v) { + return __v; +} + +template +simd<_Tp, rebind_abi_t<_Tp, + __variadic_sum(simd_size<_Tp, _Abi>::value, + simd_size<_Tp, _Abis>::value...), + _Abi, _Abis...>> +__concat_variadic(const simd<_Tp, _Abi>& __first, + const simd<_Tp, _Abis>&... __rest) { + return __simd_shuffle( + __first, __concat_variadic(__rest...), + std::make_index_sequence<__variadic_sum( + simd_size<_Tp, _Abi>::value, simd_size<_Tp, _Abis>::value...)>()); +} + template -simd<_Tp, abi_for_size_t<_Tp, __variadic_sum(simd_size<_Tp, _Abis>::value...)>> -concat(const simd<_Tp, _Abis>&...); +simd<_Tp, + rebind_abi_t<_Tp, __variadic_sum(simd_size<_Tp, _Abis>::value...), + _Abis...>> +concat(const simd<_Tp, _Abis>&... __vs) { + return __concat_variadic(__vs...); +} + +template +simd<_Tp, rebind_abi_t<_Tp, _Np * simd<_Tp, _Abi>::size(), _Abi>> +__concat_array(const std::array, _Np>& __arr, + std::index_sequence<__indices...>) { + return concat(__arr[__indices]...); +} + +template +simd<_Tp, rebind_abi_t<_Tp, _Np * simd<_Tp, _Abi>::size(), _Abi>> +concat(const std::array, _Np>& __arr) { + return __concat_array(__arr, std::make_index_sequence<_Np>()); +} template -simd_mask<_Tp, - abi_for_size_t<_Tp, __variadic_sum(simd_size<_Tp, _Abis>::value...)>> +simd_mask<_Tp, rebind_abi_t< + _Tp, __variadic_sum(simd_size<_Tp, _Abis>::value...), + _Abis...>> concat(const simd_mask<_Tp, _Abis>&...); +template +simd_mask<_Tp, rebind_abi_t<_Tp, _Np, _Abi>> +concat(const std::array, _Np>&); + // reductions [simd.mask.reductions] template bool all_of(const simd_mask<_Tp, _Abi>&) noexcept; @@ -1367,15 +1544,15 @@ std::is_unsigned<_Tp>::value); } - template + template static constexpr decltype( std::forward_as_tuple(std::declval<_Generator>()( - std::integral_constant())...), + std::integral_constant())...), bool()) - __can_generate(std::index_sequence<__indicies...>) { + __can_generate(std::index_sequence<__indices...>) { return !__variadic_sum( !__can_broadcast()( - std::integral_constant()))>()...); + std::integral_constant()))>()...); } template @@ -1383,11 +1560,11 @@ return false; } - template - void __generator_init(_Generator&& __g, std::index_sequence<__indicies...>) { - int __not_used[]{((*this)[__indicies] = - __g(std::integral_constant()), - 0)...}; + template + void __generator_init(_Generator&& __g, std::index_sequence<__indices...>) { + int __not_used[]{ + ((*this)[__indices] = __g(std::integral_constant()), + 0)...}; (void)__not_used; } @@ -1505,6 +1682,15 @@ friend mask_type operator<=(const simd&, const simd&); friend mask_type operator>(const simd&, const simd&); friend mask_type operator<(const simd&, const simd&); + +#if !defined(_LIBCPP_HAS_NO_VECTOR_EXTENSION) && defined(_LIBCPP_COMPILER_CLANG) + template + friend simd<_Up, __simd_abi<_StorageKind::_VecExt, sizeof...(__indices)>> + __simd_shuffle( + const simd<_Up, __simd_abi<_StorageKind::_VecExt, __num_element>>& __v, + const simd<_Up, __simd_abi<_StorageKind::_VecExt, __num_element>>& __u, + std::index_sequence<__indices...>); +#endif }; // [simd.mask.class] Index: libcxx/test/std/experimental/simd/simd.horizontal/concat.pass.cpp =================================================================== --- /dev/null +++ libcxx/test/std/experimental/simd/simd.horizontal/concat.pass.cpp @@ -0,0 +1,99 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++98, c++03 + +// +// +// template +// simd + ...)>> +// concat(const simd&...); +// +// template +// simd, Abi>> +// concat(const std::array, N>& __v); +// +// template +// simd_mask + ...)>> +// concat(const simd_mask&...); +// +// template +// simd_mask, Abi>> +// concat(const std::array, N>&); + +#include +#include +#include + +namespace ex = std::experimental::parallelism_v2; + +void test_concat() { + auto v = ex::concat(ex::fixed_size_simd([](int i) { return i; }), + ex::fixed_size_simd([](int i) { return i + 1; }), + ex::fixed_size_simd([](int i) { return i + 3; })); + static_assert(v.size() == 6, ""); + assert(v[0] == 0); + assert(v[1] == 1); + assert(v[2] == 2); + assert(v[3] == 3); + assert(v[4] == 4); + assert(v[5] == 5); +} + +void test_concat_array() { + std::array, 2> arr; + arr[0] = ex::fixed_size_simd([](int) { return 0; }); + arr[1] = ex::fixed_size_simd([](int) { return 1; }); + + auto v = ex::concat(arr); + static_assert(v.size() == 4, ""); + assert(v[0] == 0); + assert(v[1] == 0); + assert(v[2] == 1); + assert(v[3] == 1); +} + +void test_concat_native() { + std::array, 2> arr; + arr[0] = ex::native_simd([](int) { return 0; }); + arr[1] = ex::native_simd([](int) { return 1; }); + + auto v = ex::concat(arr); + auto n = ex::native_simd::size(); + assert(v.size() == n * 2); + for (size_t i = 0; i < n; i++) { + assert(v[i] == 0); + } + for (size_t i = n; i < 2 * n; i++) { + assert(v[i] == 1); + } +} + +void compile_split_propagate_abi() { + static_assert( + std::is_same(), ex::simd()))::abi_type, + ex::rebind_abi_t::size() * 2, + ex::simd_abi::compatible>>::value, + ""); + + static_assert( + std::is_same(), + ex::native_simd()))::abi_type, + ex::rebind_abi_t::size() * 2, + ex::simd_abi::native>>::value, + ""); +} + +int main() { + test_concat(); + test_concat_array(); + test_concat_native(); +} Index: libcxx/test/std/experimental/simd/simd.horizontal/split.pass.cpp =================================================================== --- /dev/null +++ libcxx/test/std/experimental/simd/simd.horizontal/split.pass.cpp @@ -0,0 +1,128 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++98, c++03 + +// +// +// template +// tuple>...> split(const simd&); +// +// template +// tuple>...> split(const simd_mask&); +// +// template +// array / V::size()> split( +// const simd&); +// +// template +// array / V::size()> split( +// const simd_mask&); +// +// template +// array / n, A>>, n> split_by( +// const simd& x); +// +// template +// array / n, A>>, n> split_by( +// const simd_mask& x); + +#include +#include +#include + +namespace ex = std::experimental::parallelism_v2; + +void test_split() { + auto t = + ex::split<1, 2, 3>(ex::fixed_size_simd([](int i) { return i; })); + static_assert(std::tuple_size::value == 3, ""); + + assert(std::get<0>(t).size() == 1); + assert(std::get<0>(t)[0] == 0); + + assert(std::get<1>(t).size() == 2); + assert(std::get<1>(t)[0] == 1); + assert(std::get<1>(t)[1] == 2); + + assert(std::get<2>(t).size() == 3); + assert(std::get<2>(t)[0] == 3); + assert(std::get<2>(t)[1] == 4); + assert(std::get<2>(t)[2] == 5); +} + +void test_split_array() { + { + auto arr = + ex::split_by<2>(ex::fixed_size_simd([](int i) { return i; })); + static_assert(arr.size() == 2, ""); + + assert(arr[0].size() == 3); + assert(arr[0][0] == 0); + assert(arr[0][1] == 1); + assert(arr[0][2] == 2); + + assert(arr[1].size() == 3); + assert(arr[1][0] == 3); + assert(arr[1][1] == 4); + assert(arr[1][2] == 5); + } + { + auto arr = ex::split>( + ex::fixed_size_simd([](int i) { return i; })); + static_assert(arr.size() == 2, ""); + + assert(arr[0].size() == 3); + assert(arr[0][0] == 0); + assert(arr[0][1] == 1); + assert(arr[0][2] == 2); + + assert(arr[1].size() == 3); + assert(arr[1][0] == 3); + assert(arr[1][1] == 4); + assert(arr[1][2] == 5); + } +} + +void compile_split_propagate_abi() { + using compatible_simd_half = + ex::simd::size() / 2, + ex::simd_abi::compatible>>; + using native_simd_half = + ex::simd::size() / 2, + ex::simd_abi::native>>; + + static_assert( + std::is_same< + decltype(ex::split::size() / 2, + ex::simd::size() / 2>(ex::simd())), + std::tuple>::value, + ""); + + static_assert( + std::is_same::size() / 2, + ex::native_simd::size() / 2>( + ex::native_simd())), + std::tuple>::value, + ""); + + static_assert(std::is_same(ex::simd())), + std::array>::value, + ""); + + static_assert(std::is_same(ex::native_simd())), + std::array>::value, + ""); +} + +int main() { + test_split(); + test_split_array(); +}