Index: libcxx/include/experimental/simd =================================================================== --- libcxx/include/experimental/simd +++ libcxx/include/experimental/simd @@ -1238,25 +1238,36 @@ "Element type should be vectorizable"); }; -template -struct memory_alignment; +template +struct __memory_alignment_impl : std::integral_constant { +}; -// TODO: May extend this after implementing vector_aligned. template -struct memory_alignment, _Up> +struct __memory_alignment_impl, _Up, vector_aligned_tag> : std::integral_constant)> {}; -template -struct memory_alignment, bool> - : std::integral_constant)> {}; +// TODO: Figure out a useful alignment based on simd_mask load and store +// implementation. Currently, make sure that the buffer is suitable for aligned +// SIMD load. +template +struct __memory_alignment_impl, _Up, vector_aligned_tag> + : std::integral_constant)> {}; + +template +struct __memory_alignment_impl<_ValueType, _Up, overaligned_tag<__alignment>> + : std::integral_constant {}; + +template +struct memory_alignment + : __memory_alignment_impl<_SimdType, _Up, vector_aligned_tag> {}; #if _LIBCPP_STD_VER >= 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES) template > _LIBCPP_INLINE_VAR constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value; -template +template _LIBCPP_INLINE_VAR constexpr size_t memory_alignment_v = - memory_alignment<_Tp, _Up>::value; + memory_alignment<_SimdType, _Up>::value; #endif // class template simd [simd.class] @@ -1932,6 +1943,22 @@ (void)__unused; } + template + void __copy_from_impl(const _Up* __buffer + __attribute__((align_value(__alignment)))) { + for (size_t __i = 0; __i < size(); __i++) { + (*this)[__i] = static_cast<_Tp>(__buffer[__i]); + } + } + + template + void __copy_to_impl(_Up* __buffer + __attribute__((align_value(__alignment)))) const { + for (size_t __i = 0; __i < size(); __i++) { + __buffer[__i] = static_cast<_Up>((*this)[__i]); + } + } + public: // implicit type conversion constructor template ()>::type, class = typename std::enable_if::value>::type> simd(const _Up* __buffer, _Flags) { - // TODO: optimize for overaligned flags - for (size_t __i = 0; __i < size(); __i++) { - (*this)[__i] = static_cast<_Tp>(__buffer[__i]); - } + __copy_from_impl<__memory_alignment_impl::value>( + __buffer); } // loads [simd.load] @@ -2008,10 +2033,7 @@ typename std::enable_if<__vectorizable<_Up>() && is_simd_flag_type<_Flags>::value>::type copy_to(_Up* __buffer, _Flags) const { - // TODO: optimize for overaligned flags - for (size_t __i = 0; __i < size(); __i++) { - __buffer[__i] = static_cast<_Up>((*this)[__i]); - } + __copy_to_impl<__memory_alignment_impl::value>(__buffer); } // scalar access [simd.subscr] @@ -2265,6 +2287,24 @@ friend struct __simd_mask_friend; + // Use a non-member function, only because Clang 3.8 crashes with a member function. + template + static void __copy_from_impl(simd_mask* __mask, const bool* __buffer + __attribute__((align_value(__alignment)))) { + for (size_t __i = 0; __i < size(); __i++) { + (*__mask)[__i] = __buffer[__i]; + } + } + + // Use a non-member function, only because Clang 3.8 crashes with a member function. + template + static void __copy_to_impl(const simd_mask* __mask, bool* __buffer + __attribute__((align_value(__alignment)))) { + for (size_t __i = 0; __i < size(); __i++) { + __buffer[__i] = (*__mask)[__i]; + } + } + public: using value_type = bool; using reference = __simd_reference; @@ -2300,10 +2340,8 @@ template ::value>::type> simd_mask(const value_type* __buffer, _Flags) { - // TODO: optimize for overaligned flags - for (size_t __i = 0; __i < size(); __i++) { - (*this)[__i] = __buffer[__i]; - } + __copy_from_impl<__memory_alignment_impl::value>( + this, __buffer); } template @@ -2336,10 +2374,8 @@ template typename std::enable_if::value>::type copy_to(value_type* __buffer, _Flags) const { - // TODO: optimize for overaligned flags - for (size_t __i = 0; __i < size(); __i++) { - __buffer[__i] = (*this)[__i]; - } + __copy_to_impl<__memory_alignment_impl::value>( + this, __buffer); } // scalar access [simd.mask.subscr] @@ -2401,10 +2437,10 @@ } }; -template +template void __mask_copy_to(const simd<_Tp, _Abi>& __v, const simd_mask<_Tp, _Abi>& __m, - _Up* __buffer, _Flags) { - // TODO: optimize for overaligned flags + _Up* __buffer __attribute__((align_value(__alignment)))) { + // TODO: optimize based on bool's bit pattern. for (size_t __i = 0; __i < __v.size(); __i++) { if (__m[__i]) { __buffer[__i] = static_cast<_Up>(__v[__i]); @@ -2412,9 +2448,10 @@ } } -template +template void __mask_copy_to(const simd_mask<_Tp, _Abi>& __v, - const simd_mask<_Tp, _Abi>& __m, _Up* __buffer, _Flags) { + const simd_mask<_Tp, _Abi>& __m, + _Up* __buffer __attribute__((align_value(__alignment)))) { // TODO: optimize based on bool's bit pattern. for (size_t __i = 0; __i < __v.size(); __i++) { if (__m[__i]) { @@ -2423,17 +2460,19 @@ } } -template -void __mask_copy_to(_Tp __val, bool __m, _Up* __buffer, _Flags) { +template +void __mask_copy_to(_Tp __val, bool __m, + _Up* __buffer __attribute__((align_value(__alignment)))) { if (__m) { *__buffer = static_cast<_Up>(__val); } } -template +template void __mask_copy_from(simd<_Tp, _Abi>& __v, const simd_mask<_Tp, _Abi>& __m, - const _Up* __buffer, _Flags) { - // TODO: optimize for overaligned flags + const _Up* __buffer + __attribute__((align_value(__alignment)))) { + // TODO: optimize based on bool's bit pattern. for (size_t __i = 0; __i < __v.size(); __i++) { if (__m[__i]) { __v[__i] = static_cast<_Tp>(__buffer[__i]); @@ -2441,10 +2480,11 @@ } } -template +template void __mask_copy_from(simd_mask<_Tp, _Abi>& __v, - const simd_mask<_Tp, _Abi>& __m, const _Up* __buffer, - _Flags) { + const simd_mask<_Tp, _Abi>& __m, + const _Up* __buffer + __attribute__((align_value(__alignment)))) { // TODO: optimize based on bool's bit pattern. for (size_t __i = 0; __i < __v.size(); __i++) { if (__m[__i]) { @@ -2453,8 +2493,10 @@ } } -template -void __mask_copy_from(_Tp& __val, bool __m, const _Up* __buffer, _Flags) { +template +void __mask_copy_from(_Tp& __val, bool __m, + const _Up* __buffer + __attribute__((align_value(__alignment)))) { if (__m) { __val = static_cast<_Tp>(*__buffer); } @@ -2545,7 +2587,8 @@ typename std::enable_if::value || !std::is_same<_Tp, bool>::value>::type copy_to(_Up* __buffer, _Flags) const&& { - __mask_copy_to(__v_, __m_, __buffer, _Flags()); + __mask_copy_to<__memory_alignment_impl<_ValueType, _Up, _Flags>::value>( + __v_, __m_, __buffer); } }; @@ -2664,7 +2707,8 @@ typename std::enable_if::value || !std::is_same<_Tp, bool>::value>::type copy_from(const _Up* __buffer, _Flags) { - __mask_copy_from(this->__v_, this->__m_, __buffer, _Flags()); + __mask_copy_from<__memory_alignment_impl<_ValueType, _Up, _Flags>::value>( + this->__v_, this->__m_, __buffer); } };