Index: libcxx/include/experimental/__config =================================================================== --- libcxx/include/experimental/__config +++ libcxx/include/experimental/__config @@ -66,11 +66,15 @@ #define _LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD_ABI \ } _LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD -// TODO: support more targets +#if defined(__SSE2__) +#define _LIBCPP_MICROARCH_SSE2 #if defined(__AVX__) -#define _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES 32 -#else -#define _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES 16 +#define _LIBCPP_MICROARCH_AVX +#endif +#elif defined(__ALTIVEC__) || defined(__VSX__) +#define _LIBCPP_MICROARCH_ALTIVEC +#elif defined(__ARM_NEON) +#define _LIBCPP_MICROARCH_NEON #endif #endif Index: libcxx/include/experimental/simd =================================================================== --- libcxx/include/experimental/simd +++ libcxx/include/experimental/simd @@ -596,6 +596,17 @@ #include #include +#if defined(_LIBCPP_MICROARCH_SSE2) +#include +#if defined(_LIBCPP_MICROARCH_AVX) +#include +#endif +#elif defined(_LIBCPP_MICROARCH_ALTIVEC) +#include +#elif defined(_LIBCPP_MICROARCH_NEON) +#include +#endif + #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header #endif @@ -680,59 +691,91 @@ _TYPE __attribute__((vector_size(sizeof(_TYPE) * _NUM_ELEMENT))); \ } -#define _SPECIALIZE_VEC_EXT_32(_TYPE) \ - _SPECIALIZE_VEC_EXT(_TYPE, 1); \ - _SPECIALIZE_VEC_EXT(_TYPE, 2); \ - _SPECIALIZE_VEC_EXT(_TYPE, 3); \ - _SPECIALIZE_VEC_EXT(_TYPE, 4); \ - _SPECIALIZE_VEC_EXT(_TYPE, 5); \ - _SPECIALIZE_VEC_EXT(_TYPE, 6); \ - _SPECIALIZE_VEC_EXT(_TYPE, 7); \ - _SPECIALIZE_VEC_EXT(_TYPE, 8); \ - _SPECIALIZE_VEC_EXT(_TYPE, 9); \ - _SPECIALIZE_VEC_EXT(_TYPE, 10); \ - _SPECIALIZE_VEC_EXT(_TYPE, 11); \ - _SPECIALIZE_VEC_EXT(_TYPE, 12); \ - _SPECIALIZE_VEC_EXT(_TYPE, 13); \ - _SPECIALIZE_VEC_EXT(_TYPE, 14); \ - _SPECIALIZE_VEC_EXT(_TYPE, 15); \ - _SPECIALIZE_VEC_EXT(_TYPE, 16); \ - _SPECIALIZE_VEC_EXT(_TYPE, 17); \ - _SPECIALIZE_VEC_EXT(_TYPE, 18); \ - _SPECIALIZE_VEC_EXT(_TYPE, 19); \ - _SPECIALIZE_VEC_EXT(_TYPE, 20); \ - _SPECIALIZE_VEC_EXT(_TYPE, 21); \ - _SPECIALIZE_VEC_EXT(_TYPE, 22); \ - _SPECIALIZE_VEC_EXT(_TYPE, 23); \ - _SPECIALIZE_VEC_EXT(_TYPE, 24); \ - _SPECIALIZE_VEC_EXT(_TYPE, 25); \ - _SPECIALIZE_VEC_EXT(_TYPE, 26); \ - _SPECIALIZE_VEC_EXT(_TYPE, 27); \ - _SPECIALIZE_VEC_EXT(_TYPE, 28); \ - _SPECIALIZE_VEC_EXT(_TYPE, 29); \ - _SPECIALIZE_VEC_EXT(_TYPE, 30); \ - _SPECIALIZE_VEC_EXT(_TYPE, 31); \ - _SPECIALIZE_VEC_EXT(_TYPE, 32); - -_SPECIALIZE_VEC_EXT_32(char); -_SPECIALIZE_VEC_EXT_32(char16_t); -_SPECIALIZE_VEC_EXT_32(char32_t); -_SPECIALIZE_VEC_EXT_32(wchar_t); -_SPECIALIZE_VEC_EXT_32(signed char); -_SPECIALIZE_VEC_EXT_32(signed short); -_SPECIALIZE_VEC_EXT_32(signed int); -_SPECIALIZE_VEC_EXT_32(signed long); -_SPECIALIZE_VEC_EXT_32(signed long long); -_SPECIALIZE_VEC_EXT_32(unsigned char); -_SPECIALIZE_VEC_EXT_32(unsigned short); -_SPECIALIZE_VEC_EXT_32(unsigned int); -_SPECIALIZE_VEC_EXT_32(unsigned long); -_SPECIALIZE_VEC_EXT_32(unsigned long long); -_SPECIALIZE_VEC_EXT_32(float); -_SPECIALIZE_VEC_EXT_32(double); -_SPECIALIZE_VEC_EXT_32(long double); - -#undef _SPECIALIZE_VEC_EXT_32 +#define _SPECIALIZE_VEC_EXT_FOR_SIZES(_TYPE) \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x01); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x02); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x03); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x04); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x05); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x06); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x07); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x08); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x09); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x0a); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x0b); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x0c); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x0d); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x0e); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x0f); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x10); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x11); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x12); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x13); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x14); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x15); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x16); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x17); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x18); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x19); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x1a); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x1b); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x1c); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x1d); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x1e); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x1f); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x20); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x21); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x22); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x23); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x24); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x25); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x26); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x27); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x28); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x29); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x2a); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x2b); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x2c); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x2d); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x2e); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x2f); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x30); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x31); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x32); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x33); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x34); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x35); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x36); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x37); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x38); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x39); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x3a); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x3b); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x3c); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x3d); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x3e); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x3f); \ + _SPECIALIZE_VEC_EXT(_TYPE, 0x40); + +_SPECIALIZE_VEC_EXT_FOR_SIZES(char); +_SPECIALIZE_VEC_EXT_FOR_SIZES(char16_t); +_SPECIALIZE_VEC_EXT_FOR_SIZES(char32_t); +_SPECIALIZE_VEC_EXT_FOR_SIZES(wchar_t); +_SPECIALIZE_VEC_EXT_FOR_SIZES(signed char); +_SPECIALIZE_VEC_EXT_FOR_SIZES(signed short); +_SPECIALIZE_VEC_EXT_FOR_SIZES(signed int); +_SPECIALIZE_VEC_EXT_FOR_SIZES(signed long); +_SPECIALIZE_VEC_EXT_FOR_SIZES(signed long long); +_SPECIALIZE_VEC_EXT_FOR_SIZES(unsigned char); +_SPECIALIZE_VEC_EXT_FOR_SIZES(unsigned short); +_SPECIALIZE_VEC_EXT_FOR_SIZES(unsigned int); +_SPECIALIZE_VEC_EXT_FOR_SIZES(unsigned long); +_SPECIALIZE_VEC_EXT_FOR_SIZES(unsigned long long); +_SPECIALIZE_VEC_EXT_FOR_SIZES(float); +_SPECIALIZE_VEC_EXT_FOR_SIZES(double); +_SPECIALIZE_VEC_EXT_FOR_SIZES(long double); + +#undef _SPECIALIZE_VEC_EXT_FOR_SIZES #undef _SPECIALIZE_VEC_EXT #endif @@ -761,6 +804,114 @@ #endif // _LIBCPP_HAS_NO_VECTOR_EXTENSION +template +struct __native_type_traits {}; + +#if defined(_LIBCPP_MICROARCH_SSE2) + +template +struct __native_type_traits<_Tp, 16> { + static_assert(16 % sizeof(_Tp) == 0, ""); + using type = __m128i; +}; + +template <> +struct __native_type_traits { + using type = __m128; +}; + +template <> +struct __native_type_traits { + using type = __m128d; +}; + +#if defined(_LIBCPP_MICROARCH_AVX) + +template +struct __native_type_traits<_Tp, 32> { + static_assert(32 % sizeof(_Tp) == 0, ""); + using type = __m256i; +}; + +template <> +struct __native_type_traits { + using type = __m256; +}; + +template <> +struct __native_type_traits { + using type = __m256d; +}; +#endif // _LIBCPP_MICROARCH_AVX +#elif defined(_LIBCPP_MICROARCH_ALTIVEC) + +#define _SPECIALIZE_ALTIVEC_TRAITS(_TYPE) \ + template <> \ + struct __native_type_traits<_TYPE, 16> { \ + using type = __vector _TYPE; \ + } + +_SPECIALIZE_ALTIVEC_TRAITS(char); +_SPECIALIZE_ALTIVEC_TRAITS(char16_t); +_SPECIALIZE_ALTIVEC_TRAITS(char32_t); +_SPECIALIZE_ALTIVEC_TRAITS(wchar_t); +_SPECIALIZE_ALTIVEC_TRAITS(signed char); +_SPECIALIZE_ALTIVEC_TRAITS(signed short); +_SPECIALIZE_ALTIVEC_TRAITS(signed int); +_SPECIALIZE_ALTIVEC_TRAITS(signed long); +_SPECIALIZE_ALTIVEC_TRAITS(signed long long); +_SPECIALIZE_ALTIVEC_TRAITS(unsigned char); +_SPECIALIZE_ALTIVEC_TRAITS(unsigned short); +_SPECIALIZE_ALTIVEC_TRAITS(unsigned int); +_SPECIALIZE_ALTIVEC_TRAITS(unsigned long); +_SPECIALIZE_ALTIVEC_TRAITS(unsigned long long); +_SPECIALIZE_ALTIVEC_TRAITS(float); +_SPECIALIZE_ALTIVEC_TRAITS(double); + +#undef _SPECIALIZE_ALTIVEC_TRAITS +#elif defined(_LIBCPP_MICROARCH_NEON) +#define _SPECIALIZE_NEON_TRAITS(_TYPE, _NUM_BYTES, _UNDERLYING_TYPE) \ + template <> \ + struct __native_type_traits<_TYPE, _NUM_BYTES> { \ + using type = _UNDERLYING_TYPE; \ + } + +_SPECIALIZE_NEON_TRAITS(char, 16, int8x16_t); +_SPECIALIZE_NEON_TRAITS(char16_t, 16, int16x8_t); +_SPECIALIZE_NEON_TRAITS(char32_t, 16, int32x4_t); +_SPECIALIZE_NEON_TRAITS(wchar_t, 16, int16x8_t); +_SPECIALIZE_NEON_TRAITS(signed char, 16, int8x16_t); +_SPECIALIZE_NEON_TRAITS(signed short, 16, int16x8_t); +_SPECIALIZE_NEON_TRAITS(signed int, 16, int32x4_t); +_SPECIALIZE_NEON_TRAITS(signed long, 16, int64x2_t); +_SPECIALIZE_NEON_TRAITS(signed long long, 16, int64x2_t); +_SPECIALIZE_NEON_TRAITS(unsigned char, 16, uint8x16_t); +_SPECIALIZE_NEON_TRAITS(unsigned short, 16, uint16x8_t); +_SPECIALIZE_NEON_TRAITS(unsigned int, 16, uint32x4_t); +_SPECIALIZE_NEON_TRAITS(unsigned long, 16, uint64x2_t); +_SPECIALIZE_NEON_TRAITS(unsigned long long, 16, uint64x2_t); +_SPECIALIZE_NEON_TRAITS(float, 16, float32x4_t); +_SPECIALIZE_NEON_TRAITS(double, 16, float64x2_t); +_SPECIALIZE_NEON_TRAITS(char, 8, int8x8_t); +_SPECIALIZE_NEON_TRAITS(char16_t, 8, int16x4_t); +_SPECIALIZE_NEON_TRAITS(char32_t, 8, int32x2_t); +_SPECIALIZE_NEON_TRAITS(wchar_t, 8, int16x4_t); +_SPECIALIZE_NEON_TRAITS(signed char, 8, int8x8_t); +_SPECIALIZE_NEON_TRAITS(signed short, 8, int16x4_t); +_SPECIALIZE_NEON_TRAITS(signed int, 8, int32x2_t); +_SPECIALIZE_NEON_TRAITS(signed long, 8, int64x1_t); +_SPECIALIZE_NEON_TRAITS(signed long long, 8, int64x1_t); +_SPECIALIZE_NEON_TRAITS(unsigned char, 8, uint8x8_t); +_SPECIALIZE_NEON_TRAITS(unsigned short, 8, uint16x4_t); +_SPECIALIZE_NEON_TRAITS(unsigned int, 8, uint32x2_t); +_SPECIALIZE_NEON_TRAITS(unsigned long, 8, uint64x1_t); +_SPECIALIZE_NEON_TRAITS(unsigned long long, 8, uint64x1_t); +_SPECIALIZE_NEON_TRAITS(float, 8, float32x2_t); +_SPECIALIZE_NEON_TRAITS(double, 8, float64x1_t); + +#undef _SPECIALIZE_VEC_EXT +#endif // _LIBCPP_MICROARCH_NEON + template class __simd_reference { static_assert(std::is_same<_Vp, _Tp>::value || @@ -964,7 +1115,7 @@ #if _LIBCPP_STD_VER >= 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES) template -_LIBCPP_INLINE_VAR constexpr size_t max_fixed_size = 32; +_LIBCPP_INLINE_VAR constexpr size_t max_fixed_size = 64; #endif template @@ -982,7 +1133,13 @@ using compatible = __compatible<_Tp, 16 / sizeof(_Tp)>; template -using native = __native<_Tp, _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES / sizeof(_Tp)>; +using native = __native<_Tp, +#if defined(_LIBCPP_MICROARCH_AVX) + 32 +#else + 16 +#endif + / sizeof(_Tp)>; _LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD_ABI _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_SIMD @@ -1807,6 +1964,25 @@ std::make_index_sequence()); } + template + simd(typename __native_type_traits<_Up, sizeof(_Up) * size()>::type __r) { + __s_.__assign(typename __simd_storage<_Tp, _Abi>::__raw_type(__r)); + } + + template + typename __native_type_traits<_Up, sizeof(_Up) * size()>::type __raw() const { + return typename __native_type_traits<_Tp, sizeof(_Tp) * size()>::type( + __s_.__raw()); + } + + template ::type, + _RetType>::value>::type> + operator _RetType() const { + return __raw(); + } + // load constructor template < class _Up, class _Flags, @@ -2130,6 +2306,26 @@ } } + template + simd_mask(typename __native_type_traits<__element_type, + sizeof(_Up) * size()>::type __r) + : __s_(__r) {} + + template + typename __native_type_traits<__element_type, sizeof(_Up) * size()>::type + __raw() const { + return __s_.__raw(); + } + + template ::type, + _RetType>::value>::type> + operator _RetType() const { + return __raw(); + } + // loads [simd.mask.copy] template typename std::enable_if::value>::type Index: libcxx/test/std/experimental/simd/simd.abi/raw.pass.cpp =================================================================== --- /dev/null +++ libcxx/test/std/experimental/simd/simd.abi/raw.pass.cpp @@ -0,0 +1,122 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++98, c++03 + +// +// +// [simd.abi] + +#include +#include + +using namespace std::experimental::parallelism_v2; + +template +void compile_raw() { + static_assert(std::is_same().__raw()), + ExpectedRawType>::value, + ""); + (void)SimdType(ExpectedRawType()); + (void)static_cast(SimdType()); +} + +int main() { +#if defined(__AVX__) + compile_raw, __m256i>(); + compile_raw, __m256i>(); + compile_raw, __m256i>(); + compile_raw, __m256i>(); + compile_raw, __m256i>(); + compile_raw, __m256i>(); + compile_raw, __m256i>(); + compile_raw, __m256i>(); + compile_raw, __m256>(); + compile_raw, __m256d>(); +#elif defined(__SSE2__) + compile_raw, __m128i>(); + compile_raw, __m128i>(); + compile_raw, __m128i>(); + compile_raw, __m128i>(); + compile_raw, __m128i>(); + compile_raw, __m128i>(); + compile_raw, __m128i>(); + compile_raw, __m128i>(); + compile_raw, __m128>(); + compile_raw, __m128d>(); +#elif defined(__ALTIVEC__) || defined(__VSX__) + compile_raw, __vector char>(); + compile_raw, __vector char16_t>(); + compile_raw, __vector char32_t>(); + compile_raw, __vector wchar_t>(); + compile_raw, __vector signed char>(); + compile_raw, __vector signed short>(); + compile_raw, __vector signed int>(); + compile_raw, __vector signed long>(); + compile_raw, __vector signed long long>(); + compile_raw, __vector unsigned char>(); + compile_raw, __vector unsigned short>(); + compile_raw, __vector unsigned int>(); + compile_raw, __vector unsigned long>(); + compile_raw, __vector unsigned long long>(); + compile_raw, __vector float>(); + compile_raw, __vector double>(); +#elif defined(__ARM_NEON) + compile_raw, int8x16_t>(); + compile_raw, int16x8_t>(); + compile_raw, int32x4_t>(); + compile_raw, int64x2_t>(); + compile_raw, uint8x16_t>(); + compile_raw, uint16x8_t>(); + compile_raw, uint32x4_t>(); + compile_raw, uint64x2_t>(); + compile_raw, float32x4_t>(); + compile_raw, float64x2_t>(); + + compile_raw(native_simd())[0])>::type, + int8x8_t>(); + + compile_raw(native_simd())[0])>::type, + int16x4_t>(); + + compile_raw(native_simd())[0])>::type, + int32x2_t>(); + + compile_raw(native_simd())[0])>::type, + int64x1_t>(); + + compile_raw(native_simd())[0])>::type, + uint8x8_t>(); + + compile_raw(native_simd())[0])>::type, + uint16x4_t>(); + + compile_raw(native_simd())[0])>::type, + uint32x2_t>(); + + compile_raw(native_simd())[0])>::type, + uint64x1_t>(); + + compile_raw(native_simd())[0])>::type, + float32x2_t>(); + + compile_raw(native_simd())[0])>::type, + float64x1_t>(); +#endif +}