diff --git a/clang/lib/Headers/avx512bf16intrin.h b/clang/lib/Headers/avx512bf16intrin.h --- a/clang/lib/Headers/avx512bf16intrin.h +++ b/clang/lib/Headers/avx512bf16intrin.h @@ -13,8 +13,10 @@ #ifndef __AVX512BF16INTRIN_H #define __AVX512BF16INTRIN_H -typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64))); -typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32))); +typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64), + deprecated("use __m512i instead"))); +typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32), + deprecated("use __m256i instead"))); /// \typedef __bfloat16 /// A target specific type to represent the storage only brain floating-point @@ -56,10 +58,10 @@ /// A 512-bit vector of [16 x float]. /// \returns A 512-bit vector of [32 x bfloat] whose lower 256 bits come from /// conversion of __B, and higher 256 bits come from conversion of __A. -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtne2ps_pbh(__m512 __A, __m512 __B) { - return (__m512bh)__builtin_ia32_cvtne2ps2bf16_512((__v16sf) __A, - (__v16sf) __B); + return (__m512i)__builtin_ia32_cvtne2ps2bf16_512((__v16sf) __A, + (__v16sf) __B); } /// Convert Two Packed Single Data to One Packed BF16 Data. @@ -79,9 +81,9 @@ /// A 1 means conversion of __A or __B. A 0 means element from __W. /// \returns A 512-bit vector of [32 x bfloat] whose lower 256 bits come from /// conversion of __B, and higher 256 bits come from conversion of __A. -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtne2ps_pbh(__m512bh __W, __mmask32 __U, __m512 __A, __m512 __B) { - return (__m512bh)__builtin_ia32_selectw_512((__mmask32)__U, +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtne2ps_pbh(__m512i __W, __mmask32 __U, __m512 __A, __m512 __B) { + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_cvtne2ps_pbh(__A, __B), (__v32hi)__W); } @@ -101,9 +103,9 @@ /// A 1 means conversion of __A or __B. A 0 means element is zero. /// \returns A 512-bit vector of [32 x bfloat] whose lower 256 bits come from /// conversion of __B, and higher 256 bits come from conversion of __A. -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtne2ps_pbh(__mmask32 __U, __m512 __A, __m512 __B) { - return (__m512bh)__builtin_ia32_selectw_512((__mmask32)__U, + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_cvtne2ps_pbh(__A, __B), (__v32hi)_mm512_setzero_si512()); } @@ -117,9 +119,9 @@ /// \param __A /// A 512-bit vector of [16 x float]. /// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A. -static __inline__ __m256bh __DEFAULT_FN_ATTRS512 +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtneps_pbh(__m512 __A) { - return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A, + return (__m256i)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1); } @@ -138,11 +140,11 @@ /// A 16-bit mask value specifying what is chosen for each element. /// A 1 means conversion of __A. A 0 means element from __W. /// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A. -static __inline__ __m256bh __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtneps_pbh(__m256bh __W, __mmask16 __U, __m512 __A) { - return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A, - (__v16hi)__W, - (__mmask16)__U); +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtneps_pbh(__m256i __W, __mmask16 __U, __m512 __A) { + return (__m256i)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A, + (__v16hi)__W, + (__mmask16)__U); } /// Convert Packed Single Data to Packed BF16 Data. @@ -157,9 +159,9 @@ /// A 16-bit mask value specifying what is chosen for each element. /// A 1 means conversion of __A. A 0 means element is zero. /// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A. -static __inline__ __m256bh __DEFAULT_FN_ATTRS512 +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtneps_pbh(__mmask16 __U, __m512 __A) { - return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A, + return (__m256i)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U); } @@ -179,7 +181,7 @@ /// \returns A 512-bit vector of [16 x float] comes from Dot Product of /// __A, __B and __D static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_dpbf16_ps(__m512 __D, __m512bh __A, __m512bh __B) { +_mm512_dpbf16_ps(__m512 __D, __m512i __A, __m512i __B) { return (__m512)__builtin_ia32_dpbf16ps_512((__v16sf) __D, (__v16si) __A, (__v16si) __B); @@ -203,7 +205,7 @@ /// \returns A 512-bit vector of [16 x float] comes from Dot Product of /// __A, __B and __D static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_dpbf16_ps(__m512 __D, __mmask16 __U, __m512bh __A, __m512bh __B) { +_mm512_mask_dpbf16_ps(__m512 __D, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_dpbf16_ps(__D, __A, __B), (__v16sf)__D); @@ -227,7 +229,7 @@ /// \returns A 512-bit vector of [16 x float] comes from Dot Product of /// __A, __B and __D static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_dpbf16_ps(__mmask16 __U, __m512 __D, __m512bh __A, __m512bh __B) { +_mm512_maskz_dpbf16_ps(__mmask16 __U, __m512 __D, __m512i __A, __m512i __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_dpbf16_ps(__D, __A, __B), (__v16sf)_mm512_setzero_si512()); @@ -240,7 +242,7 @@ /// \param __A /// A 256-bit vector of [16 x bfloat]. /// \returns A 512-bit vector of [16 x float] come from conversion of __A -static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpbh_ps(__m256bh __A) { +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpbh_ps(__m256i __A) { return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32( (__m512i)_mm512_cvtepi16_epi32((__m256i)__A), 16)); } @@ -256,7 +258,7 @@ /// A 256-bit vector of [16 x bfloat]. /// \returns A 512-bit vector of [16 x float] come from conversion of __A static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtpbh_ps(__mmask16 __U, __m256bh __A) { +_mm512_maskz_cvtpbh_ps(__mmask16 __U, __m256i __A) { return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32( (__m512i)_mm512_maskz_cvtepi16_epi32((__mmask16)__U, (__m256i)__A), 16)); } @@ -274,7 +276,7 @@ /// A 256-bit vector of [16 x bfloat]. /// \returns A 512-bit vector of [16 x float] come from conversion of __A static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtpbh_ps(__m512 __S, __mmask16 __U, __m256bh __A) { +_mm512_mask_cvtpbh_ps(__m512 __S, __mmask16 __U, __m256i __A) { return _mm512_castsi512_ps((__m512i)_mm512_mask_slli_epi32( (__m512i)__S, (__mmask16)__U, (__m512i)_mm512_cvtepi16_epi32((__m256i)__A), 16)); diff --git a/clang/lib/Headers/avx512vlbf16intrin.h b/clang/lib/Headers/avx512vlbf16intrin.h --- a/clang/lib/Headers/avx512vlbf16intrin.h +++ b/clang/lib/Headers/avx512vlbf16intrin.h @@ -13,7 +13,8 @@ #ifndef __AVX512VLBF16INTRIN_H #define __AVX512VLBF16INTRIN_H -typedef short __m128bh __attribute__((__vector_size__(16), __aligned__(16))); +typedef short __m128bh __attribute__((__vector_size__(16), __aligned__(16), + deprecated("use __m128i instead"))); #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ @@ -34,10 +35,10 @@ /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from /// conversion of __B, and higher 64 bits come from conversion of __A. -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtne2ps_pbh(__m128 __A, __m128 __B) { - return (__m128bh)__builtin_ia32_cvtne2ps2bf16_128((__v4sf) __A, - (__v4sf) __B); + return (__m128i)__builtin_ia32_cvtne2ps2bf16_128((__v4sf) __A, + (__v4sf) __B); } /// Convert Two Packed Single Data to One Packed BF16 Data. @@ -57,9 +58,9 @@ /// A 1 means conversion of __A or __B. A 0 means element from __W. /// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from /// conversion of __B, and higher 64 bits come from conversion of __A. -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_mask_cvtne2ps_pbh(__m128bh __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128bh)__builtin_ia32_selectw_128((__mmask8)__U, +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvtne2ps_pbh(__m128i __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_cvtne2ps_pbh(__A, __B), (__v8hi)__W); } @@ -79,9 +80,9 @@ /// A 1 means conversion of __A or __B. A 0 means element is zero. /// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from /// conversion of __B, and higher 64 bits come from conversion of __A. -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtne2ps_pbh(__mmask8 __U, __m128 __A, __m128 __B) { - return (__m128bh)__builtin_ia32_selectw_128((__mmask8)__U, + return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_cvtne2ps_pbh(__A, __B), (__v8hi)_mm_setzero_si128()); } @@ -98,10 +99,10 @@ /// A 256-bit vector of [8 x float]. /// \returns A 256-bit vector of [16 x bfloat] whose lower 128 bits come from /// conversion of __B, and higher 128 bits come from conversion of __A. -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtne2ps_pbh(__m256 __A, __m256 __B) { - return (__m256bh)__builtin_ia32_cvtne2ps2bf16_256((__v8sf) __A, - (__v8sf) __B); + return (__m256i)__builtin_ia32_cvtne2ps2bf16_256((__v8sf) __A, + (__v8sf) __B); } /// Convert Two Packed Single Data to One Packed BF16 Data. @@ -121,9 +122,9 @@ /// A 1 means conversion of __A or __B. A 0 means element from __W. /// \returns A 256-bit vector of [16 x bfloat] whose lower 128 bits come from /// conversion of __B, and higher 128 bits come from conversion of __A. -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtne2ps_pbh(__m256bh __W, __mmask16 __U, __m256 __A, __m256 __B) { - return (__m256bh)__builtin_ia32_selectw_256((__mmask16)__U, +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtne2ps_pbh(__m256i __W, __mmask16 __U, __m256 __A, __m256 __B) { + return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_cvtne2ps_pbh(__A, __B), (__v16hi)__W); } @@ -143,9 +144,9 @@ /// A 1 means conversion of __A or __B. A 0 means element is zero. /// \returns A 256-bit vector of [16 x bfloat] whose lower 128 bits come from /// conversion of __B, and higher 128 bits come from conversion of __A. -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtne2ps_pbh(__mmask16 __U, __m256 __A, __m256 __B) { - return (__m256bh)__builtin_ia32_selectw_256((__mmask16)__U, + return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_cvtne2ps_pbh(__A, __B), (__v16hi)_mm256_setzero_si256()); } @@ -160,9 +161,9 @@ /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from /// conversion of __A, and higher 64 bits are 0. -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtneps_pbh(__m128 __A) { - return (__m128bh)__builtin_ia32_cvtneps2bf16_128_mask((__v4sf) __A, + return (__m128i)__builtin_ia32_cvtneps2bf16_128_mask((__v4sf) __A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1); } @@ -182,11 +183,11 @@ /// A 1 means conversion of __A. A 0 means element from __W. /// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from /// conversion of __A, and higher 64 bits are 0. -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_mask_cvtneps_pbh(__m128bh __W, __mmask8 __U, __m128 __A) { - return (__m128bh)__builtin_ia32_cvtneps2bf16_128_mask((__v4sf) __A, - (__v8hi)__W, - (__mmask8)__U); +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvtneps_pbh(__m128i __W, __mmask8 __U, __m128 __A) { + return (__m128i)__builtin_ia32_cvtneps2bf16_128_mask((__v4sf) __A, + (__v8hi)__W, + (__mmask8)__U); } /// Convert Packed Single Data to Packed BF16 Data. @@ -202,9 +203,9 @@ /// A 1 means conversion of __A. A 0 means element is zero. /// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from /// conversion of __A, and higher 64 bits are 0. -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtneps_pbh(__mmask8 __U, __m128 __A) { - return (__m128bh)__builtin_ia32_cvtneps2bf16_128_mask((__v4sf) __A, + return (__m128i)__builtin_ia32_cvtneps2bf16_128_mask((__v4sf) __A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U); } @@ -218,9 +219,9 @@ /// \param __A /// A 256-bit vector of [8 x float]. /// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A. -static __inline__ __m128bh __DEFAULT_FN_ATTRS256 +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtneps_pbh(__m256 __A) { - return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A, + return (__m128i)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1); } @@ -239,11 +240,11 @@ /// A 8-bit mask value specifying what is chosen for each element. /// A 1 means conversion of __A. A 0 means element from __W. /// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A. -static __inline__ __m128bh __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtneps_pbh(__m128bh __W, __mmask8 __U, __m256 __A) { - return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A, - (__v8hi)__W, - (__mmask8)__U); +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtneps_pbh(__m128i __W, __mmask8 __U, __m256 __A) { + return (__m128i)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A, + (__v8hi)__W, + (__mmask8)__U); } /// Convert Packed Single Data to Packed BF16 Data. @@ -258,9 +259,9 @@ /// A 8-bit mask value specifying what is chosen for each element. /// A 1 means conversion of __A. A 0 means element is zero. /// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A. -static __inline__ __m128bh __DEFAULT_FN_ATTRS256 +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtneps_pbh(__mmask8 __U, __m256 __A) { - return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A, + return (__m128i)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U); } @@ -280,7 +281,7 @@ /// \returns A 128-bit vector of [4 x float] comes from Dot Product of /// __A, __B and __D static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_dpbf16_ps(__m128 __D, __m128bh __A, __m128bh __B) { +_mm_dpbf16_ps(__m128 __D, __m128i __A, __m128i __B) { return (__m128)__builtin_ia32_dpbf16ps_128((__v4sf)__D, (__v4si)__A, (__v4si)__B); @@ -304,7 +305,7 @@ /// \returns A 128-bit vector of [4 x float] comes from Dot Product of /// __A, __B and __D static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_dpbf16_ps(__m128 __D, __mmask8 __U, __m128bh __A, __m128bh __B) { +_mm_mask_dpbf16_ps(__m128 __D, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_dpbf16_ps(__D, __A, __B), (__v4sf)__D); @@ -328,7 +329,7 @@ /// \returns A 128-bit vector of [4 x float] comes from Dot Product of /// __A, __B and __D static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_dpbf16_ps(__mmask8 __U, __m128 __D, __m128bh __A, __m128bh __B) { +_mm_maskz_dpbf16_ps(__mmask8 __U, __m128 __D, __m128i __A, __m128i __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_dpbf16_ps(__D, __A, __B), (__v4sf)_mm_setzero_si128()); @@ -349,7 +350,7 @@ /// \returns A 256-bit vector of [8 x float] comes from Dot Product of /// __A, __B and __D static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_dpbf16_ps(__m256 __D, __m256bh __A, __m256bh __B) { +_mm256_dpbf16_ps(__m256 __D, __m256i __A, __m256i __B) { return (__m256)__builtin_ia32_dpbf16ps_256((__v8sf)__D, (__v8si)__A, (__v8si)__B); @@ -373,7 +374,7 @@ /// \returns A 256-bit vector of [8 x float] comes from Dot Product of /// __A, __B and __D static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_dpbf16_ps(__m256 __D, __mmask8 __U, __m256bh __A, __m256bh __B) { +_mm256_mask_dpbf16_ps(__m256 __D, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_dpbf16_ps(__D, __A, __B), (__v8sf)__D); @@ -397,7 +398,7 @@ /// \returns A 256-bit vector of [8 x float] comes from Dot Product of /// __A, __B and __D static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_dpbf16_ps(__mmask8 __U, __m256 __D, __m256bh __A, __m256bh __B) { +_mm256_maskz_dpbf16_ps(__mmask8 __U, __m256 __D, __m256i __A, __m256i __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_dpbf16_ps(__D, __A, __B), (__v8sf)_mm256_setzero_si256()); @@ -428,7 +429,7 @@ /// \param __A /// A 128-bit vector of [4 x bfloat]. /// \returns A 128-bit vector of [4 x float] come from conversion of __A -static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtpbh_ps(__m128bh __A) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtpbh_ps(__m128i __A) { return _mm_castsi128_ps( (__m128i)_mm_slli_epi32((__m128i)_mm_cvtepi16_epi32((__m128i)__A), 16)); } @@ -440,7 +441,7 @@ /// \param __A /// A 128-bit vector of [8 x bfloat]. /// \returns A 256-bit vector of [8 x float] come from conversion of __A -static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps(__m128bh __A) { +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps(__m128i __A) { return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32( (__m256i)_mm256_cvtepi16_epi32((__m128i)__A), 16)); } @@ -456,7 +457,7 @@ /// A 128-bit vector of [4 x bfloat]. /// \returns A 128-bit vector of [4 x float] come from conversion of __A static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) { +_mm_maskz_cvtpbh_ps(__mmask8 __U, __m128i __A) { return _mm_castsi128_ps((__m128i)_mm_slli_epi32( (__m128i)_mm_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16)); } @@ -472,7 +473,7 @@ /// A 128-bit vector of [8 x bfloat]. /// \returns A 256-bit vector of [8 x float] come from conversion of __A static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) { +_mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128i __A) { return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32( (__m256i)_mm256_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16)); } @@ -491,7 +492,7 @@ /// A 128-bit vector of [4 x bfloat]. /// \returns A 128-bit vector of [4 x float] come from conversion of __A static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_cvtpbh_ps(__m128 __S, __mmask8 __U, __m128bh __A) { +_mm_mask_cvtpbh_ps(__m128 __S, __mmask8 __U, __m128i __A) { return _mm_castsi128_ps((__m128i)_mm_mask_slli_epi32( (__m128i)__S, (__mmask8)__U, (__m128i)_mm_cvtepi16_epi32((__m128i)__A), 16)); @@ -511,7 +512,7 @@ /// A 128-bit vector of [8 x bfloat]. /// \returns A 256-bit vector of [8 x float] come from conversion of __A static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtpbh_ps(__m256 __S, __mmask8 __U, __m128bh __A) { +_mm256_mask_cvtpbh_ps(__m256 __S, __mmask8 __U, __m128i __A) { return _mm256_castsi256_ps((__m256i)_mm256_mask_slli_epi32( (__m256i)__S, (__mmask8)__U, (__m256i)_mm256_cvtepi16_epi32((__m128i)__A), 16)); diff --git a/clang/test/CodeGen/X86/avx512bf16-builtins.c b/clang/test/CodeGen/X86/avx512bf16-builtins.c --- a/clang/test/CodeGen/X86/avx512bf16-builtins.c +++ b/clang/test/CodeGen/X86/avx512bf16-builtins.c @@ -13,60 +13,60 @@ return _mm_cvtsbh_ss(A); } -__m512bh test_mm512_cvtne2ps_pbh(__m512 A, __m512 B) { +__m512i test_mm512_cvtne2ps_pbh(__m512 A, __m512 B) { // CHECK-LABEL: @test_mm512_cvtne2ps_pbh // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512 - // CHECK: ret <32 x i16> %{{.*}} + // CHECK: ret <8 x i64> %{{.*}} return _mm512_cvtne2ps_pbh(A, B); } -__m512bh test_mm512_maskz_cvtne2ps_pbh(__m512 A, __m512 B, __mmask32 U) { +__m512i test_mm512_maskz_cvtne2ps_pbh(__m512 A, __m512 B, __mmask32 U) { // CHECK-LABEL: @test_mm512_maskz_cvtne2ps_pbh // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} - // CHECK: ret <32 x i16> %{{.*}} + // CHECK: ret <8 x i64> %{{.*}} return _mm512_maskz_cvtne2ps_pbh(U, A, B); } -__m512bh test_mm512_mask_cvtne2ps_pbh(__m512bh C, __mmask32 U, __m512 A, __m512 B) { +__m512i test_mm512_mask_cvtne2ps_pbh(__m512i C, __mmask32 U, __m512 A, __m512 B) { // CHECK-LABEL: @test_mm512_mask_cvtne2ps_pbh // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} - // CHECK: ret <32 x i16> %{{.*}} + // CHECK: ret <8 x i64> %{{.*}} return _mm512_mask_cvtne2ps_pbh(C, U, A, B); } -__m256bh test_mm512_cvtneps_pbh(__m512 A) { +__m256i test_mm512_cvtneps_pbh(__m512 A) { // CHECK-LABEL: @test_mm512_cvtneps_pbh // CHECK: @llvm.x86.avx512bf16.cvtneps2bf16.512 - // CHECK: ret <16 x i16> %{{.*}} + // CHECK: ret <4 x i64> %{{.*}} return _mm512_cvtneps_pbh(A); } -__m256bh test_mm512_mask_cvtneps_pbh(__m256bh C, __mmask16 U, __m512 A) { +__m256i test_mm512_mask_cvtneps_pbh(__m256i C, __mmask16 U, __m512 A) { // CHECK-LABEL: @test_mm512_mask_cvtneps_pbh // CHECK: @llvm.x86.avx512bf16.cvtneps2bf16.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} - // CHECK: ret <16 x i16> %{{.*}} + // CHECK: ret <4 x i64> %{{.*}} return _mm512_mask_cvtneps_pbh(C, U, A); } -__m256bh test_mm512_maskz_cvtneps_pbh(__m512 A, __mmask16 U) { +__m256i test_mm512_maskz_cvtneps_pbh(__m512 A, __mmask16 U) { // CHECK-LABEL: @test_mm512_maskz_cvtneps_pbh // CHECK: @llvm.x86.avx512bf16.cvtneps2bf16.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} - // CHECK: ret <16 x i16> %{{.*}} + // CHECK: ret <4 x i64> %{{.*}} return _mm512_maskz_cvtneps_pbh(U, A); } -__m512 test_mm512_dpbf16_ps(__m512 D, __m512bh A, __m512bh B) { +__m512 test_mm512_dpbf16_ps(__m512 D, __m512i A, __m512i B) { // CHECK-LABEL: @test_mm512_dpbf16_ps // CHECK: @llvm.x86.avx512bf16.dpbf16ps.512 // CHECK: ret <16 x float> %{{.*}} return _mm512_dpbf16_ps(D, A, B); } -__m512 test_mm512_maskz_dpbf16_ps(__m512 D, __m512bh A, __m512bh B, __mmask16 U) { +__m512 test_mm512_maskz_dpbf16_ps(__m512 D, __m512i A, __m512i B, __mmask16 U) { // CHECK-LABEL: @test_mm512_maskz_dpbf16_ps // CHECK: @llvm.x86.avx512bf16.dpbf16ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} @@ -74,7 +74,7 @@ return _mm512_maskz_dpbf16_ps(U, D, A, B); } -__m512 test_mm512_mask_dpbf16_ps(__m512 D, __m512bh A, __m512bh B, __mmask16 U) { +__m512 test_mm512_mask_dpbf16_ps(__m512 D, __m512i A, __m512i B, __mmask16 U) { // CHECK-LABEL: @test_mm512_mask_dpbf16_ps // CHECK: @llvm.x86.avx512bf16.dpbf16ps.512 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} @@ -82,18 +82,18 @@ return _mm512_mask_dpbf16_ps(D, U, A, B); } -__m512 test_mm512_cvtpbh_ps(__m256bh A) { +__m512 test_mm512_cvtpbh_ps(__m256i A) { // CHECK-LABEL: @test_mm512_cvtpbh_ps - // CHECK: sext <16 x i16> %{{.*}} to <16 x i32> + // CHECK: bitcast <8 x i64> %{{.*}} to <16 x i32> // CHECK: @llvm.x86.avx512.pslli.d.512 // CHECK: bitcast <8 x i64> %{{.*}} to <16 x float> // CHECK: ret <16 x float> %{{.*}} return _mm512_cvtpbh_ps(A); } -__m512 test_mm512_maskz_cvtpbh_ps(__mmask16 M, __m256bh A) { +__m512 test_mm512_maskz_cvtpbh_ps(__mmask16 M, __m256i A) { // CHECK-LABEL: @test_mm512_maskz_cvtpbh_ps - // CHECK: sext <16 x i16> %{{.*}} to <16 x i32> + // CHECK: bitcast <8 x i64> %{{.*}} to <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} // CHECK: @llvm.x86.avx512.pslli.d.512 // CHECK: bitcast <8 x i64> %{{.*}} to <16 x float> @@ -101,9 +101,9 @@ return _mm512_maskz_cvtpbh_ps(M, A); } -__m512 test_mm512_mask_cvtpbh_ps(__m512 S, __mmask16 M, __m256bh A) { +__m512 test_mm512_mask_cvtpbh_ps(__m512 S, __mmask16 M, __m256i A) { // CHECK-LABEL: @test_mm512_mask_cvtpbh_ps - // CHECK: sext <16 x i16> %{{.*}} to <16 x i32> + // CHECK: bitcast <8 x i64> %{{.*}} to <16 x i32> // CHECK: @llvm.x86.avx512.pslli.d.512 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} // CHECK: bitcast <8 x i64> %{{.*}} to <16 x float> diff --git a/clang/test/CodeGen/X86/avx512bf16-error.c b/clang/test/CodeGen/X86/avx512bf16-error.c --- a/clang/test/CodeGen/X86/avx512bf16-error.c +++ b/clang/test/CodeGen/X86/avx512bf16-error.c @@ -11,3 +11,9 @@ __bfloat16 bar(__bfloat16 a, __bfloat16 b) { return a + b; } + +// expected-warning@+2 3 {{'__m128bh' is deprecated: use __m128i instead}} +// expected-note@* 3 {{'__m128bh' has been explicitly marked deprecated here}} +__m128bh baz(__m128bh a, __m128bh b) { + return a + b; +} diff --git a/clang/test/CodeGen/X86/avx512vlbf16-builtins.c b/clang/test/CodeGen/X86/avx512vlbf16-builtins.c --- a/clang/test/CodeGen/X86/avx512vlbf16-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbf16-builtins.c @@ -4,127 +4,127 @@ #include -__m128bh test_mm_cvtne2ps2bf16(__m128 A, __m128 B) { +__m128i test_mm_cvtne2ps2bf16(__m128 A, __m128 B) { // CHECK-LABEL: @test_mm_cvtne2ps2bf16 // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.128 - // CHECK: ret <8 x i16> %{{.*}} + // CHECK: ret <2 x i64> %{{.*}} return _mm_cvtne2ps_pbh(A, B); } -__m128bh test_mm_maskz_cvtne2ps2bf16(__m128 A, __m128 B, __mmask8 U) { +__m128i test_mm_maskz_cvtne2ps2bf16(__m128 A, __m128 B, __mmask8 U) { // CHECK-LABEL: @test_mm_maskz_cvtne2ps2bf16 // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.128 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} - // CHECK: ret <8 x i16> %{{.*}} + // CHECK: ret <2 x i64> %{{.*}} return _mm_maskz_cvtne2ps_pbh(U, A, B); } -__m128bh test_mm_mask_cvtne2ps2bf16(__m128bh C, __mmask8 U, __m128 A, __m128 B) { +__m128i test_mm_mask_cvtne2ps2bf16(__m128i C, __mmask8 U, __m128 A, __m128 B) { // CHECK-LABEL: @test_mm_mask_cvtne2ps2bf16 // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.128 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} - // CHECK: ret <8 x i16> %{{.*}} + // CHECK: ret <2 x i64> %{{.*}} return _mm_mask_cvtne2ps_pbh(C, U, A, B); } -__m256bh test_mm256_cvtne2ps2bf16(__m256 A, __m256 B) { +__m256i test_mm256_cvtne2ps2bf16(__m256 A, __m256 B) { // CHECK-LABEL: @test_mm256_cvtne2ps2bf16 // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.256 - // CHECK: ret <16 x i16> %{{.*}} + // CHECK: ret <4 x i64> %{{.*}} return _mm256_cvtne2ps_pbh(A, B); } -__m256bh test_mm256_maskz_cvtne2ps2bf16(__m256 A, __m256 B, __mmask16 U) { +__m256i test_mm256_maskz_cvtne2ps2bf16(__m256 A, __m256 B, __mmask16 U) { // CHECK-LABEL: @test_mm256_maskz_cvtne2ps2bf16 // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.256 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} - // CHECK: ret <16 x i16> %{{.*}} + // CHECK: ret <4 x i64> %{{.*}} return _mm256_maskz_cvtne2ps_pbh(U, A, B); } -__m256bh test_mm256_mask_cvtne2ps2bf16(__m256bh C, __mmask16 U, __m256 A, __m256 B) { +__m256i test_mm256_mask_cvtne2ps2bf16(__m256i C, __mmask16 U, __m256 A, __m256 B) { // CHECK-LABEL: @test_mm256_mask_cvtne2ps2bf16 // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.256 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} - // CHECK: ret <16 x i16> %{{.*}} + // CHECK: ret <4 x i64> %{{.*}} return _mm256_mask_cvtne2ps_pbh(C, U, A, B); } -__m512bh test_mm512_cvtne2ps2bf16(__m512 A, __m512 B) { +__m512i test_mm512_cvtne2ps2bf16(__m512 A, __m512 B) { // CHECK-LABEL: @test_mm512_cvtne2ps2bf16 // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512 - // CHECK: ret <32 x i16> %{{.*}} + // CHECK: ret <8 x i64> %{{.*}} return _mm512_cvtne2ps_pbh(A, B); } -__m512bh test_mm512_maskz_cvtne2ps2bf16(__m512 A, __m512 B, __mmask32 U) { +__m512i test_mm512_maskz_cvtne2ps2bf16(__m512 A, __m512 B, __mmask32 U) { // CHECK-LABEL: @test_mm512_maskz_cvtne2ps2bf16 // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} - // CHECK: ret <32 x i16> %{{.*}} + // CHECK: ret <8 x i64> %{{.*}} return _mm512_maskz_cvtne2ps_pbh(U, A, B); } -__m512bh test_mm512_mask_cvtne2ps2bf16(__m512bh C, __mmask32 U, __m512 A, __m512 B) { +__m512i test_mm512_mask_cvtne2ps2bf16(__m512i C, __mmask32 U, __m512 A, __m512 B) { // CHECK-LABEL: @test_mm512_mask_cvtne2ps2bf16 // CHECK: @llvm.x86.avx512bf16.cvtne2ps2bf16.512 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} - // CHECK: ret <32 x i16> %{{.*}} + // CHECK: ret <8 x i64> %{{.*}} return _mm512_mask_cvtne2ps_pbh(C, U, A, B); } -__m128bh test_mm_cvtneps2bf16(__m128 A) { +__m128i test_mm_cvtneps2bf16(__m128 A) { // CHECK-LABEL: @test_mm_cvtneps2bf16 // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.128 - // CHECK: ret <8 x i16> %{{.*}} + // CHECK: ret <2 x i64> %{{.*}} return _mm_cvtneps_pbh(A); } -__m128bh test_mm_mask_cvtneps2bf16(__m128bh C, __mmask8 U, __m128 A) { +__m128i test_mm_mask_cvtneps2bf16(__m128i C, __mmask8 U, __m128 A) { // CHECK-LABEL: @test_mm_mask_cvtneps2bf16 // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16. - // CHECK: ret <8 x i16> %{{.*}} + // CHECK: ret <2 x i64> %{{.*}} return _mm_mask_cvtneps_pbh(C, U, A); } -__m128bh test_mm_maskz_cvtneps2bf16(__m128 A, __mmask8 U) { +__m128i test_mm_maskz_cvtneps2bf16(__m128 A, __mmask8 U) { // CHECK-LABEL: @test_mm_maskz_cvtneps2bf16 // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.128 - // CHECK: ret <8 x i16> %{{.*}} + // CHECK: ret <2 x i64> %{{.*}} return _mm_maskz_cvtneps_pbh(U, A); } -__m128bh test_mm256_cvtneps2bf16(__m256 A) { +__m128i test_mm256_cvtneps2bf16(__m256 A) { // CHECK-LABEL: @test_mm256_cvtneps2bf16 // CHECK: @llvm.x86.avx512bf16.cvtneps2bf16.256 - // CHECK: ret <8 x i16> %{{.*}} + // CHECK: ret <2 x i64> %{{.*}} return _mm256_cvtneps_pbh(A); } -__m128bh test_mm256_mask_cvtneps2bf16(__m128bh C, __mmask8 U, __m256 A) { +__m128i test_mm256_mask_cvtneps2bf16(__m128i C, __mmask8 U, __m256 A) { // CHECK-LABEL: @test_mm256_mask_cvtneps2bf16 // CHECK: @llvm.x86.avx512bf16.cvtneps2bf16.256 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} - // CHECK: ret <8 x i16> %{{.*}} + // CHECK: ret <2 x i64> %{{.*}} return _mm256_mask_cvtneps_pbh(C, U, A); } -__m128bh test_mm256_maskz_cvtneps2bf16(__m256 A, __mmask8 U) { +__m128i test_mm256_maskz_cvtneps2bf16(__m256 A, __mmask8 U) { // CHECK-LABEL: @test_mm256_maskz_cvtneps2bf16 // CHECK: @llvm.x86.avx512bf16.cvtneps2bf16.256 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} - // CHECK: ret <8 x i16> %{{.*}} + // CHECK: ret <2 x i64> %{{.*}} return _mm256_maskz_cvtneps_pbh(U, A); } -__m128 test_mm_dpbf16_ps(__m128 D, __m128bh A, __m128bh B) { +__m128 test_mm_dpbf16_ps(__m128 D, __m128i A, __m128i B) { // CHECK-LABEL: @test_mm_dpbf16_ps // CHECK: @llvm.x86.avx512bf16.dpbf16ps.128 // CHECK: ret <4 x float> %{{.*}} return _mm_dpbf16_ps(D, A, B); } -__m128 test_mm_maskz_dpbf16_ps(__m128 D, __m128bh A, __m128bh B, __mmask8 U) { +__m128 test_mm_maskz_dpbf16_ps(__m128 D, __m128i A, __m128i B, __mmask8 U) { // CHECK-LABEL: @test_mm_maskz_dpbf16_ps // CHECK: @llvm.x86.avx512bf16.dpbf16ps.128 // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} @@ -132,21 +132,21 @@ return _mm_maskz_dpbf16_ps(U, D, A, B); } -__m128 test_mm_mask_dpbf16_ps(__m128 D, __m128bh A, __m128bh B, __mmask8 U) { +__m128 test_mm_mask_dpbf16_ps(__m128 D, __m128i A, __m128i B, __mmask8 U) { // CHECK-LABEL: @test_mm_mask_dpbf16_ps // CHECK: @llvm.x86.avx512bf16.dpbf16ps.128 // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} // CHECK: ret <4 x float> %{{.*}} return _mm_mask_dpbf16_ps(D, U, A, B); } -__m256 test_mm256_dpbf16_ps(__m256 D, __m256bh A, __m256bh B) { +__m256 test_mm256_dpbf16_ps(__m256 D, __m256i A, __m256i B) { // CHECK-LABEL: @test_mm256_dpbf16_ps // CHECK: @llvm.x86.avx512bf16.dpbf16ps.256 // CHECK: ret <8 x float> %{{.*}} return _mm256_dpbf16_ps(D, A, B); } -__m256 test_mm256_maskz_dpbf16_ps(__m256 D, __m256bh A, __m256bh B, __mmask8 U) { +__m256 test_mm256_maskz_dpbf16_ps(__m256 D, __m256i A, __m256i B, __mmask8 U) { // CHECK-LABEL: @test_mm256_maskz_dpbf16_ps // CHECK: @llvm.x86.avx512bf16.dpbf16ps.256 // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} @@ -154,7 +154,7 @@ return _mm256_maskz_dpbf16_ps(U, D, A, B); } -__m256 test_mm256_mask_dpbf16_ps(__m256 D, __m256bh A, __m256bh B, __mmask8 U) { +__m256 test_mm256_mask_dpbf16_ps(__m256 D, __m256i A, __m256i B, __mmask8 U) { // CHECK-LABEL: @test_mm256_mask_dpbf16_ps // CHECK: @llvm.x86.avx512bf16.dpbf16ps.256 // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} @@ -169,7 +169,7 @@ return _mm_cvtness_sbh(A); } -__m128 test_mm_cvtpbh_ps(__m128bh A) { +__m128 test_mm_cvtpbh_ps(__m128i A) { // CHECK-LABEL: @test_mm_cvtpbh_ps // CHECK: sext <4 x i16> %{{.*}} to <4 x i32> // CHECK: @llvm.x86.sse2.pslli.d @@ -178,7 +178,7 @@ return _mm_cvtpbh_ps(A); } -__m256 test_mm256_cvtpbh_ps(__m128bh A) { +__m256 test_mm256_cvtpbh_ps(__m128i A) { // CHECK-LABEL: @test_mm256_cvtpbh_ps // CHECK: sext <8 x i16> %{{.*}} to <8 x i32> // CHECK: @llvm.x86.avx2.pslli.d @@ -187,7 +187,7 @@ return _mm256_cvtpbh_ps(A); } -__m128 test_mm_maskz_cvtpbh_ps(__mmask8 M, __m128bh A) { +__m128 test_mm_maskz_cvtpbh_ps(__mmask8 M, __m128i A) { // CHECK-LABEL: @test_mm_maskz_cvtpbh_ps // CHECK: sext <4 x i16> %{{.*}} to <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} @@ -197,7 +197,7 @@ return _mm_maskz_cvtpbh_ps(M, A); } -__m256 test_mm256_maskz_cvtpbh_ps(__mmask8 M, __m128bh A) { +__m256 test_mm256_maskz_cvtpbh_ps(__mmask8 M, __m128i A) { // CHECK-LABEL: @test_mm256_maskz_cvtpbh_ps // CHECK: sext <8 x i16> %{{.*}} to <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} @@ -207,7 +207,7 @@ return _mm256_maskz_cvtpbh_ps(M, A); } -__m128 test_mm_mask_cvtpbh_ps(__m128 S, __mmask8 M, __m128bh A) { +__m128 test_mm_mask_cvtpbh_ps(__m128 S, __mmask8 M, __m128i A) { // CHECK-LABEL: @test_mm_mask_cvtpbh_ps // CHECK: sext <4 x i16> %{{.*}} to <4 x i32> // CHECK: @llvm.x86.sse2.pslli.d @@ -217,7 +217,7 @@ return _mm_mask_cvtpbh_ps(S, M, A); } -__m256 test_mm256_mask_cvtpbh_ps(__m256 S, __mmask8 M, __m128bh A) { +__m256 test_mm256_mask_cvtpbh_ps(__m256 S, __mmask8 M, __m128i A) { // CHECK-LABEL: @test_mm256_mask_cvtpbh_ps // CHECK: sext <8 x i16> %{{.*}} to <8 x i32> // CHECK: @llvm.x86.avx2.pslli.d