diff --git a/clang/lib/Headers/avx512bf16intrin.h b/clang/lib/Headers/avx512bf16intrin.h --- a/clang/lib/Headers/avx512bf16intrin.h +++ b/clang/lib/Headers/avx512bf16intrin.h @@ -232,7 +232,7 @@ /// /// \param __A /// A 256-bit vector of [16 x bfloat]. -/// \returns A 512-bit vector of [16 x float] come from convertion of __A +/// \returns A 512-bit vector of [16 x float] come from conversion of __A static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpbh_ps(__m256bh __A) { return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32( (__m512i)_mm512_cvtepi16_epi32((__m256i)__A), 16)); @@ -247,7 +247,7 @@ /// bit is not set. /// \param __A /// A 256-bit vector of [16 x bfloat]. -/// \returns A 512-bit vector of [16 x float] come from convertion of __A +/// \returns A 512-bit vector of [16 x float] come from conversion of __A static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpbh_ps(__mmask16 __U, __m256bh __A) { return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32( @@ -265,7 +265,7 @@ /// A 16-bit mask. /// \param __A /// A 256-bit vector of [16 x bfloat]. -/// \returns A 512-bit vector of [16 x float] come from convertion of __A +/// \returns A 512-bit vector of [16 x float] come from conversion of __A static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpbh_ps(__m512 __S, __mmask16 __U, __m256bh __A) { return _mm512_castsi512_ps((__m512i)_mm512_mask_slli_epi32( diff --git a/clang/lib/Headers/avx512vlbf16intrin.h b/clang/lib/Headers/avx512vlbf16intrin.h --- a/clang/lib/Headers/avx512vlbf16intrin.h +++ b/clang/lib/Headers/avx512vlbf16intrin.h @@ -420,18 +420,46 @@ return __R[0]; } +/// Convert Packed BF16 Data to Packed float Data. +/// +/// \headerfile +/// +/// \param __A +/// A 128-bit vector of [4 x bfloat]. +/// \returns A 128-bit vector of [4 x float] come from conversion of __A +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtpbh_ps(__m128bh __A) { + return _mm_castsi128_ps( + (__m128i)_mm_slli_epi32((__m128i)_mm_cvtepi16_epi32((__m128i)__A), 16)); +} + /// Convert Packed BF16 Data to Packed float Data. /// /// \headerfile /// /// \param __A /// A 128-bit vector of [8 x bfloat]. -/// \returns A 256-bit vector of [8 x float] come from convertion of __A +/// \returns A 256-bit vector of [8 x float] come from conversion of __A static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps(__m128bh __A) { return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32( (__m256i)_mm256_cvtepi16_epi32((__m128i)__A), 16)); } +/// Convert Packed BF16 Data to Packed float Data using zeroing mask. +/// +/// \headerfile +/// +/// \param __U +/// A 4-bit mask. Elements are zeroed out when the corresponding mask +/// bit is not set. +/// \param __A +/// A 128-bit vector of [4 x bfloat]. +/// \returns A 128-bit vector of [4 x float] come from conversion of __A +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) { + return _mm_castsi128_ps((__m128i)_mm_slli_epi32( + (__m128i)_mm_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16)); +} + /// Convert Packed BF16 Data to Packed float Data using zeroing mask. /// /// \headerfile @@ -441,13 +469,33 @@ /// bit is not set. /// \param __A /// A 128-bit vector of [8 x bfloat]. -/// \returns A 256-bit vector of [8 x float] come from convertion of __A +/// \returns A 256-bit vector of [8 x float] come from conversion of __A static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) { return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32( (__m256i)_mm256_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16)); } +/// Convert Packed BF16 Data to Packed float Data using merging mask. +/// +/// \headerfile +/// +/// \param __S +/// A 128-bit vector of [4 x float]. Elements are copied from __S when +/// the corresponding mask bit is not set. +/// \param __U +/// A 4-bit mask. Elements are zeroed out when the corresponding mask +/// bit is not set. +/// \param __A +/// A 128-bit vector of [4 x bfloat]. +/// \returns A 128-bit vector of [4 x float] come from conversion of __A +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_mask_cvtpbh_ps(__m128 __S, __mmask8 __U, __m128bh __A) { + return _mm_castsi128_ps((__m128i)_mm_mask_slli_epi32( + (__m128i)__S, (__mmask8)__U, (__m128i)_mm_cvtepi16_epi32((__m128i)__A), + 16)); +} + /// Convert Packed BF16 Data to Packed float Data using merging mask. /// /// \headerfile @@ -460,7 +508,7 @@ /// bit is not set. /// \param __A /// A 128-bit vector of [8 x bfloat]. -/// \returns A 256-bit vector of [8 x float] come from convertion of __A +/// \returns A 256-bit vector of [8 x float] come from conversion of __A static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpbh_ps(__m256 __S, __mmask8 __U, __m128bh __A) { return _mm256_castsi256_ps((__m256i)_mm256_mask_slli_epi32( diff --git a/clang/test/CodeGen/X86/avx512vlbf16-builtins.c b/clang/test/CodeGen/X86/avx512vlbf16-builtins.c --- a/clang/test/CodeGen/X86/avx512vlbf16-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbf16-builtins.c @@ -169,6 +169,15 @@ return _mm_cvtness_sbh(A); } +__m128 test_mm_cvtpbh_ps(__m128bh A) { + // CHECK-LABEL: @test_mm_cvtpbh_ps + // CHECK: sext <4 x i16> %{{.*}} to <4 x i32> + // CHECK: @llvm.x86.sse2.pslli.d + // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float> + // CHECK: ret <4 x float> %{{.*}} + return _mm_cvtpbh_ps(A); +} + __m256 test_mm256_cvtpbh_ps(__m128bh A) { // CHECK-LABEL: @test_mm256_cvtpbh_ps // CHECK: sext <8 x i16> %{{.*}} to <8 x i32> @@ -178,6 +187,16 @@ return _mm256_cvtpbh_ps(A); } +__m128 test_mm_maskz_cvtpbh_ps(__mmask8 M, __m128bh A) { + // CHECK-LABEL: @test_mm_maskz_cvtpbh_ps + // CHECK: sext <4 x i16> %{{.*}} to <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // CHECK: @llvm.x86.sse2.pslli.d + // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float> + // CHECK: ret <4 x float> %{{.*}} + return _mm_maskz_cvtpbh_ps(M, A); +} + __m256 test_mm256_maskz_cvtpbh_ps(__mmask8 M, __m128bh A) { // CHECK-LABEL: @test_mm256_maskz_cvtpbh_ps // CHECK: sext <8 x i16> %{{.*}} to <8 x i32> @@ -188,6 +207,16 @@ return _mm256_maskz_cvtpbh_ps(M, A); } +__m128 test_mm_mask_cvtpbh_ps(__m128 S, __mmask8 M, __m128bh A) { + // CHECK-LABEL: @test_mm_mask_cvtpbh_ps + // CHECK: sext <4 x i16> %{{.*}} to <4 x i32> + // CHECK: @llvm.x86.sse2.pslli.d + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float> + // CHECK: ret <4 x float> %{{.*}} + return _mm_mask_cvtpbh_ps(S, M, A); +} + __m256 test_mm256_mask_cvtpbh_ps(__m256 S, __mmask8 M, __m128bh A) { // CHECK-LABEL: @test_mm256_mask_cvtpbh_ps // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>