diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -5929,41 +5929,44 @@ (__v8di)_mm512_setzero_si512()); } -#define _mm512_ternarylogic_epi32(A, B, C, imm) \ - ((__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), \ - (__v16si)(__m512i)(C), (int)(imm), \ - (__mmask16)-1)) - -#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \ - ((__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), \ - (__v16si)(__m512i)(C), (int)(imm), \ - (__mmask16)(U))) - -#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \ - ((__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), \ - (__v16si)(__m512i)(C), \ - (int)(imm), (__mmask16)(U))) - -#define _mm512_ternarylogic_epi64(A, B, C, imm) \ - ((__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)-1)) - -#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \ - ((__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)(U))) - -#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \ - ((__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), \ - (__v8di)(__m512i)(C), (int)(imm), \ - (__mmask8)(U))) +/// \enum _MM_TERNLOG_ENUM +/// A helper to represent the ternary logic operations among vector \a A, +/// \a B and \a C. The representation is passed to \a imm. +typedef enum { + _MM_TERNLOG_A = 0xF0, + _MM_TERNLOG_B = 0xCC, + _MM_TERNLOG_C = 0xAA +} _MM_TERNLOG_ENUM; + +#define _mm512_ternarylogic_epi32(A, B, C, imm) \ + ((__m512i)__builtin_ia32_pternlogd512_mask( \ + (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \ + (unsigned char)(imm), (__mmask16)-1)) + +#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \ + ((__m512i)__builtin_ia32_pternlogd512_mask( \ + (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \ + (unsigned char)(imm), (__mmask16)(U))) + +#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \ + ((__m512i)__builtin_ia32_pternlogd512_maskz( \ + (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \ + (unsigned char)(imm), (__mmask16)(U))) + +#define _mm512_ternarylogic_epi64(A, B, C, imm) \ + ((__m512i)__builtin_ia32_pternlogq512_mask( \ + (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \ + (unsigned char)(imm), (__mmask8)-1)) + +#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \ + ((__m512i)__builtin_ia32_pternlogq512_mask( \ + (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \ + (unsigned char)(imm), (__mmask8)(U))) + +#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \ + ((__m512i)__builtin_ia32_pternlogq512_maskz( \ + (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \ + (unsigned char)(imm), (__mmask8)(U))) #ifdef __x86_64__ #define _mm_cvt_roundsd_i64(A, R) \ diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -6525,79 +6525,65 @@ (__v4di)_mm256_setzero_si256()); } -#define _mm_ternarylogic_epi32(A, B, C, imm) \ - ((__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), \ - (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)-1)) - -#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \ - ((__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), \ - (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U))) - -#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \ - ((__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), \ - (__v4si)(__m128i)(C), (int)(imm), \ - (__mmask8)(U))) - -#define _mm256_ternarylogic_epi32(A, B, C, imm) \ - ((__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), \ - (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)-1)) - -#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \ - ((__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), \ - (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)(U))) - -#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \ - ((__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), \ - (__v8si)(__m256i)(C), (int)(imm), \ - (__mmask8)(U))) - -#define _mm_ternarylogic_epi64(A, B, C, imm) \ - ((__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), \ - (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)-1)) - -#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \ - ((__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), \ - (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)(U))) - -#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \ - ((__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), \ - (__v2di)(__m128i)(C), (int)(imm), \ - (__mmask8)(U))) - -#define _mm256_ternarylogic_epi64(A, B, C, imm) \ - ((__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - (__v4di)(__m256i)(C), (int)(imm), \ - (__mmask8)-1)) - -#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \ - ((__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - (__v4di)(__m256i)(C), (int)(imm), \ - (__mmask8)(U))) - -#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \ - ((__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), \ - (__v4di)(__m256i)(C), (int)(imm), \ - (__mmask8)(U))) - - +#define _mm_ternarylogic_epi32(A, B, C, imm) \ + ((__m128i)__builtin_ia32_pternlogd128_mask( \ + (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \ + (unsigned char)(imm), (__mmask8)-1)) + +#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \ + ((__m128i)__builtin_ia32_pternlogd128_mask( \ + (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \ + (unsigned char)(imm), (__mmask8)(U))) + +#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \ + ((__m128i)__builtin_ia32_pternlogd128_maskz( \ + (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \ + (unsigned char)(imm), (__mmask8)(U))) + +#define _mm256_ternarylogic_epi32(A, B, C, imm) \ + ((__m256i)__builtin_ia32_pternlogd256_mask( \ + (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \ + (unsigned char)(imm), (__mmask8)-1)) + +#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \ + ((__m256i)__builtin_ia32_pternlogd256_mask( \ + (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \ + (unsigned char)(imm), (__mmask8)(U))) + +#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \ + ((__m256i)__builtin_ia32_pternlogd256_maskz( \ + (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \ + (unsigned char)(imm), (__mmask8)(U))) + +#define _mm_ternarylogic_epi64(A, B, C, imm) \ + ((__m128i)__builtin_ia32_pternlogq128_mask( \ + (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \ + (unsigned char)(imm), (__mmask8)-1)) + +#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \ + ((__m128i)__builtin_ia32_pternlogq128_mask( \ + (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \ + (unsigned char)(imm), (__mmask8)(U))) + +#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \ + ((__m128i)__builtin_ia32_pternlogq128_maskz( \ + (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \ + (unsigned char)(imm), (__mmask8)(U))) + +#define _mm256_ternarylogic_epi64(A, B, C, imm) \ + ((__m256i)__builtin_ia32_pternlogq256_mask( \ + (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \ + (unsigned char)(imm), (__mmask8)-1)) + +#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \ + ((__m256i)__builtin_ia32_pternlogq256_mask( \ + (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \ + (unsigned char)(imm), (__mmask8)(U))) + +#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \ + ((__m256i)__builtin_ia32_pternlogq256_maskz( \ + (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \ + (unsigned char)(imm), (__mmask8)(U))) #define _mm256_shuffle_f32x4(A, B, imm) \ ((__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \ diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -6007,42 +6007,42 @@ __m512i test_mm512_ternarylogic_epi32(__m512i __A, __m512i __B, __m512i __C) { // CHECK-LABEL: @test_mm512_ternarylogic_epi32 - // CHECK: @llvm.x86.avx512.pternlog.d.512 - return _mm512_ternarylogic_epi32(__A, __B, __C, 4); + // CHECK: @llvm.x86.avx512.pternlog.d.512({{.*}}, i32 240) + return _mm512_ternarylogic_epi32(__A, __B, __C, _MM_TERNLOG_A); } __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { // CHECK-LABEL: @test_mm512_mask_ternarylogic_epi32 - // CHECK: @llvm.x86.avx512.pternlog.d.512 + // CHECK: @llvm.x86.avx512.pternlog.d.512({{.*}}, i32 204) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} - return _mm512_mask_ternarylogic_epi32(__A, __U, __B, __C, 4); + return _mm512_mask_ternarylogic_epi32(__A, __U, __B, __C, _MM_TERNLOG_B); } __m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) { // CHECK-LABEL: @test_mm512_maskz_ternarylogic_epi32 - // CHECK: @llvm.x86.avx512.pternlog.d.512 + // CHECK: @llvm.x86.avx512.pternlog.d.512({{.*}}, i32 170) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> zeroinitializer - return _mm512_maskz_ternarylogic_epi32(__U, __A, __B, __C, 4); + return _mm512_maskz_ternarylogic_epi32(__U, __A, __B, __C, _MM_TERNLOG_C); } __m512i test_mm512_ternarylogic_epi64(__m512i __A, __m512i __B, __m512i __C) { // CHECK-LABEL: @test_mm512_ternarylogic_epi64 - // CHECK: @llvm.x86.avx512.pternlog.q.512 - return _mm512_ternarylogic_epi64(__A, __B, __C, 4); + // CHECK: @llvm.x86.avx512.pternlog.q.512({{.*}}, i32 192) + return _mm512_ternarylogic_epi64(__A, __B, __C, _MM_TERNLOG_A & _MM_TERNLOG_B); } __m512i test_mm512_mask_ternarylogic_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) { // CHECK-LABEL: @test_mm512_mask_ternarylogic_epi64 - // CHECK: @llvm.x86.avx512.pternlog.q.512 + // CHECK: @llvm.x86.avx512.pternlog.q.512({{.*}}, i32 238) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} - return _mm512_mask_ternarylogic_epi64(__A, __U, __B, __C, 4); + return _mm512_mask_ternarylogic_epi64(__A, __U, __B, __C, _MM_TERNLOG_B | _MM_TERNLOG_C); } __m512i test_mm512_maskz_ternarylogic_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) { // CHECK-LABEL: @test_mm512_maskz_ternarylogic_epi64 - // CHECK: @llvm.x86.avx512.pternlog.q.512 + // CHECK: @llvm.x86.avx512.pternlog.q.512({{.*}}, i32 111) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> zeroinitializer - return _mm512_maskz_ternarylogic_epi64(__U, __A, __B, __C, 4); + return _mm512_maskz_ternarylogic_epi64(__U, __A, __B, __C, ~_MM_TERNLOG_A | (_MM_TERNLOG_B ^ _MM_TERNLOG_C)); } __m512 test_mm512_shuffle_f32x4(__m512 __A, __m512 __B) {