Index: include/clang/Basic/BuiltinsX86.def =================================================================== --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -1056,6 +1056,10 @@ TARGET_BUILTIN(__builtin_ia32_vpermt2varpd512_mask, "V8dV8LLiV8dV8dUc", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_alignq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_alignd512_mask, "V16iV16iV16iIiV16iUs", "", "avx512f") +TARGET_BUILTIN(__builtin_ia32_alignd128_mask, "V4iV4iV4iIiV4iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_alignd256_mask, "V8iV8iV8iIiV8iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_alignq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_alignq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIiV4dUc", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIiV4fUc", "", "avx512f") @@ -1962,6 +1966,10 @@ TARGET_BUILTIN(__builtin_ia32_rsqrt14pd256_mask, "V4dV4dV4dUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_rsqrt14ps128_mask, "V4fV4fV4fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_rsqrt14ps256_mask, "V8fV8fV8fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_palignr512_mask, "V64cV64cV64ciV64cULLi","","avx512bw") +TARGET_BUILTIN(__builtin_ia32_palignr128_mask, "V16cV16cV16ciV16cUs","","avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_palignr256_mask, "V32cV32cV32ciV32cUi","","avx512bw,avx512vl") + #undef BUILTIN #undef TARGET_BUILTIN Index: lib/Headers/avx512bwintrin.h =================================================================== --- lib/Headers/avx512bwintrin.h +++ lib/Headers/avx512bwintrin.h @@ -2057,6 +2057,28 @@ (__v32hi) __B, __U); } +#define _mm512_alignr_epi8( __A, __B, __N) __extension__ ({\ +__builtin_ia32_palignr512_mask ((__v8di) __A,\ + (__v8di) __B ,__N * 8,\ + (__v8di) _mm512_setzero_si512 (),\ + (__mmask64) -1);\ +}) + +#define _mm512_mask_alignr_epi8( __W, __U, __A, __B, __N) __extension__({\ +__builtin_ia32_palignr512_mask ((__v8di) __A,\ + (__v8di) __B,\ + __N * 8,\ + (__v8di) __W,\ + (__mmask64) __U);\ +}) + +#define _mm512_maskz_alignr_epi8 ( __U, __A, __B, __N) __extension__({\ +__builtin_ia32_palignr512_mask ((__v8di) __A,\ + (__v8di) __B,\ + __N * 8,\ + (__v8di) _mm512_setzero_si512 (),\ + (__mmask64) __U);\ +}) #undef __DEFAULT_FN_ATTRS Index: lib/Headers/avx512fintrin.h =================================================================== --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -2429,12 +2429,40 @@ (I), (__v8di)_mm512_setzero_si512(), \ (__mmask8)-1); }) +#define _mm512_mask_alignr_epi64( __W, __U, __A, __B, __imm) __extension__({\ + (__m512i)__builtin_ia32_alignq512_mask ((__v8di) __A,\ + (__v8di) __B, __imm,\ + (__v8di) __W,\ + (__mmask8) __U);\ +}) + +#define _mm512_maskz_alignr_epi64( __U, __A, __B, __imm) __extension__({\ + (__m512i)__builtin_ia32_alignq512_mask ((__v8di) __A,\ + (__v8di) __B, __imm,\ + (__v8di) _mm512_setzero_si512 (),\ + (__mmask8) __U);\ +}) + #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \ - (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \ + (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \ (__v16si)(__m512i)(B), \ (I), (__v16si)_mm512_setzero_si512(), \ - (__mmask16)-1); }) + (__mmask16)-1);\ +}) + +#define _mm512_mask_alignr_epi32( __W, __U, __A, __B, __imm) __extension__ ({\ + (__m512i) __builtin_ia32_alignd512_mask((__v16si) __A,\ + (__v16si) __B, __imm,\ + (__v16si) __W,\ + (__mmask16) __U);\ +}) +#define _mm512_maskz_alignr_epi32( __U, __A, __B, __imm) __extension__({\ + (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,\ + (__v16si) __B, __imm,\ + (__v16si) _mm512_setzero_si512 (),\ + (__mmask16) __U);\ +} /* Vector Extract */ #define _mm512_extractf64x4_pd(A, I) __extension__ ({ \ Index: lib/Headers/avx512vlbwintrin.h =================================================================== --- lib/Headers/avx512vlbwintrin.h +++ lib/Headers/avx512vlbwintrin.h @@ -3172,7 +3172,39 @@ (__v16hi) __B, __U); } +#define _mm_mask_alignr_epi8( __W, __U, __A, __B, __N) __extension__ ({ \ +__builtin_ia32_palignr128_mask ((__v2di)( __A),\ + (__v2di)( __B),\ + ( __N) * 8,\ + (__v2di)( __W),\ + (__mmask16)( __U));\ +}) + +#define _mm_maskz_alignr_epi8( __U, __A, __B, __N) __extension__ ({ \ +__builtin_ia32_palignr128_mask ((__v2di)( __A),\ + (__v2di)( __B),\ + ( __N) * 8,\ + (__v2di)\ + _mm_setzero_si128 (),\ + (__mmask16)( __U));\ +}) +#define _mm256_mask_alignr_epi8( __W, __U, __A, __B, __N) __extension__ ({ \ +__builtin_ia32_palignr256_mask ((__v4di)( __A),\ + (__v4di)( __B),\ + ( __N) * 8,\ + (__v4di)( __W),\ + (__mmask32)( __U));\ +}) + +#define _mm256_maskz_alignr_epi8( __U, __A, __B, __N) __extension__ ({ \ +__builtin_ia32_palignr256_mask ((__v4di)( __A),\ + (__v4di)( __B),\ + ( __N) * 8,\ + (__v4di)\ + _mm256_setzero_si256 (),\ + (__mmask32)( __U));\ +}) #undef __DEFAULT_FN_ATTRS Index: lib/Headers/avx512vlintrin.h =================================================================== --- lib/Headers/avx512vlintrin.h +++ lib/Headers/avx512vlintrin.h @@ -7765,6 +7765,90 @@ (__mmask8) __U); } +#define _mm_alignr_epi32( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignd128_mask ((__v4si)( __A),\ + (__v4si)( __B),( __imm),\ + (__v4si) _mm_setzero_si128 (),\ + (__mmask8) -1);\ +}) + +#define _mm_mask_alignr_epi32( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignd128_mask ((__v4si)( __A),\ + (__v4si)( __B),( __imm),\ + (__v4si)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm_maskz_alignr_epi32( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignd128_mask ((__v4si)( __A),\ + (__v4si)( __B),( __imm),\ + (__v4si) _mm_setzero_si128 (),\ + (__mmask8)( __U));\ +}) + +#define _mm256_alignr_epi32( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignd256_mask ((__v8si)( __A),\ + (__v8si)( __B),( __imm),\ + (__v8si) _mm256_setzero_si256 (),\ + (__mmask8) -1);\ +}) + +#define _mm256_mask_alignr_epi32( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignd256_mask ((__v8si)( __A),\ + (__v8si)( __B),( __imm),\ + (__v8si)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm256_maskz_alignr_epi32( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignd256_mask ((__v8si)( __A),\ + (__v8si)( __B),( __imm),\ + (__v8si) _mm256_setzero_si256 (),\ + (__mmask8)( __U));\ +}) + +#define _mm_alignr_epi64( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignq128_mask ((__v2di)( __A),\ + (__v2di)( __B),( __imm),\ + (__v2di) _mm_setzero_di (),\ + (__mmask8) -1);\ +}) + +#define _mm_mask_alignr_epi64( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignq128_mask ((__v2di)( __A),\ + (__v2di)( __B),( __imm),\ + (__v2di)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm_maskz_alignr_epi64( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignq128_mask ((__v2di)( __A),\ + (__v2di)( __B),( __imm),\ + (__v2di) _mm_setzero_di (),\ + (__mmask8)( __U));\ +}) + +#define _mm256_alignr_epi64( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignq256_mask ((__v4di)( __A),\ + (__v4di)( __B),( __imm),\ + (__v4di) _mm256_setzero_si256 (),\ + (__mmask8) -1);\ +}) + +#define _mm256_mask_alignr_epi64( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignq256_mask ((__v4di)( __A),\ + (__v4di)( __B),( __imm),\ + (__v4di)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm256_maskz_alignr_epi64( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignq256_mask ((__v4di)( __A),\ + (__v4di)( __B),( __imm),\ + (__v4di) _mm256_setzero_si256 (),\ + (__mmask8)( __U));\ +}) + #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_BOTH Index: test/CodeGen/avx512bw-builtins.c =================================================================== --- test/CodeGen/avx512bw-builtins.c +++ test/CodeGen/avx512bw-builtins.c @@ -1404,3 +1404,23 @@ return _mm512_mask_testn_epi16_mask(__U, __A, __B); } +__m512i test_mm512_alignr_epi8(__m512i __A,__m512i __B){ + // CHECK-LABEL: @test_mm512_alignr_epi8 + // CHECK: @llvm.x86.avx512.mask.palignr.512 + _mm512_alignr_epi8(__A, __B, 2); +} + +__m512i test_mm512_mask_alignr_epi8(__m512i __W, __mmask64 __U, __m512i __A,__m512i __B){ + // CHECK-LABEL: @test_mm512_mask_alignr_epi8 + // CHECK: @llvm.x86.avx512.mask.palignr.512 + _mm512_mask_alignr_epi8(__W, __U, __A, __B, 2); +} + +__m512i test_mm512_maskz_alignr_epi8(__mmask64 __U, __m512i __A,__m512i __B){ + // CHECK-LABEL: @test_mm512_maskz_alignr_epi8 + // CHECK: @llvm.x86.avx512.mask.palignr.512 + _mm512_maskz_alignr_epi8(__U, __A, __B, 2); +} + + + Index: test/CodeGen/avx512f-builtins.c =================================================================== --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -180,6 +180,20 @@ return _mm512_alignr_epi32(a, b, 2); } +__m512i test_mm512_mask_alignr_epi32(__m512i w, __mmask16 u, __m512i a, __m512i b) +{ + // CHECK-LABEL: @test_mm512_mask_alignr_epi32 + // CHECK: @llvm.x86.avx512.mask.valign.d.512 + return _mm512_mask_alignr_epi32(w, u, a, b, 2); +} + +__m512i test_mm512_maskz_alignr_epi32( __mmask16 u, __m512i a, __m512i b) +{ + // CHECK-LABEL: @test_mm512_alignr_epi32 + // CHECK: @llvm.x86.avx512.mask.valign.d.512 + return _mm512_maskz_alignr_epi32(u, a, b, 2); +} + __m512i test_mm512_alignr_epi64(__m512i a, __m512i b) { // CHECK-LABEL: @test_mm512_alignr_epi64 @@ -187,6 +201,20 @@ return _mm512_alignr_epi64(a, b, 2); } +__m512i test_mm512_mask_alignr_epi64(__m512i w, __mmask8 u, __m512i a, __m512i b) +{ + // CHECK-LABEL: @test_mm512_mask_alignr_epi64 + // CHECK: @llvm.x86.avx512.mask.valign.q.512 + return _mm512_mask_alignr_epi64(w, u, a, b, 2); +} + +__m512i test_mm512_maskz_alignr_epi64( __mmask8 u, __m512i a, __m512i b) +{ + // CHECK-LABEL: @test_mm512_maskz_alignr_epi64 + // CHECK: @llvm.x86.avx512.mask.valign.q.512 + return _mm512_maskz_alignr_epi64(u, a, b, 2); +} + __m512d test_mm512_broadcastsd_pd(__m128d a) { // CHECK-LABEL: @test_mm512_broadcastsd_pd Index: test/CodeGen/avx512vl-builtins.c =================================================================== --- test/CodeGen/avx512vl-builtins.c +++ test/CodeGen/avx512vl-builtins.c @@ -5278,3 +5278,75 @@ // CHECK: @llvm.x86.avx512.rsqrt14.ps.256 return _mm256_maskz_rsqrt14_ps(__U, __A); } + +__m128i test_mm_alignr_epi32(__m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_alignr_epi32 + // CHECK: @llvm.x86.avx512.mask.valign.d.128 + return _mm_alignr_epi32(__A, __B, 1); +} + +__m128i test_mm_mask_alignr_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_mask_alignr_epi32 + // CHECK: @llvm.x86.avx512.mask.valign.d.128 + return _mm_mask_alignr_epi32(__W, __U, __A, __B, 1); +} + +__m128i test_mm_maskz_alignr_epi32(__mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_maskz_alignr_epi32 + // CHECK: @llvm.x86.avx512.mask.valign.d.128 + return _mm_maskz_alignr_epi32(__U, __A, __B, 1); +} + +__m256i test_mm256_alignr_epi32(__m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_alignr_epi32 + // CHECK: @llvm.x86.avx512.mask.valign.d.256 + return _mm256_alignr_epi32(__A, __B, 1); +} + +__m256i test_mm256_mask_alignr_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_alignr_epi32 + // CHECK: @llvm.x86.avx512.mask.valign.d.256 + return _mm256_mask_alignr_epi32(__W, __U, __A, __B, 1); +} + +__m256i test_mm256_maskz_alignr_epi32(__mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_alignr_epi32 + // CHECK: @llvm.x86.avx512.mask.valign.d.256 + return _mm256_maskz_alignr_epi32(__U, __A, __B, 1); +} + +__m128i test_mm_alignr_epi64(__m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_alignr_epi64 + // CHECK: @llvm.x86.avx512.mask.valign.q.128 + return _mm_alignr_epi64(__A, __B, 1); +} + +__m128i test_mm_mask_alignr_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_mask_alignr_epi64 + // CHECK: @llvm.x86.avx512.mask.valign.q.128 + return _mm_mask_alignr_epi64(__W, __U, __A, __B, 1); +} + +__m128i test_mm_maskz_alignr_epi64(__mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_maskz_alignr_epi64 + // CHECK: @llvm.x86.avx512.mask.valign.q.128 + return _mm_maskz_alignr_epi64(__U, __A, __B, 1); +} + +__m256i test_mm256_alignr_epi64(__m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_alignr_epi64 + // CHECK: @llvm.x86.avx512.mask.valign.q.256 + return _mm256_alignr_epi64(__A, __B, 1); +} + +__m256i test_mm256_mask_alignr_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_alignr_epi64 + // CHECK: @llvm.x86.avx512.mask.valign.q.256 + return _mm256_mask_alignr_epi64(__W, __U, __A, __B, 1); +} + +__m256i test_mm256_maskz_alignr_epi64(__mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_alignr_epi64 + // CHECK: @llvm.x86.avx512.mask.valign.q.256 + return _mm256_maskz_alignr_epi64(__U, __A, __B, 1); +} Index: test/CodeGen/avx512vlbw-builtins.c =================================================================== --- test/CodeGen/avx512vlbw-builtins.c +++ test/CodeGen/avx512vlbw-builtins.c @@ -2172,3 +2172,27 @@ return _mm256_mask_testn_epi16_mask(__U, __A, __B); } +__m128i test_mm_mask_alignr_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_mask_alignr_epi8 + // CHECK: @llvm.x86.avx512.mask.palignr.128 + return _mm_mask_alignr_epi8(__W, __U, __A, __B, 2); +} + +__m128i test_mm_maskz_alignr_epi8(__mmask16 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_maskz_alignr_epi8 + // CHECK: @llvm.x86.avx512.mask.palignr.128 + return _mm_maskz_alignr_epi8(__U, __A, __B, 2); +} + +__m256i test_mm256_mask_alignr_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_alignr_epi8 + // CHECK: @llvm.x86.avx512.mask.palignr.256 + return _mm256_mask_alignr_epi8(__W, __U, __A, __B, 2); +} + +__m256i test_mm256_maskz_alignr_epi8(__mmask32 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_alignr_epi8 + // CHECK: @llvm.x86.avx512.mask.palignr.256 + return _mm256_maskz_alignr_epi8(__U, __A, __B, 2); +} +