Index: include/clang/Basic/BuiltinsX86.def =================================================================== --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -1794,6 +1794,25 @@ TARGET_BUILTIN(__builtin_ia32_loadupd256_mask, "V4dV4d*V4dUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_loadups128_mask, "V4fV4f*V4fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_loadups256_mask, "V8fV8f*V8fUc","","avx512vl") - +TARGET_BUILTIN(__builtin_ia32_shuf_f32x4_mask, "V16fV16fV16fIiV16fUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_shuf_f64x2_mask, "V8dV8dV8dIiV8dUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_shuf_i32x4_mask, "V16iV16iV16iIiV16iUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_shuf_i64x2_mask, "V8LLiV8LLiV8LLiIiV8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_shufpd512_mask, "V8dV8dV8dIiV8dUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_shufps512_mask, "V16fV16fV16fIiV16fUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_shuf_f32x4_256_mask, "V8fV8fV8fIiV8fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_shuf_f64x2_256_mask, "V4dV4dV4dIiV4dUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_shuf_i32x4_256_mask, "V8iV8iV8iIiV8iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_shuf_i64x2_256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_shufpd128_mask, "V2dV2dV2dIiV2dUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_shufpd256_mask, "V4dV4dV4dIiV4dUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_shufps128_mask, "V4fV4fV4fIiV4fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_shufps256_mask, "V8fV8fV8fIiV8fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_sqrtsd_round_mask, "V2dV2dV2dV2dUcIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_sqrtss_round_mask, "V4fV4fV4fV4fUcIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_rsqrt14pd128_mask, "V2dV2dV2dUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_rsqrt14pd256_mask, "V4dV4dV4dUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_rsqrt14ps128_mask, "V4fV4fV4fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_rsqrt14ps256_mask, "V8fV8fV8fUc","","avx512vl") #undef BUILTIN #undef TARGET_BUILTIN Index: lib/Headers/avx512fintrin.h =================================================================== --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -3974,6 +3974,228 @@ __builtin_ia32_vcomiss ((__v4sf) (__A), (__v4sf) (__B), ( __P), ( __R));\ }) +#define _mm512_shuffle_f32x4( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_f32x4_mask ((__v16sf)( __A),\ + (__v16sf)( __B),( __imm),\ + (__v16sf)\ + _mm512_undefined_ps (),\ + (__mmask16) -1);\ +}) + +#define _mm512_mask_shuffle_f32x4( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_f32x4_mask ((__v16sf)( __A),\ + (__v16sf)( __B),( __imm),\ + (__v16sf)( __W),\ + (__mmask16)( __U));\ +}) + +#define _mm512_maskz_shuffle_f32x4( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_f32x4_mask ((__v16sf)( __A),\ + (__v16sf)( __B),( __imm),\ + (__v16sf)\ + _mm512_setzero_ps (),\ + (__mmask16)( __U));\ +}) + +#define _mm512_shuffle_f64x2( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_f64x2_mask ((__v8df)( __A),\ + (__v8df)( __B),( __imm),\ + (__v8df)\ + _mm512_undefined_pd (),\ + (__mmask8) -1);\ +}) + +#define _mm512_mask_shuffle_f64x2( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_f64x2_mask ((__v8df)( __A),\ + (__v8df)( __B),( __imm),\ + (__v8df)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm512_maskz_shuffle_f64x2( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_f64x2_mask ((__v8df)( __A),\ + (__v8df)( __B),( __imm),\ + (__v8df)\ + _mm512_setzero_pd (),\ + (__mmask8)( __U));\ +}) + +#define _mm512_shuffle_i32x4( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_i32x4_mask ((__v16si)( __A),\ + (__v16si)( __B),\ + ( __imm),\ + (__v16si)\ + _mm512_setzero_si512 (),\ + (__mmask16) -1);\ +}) + +#define _mm512_mask_shuffle_i32x4( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_i32x4_mask ((__v16si)( __A),\ + (__v16si)( __B),\ + ( __imm),\ + (__v16si)( __W),\ + (__mmask16)( __U));\ +}) + +#define _mm512_maskz_shuffle_i32x4( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_i32x4_mask ((__v16si)( __A),\ + (__v16si)( __B),\ + ( __imm),\ + (__v16si)\ + _mm512_setzero_si512 (),\ + (__mmask16)( __U));\ +}) + +#define _mm512_shuffle_i64x2( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_i64x2_mask ((__v8di)( __A),\ + (__v8di)( __B),( __imm),\ + (__v8di)\ + _mm512_setzero_si512 (),\ + (__mmask8) -1);\ +}) + +#define _mm512_mask_shuffle_i64x2( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_i64x2_mask ((__v8di)( __A),\ + (__v8di)( __B),( __imm),\ + (__v8di)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm512_maskz_shuffle_i64x2( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_i64x2_mask ((__v8di)( __A),\ + (__v8di)( __B),( __imm),\ + (__v8di)\ + _mm512_setzero_si512 (),\ + (__mmask8)( __U));\ +}) + +#define _mm512_shuffle_pd( __M, __V, __imm) __extension__ ({ \ +__builtin_ia32_shufpd512_mask ((__v8df)( __M),\ + (__v8df)( __V),( __imm),\ + (__v8df)\ + _mm512_undefined_pd (),\ + (__mmask8) -1);\ +}) + +#define _mm512_mask_shuffle_pd( __W, __U, __M, __V, __imm) __extension__ ({ \ +__builtin_ia32_shufpd512_mask ((__v8df)( __M),\ + (__v8df)( __V),( __imm),\ + (__v8df)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm512_maskz_shuffle_pd( __U, __M, __V, __imm) __extension__ ({ \ +__builtin_ia32_shufpd512_mask ((__v8df)( __M),\ + (__v8df)( __V),( __imm),\ + (__v8df)\ + _mm512_setzero_pd (),\ + (__mmask8)( __U));\ +}) + +#define _mm512_shuffle_ps( __M, __V, __imm) __extension__ ({ \ +__builtin_ia32_shufps512_mask ((__v16sf)( __M),\ + (__v16sf)( __V),( __imm),\ + (__v16sf)\ + _mm512_undefined_ps (),\ + (__mmask16) -1);\ +}) + +#define _mm512_mask_shuffle_ps( __W, __U, __M, __V, __imm) __extension__ ({ \ +__builtin_ia32_shufps512_mask ((__v16sf)( __M),\ + (__v16sf)( __V),( __imm),\ + (__v16sf)( __W),\ + (__mmask16)( __U));\ +}) + +#define _mm512_maskz_shuffle_ps( __U, __M, __V, __imm) __extension__ ({ \ +__builtin_ia32_shufps512_mask ((__v16sf)( __M),\ + (__v16sf)( __V),( __imm),\ + (__v16sf)\ + _mm512_setzero_ps (),\ + (__mmask16)( __U));\ +}) + +#define _mm_sqrt_round_sd( __A, __B, __R) __extension__ ({ \ +__builtin_ia32_sqrtsd_round_mask ((__v2df)( __B),\ + (__v2df)( __A),(__v2df) _mm_setzero_pd(),\ + (__mmask8) -1,\ + ( __R));\ +}) + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __B, + (__v2df) __A, + (__v2df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_mask_sqrt_round_sd( __W, __U, __A, __B, __R) __extension__ ({ \ +__builtin_ia32_sqrtsd_round_mask ((__v2df)( __B),\ + (__v2df)( __A),(__v2df) __W,\ + (__mmask8) __U,\ + ( __R));\ +}) + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __B, + (__v2df) __A, + (__v2df) _mm_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_maskz_sqrt_round_sd( __U, __A, __B, __R) __extension__ ({ \ +__builtin_ia32_sqrtsd_round_mask ((__v2df)( __B),\ + (__v2df)( __A),(__v2df) _mm_setzero_pd(),\ + (__mmask8) __U,\ + ( __R));\ +}) + +#define _mm_sqrt_round_ss( __A, __B, __R) __extension__ ({ \ +__builtin_ia32_sqrtss_round_mask ((__v4sf)( __B),\ + (__v4sf)( __A),(__v4sf) _mm_setzero_ps(),\ + (__mmask8) -1,\ + ( __R));\ +}) + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __B, + (__v4sf) __A, + (__v4sf) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_mask_sqrt_round_ss( __W, __U, __A, __B, __R) __extension__ ({ \ +__builtin_ia32_sqrtss_round_mask ((__v4sf)( __B),\ + (__v4sf)( __A),(__v4sf) __W,\ + (__mmask8) __U,\ + ( __R));\ +}) + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, + (__v4sf) __B, + (__v4sf) _mm_setzero_ps (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_maskz_sqrt_round_ss( __U, __A, __B, __R) __extension__ ({ \ +__builtin_ia32_sqrtss_round_mask ((__v4sf)( __B),\ + (__v4sf)( __A),(__v4sf) _mm_setzero_ps(),\ + (__mmask8) __U,\ + _MM_FROUND_CUR_DIRECTION);\ +}) #undef __DEFAULT_FN_ATTRS Index: lib/Headers/avx512vlintrin.h =================================================================== --- lib/Headers/avx512vlintrin.h +++ lib/Headers/avx512vlintrin.h @@ -6360,6 +6360,275 @@ _mm256_setzero_ps (), (__mmask8) __U); } + +#define _mm256_shuffle_f32x4( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_f32x4_256_mask ((__v8sf)( __A),\ + (__v8sf)( __B),\ + ( __imm),\ + (__v8sf)\ + _mm256_setzero_ps (),\ + (__mmask8) -1);\ +}) + +#define _mm256_mask_shuffle_f32x4( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_f32x4_256_mask ((__v8sf)( __A),\ + (__v8sf)( __B),\ + ( __imm),\ + (__v8sf)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm256_maskz_shuffle_f32x4( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_f32x4_256_mask ((__v8sf)( __A),\ + (__v8sf)( __B),\ + ( __imm),\ + (__v8sf)\ + _mm256_setzero_ps (),\ + (__mmask8)( __U));\ +}) + +#define _mm256_shuffle_f64x2( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_f64x2_256_mask ((__v4df)( __A),\ + (__v4df)( __B),\ + ( __imm),\ + (__v4df)\ + _mm256_setzero_pd (),\ + (__mmask8) -1);\ +}) + +#define _mm256_mask_shuffle_f64x2( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_f64x2_256_mask ((__v4df)( __A),\ + (__v4df)( __B),\ + ( __imm),\ + (__v4df)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm256_maskz_shuffle_f64x2( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_f64x2_256_mask ((__v4df)( __A),\ + (__v4df)( __B),\ + ( __imm),\ + (__v4df)\ + _mm256_setzero_pd (),\ + (__mmask8)( __U));\ +}) + +#define _mm256_shuffle_i32x4( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_i32x4_256_mask ((__v8si)( __A),\ + (__v8si)( __B),\ + ( __imm),\ + (__v8si)\ + _mm256_setzero_si256 (),\ + (__mmask8) -1);\ +}) + +#define _mm256_mask_shuffle_i32x4( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_i32x4_256_mask ((__v8si)( __A),\ + (__v8si)( __B),\ + ( __imm),\ + (__v8si)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm256_maskz_shuffle_i32x4( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_i32x4_256_mask ((__v8si)( __A),\ + (__v8si)( __B),\ + ( __imm),\ + (__v8si)\ + _mm256_setzero_si256 (),\ + (__mmask8)( __U));\ +}) + +#define _mm256_shuffle_i64x2( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_i64x2_256_mask ((__v4di)( __A),\ + (__v4di)( __B),\ + ( __imm),\ + (__v4di)\ + _mm256_setzero_si256 (),\ + (__mmask8) -1);\ +}) + +#define _mm256_mask_shuffle_i64x2( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_i64x2_256_mask ((__v4di)( __A),\ + (__v4di)( __B),\ + ( __imm),\ + (__v4di)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm256_maskz_shuffle_i64x2( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shuf_i64x2_256_mask ((__v4di)( __A),\ + (__v4di)( __B),\ + ( __imm),\ + (__v4di)\ + _mm256_setzero_si256 (),\ + (__mmask8)( __U));\ +}) + +#define _mm_mask_shuffle_pd( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shufpd128_mask ((__v2df)( __A),\ + (__v2df)( __B),( __imm),\ + (__v2df)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm_maskz_shuffle_pd( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shufpd128_mask ((__v2df)( __A),\ + (__v2df)( __B),( __imm),\ + (__v2df)\ + _mm_setzero_pd (),\ + (__mmask8)( __U));\ +}) + +#define _mm256_mask_shuffle_pd( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shufpd256_mask ((__v4df)( __A),\ + (__v4df)( __B),( __imm),\ + (__v4df)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm256_maskz_shuffle_pd( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shufpd256_mask ((__v4df)( __A),\ + (__v4df)( __B),( __imm),\ + (__v4df)\ + _mm256_setzero_pd (),\ + (__mmask8)( __U));\ +}) + +#define _mm_mask_shuffle_ps( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shufps128_mask ((__v4sf)( __A),\ + (__v4sf)( __B),( __imm),\ + (__v4sf)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm_maskz_shuffle_ps( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shufps128_mask ((__v4sf)( __A),\ + (__v4sf)( __B),( __imm),\ + (__v4sf)\ + _mm_setzero_ps (),\ + (__mmask8)( __U));\ +}) + +#define _mm256_mask_shuffle_ps( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shufps256_mask ((__v8sf)( __A),\ + (__v8sf)( __B),( __imm),\ + (__v8sf)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm256_maskz_shuffle_ps( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_shufps256_mask ((__v8sf)( __A),\ + (__v8sf)( __B),( __imm),\ + (__v8sf)\ + _mm256_setzero_ps (),\ + (__mmask8)( __U));\ +}) + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_rsqrt14_pd (__m128d __A) +{ + return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, + (__v2df) + _mm_setzero_pd (), + (__mmask8) -1); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A) +{ + return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, + (__v2df) __W, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A) +{ + return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_rsqrt14_pd (__m256d __A) +{ + return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) -1); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A) +{ + return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, + (__v4df) __W, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A) +{ + return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_rsqrt14_ps (__m128 __A) +{ + return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A) +{ + return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, + (__v4sf) __W, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A) +{ + return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_rsqrt14_ps (__m256 __A) +{ + return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) -1); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A) +{ + return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, + (__v8sf) __W, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) +{ + return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); +} + #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_BOTH Index: test/CodeGen/avx512f-builtins.c =================================================================== --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -2576,5 +2576,164 @@ return _mm512_kmov(__A); } +__m512 test_mm512_shuffle_f32x4(__m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_shuffle_f32x4 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm512_shuffle_f32x4(__A, __B, 4); +} + +__m512 test_mm512_mask_shuffle_f32x4(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_mask_shuffle_f32x4 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm512_mask_shuffle_f32x4(__W, __U, __A, __B, 4); +} + +__m512 test_mm512_maskz_shuffle_f32x4(__mmask16 __U, __m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_maskz_shuffle_f32x4 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm512_maskz_shuffle_f32x4(__U, __A, __B, 4); +} + +__m512d test_mm512_shuffle_f64x2(__m512d __A, __m512d __B) { + // CHECK-LABEL: @test_mm512_shuffle_f64x2 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm512_shuffle_f64x2(__A, __B, 4); +} + +__m512d test_mm512_mask_shuffle_f64x2(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + // CHECK-LABEL: @test_mm512_mask_shuffle_f64x2 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm512_mask_shuffle_f64x2(__W, __U, __A, __B, 4); +} + +__m512d test_mm512_maskz_shuffle_f64x2(__mmask8 __U, __m512d __A, __m512d __B) { + // CHECK-LABEL: @test_mm512_maskz_shuffle_f64x2 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm512_maskz_shuffle_f64x2(__U, __A, __B, 4); +} + +__m512i test_mm512_shuffle_i32x4(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_shuffle_i32x4 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm512_shuffle_i32x4(__A, __B, 4); +} + +__m512i test_mm512_mask_shuffle_i32x4(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_shuffle_i32x4 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm512_mask_shuffle_i32x4(__W, __U, __A, __B, 4); +} + +__m512i test_mm512_maskz_shuffle_i32x4(__mmask16 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_shuffle_i32x4 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm512_maskz_shuffle_i32x4(__U, __A, __B, 4); +} + +__m512i test_mm512_shuffle_i64x2(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_shuffle_i64x2 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm512_shuffle_i64x2(__A, __B, 4); +} + +__m512i test_mm512_mask_shuffle_i64x2(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_shuffle_i64x2 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm512_mask_shuffle_i64x2(__W, __U, __A, __B, 4); +} + +__m512i test_mm512_maskz_shuffle_i64x2(__mmask8 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_shuffle_i64x2 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm512_maskz_shuffle_i64x2(__U, __A, __B, 4); +} + +__m512d test_mm512_shuffle_pd(__m512d __M, __m512d __V) { + // CHECK-LABEL: @test_mm512_shuffle_pd + // CHECK: @llvm.x86.avx512.mask.shuf.pd.512 + return _mm512_shuffle_pd(__M, __V, 4); +} + +__m512d test_mm512_mask_shuffle_pd(__m512d __W, __mmask8 __U, __m512d __M, __m512d __V) { + // CHECK-LABEL: @test_mm512_mask_shuffle_pd + // CHECK: @llvm.x86.avx512.mask.shuf.pd.512 + return _mm512_mask_shuffle_pd(__W, __U, __M, __V, 4); +} + +__m512d test_mm512_maskz_shuffle_pd(__mmask8 __U, __m512d __M, __m512d __V) { + // CHECK-LABEL: @test_mm512_maskz_shuffle_pd + // CHECK: @llvm.x86.avx512.mask.shuf.pd.512 + return _mm512_maskz_shuffle_pd(__U, __M, __V, 4); +} + +__m512 test_mm512_shuffle_ps(__m512 __M, __m512 __V) { + // CHECK-LABEL: @test_mm512_shuffle_ps + // CHECK: @llvm.x86.avx512.mask.shuf.ps.512 + return _mm512_shuffle_ps(__M, __V, 4); +} + +__m512 test_mm512_mask_shuffle_ps(__m512 __W, __mmask16 __U, __m512 __M, __m512 __V) { + // CHECK-LABEL: @test_mm512_mask_shuffle_ps + // CHECK: @llvm.x86.avx512.mask.shuf.ps.512 + return _mm512_mask_shuffle_ps(__W, __U, __M, __V, 4); +} + +__m512 test_mm512_maskz_shuffle_ps(__mmask16 __U, __m512 __M, __m512 __V) { + // CHECK-LABEL: @test_mm512_maskz_shuffle_ps + // CHECK: @llvm.x86.avx512.mask.shuf.ps.512 + return _mm512_maskz_shuffle_ps(__U, __M, __V, 4); +} + +__m128d test_mm_sqrt_round_sd(__m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_sqrt_round_sd + // CHECK: @llvm.x86.avx512.mask.sqrt.sd + return _mm_sqrt_round_sd(__A, __B, 4); +} + +__m128d test_mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ + // CHECK: @llvm.x86.avx512.mask.sqrt.sd + return _mm_mask_sqrt_sd(__W,__U,__A,__B); +} + +__m128d test_mm_mask_sqrt_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ + // CHECK: @llvm.x86.avx512.mask.sqrt.sd + return _mm_mask_sqrt_round_sd(__W,__U,__A,__B,_MM_FROUND_CUR_DIRECTION); +} + +__m128d test_mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B){ + // CHECK: @llvm.x86.avx512.mask.sqrt.sd + return _mm_maskz_sqrt_sd(__U,__A,__B); +} + +__m128d test_mm_maskz_sqrt_round_sd(__mmask8 __U, __m128d __A, __m128d __B){ + // CHECK: @llvm.x86.avx512.mask.sqrt.sd + return _mm_maskz_sqrt_round_sd(__U,__A,__B,_MM_FROUND_CUR_DIRECTION); +} + +__m128 test_mm_sqrt_round_ss(__m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_sqrt_round_ss + // CHECK: @llvm.x86.avx512.mask.sqrt.ss + return _mm_sqrt_round_ss(__A, __B, 4); +} + +__m128 test_mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ + // CHECK: @llvm.x86.avx512.mask.sqrt.ss + return _mm_mask_sqrt_ss(__W,__U,__A,__B); +} + +__m128 test_mm_mask_sqrt_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ + // CHECK: @llvm.x86.avx512.mask.sqrt.ss + return _mm_mask_sqrt_round_ss(__W,__U,__A,__B,_MM_FROUND_CUR_DIRECTION); +} + +__m128 test_mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B){ + // CHECK: @llvm.x86.avx512.mask.sqrt.ss + return _mm_maskz_sqrt_ss(__U,__A,__B); +} + +__m128 test_mm_maskz_sqrt_round_ss(__mmask8 __U, __m128 __A, __m128 __B){ + // CHECK: @llvm.x86.avx512.mask.sqrt.ss + return _mm_maskz_sqrt_round_ss(__U,__A,__B,_MM_FROUND_CUR_DIRECTION); +} Index: test/CodeGen/avx512vl-builtins.c =================================================================== --- test/CodeGen/avx512vl-builtins.c +++ test/CodeGen/avx512vl-builtins.c @@ -4296,3 +4296,194 @@ return _mm256_maskz_loadu_ps(__U, __P); } +__m256 test_mm256_shuffle_f32x4(__m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_shuffle_f32x4 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm256_shuffle_f32x4(__A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m256 test_mm256_mask_shuffle_f32x4(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_mask_shuffle_f32x4 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm256_mask_shuffle_f32x4(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m256 test_mm256_maskz_shuffle_f32x4(__mmask8 __U, __m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_maskz_shuffle_f32x4 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm256_maskz_shuffle_f32x4(__U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m256d test_mm256_shuffle_f64x2(__m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_shuffle_f64x2 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm256_shuffle_f64x2(__A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m256d test_mm256_mask_shuffle_f64x2(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_mask_shuffle_f64x2 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm256_mask_shuffle_f64x2(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m256d test_mm256_maskz_shuffle_f64x2(__mmask8 __U, __m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_maskz_shuffle_f64x2 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm256_maskz_shuffle_f64x2(__U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m256i test_mm256_shuffle_i32x4(__m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_shuffle_i32x4 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm256_shuffle_i32x4(__A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m256i test_mm256_mask_shuffle_i32x4(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_shuffle_i32x4 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm256_mask_shuffle_i32x4(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m256i test_mm256_maskz_shuffle_i32x4(__mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_shuffle_i32x4 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm256_maskz_shuffle_i32x4(__U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m256i test_mm256_shuffle_i64x2(__m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_shuffle_i64x2 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm256_shuffle_i64x2(__A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m256i test_mm256_mask_shuffle_i64x2(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_shuffle_i64x2 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm256_mask_shuffle_i64x2(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m256i test_mm256_maskz_shuffle_i64x2(__mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_shuffle_i64x2 + // CHECK: @llvm.x86.avx512.mask.shuf + return _mm256_maskz_shuffle_i64x2(__U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m128d test_mm_mask_shuffle_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_mask_shuffle_pd + // CHECK: @llvm.x86.avx512.mask.shuf.pd.128 + return _mm_mask_shuffle_pd(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m128d test_mm_maskz_shuffle_pd(__mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_maskz_shuffle_pd + // CHECK: @llvm.x86.avx512.mask.shuf.pd.128 + return _mm_maskz_shuffle_pd(__U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m256d test_mm256_mask_shuffle_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_mask_shuffle_pd + // CHECK: @llvm.x86.avx512.mask.shuf.pd.256 + return _mm256_mask_shuffle_pd(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m256d test_mm256_maskz_shuffle_pd(__mmask8 __U, __m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_maskz_shuffle_pd + // CHECK: @llvm.x86.avx512.mask.shuf.pd.256 + return _mm256_maskz_shuffle_pd(__U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m128 test_mm_mask_shuffle_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_mask_shuffle_ps + // CHECK: @llvm.x86.avx512.mask.shuf.ps.128 + return _mm_mask_shuffle_ps(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m128 test_mm_maskz_shuffle_ps(__mmask8 __U, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_maskz_shuffle_ps + // CHECK: @llvm.x86.avx512.mask.shuf.ps.128 + return _mm_maskz_shuffle_ps(__U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m256 test_mm256_mask_shuffle_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_mask_shuffle_ps + // CHECK: @llvm.x86.avx512.mask.shuf.ps.256 + return _mm256_mask_shuffle_ps(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m256 test_mm256_maskz_shuffle_ps(__mmask8 __U, __m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_maskz_shuffle_ps + // CHECK: @llvm.x86.avx512.mask.shuf.ps.256 + return _mm256_maskz_shuffle_ps(__U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m128d test_mm_rsqrt14_pd(__m128d __A) { + // CHECK-LABEL: @test_mm_rsqrt14_pd + // CHECK: @llvm.x86.avx512.rsqrt14.pd.128 + return _mm_rsqrt14_pd(__A); +} + +__m128d test_mm_mask_rsqrt14_pd(__m128d __W, __mmask8 __U, __m128d __A) { + // CHECK-LABEL: @test_mm_mask_rsqrt14_pd + // CHECK: @llvm.x86.avx512.rsqrt14.pd.128 + return _mm_mask_rsqrt14_pd(__W, __U, __A); +} + +__m128d test_mm_maskz_rsqrt14_pd(__mmask8 __U, __m128d __A) { + // CHECK-LABEL: @test_mm_maskz_rsqrt14_pd + // CHECK: @llvm.x86.avx512.rsqrt14.pd.128 + return _mm_maskz_rsqrt14_pd(__U, __A); +} + +__m256d test_mm256_rsqrt14_pd(__m256d __A) { + // CHECK-LABEL: @test_mm256_rsqrt14_pd + // CHECK: @llvm.x86.avx512.rsqrt14.pd.256 + return _mm256_rsqrt14_pd(__A); +} + +__m256d test_mm256_mask_rsqrt14_pd(__m256d __W, __mmask8 __U, __m256d __A) { + // CHECK-LABEL: @test_mm256_mask_rsqrt14_pd + // CHECK: @llvm.x86.avx512.rsqrt14.pd.256 + return _mm256_mask_rsqrt14_pd(__W, __U, __A); +} + +__m256d test_mm256_maskz_rsqrt14_pd(__mmask8 __U, __m256d __A) { + // CHECK-LABEL: @test_mm256_maskz_rsqrt14_pd + // CHECK: @llvm.x86.avx512.rsqrt14.pd.256 + return _mm256_maskz_rsqrt14_pd(__U, __A); +} + +__m128 test_mm_rsqrt14_ps(__m128 __A) { + // CHECK-LABEL: @test_mm_rsqrt14_ps + // CHECK: @llvm.x86.avx512.rsqrt14.ps.128 + return _mm_rsqrt14_ps(__A); +} + +__m128 test_mm_mask_rsqrt14_ps(__m128 __W, __mmask8 __U, __m128 __A) { + // CHECK-LABEL: @test_mm_mask_rsqrt14_ps + // CHECK: @llvm.x86.avx512.rsqrt14.ps.128 + return _mm_mask_rsqrt14_ps(__W, __U, __A); +} + +__m128 test_mm_maskz_rsqrt14_ps(__mmask8 __U, __m128 __A) { + // CHECK-LABEL: @test_mm_maskz_rsqrt14_ps + // CHECK: @llvm.x86.avx512.rsqrt14.ps.128 + return _mm_maskz_rsqrt14_ps(__U, __A); +} + +__m256 test_mm256_rsqrt14_ps(__m256 __A) { + // CHECK-LABEL: @test_mm256_rsqrt14_ps + // CHECK: @llvm.x86.avx512.rsqrt14.ps.256 + return _mm256_rsqrt14_ps(__A); +} + +__m256 test_mm256_mask_rsqrt14_ps(__m256 __W, __mmask8 __U, __m256 __A) { + // CHECK-LABEL: @test_mm256_mask_rsqrt14_ps + // CHECK: @llvm.x86.avx512.rsqrt14.ps.256 + return _mm256_mask_rsqrt14_ps(__W, __U, __A); +} + +__m256 test_mm256_maskz_rsqrt14_ps(__mmask8 __U, __m256 __A) { + // CHECK-LABEL: @test_mm256_maskz_rsqrt14_ps + // CHECK: @llvm.x86.avx512.rsqrt14.ps.256 + return _mm256_maskz_rsqrt14_ps(__U, __A); +}