Index: lib/Headers/avx512vlbwintrin.h =================================================================== --- lib/Headers/avx512vlbwintrin.h +++ lib/Headers/avx512vlbwintrin.h @@ -1496,10 +1496,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi16_epi8 (__m128i __A) { - - return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, - (__v16qi) _mm_setzero_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v8hi)__A, __v8qi), + (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 Index: lib/Headers/avx512vlintrin.h =================================================================== --- lib/Headers/avx512vlintrin.h +++ lib/Headers/avx512vlintrin.h @@ -31,6 +31,10 @@ #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256))) +typedef short __v2hi __attribute__((__vector_size__(4))); +typedef char __v4qi __attribute__((__vector_size__(4))); +typedef char __v2qi __attribute__((__vector_size__(2))); + /* Integer compare */ #define _mm_cmpeq_epi32_mask(A, B) \ @@ -7341,9 +7345,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi8 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, - (__v16qi)_mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -7371,9 +7375,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi8 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, - (__v16qi)_mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v8si)__A, __v8qi), + (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 @@ -7400,9 +7405,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi16 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, - (__v8hi) _mm_setzero_si128 (), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -7456,9 +7461,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi8 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, - (__v16qi) _mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -7485,9 +7490,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi8 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, - (__v16qi) _mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 @@ -7514,9 +7519,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi32 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, - (__v4si)_mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -7571,9 +7575,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi16 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, - (__v8hi) _mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3, + 3, 3, 3, 3); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -7601,9 +7605,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi16 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, - (__v8hi)_mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 Index: test/CodeGen/avx512vl-builtins.c =================================================================== --- test/CodeGen/avx512vl-builtins.c +++ test/CodeGen/avx512vl-builtins.c @@ -8503,7 +8503,8 @@ __m128i test_mm_cvtepi32_epi8(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.db.128 + // CHECK: trunc <4 x i32> %{{.*}} to <4 x i8> + // CHECK: shufflevector <4 x i8> %{{.*}}, <4 x i8> %{{.*}}, <16 x i32> return _mm_cvtepi32_epi8(__A); } @@ -8527,7 +8528,8 @@ __m128i test_mm256_cvtepi32_epi8(__m256i __A) { // CHECK-LABEL: @test_mm256_cvtepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.db.256 + // CHECK: trunc <8 x i32> %{{.*}} to <8 x i8> + // CHECK: shufflevector <8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <16 x i32> return _mm256_cvtepi32_epi8(__A); } @@ -8551,7 +8553,8 @@ __m128i test_mm_cvtepi32_epi16(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.dw.128 + // CHECK: trunc <4 x i32> %{{.*}} to <4 x i16> + // CHECK: shufflevector <4 x i16> %{{.*}}, <4 x i16> %{{.*}}, <8 x i32> return _mm_cvtepi32_epi16(__A); } @@ -8599,7 +8602,8 @@ __m128i test_mm_cvtepi64_epi8(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.qb.128 + // CHECK: trunc <2 x i64> %{{.*}} to <2 x i8> + // CHECK: shufflevector <2 x i8> %{{.*}}, <2 x i8> %{{.*}}, <16 x i32> return _mm_cvtepi64_epi8(__A); } @@ -8623,7 +8627,8 @@ __m128i test_mm256_cvtepi64_epi8(__m256i __A) { // CHECK-LABEL: @test_mm256_cvtepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.qb.256 + // CHECK: trunc <4 x i64> %{{.*}} to <4 x i8> + // CHECK: shufflevector <4 x i8> %{{.*}}, <4 x i8> %{{.*}}, <16 x i32> return _mm256_cvtepi64_epi8(__A); } @@ -8647,7 +8652,8 @@ __m128i test_mm_cvtepi64_epi32(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmov.qd.128 + // CHECK: trunc <2 x i64> %{{.*}} to <2 x i32> + // CHECK: shufflevector <2 x i32> %{{.*}}, <2 x i32> %{{.*}}, <4 x i32> return _mm_cvtepi64_epi32(__A); } @@ -8697,7 +8703,8 @@ __m128i test_mm_cvtepi64_epi16(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.qw.128 + // CHECK: trunc <2 x i64> %{{.*}} to <2 x i16> + // CHECK: shufflevector <2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <8 x i32> return _mm_cvtepi64_epi16(__A); } @@ -8721,7 +8728,8 @@ __m128i test_mm256_cvtepi64_epi16(__m256i __A) { // CHECK-LABEL: @test_mm256_cvtepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.qw.256 + // CHECK: trunc <4 x i64> %{{.*}} to <4 x i16> + // CHECK: shufflevector <4 x i16> %{{.*}}, <4 x i16> %{{.*}}, <8 x i32> return _mm256_cvtepi64_epi16(__A); } Index: test/CodeGen/avx512vlbw-builtins.c =================================================================== --- test/CodeGen/avx512vlbw-builtins.c +++ test/CodeGen/avx512vlbw-builtins.c @@ -1792,7 +1792,8 @@ __m128i test_mm_cvtepi16_epi8(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi16_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.wb.128 + // CHECK: trunc <8 x i16> %{{.*}} to <8 x i8> + // CHECK: shufflevector <8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <16 x i32> return _mm_cvtepi16_epi8(__A); }