Index: clang/lib/Headers/avx512vlbwintrin.h =================================================================== --- clang/lib/Headers/avx512vlbwintrin.h +++ clang/lib/Headers/avx512vlbwintrin.h @@ -1495,10 +1495,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi8 (__m128i __A) { - - return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, - (__v16qi) _mm_setzero_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v8hi)__A, __v8qi), + (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15); } static __inline__ __m128i __DEFAULT_FN_ATTRS Index: clang/lib/Headers/avx512vlintrin.h =================================================================== --- clang/lib/Headers/avx512vlintrin.h +++ clang/lib/Headers/avx512vlintrin.h @@ -30,6 +30,10 @@ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"))) +typedef short __v2hi __attribute__((__vector_size__(4))); +typedef char __v4qi __attribute__((__vector_size__(4))); +typedef char __v2qi __attribute__((__vector_size__(2))); + /* Integer compare */ #define _mm_cmpeq_epi32_mask(A, B) \ @@ -7412,9 +7416,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi8 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, - (__v16qi)_mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -7442,9 +7446,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi32_epi8 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, - (__v16qi)_mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v8si)__A, __v8qi), + (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -7471,9 +7476,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi16 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, - (__v8hi) _mm_setzero_si128 (), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -7527,9 +7532,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi64_epi8 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, - (__v16qi) _mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -7556,9 +7561,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi64_epi8 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, - (__v16qi) _mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -7585,9 +7590,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi64_epi32 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, - (__v4si)_mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -7642,9 +7646,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi64_epi16 (__m128i __A) { - return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, - (__v8hi) _mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3, + 3, 3, 3, 3); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -7672,9 +7676,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi64_epi16 (__m256i __A) { - return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, - (__v8hi)_mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS Index: clang/test/CodeGen/avx512vl-builtins.c =================================================================== --- clang/test/CodeGen/avx512vl-builtins.c +++ clang/test/CodeGen/avx512vl-builtins.c @@ -6974,7 +6974,8 @@ __m128i test_mm_cvtepi32_epi8(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.db.128 + // CHECK: trunc <4 x i32> %{{.*}} to <4 x i8> + // CHECK: shufflevector <4 x i8> %{{.*}}, <4 x i8> %{{.*}}, <16 x i32> return _mm_cvtepi32_epi8(__A); } @@ -6998,7 +6999,8 @@ __m128i test_mm256_cvtepi32_epi8(__m256i __A) { // CHECK-LABEL: @test_mm256_cvtepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.db.256 + // CHECK: trunc <8 x i32> %{{.*}} to <8 x i8> + // CHECK: shufflevector <8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <16 x i32> return _mm256_cvtepi32_epi8(__A); } @@ -7022,7 +7024,8 @@ __m128i test_mm_cvtepi32_epi16(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.dw.128 + // CHECK: trunc <4 x i32> %{{.*}} to <4 x i16> + // CHECK: shufflevector <4 x i16> %{{.*}}, <4 x i16> %{{.*}}, <8 x i32> return _mm_cvtepi32_epi16(__A); } @@ -7070,7 +7073,8 @@ __m128i test_mm_cvtepi64_epi8(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.qb.128 + // CHECK: trunc <2 x i64> %{{.*}} to <2 x i8> + // CHECK: shufflevector <2 x i8> %{{.*}}, <2 x i8> %{{.*}}, <16 x i32> return _mm_cvtepi64_epi8(__A); } @@ -7094,7 +7098,8 @@ __m128i test_mm256_cvtepi64_epi8(__m256i __A) { // CHECK-LABEL: @test_mm256_cvtepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.qb.256 + // CHECK: trunc <4 x i64> %{{.*}} to <4 x i8> + // CHECK: shufflevector <4 x i8> %{{.*}}, <4 x i8> %{{.*}}, <16 x i32> return _mm256_cvtepi64_epi8(__A); } @@ -7118,7 +7123,8 @@ __m128i test_mm_cvtepi64_epi32(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmov.qd.128 + // CHECK: trunc <2 x i64> %{{.*}} to <2 x i32> + // CHECK: shufflevector <2 x i32> %{{.*}}, <2 x i32> %{{.*}}, <4 x i32> return _mm_cvtepi64_epi32(__A); } @@ -7168,7 +7174,8 @@ __m128i test_mm_cvtepi64_epi16(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.qw.128 + // CHECK: trunc <2 x i64> %{{.*}} to <2 x i16> + // CHECK: shufflevector <2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <8 x i32> return _mm_cvtepi64_epi16(__A); } @@ -7192,7 +7199,8 @@ __m128i test_mm256_cvtepi64_epi16(__m256i __A) { // CHECK-LABEL: @test_mm256_cvtepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.qw.256 + // CHECK: trunc <4 x i64> %{{.*}} to <4 x i16> + // CHECK: shufflevector <4 x i16> %{{.*}}, <4 x i16> %{{.*}}, <8 x i32> return _mm256_cvtepi64_epi16(__A); } Index: clang/test/CodeGen/avx512vlbw-builtins.c =================================================================== --- clang/test/CodeGen/avx512vlbw-builtins.c +++ clang/test/CodeGen/avx512vlbw-builtins.c @@ -1792,7 +1792,8 @@ __m128i test_mm_cvtepi16_epi8(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi16_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.wb.128 + // CHECK: trunc <8 x i16> %{{.*}} to <8 x i8> + // CHECK: shufflevector <8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <16 x i32> return _mm_cvtepi16_epi8(__A); }