Index: clang/lib/Headers/avx512vlbwintrin.h =================================================================== --- clang/lib/Headers/avx512vlbwintrin.h +++ clang/lib/Headers/avx512vlbwintrin.h @@ -1495,10 +1495,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi8 (__m128i __A) { - return __builtin_shufflevector(__builtin_convertvector((__v8hi)__A, __v8qi), - (__v8qi) {0, 0, 0, 0, 0, 0, 0, 0}, - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v8hi)__A, __v8qi), + (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15); } static __inline__ __m128i __DEFAULT_FN_ATTRS Index: clang/lib/Headers/avx512vlintrin.h =================================================================== --- clang/lib/Headers/avx512vlintrin.h +++ clang/lib/Headers/avx512vlintrin.h @@ -30,6 +30,7 @@ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"))) +typedef short __v2hi __attribute__((__vector_size__(4))); typedef char __v4qi __attribute__((__vector_size__(4))); typedef char __v2qi __attribute__((__vector_size__(2))); @@ -7415,10 +7416,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi8 (__m128i __A) { - return __builtin_shufflevector(__builtin_convertvector((__v4si)__A, __v4qi), - (__v4qi) {0, 0, 0, 0}, - 0, 1, 2, 3, 4, 5, 6, 7, - 7, 7, 7, 7, 7, 7, 7, 7); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -7446,10 +7446,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi32_epi8 (__m256i __A) { - return __builtin_shufflevector(__builtin_convertvector((__v8si)__A, __v8qi), - (__v8qi) {0, 0, 0, 0, 0, 0, 0, 0}, - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v8si)__A, __v8qi), + (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -7476,9 +7476,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi16 (__m128i __A) { - return __builtin_shufflevector(__builtin_convertvector((__v4si)__A, __v4hi), - (__v4hi) {0, 0, 0, 0}, - 0, 1, 2, 3, 4, 5, 6, 7); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -7532,10 +7532,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi64_epi8 (__m128i __A) { - return __builtin_shufflevector(__builtin_convertvector((__v2di)__A, __v2qi), - (__v2qi) {0, 0}, - 0, 1, 2, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -7562,10 +7561,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi64_epi8 (__m256i __A) { - return __builtin_shufflevector(__builtin_convertvector((__v4di)__A, __v4qi), - (__v4qi) {0, 0, 0, 0}, - 0, 1, 2, 3, 4, 5, 6, 7, - 7, 7, 7, 7, 7, 7, 7, 7); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -7592,9 +7590,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi64_epi32 (__m128i __A) { - return __builtin_shufflevector(__builtin_convertvector((__v2di)__A, __v2si), - (__v2si) {0, 0}, - 0, 1, 2, 3); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -7649,9 +7646,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi64_epi16 (__m128i __A) { - return __builtin_shufflevector(__builtin_convertvector((__v2di)__A, __v2hi), - (__v2hi) {0, 0}, - 0, 1, 2, 3, 3, 3, 3, 3); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3, + 3, 3, 3, 3); } static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -7679,9 +7676,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi64_epi16 (__m256i __A) { - return __builtin_shufflevector(__builtin_convertvector((__v4di)__A, __v4hi), - (__v4hi) {0, 0, 0, 0}, - 0, 1, 2, 3, 4, 5, 6, 7); + return (__m128i)__builtin_shufflevector( + __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, + 2, 3, 4, 5, 6, 7); } static __inline__ __m128i __DEFAULT_FN_ATTRS Index: clang/test/CodeGen/avx512vl-builtins.c =================================================================== --- clang/test/CodeGen/avx512vl-builtins.c +++ clang/test/CodeGen/avx512vl-builtins.c @@ -6974,7 +6974,8 @@ __m128i test_mm_cvtepi32_epi8(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.db.128 + // CHECK: trunc <4 x i32> %{{.*}} to <4 x i8> + // CHECK: shufflevector <4 x i8> %{{.*}}, <4 x i8> %{{.*}}, <16 x i32> return _mm_cvtepi32_epi8(__A); } @@ -6998,7 +6999,8 @@ __m128i test_mm256_cvtepi32_epi8(__m256i __A) { // CHECK-LABEL: @test_mm256_cvtepi32_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.db.256 + // CHECK: trunc <8 x i32> %{{.*}} to <8 x i8> + // CHECK: shufflevector <8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <16 x i32> return _mm256_cvtepi32_epi8(__A); } @@ -7022,7 +7024,8 @@ __m128i test_mm_cvtepi32_epi16(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi32_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.dw.128 + // CHECK: trunc <4 x i32> %{{.*}} to <4 x i16> + // CHECK: shufflevector <4 x i16> %{{.*}}, <4 x i16> %{{.*}}, <8 x i32> return _mm_cvtepi32_epi16(__A); } @@ -7070,7 +7073,8 @@ __m128i test_mm_cvtepi64_epi8(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.qb.128 + // CHECK: trunc <2 x i64> %{{.*}} to <2 x i8> + // CHECK: shufflevector <2 x i8> %{{.*}}, <2 x i8> %{{.*}}, <16 x i32> return _mm_cvtepi64_epi8(__A); } @@ -7094,7 +7098,8 @@ __m128i test_mm256_cvtepi64_epi8(__m256i __A) { // CHECK-LABEL: @test_mm256_cvtepi64_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.qb.256 + // CHECK: trunc <4 x i64> %{{.*}} to <4 x i8> + // CHECK: shufflevector <4 x i8> %{{.*}}, <4 x i8> %{{.*}}, <16 x i32> return _mm256_cvtepi64_epi8(__A); } @@ -7118,7 +7123,8 @@ __m128i test_mm_cvtepi64_epi32(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi64_epi32 - // CHECK: @llvm.x86.avx512.mask.pmov.qd.128 + // CHECK: trunc <2 x i64> %{{.*}} to <2 x i32> + // CHECK: shufflevector <2 x i32> %{{.*}}, <2 x i32> %{{.*}}, <4 x i32> return _mm_cvtepi64_epi32(__A); } @@ -7168,7 +7174,8 @@ __m128i test_mm_cvtepi64_epi16(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.qw.128 + // CHECK: trunc <2 x i64> %{{.*}} to <2 x i16> + // CHECK: shufflevector <2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <8 x i32> return _mm_cvtepi64_epi16(__A); } @@ -7192,7 +7199,8 @@ __m128i test_mm256_cvtepi64_epi16(__m256i __A) { // CHECK-LABEL: @test_mm256_cvtepi64_epi16 - // CHECK: @llvm.x86.avx512.mask.pmov.qw.256 + // CHECK: trunc <4 x i64> %{{.*}} to <4 x i16> + // CHECK: shufflevector <4 x i16> %{{.*}}, <4 x i16> %{{.*}}, <8 x i32> return _mm256_cvtepi64_epi16(__A); } Index: clang/test/CodeGen/avx512vlbw-builtins.c =================================================================== --- clang/test/CodeGen/avx512vlbw-builtins.c +++ clang/test/CodeGen/avx512vlbw-builtins.c @@ -1792,7 +1792,8 @@ __m128i test_mm_cvtepi16_epi8(__m128i __A) { // CHECK-LABEL: @test_mm_cvtepi16_epi8 - // CHECK: @llvm.x86.avx512.mask.pmov.wb.128 + // CHECK: trunc <8 x i16> %{{.*}} to <8 x i8> + // CHECK: shufflevector <8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <16 x i32> return _mm_cvtepi16_epi8(__A); }