Index: cfe/trunk/include/clang/Basic/BuiltinsX86.def =================================================================== --- cfe/trunk/include/clang/Basic/BuiltinsX86.def +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def @@ -266,8 +266,6 @@ TARGET_BUILTIN(__builtin_ia32_psubusb128, "V16cV16cV16c", "", "sse2") TARGET_BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "", "sse2") TARGET_BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "", "sse2") -TARGET_BUILTIN(__builtin_ia32_pavgb128, "V16cV16cV16c", "", "sse2") -TARGET_BUILTIN(__builtin_ia32_pavgw128, "V8sV8sV8s", "", "sse2") TARGET_BUILTIN(__builtin_ia32_pmaxub128, "V16cV16cV16c", "", "sse2") TARGET_BUILTIN(__builtin_ia32_pmaxsw128, "V8sV8sV8s", "", "sse2") TARGET_BUILTIN(__builtin_ia32_pminub128, "V16cV16cV16c", "", "sse2") @@ -522,8 +520,6 @@ TARGET_BUILTIN(__builtin_ia32_psubusb256, "V32cV32cV32c", "", "avx2") TARGET_BUILTIN(__builtin_ia32_psubusw256, "V16sV16sV16s", "", "avx2") TARGET_BUILTIN(__builtin_ia32_palignr256, "V32cV32cV32cIi", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pavgb256, "V32cV32cV32c", "", "avx2") -TARGET_BUILTIN(__builtin_ia32_pavgw256, "V16sV16sV16s", "", "avx2") TARGET_BUILTIN(__builtin_ia32_pblendvb256, "V32cV32cV32cV32c", "", "avx2") TARGET_BUILTIN(__builtin_ia32_phaddw256, "V16sV16sV16s", "", "avx2") TARGET_BUILTIN(__builtin_ia32_phaddd256, "V8iV8iV8i", "", "avx2") @@ -1075,8 +1071,6 @@ TARGET_BUILTIN(__builtin_ia32_paddsw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_paddusb512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_paddusw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pavgb512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pavgw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pmaxsb512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pmaxsw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pmaxub512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw") Index: cfe/trunk/lib/Headers/avx2intrin.h =================================================================== --- cfe/trunk/lib/Headers/avx2intrin.h +++ cfe/trunk/lib/Headers/avx2intrin.h @@ -145,13 +145,21 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_avg_epu8(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b); + typedef unsigned short __v32hu __attribute__((__vector_size__(64))); + return (__m256i)__builtin_convertvector( + ((__builtin_convertvector((__v32qu)__a, __v32hu) + + __builtin_convertvector((__v32qu)__b, __v32hu)) + 1) + >> 1, __v32qu); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_avg_epu16(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b); + typedef unsigned int __v16su __attribute__((__vector_size__(64))); + return (__m256i)__builtin_convertvector( + ((__builtin_convertvector((__v16hu)__a, __v16su) + + __builtin_convertvector((__v16hu)__b, __v16su)) + 1) + >> 1, __v16hu); } static __inline__ __m256i __DEFAULT_FN_ATTRS Index: cfe/trunk/lib/Headers/avx512bwintrin.h =================================================================== --- cfe/trunk/lib/Headers/avx512bwintrin.h +++ cfe/trunk/lib/Headers/avx512bwintrin.h @@ -706,57 +706,55 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_avg_epu8 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) -1); + typedef unsigned short __v64hu __attribute__((__vector_size__(128))); + return (__m512i)__builtin_convertvector( + ((__builtin_convertvector((__v64qu) __A, __v64hu) + + __builtin_convertvector((__v64qu) __B, __v64hu)) + 1) + >> 1, __v64qu); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) __W, - (__mmask64) __U); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, + (__v64qi)_mm512_avg_epu8(__A, __B), + (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), - (__mmask64) __U); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, + (__v64qi)_mm512_avg_epu8(__A, __B), + (__v64qi)_mm512_setzero_qi()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_avg_epu16 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) -1); + typedef unsigned int __v32su __attribute__((__vector_size__(128))); + return (__m512i)__builtin_convertvector( + ((__builtin_convertvector((__v32hu) __A, __v32su) + + __builtin_convertvector((__v32hu) __B, __v32su)) + 1) + >> 1, __v32hu); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) __W, - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_avg_epu16(__A, __B), + (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_avg_epu16(__A, __B), + (__v32hi) _mm512_setzero_hi()); } static __inline__ __m512i __DEFAULT_FN_ATTRS Index: cfe/trunk/lib/Headers/emmintrin.h =================================================================== --- cfe/trunk/lib/Headers/emmintrin.h +++ cfe/trunk/lib/Headers/emmintrin.h @@ -2258,7 +2258,11 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, __m128i __b) { - return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); + typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32))); + return (__m128i)__builtin_convertvector( + ((__builtin_convertvector((__v16qu)__a, __v16hu) + + __builtin_convertvector((__v16qu)__b, __v16hu)) + 1) + >> 1, __v16qu); } /// \brief Computes the rounded avarages of corresponding elements of two @@ -2278,7 +2282,11 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, __m128i __b) { - return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); + typedef unsigned int __v8su __attribute__ ((__vector_size__ (32))); + return (__m128i)__builtin_convertvector( + ((__builtin_convertvector((__v8hu)__a, __v8su) + + __builtin_convertvector((__v8hu)__b, __v8su)) + 1) + >> 1, __v8hu); } /// \brief Multiplies the corresponding elements of two 128-bit signed [8 x i16] Index: cfe/trunk/test/CodeGen/avx2-builtins.c =================================================================== --- cfe/trunk/test/CodeGen/avx2-builtins.c +++ cfe/trunk/test/CodeGen/avx2-builtins.c @@ -99,13 +99,25 @@ __m256i test_mm256_avg_epu8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_avg_epu8 - // CHECK: call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) + // CHECK-NOT: call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) + // CHECK: zext <32 x i8> %{{.*}} to <32 x i16> + // CHECK: zext <32 x i8> %{{.*}} to <32 x i16> + // CHECK: add <32 x i16> %{{.*}}, %{{.*}} + // CHECK: add <32 x i16> %{{.*}}, + // CHECK: lshr <32 x i16> %{{.*}}, + // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8> return _mm256_avg_epu8(a, b); } __m256i test_mm256_avg_epu16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_avg_epu16 - // CHECK: call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) + // CHECK-NOT: call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) + // CHECK: zext <16 x i16> %{{.*}} to <16 x i32> + // CHECK: zext <16 x i16> %{{.*}} to <16 x i32> + // CHECK: add <16 x i32> %{{.*}}, %{{.*}} + // CHECK: add <16 x i32> %{{.*}}, + // CHECK: lshr <16 x i32> %{{.*}}, + // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16> return _mm256_avg_epu16(a, b); } Index: cfe/trunk/test/CodeGen/avx512bw-builtins.c =================================================================== --- cfe/trunk/test/CodeGen/avx512bw-builtins.c +++ cfe/trunk/test/CodeGen/avx512bw-builtins.c @@ -638,32 +638,74 @@ } __m512i test_mm512_avg_epu8(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_avg_epu8 - // CHECK: @llvm.x86.avx512.mask.pavg.b.512 + // CHECK-NOT: @llvm.x86.avx512.mask.pavg.b.512 + // CHECK: zext <64 x i8> %{{.*}} to <64 x i16> + // CHECK: zext <64 x i8> %{{.*}} to <64 x i16> + // CHECK: add <64 x i16> %{{.*}}, %{{.*}} + // CHECK: add <64 x i16> %{{.*}}, + // CHECK: lshr <64 x i16> %{{.*}}, + // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8> return _mm512_avg_epu8(__A,__B); } __m512i test_mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_avg_epu8 - // CHECK: @llvm.x86.avx512.mask.pavg.b.512 + // CHECK-NOT: @llvm.x86.avx512.mask.pavg.b.512 + // CHECK: zext <64 x i8> %{{.*}} to <64 x i16> + // CHECK: zext <64 x i8> %{{.*}} to <64 x i16> + // CHECK: add <64 x i16> %{{.*}}, %{{.*}} + // CHECK: add <64 x i16> %{{.*}}, + // CHECK: lshr <64 x i16> %{{.*}}, + // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8> + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_mask_avg_epu8(__W,__U,__A,__B); } __m512i test_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_avg_epu8 - // CHECK: @llvm.x86.avx512.mask.pavg.b.512 + // CHECK-NOT: @llvm.x86.avx512.mask.pavg.b.512 + // CHECK: zext <64 x i8> %{{.*}} to <64 x i16> + // CHECK: zext <64 x i8> %{{.*}} to <64 x i16> + // CHECK: add <64 x i16> %{{.*}}, %{{.*}} + // CHECK: add <64 x i16> %{{.*}}, + // CHECK: lshr <64 x i16> %{{.*}}, + // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8> + // CHECK: store <64 x i8> zeroinitializer + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_maskz_avg_epu8(__U,__A,__B); } __m512i test_mm512_avg_epu16(__m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_avg_epu16 - // CHECK: @llvm.x86.avx512.mask.pavg.w.512 + // CHECK-NOT: @llvm.x86.avx512.mask.pavg.w.512 + // CHECK: zext <32 x i16> %{{.*}} to <32 x i32> + // CHECK: zext <32 x i16> %{{.*}} to <32 x i32> + // CHECK: add <32 x i32> %{{.*}}, %{{.*}} + // CHECK: add <32 x i32> %{{.*}}, + // CHECK: lshr <32 x i32> %{{.*}}, + // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16> return _mm512_avg_epu16(__A,__B); } __m512i test_mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_mask_avg_epu16 - // CHECK: @llvm.x86.avx512.mask.pavg.w.512 + // CHECK-NOT: @llvm.x86.avx512.mask.pavg.w.512 + // CHECK: zext <32 x i16> %{{.*}} to <32 x i32> + // CHECK: zext <32 x i16> %{{.*}} to <32 x i32> + // CHECK: add <32 x i32> %{{.*}}, %{{.*}} + // CHECK: add <32 x i32> %{{.*}}, + // CHECK: lshr <32 x i32> %{{.*}}, + // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_avg_epu16(__W,__U,__A,__B); } __m512i test_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_avg_epu16 - // CHECK: @llvm.x86.avx512.mask.pavg.w.512 + // CHECK-NOT: @llvm.x86.avx512.mask.pavg.w.512 + // CHECK: zext <32 x i16> %{{.*}} to <32 x i32> + // CHECK: zext <32 x i16> %{{.*}} to <32 x i32> + // CHECK: add <32 x i32> %{{.*}}, %{{.*}} + // CHECK: add <32 x i32> %{{.*}}, + // CHECK: lshr <32 x i32> %{{.*}}, + // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16> + // CHECK: store <32 x i16> zeroinitializer + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_avg_epu16(__U,__A,__B); } __m512i test_mm512_max_epi8(__m512i __A, __m512i __B) { Index: cfe/trunk/test/CodeGen/avx512vlbw-builtins.c =================================================================== --- cfe/trunk/test/CodeGen/avx512vlbw-builtins.c +++ cfe/trunk/test/CodeGen/avx512vlbw-builtins.c @@ -1155,49 +1155,101 @@ } __m128i test_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_avg_epu8 - // CHECK: @llvm.x86.sse2.pavg.b + // CHECK-NOT: @llvm.x86.sse2.pavg.b + // CHECK: zext <16 x i8> %{{.*}} to <16 x i16> + // CHECK: zext <16 x i8> %{{.*}} to <16 x i16> + // CHECK: add <16 x i16> %{{.*}}, %{{.*}} + // CHECK: add <16 x i16> %{{.*}}, + // CHECK: lshr <16 x i16> %{{.*}}, + // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8> // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_mask_avg_epu8(__W,__U,__A,__B); } __m128i test_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_avg_epu8 - // CHECK: @llvm.x86.sse2.pavg.b + // CHECK-NOT: @llvm.x86.sse2.pavg.b + // CHECK: zext <16 x i8> %{{.*}} to <16 x i16> + // CHECK: zext <16 x i8> %{{.*}} to <16 x i16> + // CHECK: add <16 x i16> %{{.*}}, %{{.*}} + // CHECK: add <16 x i16> %{{.*}}, + // CHECK: lshr <16 x i16> %{{.*}}, + // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8> + // CHECK: store <2 x i64> zeroinitializer // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} return _mm_maskz_avg_epu8(__U,__A,__B); } __m256i test_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_avg_epu8 - // CHECK: @llvm.x86.avx2.pavg.b + // CHECK-NOT: @llvm.x86.avx2.pavg.b + // CHECK: zext <32 x i8> %{{.*}} to <32 x i16> + // CHECK: zext <32 x i8> %{{.*}} to <32 x i16> + // CHECK: add <32 x i16> %{{.*}}, %{{.*}} + // CHECK: add <32 x i16> %{{.*}}, + // CHECK: lshr <32 x i16> %{{.*}}, + // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8> // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_mask_avg_epu8(__W,__U,__A,__B); } __m256i test_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_avg_epu8 - // CHECK: @llvm.x86.avx2.pavg.b + // CHECK-NOT: @llvm.x86.avx2.pavg.b + // CHECK: zext <32 x i8> %{{.*}} to <32 x i16> + // CHECK: zext <32 x i8> %{{.*}} to <32 x i16> + // CHECK: add <32 x i16> %{{.*}}, %{{.*}} + // CHECK: add <32 x i16> %{{.*}}, + // CHECK: lshr <32 x i16> %{{.*}}, + // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8> + // CHECK: store <4 x i64> zeroinitializer // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} return _mm256_maskz_avg_epu8(__U,__A,__B); } __m128i test_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_avg_epu16 - // CHECK: @llvm.x86.sse2.pavg.w + // CHECK-NOT: @llvm.x86.sse2.pavg.w + // CHECK: zext <8 x i16> %{{.*}} to <8 x i32> + // CHECK: zext <8 x i16> %{{.*}} to <8 x i32> + // CHECK: add <8 x i32> %{{.*}}, %{{.*}} + // CHECK: add <8 x i32> %{{.*}}, + // CHECK: lshr <8 x i32> %{{.*}}, + // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16> // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_avg_epu16(__W,__U,__A,__B); } __m128i test_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_avg_epu16 - // CHECK: @llvm.x86.sse2.pavg.w + // CHECK-NOT: @llvm.x86.sse2.pavg.w + // CHECK: zext <8 x i16> %{{.*}} to <8 x i32> + // CHECK: zext <8 x i16> %{{.*}} to <8 x i32> + // CHECK: add <8 x i32> %{{.*}}, %{{.*}} + // CHECK: add <8 x i32> %{{.*}}, + // CHECK: lshr <8 x i32> %{{.*}}, + // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16> + // CHECK: store <2 x i64> zeroinitializer // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_avg_epu16(__U,__A,__B); } __m256i test_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_mask_avg_epu16 - // CHECK: @llvm.x86.avx2.pavg.w + // CHECK-NOT: @llvm.x86.avx2.pavg.w + // CHECK: zext <16 x i16> %{{.*}} to <16 x i32> + // CHECK: zext <16 x i16> %{{.*}} to <16 x i32> + // CHECK: add <16 x i32> %{{.*}}, %{{.*}} + // CHECK: add <16 x i32> %{{.*}}, + // CHECK: lshr <16 x i32> %{{.*}}, + // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16> // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_avg_epu16(__W,__U,__A,__B); } __m256i test_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_avg_epu16 - // CHECK: @llvm.x86.avx2.pavg.w + // CHECK-NOT: @llvm.x86.avx2.pavg.w + // CHECK: zext <16 x i16> %{{.*}} to <16 x i32> + // CHECK: zext <16 x i16> %{{.*}} to <16 x i32> + // CHECK: add <16 x i32> %{{.*}}, %{{.*}} + // CHECK: add <16 x i32> %{{.*}}, + // CHECK: lshr <16 x i32> %{{.*}}, + // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16> + // CHECK: store <4 x i64> zeroinitializer // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_avg_epu16(__U,__A,__B); } Index: cfe/trunk/test/CodeGen/builtins-x86.c =================================================================== --- cfe/trunk/test/CodeGen/builtins-x86.c +++ cfe/trunk/test/CodeGen/builtins-x86.c @@ -160,8 +160,6 @@ tmp_V4s = __builtin_ia32_psubusw(tmp_V4s, tmp_V4s); tmp_V4s = __builtin_ia32_pmulhw(tmp_V4s, tmp_V4s); tmp_V4s = __builtin_ia32_pmulhuw(tmp_V4s, tmp_V4s); - tmp_V8c = __builtin_ia32_pavgb(tmp_V8c, tmp_V8c); - tmp_V4s = __builtin_ia32_pavgw(tmp_V4s, tmp_V4s); tmp_V8c = __builtin_ia32_pcmpeqb(tmp_V8c, tmp_V8c); tmp_V4s = __builtin_ia32_pcmpeqw(tmp_V4s, tmp_V4s); tmp_V2i = __builtin_ia32_pcmpeqd(tmp_V2i, tmp_V2i); @@ -201,8 +199,6 @@ tmp_V16c = __builtin_ia32_psubusb128(tmp_V16c, tmp_V16c); tmp_V8s = __builtin_ia32_psubusw128(tmp_V8s, tmp_V8s); tmp_V8s = __builtin_ia32_pmulhw128(tmp_V8s, tmp_V8s); - tmp_V16c = __builtin_ia32_pavgb128(tmp_V16c, tmp_V16c); - tmp_V8s = __builtin_ia32_pavgw128(tmp_V8s, tmp_V8s); tmp_V16c = __builtin_ia32_pmaxub128(tmp_V16c, tmp_V16c); tmp_V8s = __builtin_ia32_pmaxsw128(tmp_V8s, tmp_V8s); tmp_V16c = __builtin_ia32_pminub128(tmp_V16c, tmp_V16c); Index: cfe/trunk/test/CodeGen/sse2-builtins.c =================================================================== --- cfe/trunk/test/CodeGen/sse2-builtins.c +++ cfe/trunk/test/CodeGen/sse2-builtins.c @@ -97,13 +97,25 @@ __m128i test_mm_avg_epu8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_avg_epu8 - // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) + // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) + // CHECK: zext <16 x i8> %{{.*}} to <16 x i16> + // CHECK: zext <16 x i8> %{{.*}} to <16 x i16> + // CHECK: add <16 x i16> %{{.*}}, %{{.*}} + // CHECK: add <16 x i16> %{{.*}}, + // CHECK: lshr <16 x i16> %{{.*}}, + // CHECK:trunc <16 x i16> %{{.*}} to <16 x i8> return _mm_avg_epu8(A, B); } __m128i test_mm_avg_epu16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_avg_epu16 - // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) + // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) + // CHECK: zext <8 x i16> %{{.*}} to <8 x i32> + // CHECK: zext <8 x i16> %{{.*}} to <8 x i32> + // CHECK: add <8 x i32> %{{.*}}, %{{.*}} + // CHECK: add <8 x i32> %{{.*}}, + // CHECK: lshr <8 x i32> %{{.*}}, + // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16> return _mm_avg_epu16(A, B); }