Index: include/clang/Basic/BuiltinsX86.def =================================================================== --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -503,14 +503,14 @@ TARGET_BUILTIN(__builtin_ia32_movntdq256, "vV4LLi*V4LLi", "", "avx") TARGET_BUILTIN(__builtin_ia32_movntpd256, "vd*V4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_movntps256, "vf*V8f", "", "avx") -TARGET_BUILTIN(__builtin_ia32_maskloadpd, "V2dV2dC*V2d", "", "avx") -TARGET_BUILTIN(__builtin_ia32_maskloadps, "V4fV4fC*V4f", "", "avx") -TARGET_BUILTIN(__builtin_ia32_maskloadpd256, "V4dV4dC*V4d", "", "avx") -TARGET_BUILTIN(__builtin_ia32_maskloadps256, "V8fV8fC*V8f", "", "avx") -TARGET_BUILTIN(__builtin_ia32_maskstorepd, "vV2d*V2dV2d", "", "avx") -TARGET_BUILTIN(__builtin_ia32_maskstoreps, "vV4f*V4fV4f", "", "avx") -TARGET_BUILTIN(__builtin_ia32_maskstorepd256, "vV4d*V4dV4d", "", "avx") -TARGET_BUILTIN(__builtin_ia32_maskstoreps256, "vV8f*V8fV8f", "", "avx") +TARGET_BUILTIN(__builtin_ia32_maskloadpd, "V2dV2dC*V2LLi", "", "avx") +TARGET_BUILTIN(__builtin_ia32_maskloadps, "V4fV4fC*V4i", "", "avx") +TARGET_BUILTIN(__builtin_ia32_maskloadpd256, "V4dV4dC*V4LLi", "", "avx") +TARGET_BUILTIN(__builtin_ia32_maskloadps256, "V8fV8fC*V8i", "", "avx") +TARGET_BUILTIN(__builtin_ia32_maskstorepd, "vV2d*V2LLiV2d", "", "avx") +TARGET_BUILTIN(__builtin_ia32_maskstoreps, "vV4f*V4iV4f", "", "avx") +TARGET_BUILTIN(__builtin_ia32_maskstorepd256, "vV4d*V4LLiV4d", "", "avx") +TARGET_BUILTIN(__builtin_ia32_maskstoreps256, "vV8f*V8iV8f", "", "avx") // AVX2 TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "", "avx2") Index: lib/Headers/avxintrin.h =================================================================== --- lib/Headers/avxintrin.h +++ lib/Headers/avxintrin.h @@ -835,53 +835,53 @@ /* Conditional load ops */ static __inline __m128d __DEFAULT_FN_ATTRS -_mm_maskload_pd(double const *__p, __m128d __m) +_mm_maskload_pd(double const *__p, __m128i __m) { - return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2df)__m); + return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2di)__m); } static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_maskload_pd(double const *__p, __m256d __m) +_mm256_maskload_pd(double const *__p, __m256i __m) { return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p, - (__v4df)__m); + (__v4di)__m); } static __inline __m128 __DEFAULT_FN_ATTRS -_mm_maskload_ps(float const *__p, __m128 __m) +_mm_maskload_ps(float const *__p, __m128i __m) { - return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4sf)__m); + return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4si)__m); } static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_maskload_ps(float const *__p, __m256 __m) +_mm256_maskload_ps(float const *__p, __m256i __m) { - return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8sf)__m); + return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8si)__m); } /* Conditional store ops */ static __inline void __DEFAULT_FN_ATTRS -_mm256_maskstore_ps(float *__p, __m256 __m, __m256 __a) +_mm256_maskstore_ps(float *__p, __m256i __m, __m256 __a) { - __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8sf)__m, (__v8sf)__a); + __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8si)__m, (__v8sf)__a); } static __inline void __DEFAULT_FN_ATTRS -_mm_maskstore_pd(double *__p, __m128d __m, __m128d __a) +_mm_maskstore_pd(double *__p, __m128i __m, __m128d __a) { - __builtin_ia32_maskstorepd((__v2df *)__p, (__v2df)__m, (__v2df)__a); + __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a); } static __inline void __DEFAULT_FN_ATTRS -_mm256_maskstore_pd(double *__p, __m256d __m, __m256d __a) +_mm256_maskstore_pd(double *__p, __m256i __m, __m256d __a) { - __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4df)__m, (__v4df)__a); + __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4di)__m, (__v4df)__a); } static __inline void __DEFAULT_FN_ATTRS -_mm_maskstore_ps(float *__p, __m128 __m, __m128 __a) +_mm_maskstore_ps(float *__p, __m128i __m, __m128 __a) { - __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4sf)__m, (__v4sf)__a); + __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a); } /* Cacheability support ops */ Index: test/CodeGen/builtins-x86.c =================================================================== --- test/CodeGen/builtins-x86.c +++ test/CodeGen/builtins-x86.c @@ -465,14 +465,14 @@ __builtin_ia32_movntdq256(tmp_V4LLip, tmp_V4LLi); __builtin_ia32_movntpd256(tmp_dp, tmp_V4d); __builtin_ia32_movntps256(tmp_fp, tmp_V8f); - tmp_V2d = __builtin_ia32_maskloadpd(tmp_V2dCp, tmp_V2d); - tmp_V4f = __builtin_ia32_maskloadps(tmp_V4fCp, tmp_V4f); - tmp_V4d = __builtin_ia32_maskloadpd256(tmp_V4dCp, tmp_V4d); - tmp_V8f = __builtin_ia32_maskloadps256(tmp_V8fCp, tmp_V8f); - __builtin_ia32_maskstorepd(tmp_V2dp, tmp_V2d, tmp_V2d); - __builtin_ia32_maskstoreps(tmp_V4fp, tmp_V4f, tmp_V4f); - __builtin_ia32_maskstorepd256(tmp_V4dp, tmp_V4d, tmp_V4d); - __builtin_ia32_maskstoreps256(tmp_V8fp, tmp_V8f, tmp_V8f); + tmp_V2d = __builtin_ia32_maskloadpd(tmp_V2dCp, tmp_V2LLi); + tmp_V4f = __builtin_ia32_maskloadps(tmp_V4fCp, tmp_V4i); + tmp_V4d = __builtin_ia32_maskloadpd256(tmp_V4dCp, tmp_V4LLi); + tmp_V8f = __builtin_ia32_maskloadps256(tmp_V8fCp, tmp_V8i); + __builtin_ia32_maskstorepd(tmp_V2dp, tmp_V2LLi, tmp_V2d); + __builtin_ia32_maskstoreps(tmp_V4fp, tmp_V4i, tmp_V4f); + __builtin_ia32_maskstorepd256(tmp_V4dp, tmp_V4LLi, tmp_V4d); + __builtin_ia32_maskstoreps256(tmp_V8fp, tmp_V8i, tmp_V8f); #ifdef USE_3DNOW tmp_V8c = __builtin_ia32_pavgusb(tmp_V8c, tmp_V8c);