Index: __wmmintrin_aes.h =================================================================== --- __wmmintrin_aes.h +++ __wmmintrin_aes.h @@ -125,9 +125,9 @@ return (__m128i)__builtin_ia32_aesimc128((__v2di)__V); } -/// \brief Generates a round key for AES encyption, operating on 128-bit data +/// \brief Generates a round key for AES encyption, operating on 128-bit data /// specified in the first source operand and using an 8-bit round constant -/// specified by the second source operand, and writes the result to the +/// specified by the second source operand, and writes the result to the /// destination. /// /// \headerfile Index: __wmmintrin_pclmul.h =================================================================== --- __wmmintrin_pclmul.h +++ __wmmintrin_pclmul.h @@ -24,15 +24,15 @@ #define _WMMINTRIN_PCLMUL_H /// \brief Multiplies two 64-bit integer values, which are selected from source -/// operands using the immediate-value operand. The multiplication is a +/// operands using the immediate-value operand. The multiplication is a /// carry-less multiplication, and the 128-bit integer product is stored in /// the destination. /// /// \headerfile /// -/// \code +/// \code /// __m128i _mm_clmulepi64_si128(__m128i __X, __m128i __Y, const int __I); -/// \endcode +/// \endcode /// /// This intrinsic corresponds to the \c VPCLMULQDQ instruction. /// Index: avx512fintrin.h =================================================================== --- avx512fintrin.h +++ avx512fintrin.h @@ -866,7 +866,7 @@ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R)); }) -static __inline __m512i +static __inline __m512i __DEFAULT_FN_ATTRS _mm512_max_epi32(__m512i __A, __m512i __B) { @@ -1425,7 +1425,7 @@ } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, (__v4sf) __B, @@ -1434,7 +1434,7 @@ } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) +_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, (__v4sf) __B, @@ -1453,7 +1453,7 @@ } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, (__v2df) __B, @@ -1462,7 +1462,7 @@ } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) +_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, (__v2df) __B, @@ -1533,7 +1533,7 @@ } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, (__v4sf) __B, @@ -1542,7 +1542,7 @@ } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) +_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, (__v4sf) __B, @@ -1561,7 +1561,7 @@ } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, (__v2df) __B, @@ -1570,7 +1570,7 @@ } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) +_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, (__v2df) __B, @@ -3223,7 +3223,7 @@ (__v16si)(__m512i)(B), (int)(I), \ (__v16si)_mm512_setzero_si512(), \ (__mmask16)-1); }) - + #define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\ (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \ (__v16si)(__m512i)(B), (int)(imm), \ @@ -3405,7 +3405,7 @@ _mm512_cvtepi32_pd(__m256i __A) { return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, - (__v8df) + (__v8df) _mm512_setzero_pd (), (__mmask8) -1); } @@ -3530,7 +3530,7 @@ _mm512_cvtph_ps(__m256i __A) { return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, - (__v16sf) + (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); @@ -3650,7 +3650,7 @@ (__mmask16) __U, _MM_FROUND_CUR_DIRECTION); } - + #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \ (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ (__v8si)_mm256_setzero_si256(), \ @@ -3691,7 +3691,7 @@ (__mmask16)-1, (int)(R)); }) static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_cvtps_epu32 ( __m512 __A) +_mm512_cvtps_epu32 ( __m512 __A) { return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\ (__v16si)\ @@ -5184,7 +5184,7 @@ } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, (__v2df) __B, @@ -5200,7 +5200,7 @@ (__mmask8)(U), (int)(R)); }) static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) +_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, (__v2df) __B, @@ -5229,7 +5229,7 @@ } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -5245,7 +5245,7 @@ (__mmask8)(U), (int)(R)); }) static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) +_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -6571,7 +6571,7 @@ } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, (__v2df) __B, @@ -6587,7 +6587,7 @@ (__mmask8)(U), (int)(R)); }) static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) +_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, (__v2df) __B, @@ -6618,7 +6618,7 @@ } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, (__v4sf) __B, @@ -6634,7 +6634,7 @@ (__mmask8)(U), (int)(R)); }) static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) +_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, (__v4sf) __B, @@ -6795,7 +6795,7 @@ (__mmask8)-1, (int)(R)); }) static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __B, (__v2df) __A, @@ -6811,7 +6811,7 @@ (__mmask8)(U), (int)(R)); }) static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) +_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __B, (__v2df) __A, @@ -6833,7 +6833,7 @@ (__mmask8)-1, (int)(R)); }) static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __B, (__v4sf) __A, @@ -6849,7 +6849,7 @@ (int)(R)); }) static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) +_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, (__v4sf) __B, @@ -7880,7 +7880,7 @@ (__v8di)(__m512i)(v1), (int)(scale)); }) static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __A, (__v4sf) __B, @@ -7896,7 +7896,7 @@ (int)(R)); }) static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) +_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A, (__v4sf) __B, @@ -7912,7 +7912,7 @@ _MM_FROUND_CUR_DIRECTION); }) static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) +_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, (__v4sf) __X, @@ -7928,7 +7928,7 @@ (int)(R)); }) static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __A, -(__v4sf) __B, @@ -7944,7 +7944,7 @@ (int)(R)); }) static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) +_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A, (__v4sf) __B, @@ -7960,7 +7960,7 @@ (int)(R)); }) static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) +_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, (__v4sf) __X, @@ -7976,7 +7976,7 @@ (int)(R)); }) static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_vfmaddss3_mask (-(__v4sf) __A, (__v4sf) __B, @@ -7992,7 +7992,7 @@ (int)(R)); }) static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) +_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A, (__v4sf) __B, @@ -8008,7 +8008,7 @@ (int)(R)); }) static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) +_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W, (__v4sf) __X, @@ -8024,7 +8024,7 @@ (int)(R)); }) static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_vfmaddss3_mask (-(__v4sf) __A, -(__v4sf) __B, @@ -8040,7 +8040,7 @@ (int)(R)); }) static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) +_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A, (__v4sf) __B, @@ -8056,7 +8056,7 @@ _MM_FROUND_CUR_DIRECTION); }) static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) +_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W, (__v4sf) __X, @@ -8072,7 +8072,7 @@ (int)(R)); }) static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __A, (__v2df) __B, @@ -8088,7 +8088,7 @@ (int)(R)); }) static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) +_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A, (__v2df) __B, @@ -8104,7 +8104,7 @@ _MM_FROUND_CUR_DIRECTION); }) static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) +_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, (__v2df) __X, @@ -8120,7 +8120,7 @@ (int)(R)); }) static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __A, -(__v2df) __B, @@ -8136,7 +8136,7 @@ (int)(R)); }) static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) +_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A, (__v2df) __B, @@ -8152,7 +8152,7 @@ (__mmask8)(U), (int)(R)); }) static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) +_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, (__v2df) __X, @@ -8168,7 +8168,7 @@ (__mmask8)(U), (int)(R)); }) static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_vfmaddsd3_mask ( -(__v2df) __A, (__v2df) __B, @@ -8184,7 +8184,7 @@ (int)(R)); }) static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) +_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A, (__v2df) __B, @@ -8200,7 +8200,7 @@ (int)(R)); }) static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) +_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W, (__v2df) __X, @@ -8216,7 +8216,7 @@ (int)(R)); }) static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_vfmaddsd3_mask ( -(__v2df) __A, -(__v2df) __B, @@ -8232,7 +8232,7 @@ (int)(R)); }) static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) +_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A, (__v2df) __B, @@ -8249,7 +8249,7 @@ _MM_FROUND_CUR_DIRECTION); }) static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) +_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) (__W), (__v2df) __X, @@ -9014,7 +9014,7 @@ #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ e8,e9,e10,e11,e12,e13,e14,e15) \ _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) - + static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_set_epi64 (long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, Index: avx512vldqintrin.h =================================================================== --- avx512vldqintrin.h +++ avx512vldqintrin.h @@ -1066,7 +1066,7 @@ _mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A) { return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A, - (__v4df) __O, + (__v4df) __O, __M); } @@ -1138,7 +1138,7 @@ _mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A, - (__v4di) __O, + (__v4di) __O, __M); } Index: mwaitxintrin.h =================================================================== --- mwaitxintrin.h +++ mwaitxintrin.h @@ -42,6 +42,6 @@ __builtin_ia32_mwaitx(__extensions, __hints, __clock); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS #endif /* _MWAITXINTRIN_H */ Index: pmmintrin.h =================================================================== --- pmmintrin.h +++ pmmintrin.h @@ -31,9 +31,9 @@ __attribute__((__always_inline__, __nodebug__, __target__("sse3"))) /// \brief Loads data from an unaligned memory location to elements in a 128-bit -/// vector. If the address of the data is not 16-byte aligned, the -/// instruction may read two adjacent aligned blocks of memory to retrieve -/// the requested data. +/// vector. If the address of the data is not 16-byte aligned, the +/// instruction may read two adjacent aligned blocks of memory to retrieve +/// the requested data. /// /// \headerfile /// @@ -75,14 +75,14 @@ /// This intrinsic corresponds to the \c VHADDPS instruction. /// /// \param __a -/// A 128-bit vector of [4 x float] containing one of the source operands. -/// The horizontal sums of the values are stored in the lower bits of the +/// A 128-bit vector of [4 x float] containing one of the source operands. +/// The horizontal sums of the values are stored in the lower bits of the /// destination. /// \param __b -/// A 128-bit vector of [4 x float] containing one of the source operands. -/// The horizontal sums of the values are stored in the upper bits of the +/// A 128-bit vector of [4 x float] containing one of the source operands. +/// The horizontal sums of the values are stored in the upper bits of the /// destination. -/// \returns A 128-bit vector of [4 x float] containing the horizontal sums of +/// \returns A 128-bit vector of [4 x float] containing the horizontal sums of /// both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_hadd_ps(__m128 __a, __m128 __b) @@ -98,14 +98,14 @@ /// This intrinsic corresponds to the \c VHSUBPS instruction. /// /// \param __a -/// A 128-bit vector of [4 x float] containing one of the source operands. -/// The horizontal differences between the values are stored in the lower +/// A 128-bit vector of [4 x float] containing one of the source operands. +/// The horizontal differences between the values are stored in the lower /// bits of the destination. /// \param __b -/// A 128-bit vector of [4 x float] containing one of the source operands. -/// The horizontal differences between the values are stored in the upper +/// A 128-bit vector of [4 x float] containing one of the source operands. +/// The horizontal differences between the values are stored in the upper /// bits of the destination. -/// \returns A 128-bit vector of [4 x float] containing the horizontal +/// \returns A 128-bit vector of [4 x float] containing the horizontal /// differences of both operands. static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_hsub_ps(__m128 __a, __m128 __b) @@ -168,7 +168,7 @@ /// A 128-bit vector of [2 x double] containing the left source operand. /// \param __b /// A 128-bit vector of [2 x double] containing the right source operand. -/// \returns A 128-bit vector of [2 x double] containing the alternating sums +/// \returns A 128-bit vector of [2 x double] containing the alternating sums /// and differences of both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_addsub_pd(__m128d __a, __m128d __b) @@ -176,7 +176,7 @@ return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b); } -/// \brief Horizontally adds the pairs of values contained in two 128-bit +/// \brief Horizontally adds the pairs of values contained in two 128-bit /// vectors of [2 x double]. /// /// \headerfile @@ -184,12 +184,12 @@ /// This intrinsic corresponds to the \c VHADDPD instruction. /// /// \param __a -/// A 128-bit vector of [2 x double] containing one of the source operands. -/// The horizontal sum of the values is stored in the lower bits of the +/// A 128-bit vector of [2 x double] containing one of the source operands. +/// The horizontal sum of the values is stored in the lower bits of the /// destination. /// \param __b -/// A 128-bit vector of [2 x double] containing one of the source operands. -/// The horizontal sum of the values is stored in the upper bits of the +/// A 128-bit vector of [2 x double] containing one of the source operands. +/// The horizontal sum of the values is stored in the upper bits of the /// destination. /// \returns A 128-bit vector of [2 x double] containing the horizontal sums of /// both operands. @@ -207,14 +207,14 @@ /// This intrinsic corresponds to the \c VHSUBPD instruction. /// /// \param __a -/// A 128-bit vector of [2 x double] containing one of the source operands. +/// A 128-bit vector of [2 x double] containing one of the source operands. /// The horizontal difference of the values is stored in the lower bits of /// the destination. /// \param __b -/// A 128-bit vector of [2 x double] containing one of the source operands. +/// A 128-bit vector of [2 x double] containing one of the source operands. /// The horizontal difference of the values is stored in the upper bits of /// the destination. -/// \returns A 128-bit vector of [2 x double] containing the horizontal +/// \returns A 128-bit vector of [2 x double] containing the horizontal /// differences of both operands. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_hsub_pd(__m128d __a, __m128d __b) @@ -227,19 +227,19 @@ /// /// \headerfile /// -/// \code +/// \code /// __m128d _mm_loaddup_pd(double const * dp); -/// \endcode +/// \endcode /// /// This intrinsic corresponds to the \c VMOVDDUP instruction. /// /// \param dp /// A pointer to a double-precision value to be moved and duplicated. -/// \returns A 128-bit vector of [2 x double] containing the moved and +/// \returns A 128-bit vector of [2 x double] containing the moved and /// duplicated values. #define _mm_loaddup_pd(dp) _mm_load1_pd(dp) -/// \brief Moves and duplicates the double-precision value in the lower bits of +/// \brief Moves and duplicates the double-precision value in the lower bits of /// a 128-bit vector of [2 x double] to double-precision values stored in a /// 128-bit vector of [2 x double]. /// @@ -248,9 +248,9 @@ /// This intrinsic corresponds to the \c VMOVDDUP instruction. /// /// \param __a -/// A 128-bit vector of [2 x double]. Bits [63:0] are written to bits +/// A 128-bit vector of [2 x double]. Bits [63:0] are written to bits /// [127:64] and [63:0] of the destination. -/// \returns A 128-bit vector of [2 x double] containing the moved and +/// \returns A 128-bit vector of [2 x double] containing the moved and /// duplicated values. static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_movedup_pd(__m128d __a) @@ -266,8 +266,8 @@ #define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) #define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) -/// \brief Establishes a linear address memory range to be monitored and puts -/// the processor in the monitor event pending state. Data stored in the +/// \brief Establishes a linear address memory range to be monitored and puts +/// the processor in the monitor event pending state. Data stored in the /// monitored address range causes the processor to exit the pending state. /// /// \headerfile @@ -275,7 +275,7 @@ /// This intrinsic corresponds to the \c MONITOR instruction. /// /// \param __p -/// The memory range to be monitored. The size of the range is determined by +/// The memory range to be monitored. The size of the range is determined by /// CPUID function 0000_0005h. /// \param __extensions /// Optional extensions for the monitoring state. @@ -296,7 +296,7 @@ /// This intrinsic corresponds to the \c MWAIT instruction. /// /// \param __extensions -/// Optional extensions for the monitoring state, which may vary by +/// Optional extensions for the monitoring state, which may vary by /// processor. /// \param __hints /// Optional hints for the monitoring state, which may vary by processor.