diff --git a/clang/lib/Headers/__wmmintrin_pclmul.h b/clang/lib/Headers/__wmmintrin_pclmul.h --- a/clang/lib/Headers/__wmmintrin_pclmul.h +++ b/clang/lib/Headers/__wmmintrin_pclmul.h @@ -22,23 +22,23 @@ /// \headerfile /// /// \code -/// __m128i _mm_clmulepi64_si128(__m128i __X, __m128i __Y, const int __I); +/// __m128i _mm_clmulepi64_si128(__m128i X, __m128i Y, const int I); /// \endcode /// /// This intrinsic corresponds to the VPCLMULQDQ instruction. /// -/// \param __X +/// \param X /// A 128-bit vector of [2 x i64] containing one of the source operands. -/// \param __Y +/// \param Y /// A 128-bit vector of [2 x i64] containing one of the source operands. -/// \param __I +/// \param I /// An immediate value specifying which 64-bit values to select from the -/// operands. Bit 0 is used to select a value from operand \a __X, and bit -/// 4 is used to select a value from operand \a __Y: \n -/// Bit[0]=0 indicates that bits[63:0] of operand \a __X are used. \n -/// Bit[0]=1 indicates that bits[127:64] of operand \a __X are used. \n -/// Bit[4]=0 indicates that bits[63:0] of operand \a __Y are used. \n -/// Bit[4]=1 indicates that bits[127:64] of operand \a __Y are used. +/// operands. Bit 0 is used to select a value from operand \a X, and bit +/// 4 is used to select a value from operand \a Y: \n +/// Bit[0]=0 indicates that bits[63:0] of operand \a X are used. \n +/// Bit[0]=1 indicates that bits[127:64] of operand \a X are used. \n +/// Bit[4]=0 indicates that bits[63:0] of operand \a Y are used. \n +/// Bit[4]=1 indicates that bits[127:64] of operand \a Y are used. /// \returns The 128-bit integer vector containing the result of the carry-less /// multiplication of the selected 64-bit values. #define _mm_clmulepi64_si128(X, Y, I) \ diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -1504,7 +1504,10 @@ /// 00: Bits [31:0] and [159:128] are copied from the selected operand. \n /// 01: Bits [63:32] and [191:160] are copied from the selected operand. \n /// 10: Bits [95:64] and [223:192] are copied from the selected operand. \n -/// 11: Bits [127:96] and [255:224] are copied from the selected operand. +/// 11: Bits [127:96] and [255:224] are copied from the selected operand. \n +/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. +/// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form +/// [b6, b4, b2, b0]. /// \returns A 256-bit vector of [8 x float] containing the shuffled values. #define _mm256_shuffle_ps(a, b, mask) \ ((__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \ @@ -1953,12 +1956,16 @@ /// /// \headerfile /// +/// \code +/// int _mm256_extract_epi32(__m256i X, const int N); +/// \endcode +/// /// This intrinsic corresponds to the VEXTRACTF128+COMPOSITE /// instruction. /// -/// \param __a +/// \param X /// A 256-bit vector of [8 x i32]. -/// \param __imm +/// \param N /// An immediate integer operand with bits [2:0] determining which vector /// element is extracted and returned. /// \returns A 32-bit integer containing the extracted 32 bits of extended @@ -1971,12 +1978,16 @@ /// /// \headerfile /// +/// \code +/// int _mm256_extract_epi16(__m256i X, const int N); +/// \endcode +/// /// This intrinsic corresponds to the VEXTRACTF128+COMPOSITE /// instruction. /// -/// \param __a +/// \param X /// A 256-bit integer vector of [16 x i16]. -/// \param __imm +/// \param N /// An immediate integer operand with bits [3:0] determining which vector /// element is extracted and returned. /// \returns A 32-bit integer containing the extracted 16 bits of zero extended @@ -1990,12 +2001,16 @@ /// /// \headerfile /// +/// \code +/// int _mm256_extract_epi8(__m256i X, const int N); +/// \endcode +/// /// This intrinsic corresponds to the VEXTRACTF128+COMPOSITE /// instruction. /// -/// \param __a +/// \param X /// A 256-bit integer vector of [32 x i8]. -/// \param __imm +/// \param N /// An immediate integer operand with bits [4:0] determining which vector /// element is extracted and returned. /// \returns A 32-bit integer containing the extracted 8 bits of zero extended @@ -2010,12 +2025,16 @@ /// /// \headerfile /// +/// \code +/// long long _mm256_extract_epi64(__m256i X, const int N); +/// \endcode +/// /// This intrinsic corresponds to the VEXTRACTF128+COMPOSITE /// instruction. /// -/// \param __a +/// \param X /// A 256-bit integer vector of [4 x i64]. -/// \param __imm +/// \param N /// An immediate integer operand with bits [1:0] determining which vector /// element is extracted and returned. /// \returns A 64-bit integer containing the extracted 64 bits of extended @@ -2030,18 +2049,22 @@ /// /// \headerfile /// +/// \code +/// __m256i _mm256_insert_epi32(__m256i X, int I, const int N); +/// \endcode +/// /// This intrinsic corresponds to the VINSERTF128+COMPOSITE /// instruction. /// -/// \param __a +/// \param X /// A vector of [8 x i32] to be used by the insert operation. -/// \param __b +/// \param I /// An integer value. The replacement value for the insert operation. -/// \param __imm +/// \param N /// An immediate integer specifying the index of the vector element to be /// replaced. -/// \returns A copy of vector \a __a, after replacing its element indexed by -/// \a __imm with \a __b. +/// \returns A copy of vector \a X, after replacing its element indexed by +/// \a N with \a I. #define _mm256_insert_epi32(X, I, N) \ ((__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \ (int)(I), (int)(N))) @@ -2053,18 +2076,22 @@ /// /// \headerfile /// +/// \code +/// __m256i _mm256_insert_epi16(__m256i X, int I, const int N); +/// \endcode +/// /// This intrinsic corresponds to the VINSERTF128+COMPOSITE /// instruction. /// -/// \param __a +/// \param X /// A vector of [16 x i16] to be used by the insert operation. -/// \param __b +/// \param I /// An i16 integer value. The replacement value for the insert operation. -/// \param __imm +/// \param N /// An immediate integer specifying the index of the vector element to be /// replaced. -/// \returns A copy of vector \a __a, after replacing its element indexed by -/// \a __imm with \a __b. +/// \returns A copy of vector \a X, after replacing its element indexed by +/// \a N with \a I. #define _mm256_insert_epi16(X, I, N) \ ((__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \ (int)(I), (int)(N))) @@ -2075,18 +2102,22 @@ /// /// \headerfile /// +/// \code +/// __m256i _mm256_insert_epi8(__m256i X, int I, const int N); +/// \endcode +/// /// This intrinsic corresponds to the VINSERTF128+COMPOSITE /// instruction. /// -/// \param __a +/// \param X /// A vector of [32 x i8] to be used by the insert operation. -/// \param __b +/// \param I /// An i8 integer value. The replacement value for the insert operation. -/// \param __imm +/// \param N /// An immediate integer specifying the index of the vector element to be /// replaced. -/// \returns A copy of vector \a __a, after replacing its element indexed by -/// \a __imm with \a __b. +/// \returns A copy of vector \a X, after replacing its element indexed by +/// \a N with \a I. #define _mm256_insert_epi8(X, I, N) \ ((__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \ (int)(I), (int)(N))) @@ -2098,18 +2129,22 @@ /// /// \headerfile /// +/// \code +/// __m256i _mm256_insert_epi64(__m256i X, int I, const int N); +/// \endcode +/// /// This intrinsic corresponds to the VINSERTF128+COMPOSITE /// instruction. /// -/// \param __a +/// \param X /// A vector of [4 x i64] to be used by the insert operation. -/// \param __b +/// \param I /// A 64-bit integer value. The replacement value for the insert operation. -/// \param __imm +/// \param N /// An immediate integer specifying the index of the vector element to be /// replaced. -/// \returns A copy of vector \a __a, after replacing its element indexed by -/// \a __imm with \a __b. +/// \returns A copy of vector \a X, after replacing its element indexed by +/// \a N with \a I. #define _mm256_insert_epi64(X, I, N) \ ((__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \ (long long)(I), (int)(N))) diff --git a/clang/lib/Headers/bmiintrin.h b/clang/lib/Headers/bmiintrin.h --- a/clang/lib/Headers/bmiintrin.h +++ b/clang/lib/Headers/bmiintrin.h @@ -47,6 +47,7 @@ /// An unsigned 32-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 32-bit integer containing the number of trailing zero /// bits in the operand. +/// \see _mm_tzcnt_32 static __inline__ unsigned int __RELAXED_FN_ATTRS __tzcnt_u32(unsigned int __X) { @@ -63,6 +64,7 @@ /// An unsigned 32-bit integer whose trailing zeros are to be counted. /// \returns An 32-bit integer containing the number of trailing zero bits in /// the operand. +/// \see __tzcnt_u32 static __inline__ int __RELAXED_FN_ATTRS _mm_tzcnt_32(unsigned int __X) { @@ -83,6 +85,7 @@ /// An unsigned 64-bit integer whose trailing zeros are to be counted. /// \returns An unsigned 64-bit integer containing the number of trailing zero /// bits in the operand. +/// \see _mm_tzcnt_64 static __inline__ unsigned long long __RELAXED_FN_ATTRS __tzcnt_u64(unsigned long long __X) { @@ -99,6 +102,7 @@ /// An unsigned 64-bit integer whose trailing zeros are to be counted. /// \returns An 64-bit integer containing the number of trailing zero bits in /// the operand. +/// \see __tzcnt_u64 static __inline__ long long __RELAXED_FN_ATTRS _mm_tzcnt_64(unsigned long long __X) { diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -4126,21 +4126,25 @@ /// /// \headerfile /// +/// \code +/// __m128i _mm_extract_epi16(__m256i a, const int imm); +/// \endcode +/// /// This intrinsic corresponds to the VPEXTRW / PEXTRW instruction. /// -/// \param __a +/// \param a /// A 128-bit integer vector. -/// \param __imm -/// An immediate value. Bits [2:0] selects values from \a __a to be assigned +/// \param imm +/// An immediate value. Bits [2:0] selects values from \a a to be assigned /// to bits[15:0] of the result. \n -/// 000: assign values from bits [15:0] of \a __a. \n -/// 001: assign values from bits [31:16] of \a __a. \n -/// 010: assign values from bits [47:32] of \a __a. \n -/// 011: assign values from bits [63:48] of \a __a. \n -/// 100: assign values from bits [79:64] of \a __a. \n -/// 101: assign values from bits [95:80] of \a __a. \n -/// 110: assign values from bits [111:96] of \a __a. \n -/// 111: assign values from bits [127:112] of \a __a. +/// 000: assign values from bits [15:0] of \a a. \n +/// 001: assign values from bits [31:16] of \a a. \n +/// 010: assign values from bits [47:32] of \a a. \n +/// 011: assign values from bits [63:48] of \a a. \n +/// 100: assign values from bits [79:64] of \a a. \n +/// 101: assign values from bits [95:80] of \a a. \n +/// 110: assign values from bits [111:96] of \a a. \n +/// 111: assign values from bits [127:112] of \a a. /// \returns An integer, whose lower 16 bits are selected from the 128-bit /// integer vector parameter and the remaining bits are assigned zeros. #define _mm_extract_epi16(a, imm) \ @@ -4154,18 +4158,22 @@ /// /// \headerfile /// +/// \code +/// __m128i _mm_insert_epi16(__m256i a, int b, const int imm); +/// \endcode +/// /// This intrinsic corresponds to the VPINSRW / PINSRW instruction. /// -/// \param __a +/// \param a /// A 128-bit integer vector of [8 x i16]. This vector is copied to the /// result and then one of the eight elements in the result is replaced by -/// the lower 16 bits of \a __b. -/// \param __b +/// the lower 16 bits of \a b. +/// \param b /// An integer. The lower 16 bits of this parameter are written to the -/// result beginning at an offset specified by \a __imm. -/// \param __imm +/// result beginning at an offset specified by \a imm. +/// \param imm /// An immediate value specifying the bit offset in the result at which the -/// lower 16 bits of \a __b are written. +/// lower 16 bits of \a b are written. /// \returns A 128-bit integer vector containing the constructed values. #define _mm_insert_epi16(a, b, imm) \ ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \ @@ -4213,7 +4221,10 @@ /// 00: assign values from bits [31:0] of \a a. \n /// 01: assign values from bits [63:32] of \a a. \n /// 10: assign values from bits [95:64] of \a a. \n -/// 11: assign values from bits [127:96] of \a a. +/// 11: assign values from bits [127:96] of \a a. \n +/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. +/// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form +/// [b6, b4, b2, b0]. /// \returns A 128-bit integer vector containing the shuffled values. #define _mm_shuffle_epi32(a, imm) \ ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))) @@ -4244,6 +4255,9 @@ /// 01: assign values from bits [31:16] of \a a. \n /// 10: assign values from bits [47:32] of \a a. \n /// 11: assign values from bits [63:48] of \a a. \n +/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. +/// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form +/// [b6, b4, b2, b0]. /// \returns A 128-bit integer vector containing the shuffled values. #define _mm_shufflelo_epi16(a, imm) \ ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))) @@ -4274,6 +4288,9 @@ /// 01: assign values from bits [95:80] of \a a. \n /// 10: assign values from bits [111:96] of \a a. \n /// 11: assign values from bits [127:112] of \a a. \n +/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. +/// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form +/// [b6, b4, b2, b0]. /// \returns A 128-bit integer vector containing the shuffled values. #define _mm_shufflehi_epi16(a, imm) \ ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))) @@ -4617,6 +4634,9 @@ /// Bit[0] = 1: upper element of \a a copied to lower element of result. \n /// Bit[1] = 0: lower element of \a b copied to upper element of result. \n /// Bit[1] = 1: upper element of \a b copied to upper element of result. \n +/// Note: To generate a mask, you can use the \c _MM_SHUFFLE2 macro. +/// _MM_SHUFFLE2(b1, b0) can create a 2-bit mask of the form +/// [b1, b0]. /// \returns A 128-bit vector of [2 x double] containing the shuffled values. #define _mm_shuffle_pd(a, b, i) \ ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -1213,8 +1213,8 @@ /// This intrinsic corresponds to the VPMOVSXBW / PMOVSXBW instruction. /// /// \param __V -/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are sign- -/// extended to 16-bit values. +/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are +/// sign-extended to 16-bit values. /// \returns A 128-bit vector of [8 x i16] containing the sign-extended values. static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -2086,7 +2086,7 @@ /// \headerfile /// /// \code -/// void _mm_prefetch(const void * a, const int sel); +/// void _mm_prefetch(const void *a, const int sel); /// \endcode /// /// This intrinsic corresponds to the PREFETCHNTA instruction. @@ -2360,7 +2360,10 @@ /// 00: assigned from bits [15:0] of \a a. \n /// 01: assigned from bits [31:16] of \a a. \n /// 10: assigned from bits [47:32] of \a a. \n -/// 11: assigned from bits [63:48] of \a a. +/// 11: assigned from bits [63:48] of \a a. \n +/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. +/// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form +/// [b6, b4, b2, b0]. /// \returns A 64-bit integer vector containing the shuffled values. #define _mm_shuffle_pi16(a, n) \ ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))) @@ -2602,7 +2605,10 @@ /// 00: Bits [31:0] copied from the specified operand. \n /// 01: Bits [63:32] copied from the specified operand. \n /// 10: Bits [95:64] copied from the specified operand. \n -/// 11: Bits [127:96] copied from the specified operand. +/// 11: Bits [127:96] copied from the specified operand. \n +/// Note: To generate a mask, you can use the \c _MM_SHUFFLE macro. +/// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form +/// [b6, b4, b2, b0]. /// \returns A 128-bit vector of [4 x float] containing the shuffled values. #define _mm_shuffle_ps(a, b, mask) \ ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \