Index: lib/Headers/bmiintrin.h
===================================================================
--- lib/Headers/bmiintrin.h
+++ lib/Headers/bmiintrin.h
@@ -100,6 +100,7 @@
 ///    number of bits to be extracted.
 /// \returns An unsigned integer whose least significant bits contain the
 ///    extracted bits.
+/// \see _bextr_u32
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __bextr_u32(unsigned int __X, unsigned int __Y)
 {
@@ -124,6 +125,7 @@
 ///    Bits [7:0] specify the number of bits.
 /// \returns An unsigned integer whose least significant bits contain the
 ///    extracted bits.
+/// \see __bextr_u32
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
 {
@@ -261,6 +263,7 @@
 ///    the number of bits to be extracted.
 /// \returns An unsigned 64-bit integer whose least significant bits contain the
 ///    extracted bits.
+/// \see _bextr_u64
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __bextr_u64(unsigned long long __X, unsigned long long __Y)
 {
@@ -285,6 +288,7 @@
 ///    Bits [7:0] specify the number of bits.
 /// \returns An unsigned 64-bit integer whose least significant bits contain the
 ///    extracted bits.
+/// \see __bextr_u64
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
 {
Index: lib/Headers/lzcntintrin.h
===================================================================
--- lib/Headers/lzcntintrin.h
+++ lib/Headers/lzcntintrin.h
@@ -57,6 +57,7 @@
 ///    An unsigned 32-bit integer whose leading zeros are to be counted.
 /// \returns An unsigned 32-bit integer containing the number of leading zero
 ///    bits in the operand.
+/// \see _lzcnt_u32
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __lzcnt32(unsigned int __X)
 {
@@ -73,6 +74,7 @@
 ///    An unsigned 32-bit integer whose leading zeros are to be counted.
 /// \returns An unsigned 32-bit integer containing the number of leading zero
 ///    bits in the operand.
+/// \see __lzcnt32
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _lzcnt_u32(unsigned int __X)
 {
@@ -90,6 +92,7 @@
 ///    An unsigned 64-bit integer whose leading zeros are to be counted.
 /// \returns An unsigned 64-bit integer containing the number of leading zero
 ///    bits in the operand.
+/// \see _lzcnt_u64
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __lzcnt64(unsigned long long __X)
 {
@@ -106,6 +109,7 @@
 ///    An unsigned 64-bit integer whose leading zeros are to be counted.
 /// \returns An unsigned 64-bit integer containing the number of leading zero
 ///    bits in the operand.
+/// \see __lzcnt64
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 _lzcnt_u64(unsigned long long __X)
 {
Index: lib/Headers/pmmintrin.h
===================================================================
--- lib/Headers/pmmintrin.h
+++ lib/Headers/pmmintrin.h
@@ -229,7 +229,7 @@
 /// \headerfile <x86intrin.h>
 ///
 /// \code
-/// __m128d _mm_loaddup_pd(double const * dp);
+/// __m128d _mm_loaddup_pd(double const *dp);
 /// \endcode
 ///
 /// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.
Index: lib/Headers/smmintrin.h
===================================================================
--- lib/Headers/smmintrin.h
+++ lib/Headers/smmintrin.h
@@ -493,7 +493,7 @@
 /// \param __V2
 ///    A 128-bit vector of [16 x i8].
 /// \param __M
-///    A 128-bit vector operand, with mask bits 127, 119, 111 ... 7 specifying
+///    A 128-bit vector operand, with mask bits 127, 119, 111...7 specifying
 ///    how the values are to be copied. The position of the mask bit corresponds
 ///    to the most significant bit of a copied value. When a mask bit is 0, the
 ///    corresponding 8-bit element in operand \a __V1 is copied to the same
@@ -1277,8 +1277,8 @@
 /// This intrinsic corresponds to the <c> VPMOVSXBD / PMOVSXBD </c> instruction.
 ///
 /// \param __V
-///    A 128-bit vector of [16 x i8]. The lower four 8-bit elements are sign-
-///    extended to 32-bit values.
+///    A 128-bit vector of [16 x i8]. The lower four 8-bit elements are
+///    sign-extended to 32-bit values.
 /// \returns A 128-bit vector of [4 x i32] containing the sign-extended values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi8_epi32(__m128i __V)
@@ -1298,8 +1298,8 @@
 /// This intrinsic corresponds to the <c> VPMOVSXBQ / PMOVSXBQ </c> instruction.
 ///
 /// \param __V
-///    A 128-bit vector of [16 x i8]. The lower two 8-bit elements are sign-
-///    extended to 64-bit values.
+///    A 128-bit vector of [16 x i8]. The lower two 8-bit elements are
+///    sign-extended to 64-bit values.
 /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi8_epi64(__m128i __V)
@@ -1319,8 +1319,8 @@
 /// This intrinsic corresponds to the <c> VPMOVSXWD / PMOVSXWD </c> instruction.
 ///
 /// \param __V
-///    A 128-bit vector of [8 x i16]. The lower four 16-bit elements are sign-
-///    extended to 32-bit values.
+///    A 128-bit vector of [8 x i16]. The lower four 16-bit elements are
+///    sign-extended to 32-bit values.
 /// \returns A 128-bit vector of [4 x i32] containing the sign-extended values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi16_epi32(__m128i __V)
@@ -1338,8 +1338,8 @@
 /// This intrinsic corresponds to the <c> VPMOVSXWQ / PMOVSXWQ </c> instruction.
 ///
 /// \param __V
-///    A 128-bit vector of [8 x i16]. The lower two 16-bit elements are sign-
-///    extended to 64-bit values.
+///    A 128-bit vector of [8 x i16]. The lower two 16-bit elements are
+///     sign-extended to 64-bit values.
 /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi16_epi64(__m128i __V)
@@ -1357,8 +1357,8 @@
 /// This intrinsic corresponds to the <c> VPMOVSXDQ / PMOVSXDQ </c> instruction.
 ///
 /// \param __V
-///    A 128-bit vector of [4 x i32]. The lower two 32-bit elements are sign-
-///    extended to 64-bit values.
+///    A 128-bit vector of [4 x i32]. The lower two 32-bit elements are
+///    sign-extended to 64-bit values.
 /// \returns A 128-bit vector of [2 x i64] containing the sign-extended values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi32_epi64(__m128i __V)
@@ -1377,8 +1377,8 @@
 /// This intrinsic corresponds to the <c> VPMOVZXBW / PMOVZXBW </c> instruction.
 ///
 /// \param __V
-///    A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are zero-
-///    extended to 16-bit values.
+///    A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are
+///    zero-extended to 16-bit values.
 /// \returns A 128-bit vector of [8 x i16] containing the zero-extended values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu8_epi16(__m128i __V)
@@ -1396,8 +1396,8 @@
 /// This intrinsic corresponds to the <c> VPMOVZXBD / PMOVZXBD </c> instruction.
 ///
 /// \param __V
-///    A 128-bit vector of [16 x i8]. The lower four 8-bit elements are zero-
-///    extended to 32-bit values.
+///    A 128-bit vector of [16 x i8]. The lower four 8-bit elements are
+///    zero-extended to 32-bit values.
 /// \returns A 128-bit vector of [4 x i32] containing the zero-extended values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu8_epi32(__m128i __V)
@@ -1415,8 +1415,8 @@
 /// This intrinsic corresponds to the <c> VPMOVZXBQ / PMOVZXBQ </c> instruction.
 ///
 /// \param __V
-///    A 128-bit vector of [16 x i8]. The lower two 8-bit elements are zero-
-///    extended to 64-bit values.
+///    A 128-bit vector of [16 x i8]. The lower two 8-bit elements are
+///    zero-extended to 64-bit values.
 /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu8_epi64(__m128i __V)
@@ -1434,8 +1434,8 @@
 /// This intrinsic corresponds to the <c> VPMOVZXWD / PMOVZXWD </c> instruction.
 ///
 /// \param __V
-///    A 128-bit vector of [8 x i16]. The lower four 16-bit elements are zero-
-///    extended to 32-bit values.
+///    A 128-bit vector of [8 x i16]. The lower four 16-bit elements are
+///    zero-extended to 32-bit values.
 /// \returns A 128-bit vector of [4 x i32] containing the zero-extended values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu16_epi32(__m128i __V)
@@ -1453,8 +1453,8 @@
 /// This intrinsic corresponds to the <c> VPMOVZXWQ / PMOVZXWQ </c> instruction.
 ///
 /// \param __V
-///    A 128-bit vector of [8 x i16]. The lower two 16-bit elements are zero-
-///    extended to 64-bit values.
+///    A 128-bit vector of [8 x i16]. The lower two 16-bit elements are
+///    zero-extended to 64-bit values.
 /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu16_epi64(__m128i __V)
@@ -1472,8 +1472,8 @@
 /// This intrinsic corresponds to the <c> VPMOVZXDQ / PMOVZXDQ </c> instruction.
 ///
 /// \param __V
-///    A 128-bit vector of [4 x i32]. The lower two 32-bit elements are zero-
-///    extended to 64-bit values.
+///    A 128-bit vector of [4 x i32]. The lower two 32-bit elements are
+///    zero-extended to 64-bit values.
 /// \returns A 128-bit vector of [2 x i64] containing the zero-extended values.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu32_epi64(__m128i __V)
@@ -1534,14 +1534,14 @@
 ///    \code
 ///    // M2 represents bit 2 of the immediate operand
 ///    // M10 represents bits [1:0] of the immediate operand
-///    i = M2 * 4
-///    j = M10 * 4
+///    i = M2 * 4;
+///    j = M10 * 4;
 ///    for (k = 0; k < 8; k = k + 1) {
-///      d0 = abs(X[i + k + 0] - Y[j + 0])
-///      d1 = abs(X[i + k + 1] - Y[j + 1])
-///      d2 = abs(X[i + k + 2] - Y[j + 2])
-///      d3 = abs(X[i + k + 3] - Y[j + 3])
-///      r[k] = d0 + d1 + d2 + d3
+///      d0 = abs(X[i + k + 0] - Y[j + 0]);
+///      d1 = abs(X[i + k + 1] - Y[j + 1]);
+///      d2 = abs(X[i + k + 2] - Y[j + 2]);
+///      d3 = abs(X[i + k + 3] - Y[j + 3]);
+///      r[k] = d0 + d1 + d2 + d3;
 ///    }
 ///    \endcode
 /// \returns A 128-bit integer vector containing the sums of the sets of
Index: lib/Headers/xmmintrin.h
===================================================================
--- lib/Headers/xmmintrin.h
+++ lib/Headers/xmmintrin.h
@@ -2495,10 +2495,14 @@
 ///
 ///    For example, the following expression checks if an overflow exception has
 ///    occurred:
+///    \code
 ///      ( _mm_getcsr() & _MM_EXCEPT_OVERFLOW )
+///    \endcode
 ///
 ///    The following expression gets the current rounding mode:
+///    \code
 ///      _MM_GET_ROUNDING_MODE()
+///    \endcode
 ///
 /// \headerfile <x86intrin.h>
 ///