Index: ammintrin.h =================================================================== --- ammintrin.h +++ ammintrin.h @@ -30,10 +30,47 @@ #include +/// \brief Extracts the specified bits from the lower 64 bits of the 128-bit +/// operand, using the length and bit index specified in the immediate +/// bytes. +/// +/// \headerfile +/// +/// \code +/// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx); +/// \endcode +/// +/// \code +/// This intrinsics corresponds to \c EXTRQ instruction. +/// \endcode +/// +/// \param x +/// The value from which bits are extracted. +/// \param len +/// Specifies the length at [5:0]. +/// \param idx +/// Specifies the index of the least significant bit at [5:0]. +/// \returns The bits extracted from the operand. #define _mm_extracti_si64(x, len, idx) \ ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \ (char)(len), (char)(idx))) +/// \brief Extracts the specified bits from the lower 64 bits of the 128-bit +/// integer vector operand. +/// +/// \headerfile +/// +/// \code +/// This intrinsics corresponds to \c EXTRQ instruction. +/// \endcode +/// +/// \param __x +/// The value from which bits are extracted. +/// \param __y +/// Specifies the index of the least significant bit at [13:8], +/// and the length at [5:0]. +/// \returns A 128-bit vector whose lower 64 bits contain the bits extracted +/// from the operand. static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_extract_si64(__m128i __x, __m128i __y) { @@ -40,11 +77,59 @@ return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y); } +/// \brief Inserts bits from the lower 64 bits of the source integer vector +/// operand into the lower 64 bits of the destination integer vector +/// operand, using the length and bit index specified in the immediate +/// bytes. No other bits in the lower 64 bits of the destination are +/// modified. The upper 64 bits of the destination are undefined. +/// +/// \headerfile +/// +/// \code +/// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len, +/// const int idx); +/// \endcode +/// +/// \code +/// This intrinsics corresponds to \c INSERTQ instruction. +/// \endcode +/// +/// \param x +/// The operand containing the bits used for the destination. +/// \param y +/// The source operand. +/// \param len +/// Immediate specifying the length at [5:0]. +/// \param idx +/// Immediate specifying the bit index at [5:0]. +/// \returns The result of inserting bits from operand y into +/// operand x. #define _mm_inserti_si64(x, y, len, idx) \ ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \ (__v2di)(__m128i)(y), \ (char)(len), (char)(idx))) +/// \brief Inserts bits from the lower 64 bits of the source integer vector +/// operand into the lower 64 bits of the destination integer vector +/// operand at the index specified by operand __y. No +/// other bits in the lower 64 bits of the destination are modified. The +/// upper 64 bits of the destination are undefined. +/// +/// \headerfile +/// +/// \code +/// This intrinsics corresponds to \c INSERTQ instruction. +/// \endcode +/// +/// \param __x +/// The lower 64 bits of this operand are copied to the lower 64 +/// bits of the destination. +/// \param __y +/// The field beginning at bit 0 of this operand with length +/// specified by bits [69:64] is inserted into the destination +/// at the index specified by bits [77:72]. +/// \returns The result of inserting bits from operand __y into +/// operand __x. static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_insert_si64(__m128i __x, __m128i __y) { @@ -51,6 +136,21 @@ return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y); } +/// \brief Stores a 64-bit double-precision value in a 64-bit memory location. +/// To minimize caching, the data is flagged as non-temporal (unlikely to be +/// used again soon). +/// +/// \headerfile +/// +/// \code +/// This intrinsics corresponds to \c MOVNTSD instruction. +/// \endcode +/// +/// \param __p +/// The 64-bit memory location used to store the register value. +/// \param __a +/// The 64-bit double-precision floating-point register value to +/// be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__)) _mm_stream_sd(double *__p, __m128d __a) { @@ -57,6 +157,21 @@ __builtin_ia32_movntsd(__p, (__v2df)__a); } +/// \brief Stores a 32-bit single-precision floating-point value in a 32-bit +/// memory location. To minimize caching, the data is flagged as +/// non-temporal (unlikely to be used again soon). +/// +/// \headerfile +/// +/// \code +/// This intrinsics corresponds to \c MOVNTSS instruction. +/// \endcode +/// +/// \param __p +/// The 32-bit memory location used to store the register value. +/// \param __a +/// The 32-bit single-precision floating-point register value to +/// be stored. static __inline__ void __attribute__((__always_inline__, __nodebug__)) _mm_stream_ss(float *__p, __m128 __a) {