diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -662,6 +662,14 @@ * Support intrinsic of ``_mm(256)_dpbssd(s)_epi32``. * Support intrinsic of ``_mm(256)_dpbsud(s)_epi32``. * Support intrinsic of ``_mm(256)_dpbuud(s)_epi32``. +- Support ISA of ``AVX-NE-CONVERT``. + * Support intrinsic of ``_mm(256)_bcstnebf16_ps``. + * Support intrinsic of ``_mm(256)_bcstnesh_ps``. + * Support intrinsic of ``_mm(256)_cvtneebf16_ps``. + * Support intrinsic of ``_mm(256)_cvtneeph_ps``. + * Support intrinsic of ``_mm(256)_cvtneobf16_ps``. + * Support intrinsic of ``_mm(256)_cvtneoph_ps``. + * Support intrinsic of ``_mm(256)_cvtneps_avx_pbh``. WebAssembly Support in Clang ---------------------------- diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -2116,6 +2116,22 @@ TARGET_HEADER_BUILTIN(__readgsdword, "UNiUNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(__readgsqword, "ULLiUNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +// AVX-NE-CONVERT +TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps128, "V4fyC*", "nV:128:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps256, "V8fyC*", "nV:256:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vbcstnesh2ps128, "V4fxC*", "nV:128:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vbcstnesh2ps256, "V8fxC*", "nV:256:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneebf162ps128, "V4fV8yC*", "nV:128:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneebf162ps256, "V8fV16yC*", "nV:256:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneeph2ps128, "V4fV8xC*", "nV:128:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneeph2ps256, "V8fV16xC*", "nV:256:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneobf162ps128, "V4fV8yC*", "nV:128:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneobf162ps256, "V8fV16yC*", "nV:256:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneoph2ps128, "V4fV8xC*", "nV:128:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneoph2ps256, "V8fV16xC*", "nV:256:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneps2bf16128, "V8yV4f", "nV:128:", "avx512bf16,avx512vl|avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneps2bf16256, "V8yV8f", "nV:256:", "avx512bf16,avx512vl|avxneconvert") + TARGET_HEADER_BUILTIN(_InterlockedAnd64, "WiWiD*Wi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedDecrement64, "WiWiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchange64, "WiWiD*Wi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4592,6 +4592,8 @@ def mno_avx512vp2intersect : Flag<["-"], "mno-avx512vp2intersect">, Group; def mavxifma : Flag<["-"], "mavxifma">, Group; def mno_avxifma : Flag<["-"], "mno-avxifma">, Group; +def mavxneconvert : Flag<["-"], "mavxneconvert">, Group; +def mno_avxneconvert : Flag<["-"], "mno-avxneconvert">, Group; def mavxvnniint8 : Flag<["-"], "mavxvnniint8">, Group; def mno_avxvnniint8 : Flag<["-"], "mno-avxvnniint8">, Group; def mavxvnni : Flag<["-"], "mavxvnni">, Group; diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -142,6 +142,7 @@ bool HasCMPCCXADD = false; bool HasRAOINT = false; bool HasAVXVNNIINT8 = false; + bool HasAVXNECONVERT = false; bool HasKL = false; // For key locker bool HasWIDEKL = false; // For wide key locker bool HasHRESET = false; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -340,6 +340,8 @@ HasRAOINT = true; } else if (Feature == "+avxifma") { HasAVXIFMA = true; + } else if (Feature == "+avxneconvert") { + HasAVXNECONVERT= true; } else if (Feature == "+avxvnni") { HasAVXVNNI = true; } else if (Feature == "+avxvnniint8") { @@ -796,6 +798,8 @@ Builder.defineMacro("__RAOINT__"); if (HasAVXIFMA) Builder.defineMacro("__AVXIFMA__"); + if (HasAVXNECONVERT) + Builder.defineMacro("__AVXNECONVERT__"); if (HasAVXVNNI) Builder.defineMacro("__AVXVNNI__"); if (HasAVXVNNIINT8) @@ -923,6 +927,7 @@ .Case("avx512ifma", true) .Case("avx512vp2intersect", true) .Case("avxifma", true) + .Case("avxneconvert", true) .Case("avxvnni", true) .Case("avxvnniint8", true) .Case("bmi", true) @@ -1023,7 +1028,7 @@ .Case("avx512ifma", HasAVX512IFMA) .Case("avx512vp2intersect", HasAVX512VP2INTERSECT) .Case("avxifma", HasAVXIFMA) - .Case("avxvnni", HasAVXVNNI) + .Case("avxneconvert", HasAVXNECONVERT) .Case("avxvnni", HasAVXVNNI) .Case("avxvnniint8", HasAVXVNNIINT8) .Case("bmi", HasBMI) diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -144,6 +144,7 @@ avx512vpopcntdqvlintrin.h avxifmaintrin.h avxintrin.h + avxneconvertintrin.h avxvnniint8intrin.h avxvnniintrin.h bmi2intrin.h diff --git a/clang/lib/Headers/avx512vlbf16intrin.h b/clang/lib/Headers/avx512vlbf16intrin.h --- a/clang/lib/Headers/avx512vlbf16intrin.h +++ b/clang/lib/Headers/avx512vlbf16intrin.h @@ -160,12 +160,8 @@ /// A 128-bit vector of [4 x float]. /// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from /// conversion of __A, and higher 64 bits are 0. -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_cvtneps_pbh(__m128 __A) { - return (__m128bh)__builtin_ia32_cvtneps2bf16_128_mask((__v4sf) __A, - (__v8bf)_mm_undefined_si128(), - (__mmask8)-1); -} +#define _mm_cvtneps_pbh(A) \ + ((__m128bh)__builtin_ia32_vcvtneps2bf16128((__v4sf)(A))) /// Convert Packed Single Data to Packed BF16 Data. /// @@ -218,12 +214,8 @@ /// \param __A /// A 256-bit vector of [8 x float]. /// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A. -static __inline__ __m128bh __DEFAULT_FN_ATTRS256 -_mm256_cvtneps_pbh(__m256 __A) { - return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A, - (__v8bf)_mm_undefined_si128(), - (__mmask8)-1); -} +#define _mm256_cvtneps_pbh(A) \ + ((__m128bh)__builtin_ia32_vcvtneps2bf16256((__v8sf)(A))) /// Convert Packed Single Data to Packed BF16 Data. /// diff --git a/clang/lib/Headers/avxneconvertintrin.h b/clang/lib/Headers/avxneconvertintrin.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/avxneconvertintrin.h @@ -0,0 +1,484 @@ +/*===-------------- avxneconvertintrin.h - AVXNECONVERT --------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif // __IMMINTRIN_H + +#ifdef __SSE2__ + +#ifndef __AVXNECONVERTINTRIN_H +#define __AVXNECONVERTINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avxneconvert"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("avxneconvert"), \ + __min_vector_width__(256))) + +/// Convert scalar BF16 (16-bit) floating-point element +/// stored at memory locations starting at location \a __A to a +/// single-precision (32-bit) floating-point, broadcast it to packed +/// single-precision (32-bit) floating-point elements, and store the results in +/// \a dst. +/// +/// \headerfile +/// +/// \code +/// _mm_bcstnebf16_ps(const void *__A); +/// \endcode +/// +/// This intrinsic corresponds to the \c VBCSTNEBF162PS instruction. +/// +/// \param __A +/// A pointer to a 16-bit memory location. The address of the memory +/// location does not have to be aligned. +/// \returns +/// A 128-bit vector of [4 x float]. +/// +/// \code{.operation} +/// b := Convert_BF16_To_FP32(MEM[__A+15:__A]) +/// FOR j := 0 to 3 +/// m := j*32 +/// dst[m+31:m] := b +/// ENDFOR +/// dst[MAX:128] := 0 +/// \endcode +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_bcstnebf16_ps(const void *__A) { + return (__m128)__builtin_ia32_vbcstnebf162ps128((const __bf16 *)__A); +} + +/// Convert scalar BF16 (16-bit) floating-point element +/// stored at memory locations starting at location \a __A to a +/// single-precision (32-bit) floating-point, broadcast it to packed +/// single-precision (32-bit) floating-point elements, and store the results in +/// \a dst. +/// +/// \headerfile +/// +/// \code +/// _mm256_bcstnebf16_ps(const void *__A); +/// \endcode +/// +/// This intrinsic corresponds to the \c VBCSTNEBF162PS instruction. +/// +/// \param __A +/// A pointer to a 16-bit memory location. The address of the memory +/// location does not have to be aligned. +/// \returns +/// A 256-bit vector of [8 x float]. +/// +/// \code{.operation} +/// b := Convert_BF16_To_FP32(MEM[__A+15:__A]) +/// FOR j := 0 to 7 +/// m := j*32 +/// dst[m+31:m] := b +/// ENDFOR +/// dst[MAX:256] := 0 +/// \endcode +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_bcstnebf16_ps(const void *__A) { + return (__m256)__builtin_ia32_vbcstnebf162ps256((const __bf16 *)__A); +} + +/// Convert scalar half-precision (16-bit) floating-point element +/// stored at memory locations starting at location \a __A to a +/// single-precision (32-bit) floating-point, broadcast it to packed +/// single-precision (32-bit) floating-point elements, and store the results in +/// \a dst. +/// +/// \headerfile +/// +/// \code +/// _mm_bcstnesh_ps(const void *__A); +/// \endcode +/// +/// This intrinsic corresponds to the \c VBCSTNESH2PS instruction. +/// +/// \param __A +/// A pointer to a 16-bit memory location. The address of the memory +/// location does not have to be aligned. +/// \returns +/// A 128-bit vector of [4 x float]. +/// +/// \code{.operation} +/// b := Convert_FP16_To_FP32(MEM[__A+15:__A]) +/// FOR j := 0 to 3 +/// m := j*32 +/// dst[m+31:m] := b +/// ENDFOR +/// dst[MAX:128] := 0 +/// \endcode +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_bcstnesh_ps(const void *__A) { + return (__m128)__builtin_ia32_vbcstnesh2ps128((const _Float16 *)__A); +} + +/// Convert scalar half-precision (16-bit) floating-point element +/// stored at memory locations starting at location \a __A to a +/// single-precision (32-bit) floating-point, broadcast it to packed +/// single-precision (32-bit) floating-point elements, and store the results in +/// \a dst. +/// +/// \headerfile +/// +/// \code +/// _mm256_bcstnesh_ps(const void *__A); +/// \endcode +/// +/// This intrinsic corresponds to the \c VBCSTNESH2PS instruction. +/// +/// \param __A +/// A pointer to a 16-bit memory location. The address of the memory +/// location does not have to be aligned. +/// \returns +/// A 256-bit vector of [8 x float]. +/// +/// \code{.operation} +/// b := Convert_FP16_To_FP32(MEM[__A+15:__A]) +/// FOR j := 0 to 7 +/// m := j*32 +/// dst[m+31:m] := b +/// ENDFOR +/// dst[MAX:256] := 0 +/// \endcode +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_bcstnesh_ps(const void *__A) { + return (__m256)__builtin_ia32_vbcstnesh2ps256((const _Float16 *)__A); +} + +/// Convert packed BF16 (16-bit) floating-point even-indexed elements +/// stored at memory locations starting at location \a __A to packed +/// single-precision (32-bit) floating-point elements, and store the results in +/// \a dst. +/// +/// \headerfile +/// +/// \code +/// _mm_cvtneebf16_ps(const __m128bh *__A); +/// \endcode +/// +/// This intrinsic corresponds to the \c VCVTNEEBF162PS instruction. +/// +/// \param __A +/// A pointer to a 128-bit memory location containing 8 consecutive +/// BF16 (16-bit) floating-point values. +/// \returns +/// A 128-bit vector of [4 x float]. +/// +/// \code{.operation} +/// FOR j := 0 to 3 +/// k := j*2 +/// i := k*16 +/// m := j*32 +/// dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+i+15:__A+i]) +/// ENDFOR +/// dst[MAX:128] := 0 +/// \endcode +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_cvtneebf16_ps(const __m128bh *__A) { + return (__m128)__builtin_ia32_vcvtneebf162ps128((const __v8bf *)__A); +} + +/// Convert packed BF16 (16-bit) floating-point even-indexed elements +/// stored at memory locations starting at location \a __A to packed +/// single-precision (32-bit) floating-point elements, and store the results in +/// \a dst. +/// +/// \headerfile +/// +/// \code +/// _mm256_cvtneebf16_ps(const __m256bh *__A); +/// \endcode +/// +/// This intrinsic corresponds to the \c VCVTNEEBF162PS instruction. +/// +/// \param __A +/// A pointer to a 256-bit memory location containing 16 consecutive +/// BF16 (16-bit) floating-point values. +/// \returns +/// A 256-bit vector of [8 x float]. +/// +/// \code{.operation} +/// FOR j := 0 to 7 +/// k := j*2 +/// i := k*16 +/// m := j*32 +/// dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+i+15:__A+i]) +/// ENDFOR +/// dst[MAX:256] := 0 +/// \endcode +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_cvtneebf16_ps(const __m256bh *__A) { + return (__m256)__builtin_ia32_vcvtneebf162ps256((const __v16bf *)__A); +} + +/// Convert packed half-precision (16-bit) floating-point even-indexed elements +/// stored at memory locations starting at location \a __A to packed +/// single-precision (32-bit) floating-point elements, and store the results in +/// \a dst. +/// +/// \headerfile +/// +/// \code +/// _mm_cvtneeph_ps(const __m128h *__A); +/// \endcode +/// +/// This intrinsic corresponds to the \c VCVTNEEPH2PS instruction. +/// +/// \param __A +/// A pointer to a 128-bit memory location containing 8 consecutive +/// half-precision (16-bit) floating-point values. +/// \returns +/// A 128-bit vector of [4 x float]. +/// +/// \code{.operation} +/// FOR j := 0 to 3 +/// k := j*2 +/// i := k*16 +/// m := j*32 +/// dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+i+15:__A+i]) +/// ENDFOR +/// dst[MAX:128] := 0 +/// \endcode +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_cvtneeph_ps(const __m128h *__A) { + return (__m128)__builtin_ia32_vcvtneeph2ps128((const __v8hf *)__A); +} + +/// Convert packed half-precision (16-bit) floating-point even-indexed elements +/// stored at memory locations starting at location \a __A to packed +/// single-precision (32-bit) floating-point elements, and store the results in +/// \a dst. +/// +/// \headerfile +/// +/// \code +/// _mm256_cvtneeph_ps(const __m256h *__A); +/// \endcode +/// +/// This intrinsic corresponds to the \c VCVTNEEPH2PS instruction. +/// +/// \param __A +/// A pointer to a 256-bit memory location containing 16 consecutive +/// half-precision (16-bit) floating-point values. +/// \returns +/// A 256-bit vector of [8 x float]. +/// +/// \code{.operation} +/// FOR j := 0 to 7 +/// k := j*2 +/// i := k*16 +/// m := j*32 +/// dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+i+15:__A+i]) +/// ENDFOR +/// dst[MAX:256] := 0 +/// \endcode +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_cvtneeph_ps(const __m256h *__A) { + return (__m256)__builtin_ia32_vcvtneeph2ps256((const __v16hf *)__A); +} + +/// Convert packed BF16 (16-bit) floating-point odd-indexed elements +/// stored at memory locations starting at location \a __A to packed +/// single-precision (32-bit) floating-point elements, and store the results in +/// \a dst. +/// +/// \headerfile +/// +/// \code +/// _mm_cvtneobf16_ps(const __m128bh *__A); +/// \endcode +/// +/// This intrinsic corresponds to the \c VCVTNEOBF162PS instruction. +/// +/// \param __A +/// A pointer to a 128-bit memory location containing 8 consecutive +/// BF16 (16-bit) floating-point values. +/// \returns +/// A 128-bit vector of [4 x float]. +/// +/// \code{.operation} +/// FOR j := 0 to 3 +/// k := j*2+1 +/// i := k*16 +/// m := j*32 +/// dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+i+15:__A+i]) +/// ENDFOR +/// dst[MAX:128] := 0 +/// \endcode +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_cvtneobf16_ps(const __m128bh *__A) { + return (__m128)__builtin_ia32_vcvtneobf162ps128((const __v8bf *)__A); +} + +/// Convert packed BF16 (16-bit) floating-point odd-indexed elements +/// stored at memory locations starting at location \a __A to packed +/// single-precision (32-bit) floating-point elements, and store the results in +/// \a dst. +/// +/// \headerfile +/// +/// \code +/// _mm256_cvtneobf16_ps(const __m256bh *__A); +/// \endcode +/// +/// This intrinsic corresponds to the \c VCVTNEOBF162PS instruction. +/// +/// \param __A +/// A pointer to a 256-bit memory location containing 16 consecutive +/// BF16 (16-bit) floating-point values. +/// \returns +/// A 256-bit vector of [8 x float]. +/// +/// \code{.operation} +/// FOR j := 0 to 7 +/// k := j*2+1 +/// i := k*16 +/// m := j*32 +/// dst[m+31:m] := Convert_BF16_To_FP32(MEM[__A+i+15:__A+i]) +/// ENDFOR +/// dst[MAX:256] := 0 +/// \endcode +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_cvtneobf16_ps(const __m256bh *__A) { + return (__m256)__builtin_ia32_vcvtneobf162ps256((const __v16bf *)__A); +} + +/// Convert packed half-precision (16-bit) floating-point odd-indexed elements +/// stored at memory locations starting at location \a __A to packed +/// single-precision (32-bit) floating-point elements, and store the results in +/// \a dst. +/// +/// \headerfile +/// +/// \code +/// _mm_cvtneoph_ps(const __m128h *__A); +/// \endcode +/// +/// This intrinsic corresponds to the \c VCVTNEOPH2PS instruction. +/// +/// \param __A +/// A pointer to a 128-bit memory location containing 8 consecutive +/// half-precision (16-bit) floating-point values. +/// \returns +/// A 128-bit vector of [4 x float]. +/// +/// \code{.operation} +/// FOR j := 0 to 3 +/// k := j*2+1 +/// i := k*16 +/// m := j*32 +/// dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+i+15:__A+i]) +/// ENDFOR +/// dst[MAX:128] := 0 +/// \endcode +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_cvtneoph_ps(const __m128h *__A) { + return (__m128)__builtin_ia32_vcvtneoph2ps128((const __v8hf *)__A); +} + +/// Convert packed half-precision (16-bit) floating-point odd-indexed elements +/// stored at memory locations starting at location \a __A to packed +/// single-precision (32-bit) floating-point elements, and store the results in +/// \a dst. +/// +/// \headerfile +/// +/// \code +/// _mm256_cvtneoph_ps(const __m256h *__A); +/// \endcode +/// +/// This intrinsic corresponds to the \c VCVTNEOPH2PS instruction. +/// +/// \param __A +/// A pointer to a 256-bit memory location containing 16 consecutive +/// half-precision (16-bit) floating-point values. +/// \returns +/// A 256-bit vector of [8 x float]. +/// +/// \code{.operation} +/// FOR j := 0 to 7 +/// k := j*2+1 +/// i := k*16 +/// m := j*32 +/// dst[m+31:m] := Convert_FP16_To_FP32(MEM[__A+i+15:__A+i]) +/// ENDFOR +/// dst[MAX:256] := 0 +/// \endcode +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_cvtneoph_ps(const __m256h *__A) { + return (__m256)__builtin_ia32_vcvtneoph2ps256((const __v16hf *)__A); +} + +/// Convert packed single-precision (32-bit) floating-point elements in \a __A +/// to packed BF16 (16-bit) floating-point elements, and store the results in \a +/// dst. +/// +/// \headerfile +/// +/// \code +/// _mm_cvtneps_avx_pbh(__m128 __A); +/// \endcode +/// +/// This intrinsic corresponds to the \c VCVTNEPS2BF16 instruction. +/// +/// \param __A +/// A 128-bit vector of [4 x float]. +/// \returns +/// A 128-bit vector of [8 x bfloat]. +/// +/// \code{.operation} +/// FOR j := 0 to 3 +/// dst.word[j] := Convert_FP32_To_BF16(__A.fp32[j]) +/// ENDFOR +/// dst[MAX:128] := 0 +/// \endcode +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_cvtneps_avx_pbh(__m128 __A) { + return (__m128bh)__builtin_ia32_vcvtneps2bf16128((__v4sf)__A); +} + +/// Convert packed single-precision (32-bit) floating-point elements in \a __A +/// to packed BF16 (16-bit) floating-point elements, and store the results in \a +/// dst. +/// +/// \headerfile +/// +/// \code +/// _mm256_cvtneps_avx_pbh(__m256 __A); +/// \endcode +/// +/// This intrinsic corresponds to the \c VCVTNEPS2BF16 instruction. +/// +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \returns +/// A 128-bit vector of [8 x bfloat]. +/// +/// \code{.operation} +/// FOR j := 0 to 7 +/// dst.word[j] := Convert_FP32_To_BF16(a.fp32[j]) +/// ENDFOR +/// dst[MAX:128] := 0 +/// \endcode +static __inline__ __m128bh __DEFAULT_FN_ATTRS256 +_mm256_cvtneps_avx_pbh(__m256 __A) { + return (__m128bh)__builtin_ia32_vcvtneps2bf16256((__v8sf)__A); +} + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 + +#endif // __AVXNECONVERTINTRIN_H +#endif // __SSE2__ diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h --- a/clang/lib/Headers/cpuid.h +++ b/clang/lib/Headers/cpuid.h @@ -210,6 +210,7 @@ /* Features in %edx for leaf 7 sub-leaf 1 */ #define bit_AVXVNNIINT8 0x00000010 +#define bit_AVXNECONVERT 0x00000020 #define bit_PREFETCHI 0x00004000 /* Features in %eax for leaf 13 sub-leaf 1 */ diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -264,6 +264,11 @@ #include #endif +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVXNECONVERT__) +#include +#endif + #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RDPID__) /// Returns the value of the IA32_TSC_AUX MSR (0xc0000103). diff --git a/clang/test/CodeGen/X86/avx512vlbf16-builtins.c b/clang/test/CodeGen/X86/avx512vlbf16-builtins.c --- a/clang/test/CodeGen/X86/avx512vlbf16-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbf16-builtins.c @@ -75,7 +75,7 @@ __m128bh test_mm_cvtneps2bf16(__m128 A) { // CHECK-LABEL: @test_mm_cvtneps2bf16 - // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.128 + // CHECK: @llvm.x86.vcvtneps2bf16128 // CHECK: ret <8 x bfloat> %{{.*}} return _mm_cvtneps_pbh(A); } @@ -96,7 +96,7 @@ __m128bh test_mm256_cvtneps2bf16(__m256 A) { // CHECK-LABEL: @test_mm256_cvtneps2bf16 - // CHECK: @llvm.x86.avx512bf16.cvtneps2bf16.256 + // CHECK: @llvm.x86.vcvtneps2bf16256 // CHECK: ret <8 x bfloat> %{{.*}} return _mm256_cvtneps_pbh(A); } diff --git a/clang/test/CodeGen/X86/avxneconvert-builtins.c b/clang/test/CodeGen/X86/avxneconvert-builtins.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/X86/avxneconvert-builtins.c @@ -0,0 +1,91 @@ +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx2 -target-feature +avxneconvert \ +// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s +// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown -target-feature +avx2 -target-feature +avxneconvert \ +// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s + +#include +#include + +__m128 test_mm_bcstnebf16_ps(const void *__A) { + // CHECK-LABEL: @test_mm_bcstnebf16_ps( + // CHECK: call <4 x float> @llvm.x86.vbcstnebf162ps128(ptr %{{.*}}) + return _mm_bcstnebf16_ps(__A); +} + +__m256 test_mm256_bcstnebf16_ps(const void *__A) { + // CHECK-LABEL: @test_mm256_bcstnebf16_ps( + // CHECK: call <8 x float> @llvm.x86.vbcstnebf162ps256(ptr %{{.*}}) + return _mm256_bcstnebf16_ps(__A); +} + +__m128 test_mm_bcstnesh_ps(const void *__A) { + // CHECK-LABEL: @test_mm_bcstnesh_ps( + // CHECK: call <4 x float> @llvm.x86.vbcstnesh2ps128(ptr %{{.*}}) + return _mm_bcstnesh_ps(__A); +} + +__m256 test_mm256_bcstnesh_ps(const void *__A) { + // CHECK-LABEL: @test_mm256_bcstnesh_ps( + // CHECK: call <8 x float> @llvm.x86.vbcstnesh2ps256(ptr %{{.*}}) + return _mm256_bcstnesh_ps(__A); +} + +__m128 test_mm_cvtneebf16_ps(const __m128bh *__A) { + // CHECK-LABEL: @test_mm_cvtneebf16_ps( + // CHECK: call <4 x float> @llvm.x86.vcvtneebf162ps128(ptr %{{.*}}) + return _mm_cvtneebf16_ps(__A); +} + +__m256 test_mm256_cvtneebf16_ps(const __m256bh *__A) { + // CHECK-LABEL: @test_mm256_cvtneebf16_ps( + // CHECK: call <8 x float> @llvm.x86.vcvtneebf162ps256(ptr %{{.*}}) + return _mm256_cvtneebf16_ps(__A); +} + +__m128 test_mm_cvtneeph_ps(const __m128h *__A) { + // CHECK-LABEL: @test_mm_cvtneeph_ps( + // CHECK: call <4 x float> @llvm.x86.vcvtneeph2ps128(ptr %{{.*}}) + return _mm_cvtneeph_ps(__A); +} + +__m256 test_mm256_cvtneeph_ps(const __m256h *__A) { + // CHECK-LABEL: @test_mm256_cvtneeph_ps( + // CHECK: call <8 x float> @llvm.x86.vcvtneeph2ps256(ptr %{{.*}}) + return _mm256_cvtneeph_ps(__A); +} + +__m128 test_mm_cvtneobf16_ps(const __m128bh *__A) { + // CHECK-LABEL: @test_mm_cvtneobf16_ps( + // CHECK: call <4 x float> @llvm.x86.vcvtneobf162ps128(ptr %{{.*}}) + return _mm_cvtneobf16_ps(__A); +} + +__m256 test_mm256_cvtneobf16_ps(const __m256bh *__A) { + // CHECK-LABEL: @test_mm256_cvtneobf16_ps( + // CHECK: call <8 x float> @llvm.x86.vcvtneobf162ps256(ptr %{{.*}}) + return _mm256_cvtneobf16_ps(__A); +} + +__m128 test_mm_cvtneoph_ps(const __m128h *__A) { + // CHECK-LABEL: @test_mm_cvtneoph_ps( + // CHECK: call <4 x float> @llvm.x86.vcvtneoph2ps128(ptr %{{.*}}) + return _mm_cvtneoph_ps(__A); +} + +__m256 test_mm256_cvtneoph_ps(const __m256h *__A) { + // CHECK-LABEL: @test_mm256_cvtneoph_ps( + // CHECK: call <8 x float> @llvm.x86.vcvtneoph2ps256(ptr %{{.*}}) + return _mm256_cvtneoph_ps(__A); +} + +__m128bh test_mm_cvtneps_avx_pbh(__m128 __A) { + // CHECK-LABEL: @test_mm_cvtneps_avx_pbh( + // CHECK: call <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float> %{{.*}}) + return _mm_cvtneps_avx_pbh(__A); +} + +__m128bh test_mm256_cvtneps_avx_pbh(__m256 __A) { + // CHECK-LABEL: @test_mm256_cvtneps_avx_pbh( + // CHECK: call <8 x bfloat> @llvm.x86.vcvtneps2bf16256(<8 x float> %{{.*}}) + return _mm256_cvtneps_avx_pbh(__A); +} diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c --- a/clang/test/CodeGen/attr-target-x86.c +++ b/clang/test/CodeGen/attr-target-x86.c @@ -54,9 +54,9 @@ // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="i686" // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" // CHECK-NOT: tune-cpu -// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" +// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686" -// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" +// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" // CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" // CHECK-NOT: tune-cpu // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx" diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -337,6 +337,11 @@ // AVX-VNNIINT8: "-target-feature" "+avxvnniint8" // NO-AVX-VNNIINT8: "-target-feature" "-avxvnniint8" +// RUN: %clang --target=i386 -mavxneconvert %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVXNECONVERT %s +// RUN: %clang --target=i386 -mno-avxneconvert %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVXNECONVERT %s +// AVXNECONVERT: "-target-feature" "+avxneconvert" +// NO-AVXNECONVERT: "-target-feature" "-avxneconvert" + // RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s // RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s // CRC32: "-target-feature" "+crc32" diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -634,6 +634,20 @@ // AVXVNNIINT8NOAVX2-NOT: #define __AVX2__ 1 // AVXVNNIINT8NOAVX2-NOT: #define __AVXVNNIINT8__ 1 +// RUN: %clang -target i386-unknown-unknown -march=atom -mavxneconvert -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVXNECONVERT %s + +// AVXNECONVERT: #define __AVX2__ 1 +// AVXNECONVERT: #define __AVXNECONVERT__ 1 + +// RUN: %clang -target i386-unknown-unknown -march=atom -mno-avxneconvert -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOAVXNECONVERT %s + +// NOAVXNECONVERT-NOT: #define __AVXNECONVERT__ 1 + +// RUN: %clang -target i386-unknown-unknown -march=atom -mavxneconvert -mno-avx2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVXNECONVERTNOAVX2 %s + +// AVXNECONVERTNOAVX2-NOT: #define __AVX2__ 1 +// AVXNECONVERTNOAVX2-NOT: #define __AVXNECONVERT__ 1 + // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s // CRC32: #define __CRC32__ 1 diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -138,13 +138,14 @@ Changes to the X86 Backend -------------------------- -* Support ISA of ``AVX-IFMA``. * Add support for the ``RDMSRLIST and WRMSRLIST`` instructions. * Add support for the ``WRMSRNS`` instruction. * Support ISA of ``AMX-FP16`` which contains ``tdpfp16ps`` instruction. * Support ISA of ``CMPCCXADD``. +* Support ISA of ``AVX-IFMA``. * Support ISA of ``AVX-VNNI-INT8``. +* Support ISA of ``AVX-NE-CONVERT``. Changes to the OCaml bindings ----------------------------- diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -5234,6 +5234,34 @@ Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], [ImmArg>, ImmArg>, ImmArg>]>; +def int_x86_vbcstnebf162ps128 : ClangBuiltin<"__builtin_ia32_vbcstnebf162ps128">, + Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vbcstnebf162ps256 : ClangBuiltin<"__builtin_ia32_vbcstnebf162ps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vbcstnesh2ps128 : ClangBuiltin<"__builtin_ia32_vbcstnesh2ps128">, + Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vbcstnesh2ps256 : ClangBuiltin<"__builtin_ia32_vbcstnesh2ps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vcvtneebf162ps128 : ClangBuiltin<"__builtin_ia32_vcvtneebf162ps128">, + Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vcvtneebf162ps256 : ClangBuiltin<"__builtin_ia32_vcvtneebf162ps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vcvtneeph2ps128 : ClangBuiltin<"__builtin_ia32_vcvtneeph2ps128">, + Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vcvtneeph2ps256 : ClangBuiltin<"__builtin_ia32_vcvtneeph2ps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vcvtneobf162ps128 : ClangBuiltin<"__builtin_ia32_vcvtneobf162ps128">, + Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vcvtneobf162ps256 : ClangBuiltin<"__builtin_ia32_vcvtneobf162ps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vcvtneoph2ps128 : ClangBuiltin<"__builtin_ia32_vcvtneoph2ps128">, + Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vcvtneoph2ps256 : ClangBuiltin<"__builtin_ia32_vcvtneoph2ps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vcvtneps2bf16128 : ClangBuiltin<"__builtin_ia32_vcvtneps2bf16128">, + Intrinsic<[llvm_v8bf16_ty], [llvm_v4f32_ty], [ IntrNoMem ]>; +def int_x86_vcvtneps2bf16256 : ClangBuiltin<"__builtin_ia32_vcvtneps2bf16256">, + Intrinsic<[llvm_v8bf16_ty], [llvm_v8f32_ty], [ IntrNoMem ]>; } //===----------------------------------------------------------------------===// // RAO-INT intrinsics diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -205,6 +205,7 @@ X86_FEATURE (AVX512FP16, "avx512fp16") X86_FEATURE (AMX_FP16, "amx-fp16") X86_FEATURE (CMPCCXADD, "cmpccxadd") +X86_FEATURE (AVXNECONVERT, "avxneconvert") X86_FEATURE (AVXVNNI, "avxvnni") X86_FEATURE (AVXIFMA, "avxifma") X86_FEATURE (AVXVNNIINT8, "avxvnniint8") diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -1813,6 +1813,7 @@ Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1); Features["avxifma"] = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave; Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave; + Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave; Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1); bool HasLeafD = MaxLevel >= 0xd && diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -582,11 +582,12 @@ constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE; constexpr FeatureBitset ImpliedFeaturesHRESET = {}; -constexpr FeatureBitset ImpliedFeaturesAVXVNNIINT8 = FeatureAVX2; constexpr FeatureBitset ImpliedFeaturesPREFETCHI = {}; constexpr FeatureBitset ImpliedFeaturesCMPCCXADD = {}; constexpr FeatureBitset ImpliedFeaturesRAOINT = {}; +constexpr FeatureBitset ImpliedFeaturesAVXVNNIINT8 = FeatureAVX2; constexpr FeatureBitset ImpliedFeaturesAVXIFMA = FeatureAVX2; +constexpr FeatureBitset ImpliedFeaturesAVXNECONVERT = FeatureAVX2; constexpr FeatureBitset ImpliedFeaturesAVX512FP16 = FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL; // Key Locker Features diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -272,6 +272,9 @@ def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true", "Support RAO-INT instructions", []>; +def FeatureAVXNECONVERT : SubtargetFeature<"avxneconvert", "HasAVXNECONVERT", "true", + "Support AVX-NE-CONVERT instructions", + [FeatureAVX2]>; def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true", "Invalidate Process-Context Identifier">; def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2178,15 +2178,15 @@ } } - if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) { + if (!Subtarget.useSoftFloat() && + (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) { addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass); addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass); - addRegisterClass(MVT::v32bf16, &X86::VR512RegClass); // We set the type action of bf16 to TypeSoftPromoteHalf, but we don't // provide the method to promote BUILD_VECTOR. Set the operation action // Custom to do the customization later. setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom); - for (auto VT : { MVT::v8bf16, MVT::v16bf16, MVT::v32bf16 }) { + for (auto VT : {MVT::v8bf16, MVT::v16bf16}) { setF16Action(VT, Expand); setOperationAction(ISD::FADD, VT, Expand); setOperationAction(ISD::FSUB, VT, Expand); @@ -2197,6 +2197,16 @@ addLegalFPImmediate(APFloat::getZero(APFloat::BFloat())); } + if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) { + addRegisterClass(MVT::v32bf16, &X86::VR512RegClass); + setF16Action(MVT::v32bf16, Expand); + setOperationAction(ISD::FADD, MVT::v32bf16, Expand); + setOperationAction(ISD::FSUB, MVT::v32bf16, Expand); + setOperationAction(ISD::FMUL, MVT::v32bf16, Expand); + setOperationAction(ISD::FDIV, MVT::v32bf16, Expand); + setOperationAction(ISD::BUILD_VECTOR, MVT::v32bf16, Custom); + } + if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -12947,6 +12947,16 @@ def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), v8bf16x_info.ImmAllZerosV, VK4WM:$mask), (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; + + def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))), + (VCVTNEPS2BF16Z128rr VR128X:$src)>; + def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))), + (VCVTNEPS2BF16Z128rm addr:$src)>; + + def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))), + (VCVTNEPS2BF16Z256rr VR256X:$src)>; + def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))), + (VCVTNEPS2BF16Z256rm addr:$src)>; } let Constraints = "$src1 = $dst" in { diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -993,6 +993,7 @@ def HasENQCMD : Predicate<"Subtarget->hasENQCMD()">; def HasAMXFP16 : Predicate<"Subtarget->hasAMXFP16()">; def HasCMPCCXADD : Predicate<"Subtarget->hasCMPCCXADD()">; +def HasAVXNECONVERT : Predicate<"Subtarget->hasAVXNECONVERT()">; def HasKL : Predicate<"Subtarget->hasKL()">; def HasRAOINT : Predicate<"Subtarget->hasRAOINT()">; def HasWIDEKL : Predicate<"Subtarget->hasWIDEKL()">; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -8123,6 +8123,7 @@ X86GF2P8affineqb>, TAPD; } +// AVX-IFMA let Predicates = [HasAVXIFMA, NoVLX_Or_NoIFMA], Constraints = "$src1 = $dst", checkVEXPredicate = 1 in multiclass avx_ifma_rm opc, string OpcodeStr, SDNode OpNode> { @@ -8161,6 +8162,7 @@ defm VPMADD52HUQ : avx_ifma_rm<0xb5, "vpmadd52huq", x86vpmadd52h>, VEX_W, ExplicitVEXPrefix; defm VPMADD52LUQ : avx_ifma_rm<0xb4, "vpmadd52luq", x86vpmadd52l>, VEX_W, ExplicitVEXPrefix; +// AVX-VNNI-INT8 let Constraints = "$src1 = $dst" in multiclass avx_dotprod_rm Opc, string OpcodeStr, ValueType OpVT, RegisterClass RC, PatFrag MemOpFrag, @@ -8219,3 +8221,59 @@ i256mem, X86vpdpbsuds, SchedWriteVecIMul.YMM, 0>, VEX_L, T8XS; } + +// AVX-NE-CONVERT +multiclass AVX_NE_CONVERT_BASE Opcode, string OpcodeStr, + X86MemOperand MemOp128, X86MemOperand MemOp256> { + def rm : I("int_x86_"#OpcodeStr#"128") addr:$src))]>, + Sched<[WriteCvtPH2PS]>, VEX; + def Yrm : I("int_x86_"#OpcodeStr#"256") addr:$src))]>, + Sched<[WriteCvtPH2PSY]>, VEX, VEX_L; +} + +multiclass VCVTNEPS2BF16_BASE { + def rr : I<0x72, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtneps2bf16\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_vcvtneps2bf16128 VR128:$src))]>, + Sched<[WriteCvtPH2PS]>; + def rm : I<0x72, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvtneps2bf16{x}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src)))]>, + Sched<[WriteCvtPH2PS]>; + def Yrr : I<0x72, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "vcvtneps2bf16\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_vcvtneps2bf16256 VR256:$src))]>, + Sched<[WriteCvtPH2PSY]>, VEX_L; + def Yrm : I<0x72, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), + "vcvtneps2bf16{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src)))]>, + Sched<[WriteCvtPH2PSY]>, VEX_L; +} + +let Predicates = [HasAVXNECONVERT] in { + defm VBCSTNEBF162PS : AVX_NE_CONVERT_BASE<0xb1, "vbcstnebf162ps", f16mem, + f16mem>, T8XS; + defm VBCSTNESH2PS : AVX_NE_CONVERT_BASE<0xb1, "vbcstnesh2ps", f16mem, f16mem>, + T8PD; + defm VCVTNEEBF162PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneebf162ps", f128mem, + f256mem>, T8XS; + defm VCVTNEEPH2PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneeph2ps", f128mem, + f256mem>, T8PD; + defm VCVTNEOBF162PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneobf162ps", f128mem, + f256mem>, T8XD; + defm VCVTNEOPH2PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneoph2ps", f128mem, + f256mem>, T8PS; + let checkVEXPredicate = 1 in + defm VCVTNEPS2BF16 : VCVTNEPS2BF16_BASE, VEX, T8XS, ExplicitVEXPrefix; +} + +def : InstAlias<"vcvtneps2bf16x\t{$src, $dst|$dst, $src}", + (VCVTNEPS2BF16rr VR128:$dst, VR128:$src), 0, "att">; +def : InstAlias<"vcvtneps2bf16y\t{$src, $dst|$dst, $src}", + (VCVTNEPS2BF16Yrr VR128:$dst, VR256:$src), 0, "att">; diff --git a/llvm/test/CodeGen/X86/avxneconvert-intrinsics-shared.ll b/llvm/test/CodeGen/X86/avxneconvert-intrinsics-shared.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/avxneconvert-intrinsics-shared.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avxneconvert,+avx512bf16,+avx512vl | FileCheck %s --check-prefix=AVX512BF16-COMMON +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avxneconvert,+avx512bf16,+avx512vl | FileCheck %s --check-prefix=AVX512BF16-COMMON +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx512bf16,+avx512vl | FileCheck %s --check-prefix=AVX512BF16 +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx512bf16,+avx512vl | FileCheck %s --check-prefix=AVX512BF16 + +define <8 x bfloat> @test_int_x86_vcvtneps2bf16128(<4 x float> %A) { +; AVX512BF16-COMMON-LABEL: test_int_x86_vcvtneps2bf16128: +; AVX512BF16-COMMON: # %bb.0: +; AVX512BF16-COMMON-NEXT: {vex} vcvtneps2bf16 %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7a,0x72,0xc0] +; AVX512BF16-COMMON-NEXT: # kill: def $xmm1 killed $xmm0 +; AVX512BF16-COMMON-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512BF16-LABEL: test_int_x86_vcvtneps2bf16128: +; AVX512BF16: # %bb.0: +; AVX512BF16-NEXT: vcvtneps2bf16 %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x72,0xc0] +; AVX512BF16-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float> %A) + ret <8 x bfloat> %ret +} +declare <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float> %A) + +define <8 x bfloat> @test_int_x86_vcvtneps2bf16256(<8 x float> %A) { +; AVX512BF16-COMMON-LABEL: test_int_x86_vcvtneps2bf16256: +; AVX512BF16-COMMON: # %bb.0: +; AVX512BF16-COMMON-NEXT: {vex} vcvtneps2bf16 %ymm0, %xmm0 # encoding: [0xc4,0xe2,0x7e,0x72,0xc0] +; AVX512BF16-COMMON-NEXT: # kill: def $xmm1 killed $xmm0 +; AVX512BF16-COMMON-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; AVX512BF16-COMMON-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512BF16-LABEL: test_int_x86_vcvtneps2bf16256: +; AVX512BF16: # %bb.0: +; AVX512BF16-NEXT: vcvtneps2bf16 %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x72,0xc0] +; AVX512BF16-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; AVX512BF16-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <8 x bfloat> @llvm.x86.vcvtneps2bf16256(<8 x float> %A) + ret <8 x bfloat> %ret +} +declare <8 x bfloat> @llvm.x86.vcvtneps2bf16256(<8 x float> %A) + diff --git a/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll b/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll @@ -0,0 +1,219 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avxneconvert | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avxneconvert | FileCheck %s --check-prefixes=CHECK,X86 + +define <4 x float> @test_int_x86_vbcstnebf162ps128(i8* %A) { +; X64-LABEL: test_int_x86_vbcstnebf162ps128: +; X64: # %bb.0: +; X64-NEXT: vbcstnebf162ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x7a,0xb1,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vbcstnebf162ps128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vbcstnebf162ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x7a,0xb1,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <4 x float> @llvm.x86.vbcstnebf162ps128(i8* %A) + ret <4 x float> %ret +} +declare <4 x float> @llvm.x86.vbcstnebf162ps128(i8* %A) + +define <8 x float> @test_int_x86_vbcstnebf162ps256(i8* %A) { +; X64-LABEL: test_int_x86_vbcstnebf162ps256: +; X64: # %bb.0: +; X64-NEXT: vbcstnebf162ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7e,0xb1,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vbcstnebf162ps256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vbcstnebf162ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7e,0xb1,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <8 x float> @llvm.x86.vbcstnebf162ps256(i8* %A) + ret <8 x float> %ret +} +declare <8 x float> @llvm.x86.vbcstnebf162ps256(i8* %A) + +define <4 x float> @test_int_x86_vbcstnesh2ps128(i8* %A) { +; X64-LABEL: test_int_x86_vbcstnesh2ps128: +; X64: # %bb.0: +; X64-NEXT: vbcstnesh2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0xb1,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vbcstnesh2ps128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vbcstnesh2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0xb1,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <4 x float> @llvm.x86.vbcstnesh2ps128(i8* %A) + ret <4 x float> %ret +} +declare <4 x float> @llvm.x86.vbcstnesh2ps128(i8* %A) + +define <8 x float> @test_int_x86_vbcstnesh2ps256(i8* %A) { +; X64-LABEL: test_int_x86_vbcstnesh2ps256: +; X64: # %bb.0: +; X64-NEXT: vbcstnesh2ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0xb1,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vbcstnesh2ps256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vbcstnesh2ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0xb1,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <8 x float> @llvm.x86.vbcstnesh2ps256(i8* %A) + ret <8 x float> %ret +} +declare <8 x float> @llvm.x86.vbcstnesh2ps256(i8* %A) + +define <4 x float> @test_int_x86_vcvtneebf162ps128(i8* %A) { +; X64-LABEL: test_int_x86_vcvtneebf162ps128: +; X64: # %bb.0: +; X64-NEXT: vcvtneebf162ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x7a,0xb0,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vcvtneebf162ps128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvtneebf162ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x7a,0xb0,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <4 x float> @llvm.x86.vcvtneebf162ps128(i8* %A) + ret <4 x float> %ret +} +declare <4 x float> @llvm.x86.vcvtneebf162ps128(i8* %A) + +define <8 x float> @test_int_x86_vcvtneebf162ps256(i8* %A) { +; X64-LABEL: test_int_x86_vcvtneebf162ps256: +; X64: # %bb.0: +; X64-NEXT: vcvtneebf162ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7e,0xb0,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vcvtneebf162ps256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvtneebf162ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7e,0xb0,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <8 x float> @llvm.x86.vcvtneebf162ps256(i8* %A) + ret <8 x float> %ret +} +declare <8 x float> @llvm.x86.vcvtneebf162ps256(i8* %A) + +define <4 x float> @test_int_x86_vcvtneeph2ps128(i8* %A) { +; X64-LABEL: test_int_x86_vcvtneeph2ps128: +; X64: # %bb.0: +; X64-NEXT: vcvtneeph2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0xb0,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vcvtneeph2ps128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvtneeph2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0xb0,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <4 x float> @llvm.x86.vcvtneeph2ps128(i8* %A) + ret <4 x float> %ret +} +declare <4 x float> @llvm.x86.vcvtneeph2ps128(i8* %A) + +define <8 x float> @test_int_x86_vcvtneeph2ps256(i8* %A) { +; X64-LABEL: test_int_x86_vcvtneeph2ps256: +; X64: # %bb.0: +; X64-NEXT: vcvtneeph2ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0xb0,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vcvtneeph2ps256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvtneeph2ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0xb0,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <8 x float> @llvm.x86.vcvtneeph2ps256(i8* %A) + ret <8 x float> %ret +} +declare <8 x float> @llvm.x86.vcvtneeph2ps256(i8* %A) + +define <4 x float> @test_int_x86_vcvtneobf162ps128(i8* %A) { +; X64-LABEL: test_int_x86_vcvtneobf162ps128: +; X64: # %bb.0: +; X64-NEXT: vcvtneobf162ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x7b,0xb0,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vcvtneobf162ps128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvtneobf162ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x7b,0xb0,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <4 x float> @llvm.x86.vcvtneobf162ps128(i8* %A) + ret <4 x float> %ret +} +declare <4 x float> @llvm.x86.vcvtneobf162ps128(i8* %A) + +define <8 x float> @test_int_x86_vcvtneobf162ps256(i8* %A) { +; X64-LABEL: test_int_x86_vcvtneobf162ps256: +; X64: # %bb.0: +; X64-NEXT: vcvtneobf162ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7f,0xb0,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vcvtneobf162ps256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvtneobf162ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7f,0xb0,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <8 x float> @llvm.x86.vcvtneobf162ps256(i8* %A) + ret <8 x float> %ret +} +declare <8 x float> @llvm.x86.vcvtneobf162ps256(i8* %A) + +define <4 x float> @test_int_x86_vcvtneoph2ps128(i8* %A) { +; X64-LABEL: test_int_x86_vcvtneoph2ps128: +; X64: # %bb.0: +; X64-NEXT: vcvtneoph2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x78,0xb0,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vcvtneoph2ps128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvtneoph2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x78,0xb0,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <4 x float> @llvm.x86.vcvtneoph2ps128(i8* %A) + ret <4 x float> %ret +} +declare <4 x float> @llvm.x86.vcvtneoph2ps128(i8* %A) + +define <8 x float> @test_int_x86_vcvtneoph2ps256(i8* %A) { +; X64-LABEL: test_int_x86_vcvtneoph2ps256: +; X64: # %bb.0: +; X64-NEXT: vcvtneoph2ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7c,0xb0,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vcvtneoph2ps256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvtneoph2ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7c,0xb0,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <8 x float> @llvm.x86.vcvtneoph2ps256(i8* %A) + ret <8 x float> %ret +} +declare <8 x float> @llvm.x86.vcvtneoph2ps256(i8* %A) + +define <8 x bfloat> @test_int_x86_vcvtneps2bf16128(<4 x float> %A) { +; CHECK-LABEL: test_int_x86_vcvtneps2bf16128: +; CHECK: # %bb.0: +; CHECK-NEXT: {vex} vcvtneps2bf16 %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7a,0x72,0xc0] +; CHECK-NEXT: # kill: def $xmm1 killed $xmm0 +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float> %A) + ret <8 x bfloat> %ret +} +declare <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float> %A) + +define <8 x bfloat> @test_int_x86_vcvtneps2bf16256(<8 x float> %A) { +; CHECK-LABEL: test_int_x86_vcvtneps2bf16256: +; CHECK: # %bb.0: +; CHECK-NEXT: {vex} vcvtneps2bf16 %ymm0, %xmm0 # encoding: [0xc4,0xe2,0x7e,0x72,0xc0] +; CHECK-NEXT: # kill: def $xmm1 killed $xmm0 +; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <8 x bfloat> @llvm.x86.vcvtneps2bf16256(<8 x float> %A) + ret <8 x bfloat> %ret +} +declare <8 x bfloat> @llvm.x86.vcvtneps2bf16256(<8 x float> %A) + diff --git a/llvm/test/MC/Disassembler/X86/avx_ne_convert-32.txt b/llvm/test/MC/Disassembler/X86/avx_ne_convert-32.txt new file mode 100644 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/avx_ne_convert-32.txt @@ -0,0 +1,335 @@ +# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vbcstnebf162ps 268435456(%esp,%esi,8), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7a,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vbcstnebf162ps 291(%edi,%eax,4), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7a,0xb1,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vbcstnebf162ps (%eax), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [eax] +0xc4,0xe2,0x7a,0xb1,0x10 + +# ATT: vbcstnebf162ps -64(,%ebp,2), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [2*ebp - 64] +0xc4,0xe2,0x7a,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vbcstnebf162ps 254(%ecx), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [ecx + 254] +0xc4,0xe2,0x7a,0xb1,0x91,0xfe,0x00,0x00,0x00 + +# ATT: vbcstnebf162ps -256(%edx), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [edx - 256] +0xc4,0xe2,0x7a,0xb1,0x92,0x00,0xff,0xff,0xff + +# ATT: vbcstnebf162ps 268435456(%esp,%esi,8), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7e,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vbcstnebf162ps 291(%edi,%eax,4), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7e,0xb1,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vbcstnebf162ps (%eax), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [eax] +0xc4,0xe2,0x7e,0xb1,0x10 + +# ATT: vbcstnebf162ps -64(,%ebp,2), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [2*ebp - 64] +0xc4,0xe2,0x7e,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vbcstnebf162ps 254(%ecx), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [ecx + 254] +0xc4,0xe2,0x7e,0xb1,0x91,0xfe,0x00,0x00,0x00 + +# ATT: vbcstnebf162ps -256(%edx), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [edx - 256] +0xc4,0xe2,0x7e,0xb1,0x92,0x00,0xff,0xff,0xff + +# ATT: vbcstnesh2ps 268435456(%esp,%esi,8), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x79,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vbcstnesh2ps 291(%edi,%eax,4), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [edi + 4*eax + 291] +0xc4,0xe2,0x79,0xb1,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vbcstnesh2ps (%eax), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [eax] +0xc4,0xe2,0x79,0xb1,0x10 + +# ATT: vbcstnesh2ps -64(,%ebp,2), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [2*ebp - 64] +0xc4,0xe2,0x79,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vbcstnesh2ps 254(%ecx), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [ecx + 254] +0xc4,0xe2,0x79,0xb1,0x91,0xfe,0x00,0x00,0x00 + +# ATT: vbcstnesh2ps -256(%edx), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [edx - 256] +0xc4,0xe2,0x79,0xb1,0x92,0x00,0xff,0xff,0xff + +# ATT: vbcstnesh2ps 268435456(%esp,%esi,8), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7d,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vbcstnesh2ps 291(%edi,%eax,4), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7d,0xb1,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vbcstnesh2ps (%eax), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [eax] +0xc4,0xe2,0x7d,0xb1,0x10 + +# ATT: vbcstnesh2ps -64(,%ebp,2), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [2*ebp - 64] +0xc4,0xe2,0x7d,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vbcstnesh2ps 254(%ecx), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [ecx + 254] +0xc4,0xe2,0x7d,0xb1,0x91,0xfe,0x00,0x00,0x00 + +# ATT: vbcstnesh2ps -256(%edx), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [edx - 256] +0xc4,0xe2,0x7d,0xb1,0x92,0x00,0xff,0xff,0xff + +# ATT: vcvtneebf162ps 268435456(%esp,%esi,8), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7a,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcvtneebf162ps 291(%edi,%eax,4), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7a,0xb0,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcvtneebf162ps (%eax), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [eax] +0xc4,0xe2,0x7a,0xb0,0x10 + +# ATT: vcvtneebf162ps -512(,%ebp,2), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [2*ebp - 512] +0xc4,0xe2,0x7a,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vcvtneebf162ps 2032(%ecx), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [ecx + 2032] +0xc4,0xe2,0x7a,0xb0,0x91,0xf0,0x07,0x00,0x00 + +# ATT: vcvtneebf162ps -2048(%edx), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [edx - 2048] +0xc4,0xe2,0x7a,0xb0,0x92,0x00,0xf8,0xff,0xff + +# ATT: vcvtneebf162ps 268435456(%esp,%esi,8), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7e,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcvtneebf162ps 291(%edi,%eax,4), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7e,0xb0,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcvtneebf162ps (%eax), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [eax] +0xc4,0xe2,0x7e,0xb0,0x10 + +# ATT: vcvtneebf162ps -1024(,%ebp,2), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [2*ebp - 1024] +0xc4,0xe2,0x7e,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vcvtneebf162ps 4064(%ecx), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [ecx + 4064] +0xc4,0xe2,0x7e,0xb0,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: vcvtneebf162ps -4096(%edx), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [edx - 4096] +0xc4,0xe2,0x7e,0xb0,0x92,0x00,0xf0,0xff,0xff + +# ATT: vcvtneeph2ps 268435456(%esp,%esi,8), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x79,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcvtneeph2ps 291(%edi,%eax,4), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x79,0xb0,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcvtneeph2ps (%eax), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [eax] +0xc4,0xe2,0x79,0xb0,0x10 + +# ATT: vcvtneeph2ps -512(,%ebp,2), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [2*ebp - 512] +0xc4,0xe2,0x79,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vcvtneeph2ps 2032(%ecx), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [ecx + 2032] +0xc4,0xe2,0x79,0xb0,0x91,0xf0,0x07,0x00,0x00 + +# ATT: vcvtneeph2ps -2048(%edx), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [edx - 2048] +0xc4,0xe2,0x79,0xb0,0x92,0x00,0xf8,0xff,0xff + +# ATT: vcvtneeph2ps 268435456(%esp,%esi,8), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7d,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcvtneeph2ps 291(%edi,%eax,4), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7d,0xb0,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcvtneeph2ps (%eax), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [eax] +0xc4,0xe2,0x7d,0xb0,0x10 + +# ATT: vcvtneeph2ps -1024(,%ebp,2), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [2*ebp - 1024] +0xc4,0xe2,0x7d,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vcvtneeph2ps 4064(%ecx), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [ecx + 4064] +0xc4,0xe2,0x7d,0xb0,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: vcvtneeph2ps -4096(%edx), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [edx - 4096] +0xc4,0xe2,0x7d,0xb0,0x92,0x00,0xf0,0xff,0xff + +# ATT: vcvtneobf162ps 268435456(%esp,%esi,8), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7b,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcvtneobf162ps 291(%edi,%eax,4), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7b,0xb0,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcvtneobf162ps (%eax), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [eax] +0xc4,0xe2,0x7b,0xb0,0x10 + +# ATT: vcvtneobf162ps -512(,%ebp,2), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [2*ebp - 512] +0xc4,0xe2,0x7b,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vcvtneobf162ps 2032(%ecx), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [ecx + 2032] +0xc4,0xe2,0x7b,0xb0,0x91,0xf0,0x07,0x00,0x00 + +# ATT: vcvtneobf162ps -2048(%edx), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [edx - 2048] +0xc4,0xe2,0x7b,0xb0,0x92,0x00,0xf8,0xff,0xff + +# ATT: vcvtneobf162ps 268435456(%esp,%esi,8), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7f,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcvtneobf162ps 291(%edi,%eax,4), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7f,0xb0,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcvtneobf162ps (%eax), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [eax] +0xc4,0xe2,0x7f,0xb0,0x10 + +# ATT: vcvtneobf162ps -1024(,%ebp,2), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [2*ebp - 1024] +0xc4,0xe2,0x7f,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vcvtneobf162ps 4064(%ecx), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [ecx + 4064] +0xc4,0xe2,0x7f,0xb0,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: vcvtneobf162ps -4096(%edx), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [edx - 4096] +0xc4,0xe2,0x7f,0xb0,0x92,0x00,0xf0,0xff,0xff + +# ATT: vcvtneoph2ps 268435456(%esp,%esi,8), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x78,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcvtneoph2ps 291(%edi,%eax,4), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x78,0xb0,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcvtneoph2ps (%eax), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [eax] +0xc4,0xe2,0x78,0xb0,0x10 + +# ATT: vcvtneoph2ps -512(,%ebp,2), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [2*ebp - 512] +0xc4,0xe2,0x78,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vcvtneoph2ps 2032(%ecx), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [ecx + 2032] +0xc4,0xe2,0x78,0xb0,0x91,0xf0,0x07,0x00,0x00 + +# ATT: vcvtneoph2ps -2048(%edx), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [edx - 2048] +0xc4,0xe2,0x78,0xb0,0x92,0x00,0xf8,0xff,0xff + +# ATT: vcvtneoph2ps 268435456(%esp,%esi,8), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7c,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcvtneoph2ps 291(%edi,%eax,4), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7c,0xb0,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcvtneoph2ps (%eax), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [eax] +0xc4,0xe2,0x7c,0xb0,0x10 + +# ATT: vcvtneoph2ps -1024(,%ebp,2), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [2*ebp - 1024] +0xc4,0xe2,0x7c,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vcvtneoph2ps 4064(%ecx), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [ecx + 4064] +0xc4,0xe2,0x7c,0xb0,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: vcvtneoph2ps -4096(%edx), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [edx - 4096] +0xc4,0xe2,0x7c,0xb0,0x92,0x00,0xf0,0xff,0xff + +# ATT: {vex} vcvtneps2bf16 %xmm3, %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmm3 +0xc4,0xe2,0x7a,0x72,0xd3 + +# ATT: {vex} vcvtneps2bf16 %ymm3, %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, ymm3 +0xc4,0xe2,0x7e,0x72,0xd3 + +# ATT: {vex} vcvtneps2bf16x 268435456(%esp,%esi,8), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7a,0x72,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: {vex} vcvtneps2bf16x 291(%edi,%eax,4), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7a,0x72,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: {vex} vcvtneps2bf16x (%eax), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [eax] +0xc4,0xe2,0x7a,0x72,0x10 + +# ATT: {vex} vcvtneps2bf16x -512(,%ebp,2), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [2*ebp - 512] +0xc4,0xe2,0x7a,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: {vex} vcvtneps2bf16x 2032(%ecx), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [ecx + 2032] +0xc4,0xe2,0x7a,0x72,0x91,0xf0,0x07,0x00,0x00 + +# ATT: {vex} vcvtneps2bf16x -2048(%edx), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [edx - 2048] +0xc4,0xe2,0x7a,0x72,0x92,0x00,0xf8,0xff,0xff + +# ATT: {vex} vcvtneps2bf16y -1024(,%ebp,2), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, ymmword ptr [2*ebp - 1024] +0xc4,0xe2,0x7e,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: {vex} vcvtneps2bf16y 4064(%ecx), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, ymmword ptr [ecx + 4064] +0xc4,0xe2,0x7e,0x72,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: {vex} vcvtneps2bf16y -4096(%edx), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, ymmword ptr [edx - 4096] +0xc4,0xe2,0x7e,0x72,0x92,0x00,0xf0,0xff,0xff + diff --git a/llvm/test/MC/Disassembler/X86/avx_ne_convert-64.txt b/llvm/test/MC/Disassembler/X86/avx_ne_convert-64.txt new file mode 100644 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/avx_ne_convert-64.txt @@ -0,0 +1,335 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vbcstnebf162ps 268435456(%rbp,%r14,8), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7a,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vbcstnebf162ps 291(%r8,%rax,4), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7a,0xb1,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vbcstnebf162ps (%rip), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [rip] +0xc4,0xe2,0x7a,0xb1,0x15,0x00,0x00,0x00,0x00 + +# ATT: vbcstnebf162ps -64(,%rbp,2), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [2*rbp - 64] +0xc4,0xe2,0x7a,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vbcstnebf162ps 254(%rcx), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [rcx + 254] +0xc4,0xe2,0x7a,0xb1,0x91,0xfe,0x00,0x00,0x00 + +# ATT: vbcstnebf162ps -256(%rdx), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [rdx - 256] +0xc4,0xe2,0x7a,0xb1,0x92,0x00,0xff,0xff,0xff + +# ATT: vbcstnebf162ps 268435456(%rbp,%r14,8), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7e,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vbcstnebf162ps 291(%r8,%rax,4), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7e,0xb1,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vbcstnebf162ps (%rip), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [rip] +0xc4,0xe2,0x7e,0xb1,0x15,0x00,0x00,0x00,0x00 + +# ATT: vbcstnebf162ps -64(,%rbp,2), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [2*rbp - 64] +0xc4,0xe2,0x7e,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vbcstnebf162ps 254(%rcx), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [rcx + 254] +0xc4,0xe2,0x7e,0xb1,0x91,0xfe,0x00,0x00,0x00 + +# ATT: vbcstnebf162ps -256(%rdx), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [rdx - 256] +0xc4,0xe2,0x7e,0xb1,0x92,0x00,0xff,0xff,0xff + +# ATT: vbcstnesh2ps 268435456(%rbp,%r14,8), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x79,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vbcstnesh2ps 291(%r8,%rax,4), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x79,0xb1,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vbcstnesh2ps (%rip), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [rip] +0xc4,0xe2,0x79,0xb1,0x15,0x00,0x00,0x00,0x00 + +# ATT: vbcstnesh2ps -64(,%rbp,2), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [2*rbp - 64] +0xc4,0xe2,0x79,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vbcstnesh2ps 254(%rcx), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [rcx + 254] +0xc4,0xe2,0x79,0xb1,0x91,0xfe,0x00,0x00,0x00 + +# ATT: vbcstnesh2ps -256(%rdx), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [rdx - 256] +0xc4,0xe2,0x79,0xb1,0x92,0x00,0xff,0xff,0xff + +# ATT: vbcstnesh2ps 268435456(%rbp,%r14,8), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7d,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vbcstnesh2ps 291(%r8,%rax,4), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7d,0xb1,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vbcstnesh2ps (%rip), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [rip] +0xc4,0xe2,0x7d,0xb1,0x15,0x00,0x00,0x00,0x00 + +# ATT: vbcstnesh2ps -64(,%rbp,2), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [2*rbp - 64] +0xc4,0xe2,0x7d,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vbcstnesh2ps 254(%rcx), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [rcx + 254] +0xc4,0xe2,0x7d,0xb1,0x91,0xfe,0x00,0x00,0x00 + +# ATT: vbcstnesh2ps -256(%rdx), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [rdx - 256] +0xc4,0xe2,0x7d,0xb1,0x92,0x00,0xff,0xff,0xff + +# ATT: vcvtneebf162ps 268435456(%rbp,%r14,8), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7a,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcvtneebf162ps 291(%r8,%rax,4), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7a,0xb0,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcvtneebf162ps (%rip), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [rip] +0xc4,0xe2,0x7a,0xb0,0x15,0x00,0x00,0x00,0x00 + +# ATT: vcvtneebf162ps -512(,%rbp,2), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [2*rbp - 512] +0xc4,0xe2,0x7a,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vcvtneebf162ps 2032(%rcx), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [rcx + 2032] +0xc4,0xe2,0x7a,0xb0,0x91,0xf0,0x07,0x00,0x00 + +# ATT: vcvtneebf162ps -2048(%rdx), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [rdx - 2048] +0xc4,0xe2,0x7a,0xb0,0x92,0x00,0xf8,0xff,0xff + +# ATT: vcvtneebf162ps 268435456(%rbp,%r14,8), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7e,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcvtneebf162ps 291(%r8,%rax,4), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7e,0xb0,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcvtneebf162ps (%rip), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [rip] +0xc4,0xe2,0x7e,0xb0,0x15,0x00,0x00,0x00,0x00 + +# ATT: vcvtneebf162ps -1024(,%rbp,2), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [2*rbp - 1024] +0xc4,0xe2,0x7e,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vcvtneebf162ps 4064(%rcx), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [rcx + 4064] +0xc4,0xe2,0x7e,0xb0,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: vcvtneebf162ps -4096(%rdx), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [rdx - 4096] +0xc4,0xe2,0x7e,0xb0,0x92,0x00,0xf0,0xff,0xff + +# ATT: vcvtneeph2ps 268435456(%rbp,%r14,8), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x79,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcvtneeph2ps 291(%r8,%rax,4), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x79,0xb0,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcvtneeph2ps (%rip), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [rip] +0xc4,0xe2,0x79,0xb0,0x15,0x00,0x00,0x00,0x00 + +# ATT: vcvtneeph2ps -512(,%rbp,2), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [2*rbp - 512] +0xc4,0xe2,0x79,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vcvtneeph2ps 2032(%rcx), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [rcx + 2032] +0xc4,0xe2,0x79,0xb0,0x91,0xf0,0x07,0x00,0x00 + +# ATT: vcvtneeph2ps -2048(%rdx), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [rdx - 2048] +0xc4,0xe2,0x79,0xb0,0x92,0x00,0xf8,0xff,0xff + +# ATT: vcvtneeph2ps 268435456(%rbp,%r14,8), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7d,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcvtneeph2ps 291(%r8,%rax,4), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7d,0xb0,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcvtneeph2ps (%rip), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [rip] +0xc4,0xe2,0x7d,0xb0,0x15,0x00,0x00,0x00,0x00 + +# ATT: vcvtneeph2ps -1024(,%rbp,2), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [2*rbp - 1024] +0xc4,0xe2,0x7d,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vcvtneeph2ps 4064(%rcx), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [rcx + 4064] +0xc4,0xe2,0x7d,0xb0,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: vcvtneeph2ps -4096(%rdx), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [rdx - 4096] +0xc4,0xe2,0x7d,0xb0,0x92,0x00,0xf0,0xff,0xff + +# ATT: vcvtneobf162ps 268435456(%rbp,%r14,8), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7b,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcvtneobf162ps 291(%r8,%rax,4), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7b,0xb0,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcvtneobf162ps (%rip), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [rip] +0xc4,0xe2,0x7b,0xb0,0x15,0x00,0x00,0x00,0x00 + +# ATT: vcvtneobf162ps -512(,%rbp,2), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [2*rbp - 512] +0xc4,0xe2,0x7b,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vcvtneobf162ps 2032(%rcx), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [rcx + 2032] +0xc4,0xe2,0x7b,0xb0,0x91,0xf0,0x07,0x00,0x00 + +# ATT: vcvtneobf162ps -2048(%rdx), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [rdx - 2048] +0xc4,0xe2,0x7b,0xb0,0x92,0x00,0xf8,0xff,0xff + +# ATT: vcvtneobf162ps 268435456(%rbp,%r14,8), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7f,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcvtneobf162ps 291(%r8,%rax,4), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7f,0xb0,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcvtneobf162ps (%rip), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [rip] +0xc4,0xe2,0x7f,0xb0,0x15,0x00,0x00,0x00,0x00 + +# ATT: vcvtneobf162ps -1024(,%rbp,2), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [2*rbp - 1024] +0xc4,0xe2,0x7f,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vcvtneobf162ps 4064(%rcx), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [rcx + 4064] +0xc4,0xe2,0x7f,0xb0,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: vcvtneobf162ps -4096(%rdx), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [rdx - 4096] +0xc4,0xe2,0x7f,0xb0,0x92,0x00,0xf0,0xff,0xff + +# ATT: vcvtneoph2ps 268435456(%rbp,%r14,8), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x78,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcvtneoph2ps 291(%r8,%rax,4), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x78,0xb0,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcvtneoph2ps (%rip), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [rip] +0xc4,0xe2,0x78,0xb0,0x15,0x00,0x00,0x00,0x00 + +# ATT: vcvtneoph2ps -512(,%rbp,2), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [2*rbp - 512] +0xc4,0xe2,0x78,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vcvtneoph2ps 2032(%rcx), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [rcx + 2032] +0xc4,0xe2,0x78,0xb0,0x91,0xf0,0x07,0x00,0x00 + +# ATT: vcvtneoph2ps -2048(%rdx), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [rdx - 2048] +0xc4,0xe2,0x78,0xb0,0x92,0x00,0xf8,0xff,0xff + +# ATT: vcvtneoph2ps 268435456(%rbp,%r14,8), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7c,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcvtneoph2ps 291(%r8,%rax,4), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7c,0xb0,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcvtneoph2ps (%rip), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [rip] +0xc4,0xe2,0x7c,0xb0,0x15,0x00,0x00,0x00,0x00 + +# ATT: vcvtneoph2ps -1024(,%rbp,2), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [2*rbp - 1024] +0xc4,0xe2,0x7c,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vcvtneoph2ps 4064(%rcx), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [rcx + 4064] +0xc4,0xe2,0x7c,0xb0,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: vcvtneoph2ps -4096(%rdx), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [rdx - 4096] +0xc4,0xe2,0x7c,0xb0,0x92,0x00,0xf0,0xff,0xff + +# ATT: {vex} vcvtneps2bf16 %xmm3, %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmm3 +0xc4,0xe2,0x7a,0x72,0xd3 + +# ATT: {vex} vcvtneps2bf16 %ymm3, %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, ymm3 +0xc4,0xe2,0x7e,0x72,0xd3 + +# ATT: {vex} vcvtneps2bf16x 268435456(%rbp,%r14,8), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7a,0x72,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: {vex} vcvtneps2bf16x 291(%r8,%rax,4), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7a,0x72,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: {vex} vcvtneps2bf16x (%rip), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [rip] +0xc4,0xe2,0x7a,0x72,0x15,0x00,0x00,0x00,0x00 + +# ATT: {vex} vcvtneps2bf16x -512(,%rbp,2), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [2*rbp - 512] +0xc4,0xe2,0x7a,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: {vex} vcvtneps2bf16x 2032(%rcx), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [rcx + 2032] +0xc4,0xe2,0x7a,0x72,0x91,0xf0,0x07,0x00,0x00 + +# ATT: {vex} vcvtneps2bf16x -2048(%rdx), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [rdx - 2048] +0xc4,0xe2,0x7a,0x72,0x92,0x00,0xf8,0xff,0xff + +# ATT: {vex} vcvtneps2bf16y -1024(,%rbp,2), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, ymmword ptr [2*rbp - 1024] +0xc4,0xe2,0x7e,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: {vex} vcvtneps2bf16y 4064(%rcx), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, ymmword ptr [rcx + 4064] +0xc4,0xe2,0x7e,0x72,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: {vex} vcvtneps2bf16y -4096(%rdx), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, ymmword ptr [rdx - 4096] +0xc4,0xe2,0x7e,0x72,0x92,0x00,0xf0,0xff,0xff + diff --git a/llvm/test/MC/X86/avx_ne_convert-32-att.s b/llvm/test/MC/X86/avx_ne_convert-32-att.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/avx_ne_convert-32-att.s @@ -0,0 +1,334 @@ +// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vbcstnebf162ps 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10] + vbcstnebf162ps 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vbcstnebf162ps 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x94,0x87,0x23,0x01,0x00,0x00] + vbcstnebf162ps 291(%edi,%eax,4), %xmm2 + +// CHECK: vbcstnebf162ps (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x10] + vbcstnebf162ps (%eax), %xmm2 + +// CHECK: vbcstnebf162ps -64(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnebf162ps -64(,%ebp,2), %xmm2 + +// CHECK: vbcstnebf162ps 254(%ecx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnebf162ps 254(%ecx), %xmm2 + +// CHECK: vbcstnebf162ps -256(%edx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnebf162ps -256(%edx), %xmm2 + +// CHECK: vbcstnebf162ps 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10] + vbcstnebf162ps 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vbcstnebf162ps 291(%edi,%eax,4), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x94,0x87,0x23,0x01,0x00,0x00] + vbcstnebf162ps 291(%edi,%eax,4), %ymm2 + +// CHECK: vbcstnebf162ps (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x10] + vbcstnebf162ps (%eax), %ymm2 + +// CHECK: vbcstnebf162ps -64(,%ebp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnebf162ps -64(,%ebp,2), %ymm2 + +// CHECK: vbcstnebf162ps 254(%ecx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnebf162ps 254(%ecx), %ymm2 + +// CHECK: vbcstnebf162ps -256(%edx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnebf162ps -256(%edx), %ymm2 + +// CHECK: vbcstnesh2ps 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10] + vbcstnesh2ps 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vbcstnesh2ps 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x94,0x87,0x23,0x01,0x00,0x00] + vbcstnesh2ps 291(%edi,%eax,4), %xmm2 + +// CHECK: vbcstnesh2ps (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x10] + vbcstnesh2ps (%eax), %xmm2 + +// CHECK: vbcstnesh2ps -64(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnesh2ps -64(,%ebp,2), %xmm2 + +// CHECK: vbcstnesh2ps 254(%ecx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnesh2ps 254(%ecx), %xmm2 + +// CHECK: vbcstnesh2ps -256(%edx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnesh2ps -256(%edx), %xmm2 + +// CHECK: vbcstnesh2ps 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10] + vbcstnesh2ps 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vbcstnesh2ps 291(%edi,%eax,4), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x94,0x87,0x23,0x01,0x00,0x00] + vbcstnesh2ps 291(%edi,%eax,4), %ymm2 + +// CHECK: vbcstnesh2ps (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x10] + vbcstnesh2ps (%eax), %ymm2 + +// CHECK: vbcstnesh2ps -64(,%ebp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnesh2ps -64(,%ebp,2), %ymm2 + +// CHECK: vbcstnesh2ps 254(%ecx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnesh2ps 254(%ecx), %ymm2 + +// CHECK: vbcstnesh2ps -256(%edx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnesh2ps -256(%edx), %ymm2 + +// CHECK: vcvtneebf162ps 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneebf162ps 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vcvtneebf162ps 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneebf162ps 291(%edi,%eax,4), %xmm2 + +// CHECK: vcvtneebf162ps (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x10] + vcvtneebf162ps (%eax), %xmm2 + +// CHECK: vcvtneebf162ps -512(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneebf162ps -512(,%ebp,2), %xmm2 + +// CHECK: vcvtneebf162ps 2032(%ecx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneebf162ps 2032(%ecx), %xmm2 + +// CHECK: vcvtneebf162ps -2048(%edx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneebf162ps -2048(%edx), %xmm2 + +// CHECK: vcvtneebf162ps 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneebf162ps 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vcvtneebf162ps 291(%edi,%eax,4), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneebf162ps 291(%edi,%eax,4), %ymm2 + +// CHECK: vcvtneebf162ps (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x10] + vcvtneebf162ps (%eax), %ymm2 + +// CHECK: vcvtneebf162ps -1024(,%ebp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneebf162ps -1024(,%ebp,2), %ymm2 + +// CHECK: vcvtneebf162ps 4064(%ecx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneebf162ps 4064(%ecx), %ymm2 + +// CHECK: vcvtneebf162ps -4096(%edx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneebf162ps -4096(%edx), %ymm2 + +// CHECK: vcvtneeph2ps 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneeph2ps 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vcvtneeph2ps 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneeph2ps 291(%edi,%eax,4), %xmm2 + +// CHECK: vcvtneeph2ps (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x10] + vcvtneeph2ps (%eax), %xmm2 + +// CHECK: vcvtneeph2ps -512(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneeph2ps -512(,%ebp,2), %xmm2 + +// CHECK: vcvtneeph2ps 2032(%ecx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneeph2ps 2032(%ecx), %xmm2 + +// CHECK: vcvtneeph2ps -2048(%edx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneeph2ps -2048(%edx), %xmm2 + +// CHECK: vcvtneeph2ps 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneeph2ps 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vcvtneeph2ps 291(%edi,%eax,4), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneeph2ps 291(%edi,%eax,4), %ymm2 + +// CHECK: vcvtneeph2ps (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x10] + vcvtneeph2ps (%eax), %ymm2 + +// CHECK: vcvtneeph2ps -1024(,%ebp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneeph2ps -1024(,%ebp,2), %ymm2 + +// CHECK: vcvtneeph2ps 4064(%ecx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneeph2ps 4064(%ecx), %ymm2 + +// CHECK: vcvtneeph2ps -4096(%edx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneeph2ps -4096(%edx), %ymm2 + +// CHECK: vcvtneobf162ps 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneobf162ps 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vcvtneobf162ps 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneobf162ps 291(%edi,%eax,4), %xmm2 + +// CHECK: vcvtneobf162ps (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x10] + vcvtneobf162ps (%eax), %xmm2 + +// CHECK: vcvtneobf162ps -512(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneobf162ps -512(,%ebp,2), %xmm2 + +// CHECK: vcvtneobf162ps 2032(%ecx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneobf162ps 2032(%ecx), %xmm2 + +// CHECK: vcvtneobf162ps -2048(%edx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneobf162ps -2048(%edx), %xmm2 + +// CHECK: vcvtneobf162ps 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneobf162ps 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vcvtneobf162ps 291(%edi,%eax,4), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneobf162ps 291(%edi,%eax,4), %ymm2 + +// CHECK: vcvtneobf162ps (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x10] + vcvtneobf162ps (%eax), %ymm2 + +// CHECK: vcvtneobf162ps -1024(,%ebp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneobf162ps -1024(,%ebp,2), %ymm2 + +// CHECK: vcvtneobf162ps 4064(%ecx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneobf162ps 4064(%ecx), %ymm2 + +// CHECK: vcvtneobf162ps -4096(%edx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneobf162ps -4096(%edx), %ymm2 + +// CHECK: vcvtneoph2ps 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneoph2ps 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vcvtneoph2ps 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneoph2ps 291(%edi,%eax,4), %xmm2 + +// CHECK: vcvtneoph2ps (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x10] + vcvtneoph2ps (%eax), %xmm2 + +// CHECK: vcvtneoph2ps -512(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneoph2ps -512(,%ebp,2), %xmm2 + +// CHECK: vcvtneoph2ps 2032(%ecx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneoph2ps 2032(%ecx), %xmm2 + +// CHECK: vcvtneoph2ps -2048(%edx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneoph2ps -2048(%edx), %xmm2 + +// CHECK: vcvtneoph2ps 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneoph2ps 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vcvtneoph2ps 291(%edi,%eax,4), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneoph2ps 291(%edi,%eax,4), %ymm2 + +// CHECK: vcvtneoph2ps (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x10] + vcvtneoph2ps (%eax), %ymm2 + +// CHECK: vcvtneoph2ps -1024(,%ebp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneoph2ps -1024(,%ebp,2), %ymm2 + +// CHECK: vcvtneoph2ps 4064(%ecx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneoph2ps 4064(%ecx), %ymm2 + +// CHECK: vcvtneoph2ps -4096(%edx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneoph2ps -4096(%edx), %ymm2 + +// CHECK: {vex} vcvtneps2bf16 %xmm3, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0xd3] + {vex} vcvtneps2bf16 %xmm3, %xmm2 + +// CHECK: {vex} vcvtneps2bf16 %ymm3, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0xd3] + {vex} vcvtneps2bf16 %ymm3, %xmm2 + +// CHECK: {vex} vcvtneps2bf16x 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x94,0xf4,0x00,0x00,0x00,0x10] + {vex} vcvtneps2bf16x 268435456(%esp,%esi,8), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x94,0x87,0x23,0x01,0x00,0x00] + {vex} vcvtneps2bf16x 291(%edi,%eax,4), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x10] + {vex} vcvtneps2bf16x (%eax), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x -512(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff] + {vex} vcvtneps2bf16x -512(,%ebp,2), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x 2032(%ecx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x91,0xf0,0x07,0x00,0x00] + {vex} vcvtneps2bf16x 2032(%ecx), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x -2048(%edx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x92,0x00,0xf8,0xff,0xff] + {vex} vcvtneps2bf16x -2048(%edx), %xmm2 + +// CHECK: {vex} vcvtneps2bf16y -1024(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff] + {vex} vcvtneps2bf16y -1024(,%ebp,2), %xmm2 + +// CHECK: {vex} vcvtneps2bf16y 4064(%ecx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x91,0xe0,0x0f,0x00,0x00] + {vex} vcvtneps2bf16y 4064(%ecx), %xmm2 + +// CHECK: {vex} vcvtneps2bf16y -4096(%edx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x92,0x00,0xf0,0xff,0xff] + {vex} vcvtneps2bf16y -4096(%edx), %xmm2 + diff --git a/llvm/test/MC/X86/avx_ne_convert-32-intel.s b/llvm/test/MC/X86/avx_ne_convert-32-intel.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/avx_ne_convert-32-intel.s @@ -0,0 +1,334 @@ +// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vbcstnebf162ps xmm2, word ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10] + vbcstnebf162ps xmm2, word ptr [esp + 8*esi + 268435456] + +// CHECK: vbcstnebf162ps xmm2, word ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x94,0x87,0x23,0x01,0x00,0x00] + vbcstnebf162ps xmm2, word ptr [edi + 4*eax + 291] + +// CHECK: vbcstnebf162ps xmm2, word ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x10] + vbcstnebf162ps xmm2, word ptr [eax] + +// CHECK: vbcstnebf162ps xmm2, word ptr [2*ebp - 64] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnebf162ps xmm2, word ptr [2*ebp - 64] + +// CHECK: vbcstnebf162ps xmm2, word ptr [ecx + 254] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnebf162ps xmm2, word ptr [ecx + 254] + +// CHECK: vbcstnebf162ps xmm2, word ptr [edx - 256] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnebf162ps xmm2, word ptr [edx - 256] + +// CHECK: vbcstnebf162ps ymm2, word ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10] + vbcstnebf162ps ymm2, word ptr [esp + 8*esi + 268435456] + +// CHECK: vbcstnebf162ps ymm2, word ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x94,0x87,0x23,0x01,0x00,0x00] + vbcstnebf162ps ymm2, word ptr [edi + 4*eax + 291] + +// CHECK: vbcstnebf162ps ymm2, word ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x10] + vbcstnebf162ps ymm2, word ptr [eax] + +// CHECK: vbcstnebf162ps ymm2, word ptr [2*ebp - 64] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnebf162ps ymm2, word ptr [2*ebp - 64] + +// CHECK: vbcstnebf162ps ymm2, word ptr [ecx + 254] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnebf162ps ymm2, word ptr [ecx + 254] + +// CHECK: vbcstnebf162ps ymm2, word ptr [edx - 256] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnebf162ps ymm2, word ptr [edx - 256] + +// CHECK: vbcstnesh2ps xmm2, word ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10] + vbcstnesh2ps xmm2, word ptr [esp + 8*esi + 268435456] + +// CHECK: vbcstnesh2ps xmm2, word ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x94,0x87,0x23,0x01,0x00,0x00] + vbcstnesh2ps xmm2, word ptr [edi + 4*eax + 291] + +// CHECK: vbcstnesh2ps xmm2, word ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x10] + vbcstnesh2ps xmm2, word ptr [eax] + +// CHECK: vbcstnesh2ps xmm2, word ptr [2*ebp - 64] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnesh2ps xmm2, word ptr [2*ebp - 64] + +// CHECK: vbcstnesh2ps xmm2, word ptr [ecx + 254] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnesh2ps xmm2, word ptr [ecx + 254] + +// CHECK: vbcstnesh2ps xmm2, word ptr [edx - 256] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnesh2ps xmm2, word ptr [edx - 256] + +// CHECK: vbcstnesh2ps ymm2, word ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10] + vbcstnesh2ps ymm2, word ptr [esp + 8*esi + 268435456] + +// CHECK: vbcstnesh2ps ymm2, word ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x94,0x87,0x23,0x01,0x00,0x00] + vbcstnesh2ps ymm2, word ptr [edi + 4*eax + 291] + +// CHECK: vbcstnesh2ps ymm2, word ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x10] + vbcstnesh2ps ymm2, word ptr [eax] + +// CHECK: vbcstnesh2ps ymm2, word ptr [2*ebp - 64] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnesh2ps ymm2, word ptr [2*ebp - 64] + +// CHECK: vbcstnesh2ps ymm2, word ptr [ecx + 254] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnesh2ps ymm2, word ptr [ecx + 254] + +// CHECK: vbcstnesh2ps ymm2, word ptr [edx - 256] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnesh2ps ymm2, word ptr [edx - 256] + +// CHECK: vcvtneebf162ps xmm2, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneebf162ps xmm2, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneebf162ps xmm2, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneebf162ps xmm2, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vcvtneebf162ps xmm2, xmmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x10] + vcvtneebf162ps xmm2, xmmword ptr [eax] + +// CHECK: vcvtneebf162ps xmm2, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneebf162ps xmm2, xmmword ptr [2*ebp - 512] + +// CHECK: vcvtneebf162ps xmm2, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneebf162ps xmm2, xmmword ptr [ecx + 2032] + +// CHECK: vcvtneebf162ps xmm2, xmmword ptr [edx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneebf162ps xmm2, xmmword ptr [edx - 2048] + +// CHECK: vcvtneebf162ps ymm2, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneebf162ps ymm2, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneebf162ps ymm2, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneebf162ps ymm2, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vcvtneebf162ps ymm2, ymmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x10] + vcvtneebf162ps ymm2, ymmword ptr [eax] + +// CHECK: vcvtneebf162ps ymm2, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneebf162ps ymm2, ymmword ptr [2*ebp - 1024] + +// CHECK: vcvtneebf162ps ymm2, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneebf162ps ymm2, ymmword ptr [ecx + 4064] + +// CHECK: vcvtneebf162ps ymm2, ymmword ptr [edx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneebf162ps ymm2, ymmword ptr [edx - 4096] + +// CHECK: vcvtneeph2ps xmm2, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneeph2ps xmm2, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneeph2ps xmm2, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneeph2ps xmm2, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vcvtneeph2ps xmm2, xmmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x10] + vcvtneeph2ps xmm2, xmmword ptr [eax] + +// CHECK: vcvtneeph2ps xmm2, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneeph2ps xmm2, xmmword ptr [2*ebp - 512] + +// CHECK: vcvtneeph2ps xmm2, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneeph2ps xmm2, xmmword ptr [ecx + 2032] + +// CHECK: vcvtneeph2ps xmm2, xmmword ptr [edx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneeph2ps xmm2, xmmword ptr [edx - 2048] + +// CHECK: vcvtneeph2ps ymm2, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneeph2ps ymm2, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneeph2ps ymm2, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneeph2ps ymm2, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vcvtneeph2ps ymm2, ymmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x10] + vcvtneeph2ps ymm2, ymmword ptr [eax] + +// CHECK: vcvtneeph2ps ymm2, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneeph2ps ymm2, ymmword ptr [2*ebp - 1024] + +// CHECK: vcvtneeph2ps ymm2, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneeph2ps ymm2, ymmword ptr [ecx + 4064] + +// CHECK: vcvtneeph2ps ymm2, ymmword ptr [edx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneeph2ps ymm2, ymmword ptr [edx - 4096] + +// CHECK: vcvtneobf162ps xmm2, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneobf162ps xmm2, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneobf162ps xmm2, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneobf162ps xmm2, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vcvtneobf162ps xmm2, xmmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x10] + vcvtneobf162ps xmm2, xmmword ptr [eax] + +// CHECK: vcvtneobf162ps xmm2, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneobf162ps xmm2, xmmword ptr [2*ebp - 512] + +// CHECK: vcvtneobf162ps xmm2, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneobf162ps xmm2, xmmword ptr [ecx + 2032] + +// CHECK: vcvtneobf162ps xmm2, xmmword ptr [edx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneobf162ps xmm2, xmmword ptr [edx - 2048] + +// CHECK: vcvtneobf162ps ymm2, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneobf162ps ymm2, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneobf162ps ymm2, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneobf162ps ymm2, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vcvtneobf162ps ymm2, ymmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x10] + vcvtneobf162ps ymm2, ymmword ptr [eax] + +// CHECK: vcvtneobf162ps ymm2, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneobf162ps ymm2, ymmword ptr [2*ebp - 1024] + +// CHECK: vcvtneobf162ps ymm2, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneobf162ps ymm2, ymmword ptr [ecx + 4064] + +// CHECK: vcvtneobf162ps ymm2, ymmword ptr [edx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneobf162ps ymm2, ymmword ptr [edx - 4096] + +// CHECK: vcvtneoph2ps xmm2, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneoph2ps xmm2, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneoph2ps xmm2, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneoph2ps xmm2, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vcvtneoph2ps xmm2, xmmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x10] + vcvtneoph2ps xmm2, xmmword ptr [eax] + +// CHECK: vcvtneoph2ps xmm2, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneoph2ps xmm2, xmmword ptr [2*ebp - 512] + +// CHECK: vcvtneoph2ps xmm2, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneoph2ps xmm2, xmmword ptr [ecx + 2032] + +// CHECK: vcvtneoph2ps xmm2, xmmword ptr [edx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneoph2ps xmm2, xmmword ptr [edx - 2048] + +// CHECK: vcvtneoph2ps ymm2, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneoph2ps ymm2, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneoph2ps ymm2, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneoph2ps ymm2, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vcvtneoph2ps ymm2, ymmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x10] + vcvtneoph2ps ymm2, ymmword ptr [eax] + +// CHECK: vcvtneoph2ps ymm2, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneoph2ps ymm2, ymmword ptr [2*ebp - 1024] + +// CHECK: vcvtneoph2ps ymm2, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneoph2ps ymm2, ymmword ptr [ecx + 4064] + +// CHECK: vcvtneoph2ps ymm2, ymmword ptr [edx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneoph2ps ymm2, ymmword ptr [edx - 4096] + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmm3 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0xd3] + {vex} vcvtneps2bf16 xmm2, xmm3 + +// CHECK: {vex} vcvtneps2bf16 xmm2, ymm3 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0xd3] + {vex} vcvtneps2bf16 xmm2, ymm3 + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x94,0xf4,0x00,0x00,0x00,0x10] + {vex} vcvtneps2bf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x94,0x87,0x23,0x01,0x00,0x00] + {vex} vcvtneps2bf16 xmm2, xmmword ptr [edi + 4*eax + 291] + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x10] + {vex} vcvtneps2bf16 xmm2, xmmword ptr [eax] + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff] + {vex} vcvtneps2bf16 xmm2, xmmword ptr [2*ebp - 512] + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x91,0xf0,0x07,0x00,0x00] + {vex} vcvtneps2bf16 xmm2, xmmword ptr [ecx + 2032] + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmmword ptr [edx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x92,0x00,0xf8,0xff,0xff] + {vex} vcvtneps2bf16 xmm2, xmmword ptr [edx - 2048] + +// CHECK: {vex} vcvtneps2bf16 xmm2, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff] + {vex} vcvtneps2bf16 xmm2, ymmword ptr [2*ebp - 1024] + +// CHECK: {vex} vcvtneps2bf16 xmm2, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x91,0xe0,0x0f,0x00,0x00] + {vex} vcvtneps2bf16 xmm2, ymmword ptr [ecx + 4064] + +// CHECK: {vex} vcvtneps2bf16 xmm2, ymmword ptr [edx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x92,0x00,0xf0,0xff,0xff] + {vex} vcvtneps2bf16 xmm2, ymmword ptr [edx - 4096] + diff --git a/llvm/test/MC/X86/avx_ne_convert-64-att.s b/llvm/test/MC/X86/avx_ne_convert-64-att.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/avx_ne_convert-64-att.s @@ -0,0 +1,334 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vbcstnebf162ps 268435456(%rbp,%r14,8), %xmm2 +// CHECK: encoding: [0xc4,0xa2,0x7a,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10] + vbcstnebf162ps 268435456(%rbp,%r14,8), %xmm2 + +// CHECK: vbcstnebf162ps 291(%r8,%rax,4), %xmm2 +// CHECK: encoding: [0xc4,0xc2,0x7a,0xb1,0x94,0x80,0x23,0x01,0x00,0x00] + vbcstnebf162ps 291(%r8,%rax,4), %xmm2 + +// CHECK: vbcstnebf162ps (%rip), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x15,0x00,0x00,0x00,0x00] + vbcstnebf162ps (%rip), %xmm2 + +// CHECK: vbcstnebf162ps -64(,%rbp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnebf162ps -64(,%rbp,2), %xmm2 + +// CHECK: vbcstnebf162ps 254(%rcx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnebf162ps 254(%rcx), %xmm2 + +// CHECK: vbcstnebf162ps -256(%rdx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnebf162ps -256(%rdx), %xmm2 + +// CHECK: vbcstnebf162ps 268435456(%rbp,%r14,8), %ymm2 +// CHECK: encoding: [0xc4,0xa2,0x7e,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10] + vbcstnebf162ps 268435456(%rbp,%r14,8), %ymm2 + +// CHECK: vbcstnebf162ps 291(%r8,%rax,4), %ymm2 +// CHECK: encoding: [0xc4,0xc2,0x7e,0xb1,0x94,0x80,0x23,0x01,0x00,0x00] + vbcstnebf162ps 291(%r8,%rax,4), %ymm2 + +// CHECK: vbcstnebf162ps (%rip), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x15,0x00,0x00,0x00,0x00] + vbcstnebf162ps (%rip), %ymm2 + +// CHECK: vbcstnebf162ps -64(,%rbp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnebf162ps -64(,%rbp,2), %ymm2 + +// CHECK: vbcstnebf162ps 254(%rcx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnebf162ps 254(%rcx), %ymm2 + +// CHECK: vbcstnebf162ps -256(%rdx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnebf162ps -256(%rdx), %ymm2 + +// CHECK: vbcstnesh2ps 268435456(%rbp,%r14,8), %xmm2 +// CHECK: encoding: [0xc4,0xa2,0x79,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10] + vbcstnesh2ps 268435456(%rbp,%r14,8), %xmm2 + +// CHECK: vbcstnesh2ps 291(%r8,%rax,4), %xmm2 +// CHECK: encoding: [0xc4,0xc2,0x79,0xb1,0x94,0x80,0x23,0x01,0x00,0x00] + vbcstnesh2ps 291(%r8,%rax,4), %xmm2 + +// CHECK: vbcstnesh2ps (%rip), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x15,0x00,0x00,0x00,0x00] + vbcstnesh2ps (%rip), %xmm2 + +// CHECK: vbcstnesh2ps -64(,%rbp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnesh2ps -64(,%rbp,2), %xmm2 + +// CHECK: vbcstnesh2ps 254(%rcx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnesh2ps 254(%rcx), %xmm2 + +// CHECK: vbcstnesh2ps -256(%rdx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnesh2ps -256(%rdx), %xmm2 + +// CHECK: vbcstnesh2ps 268435456(%rbp,%r14,8), %ymm2 +// CHECK: encoding: [0xc4,0xa2,0x7d,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10] + vbcstnesh2ps 268435456(%rbp,%r14,8), %ymm2 + +// CHECK: vbcstnesh2ps 291(%r8,%rax,4), %ymm2 +// CHECK: encoding: [0xc4,0xc2,0x7d,0xb1,0x94,0x80,0x23,0x01,0x00,0x00] + vbcstnesh2ps 291(%r8,%rax,4), %ymm2 + +// CHECK: vbcstnesh2ps (%rip), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x15,0x00,0x00,0x00,0x00] + vbcstnesh2ps (%rip), %ymm2 + +// CHECK: vbcstnesh2ps -64(,%rbp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnesh2ps -64(,%rbp,2), %ymm2 + +// CHECK: vbcstnesh2ps 254(%rcx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnesh2ps 254(%rcx), %ymm2 + +// CHECK: vbcstnesh2ps -256(%rdx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnesh2ps -256(%rdx), %ymm2 + +// CHECK: vcvtneebf162ps 268435456(%rbp,%r14,8), %xmm2 +// CHECK: encoding: [0xc4,0xa2,0x7a,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneebf162ps 268435456(%rbp,%r14,8), %xmm2 + +// CHECK: vcvtneebf162ps 291(%r8,%rax,4), %xmm2 +// CHECK: encoding: [0xc4,0xc2,0x7a,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneebf162ps 291(%r8,%rax,4), %xmm2 + +// CHECK: vcvtneebf162ps (%rip), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneebf162ps (%rip), %xmm2 + +// CHECK: vcvtneebf162ps -512(,%rbp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneebf162ps -512(,%rbp,2), %xmm2 + +// CHECK: vcvtneebf162ps 2032(%rcx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneebf162ps 2032(%rcx), %xmm2 + +// CHECK: vcvtneebf162ps -2048(%rdx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneebf162ps -2048(%rdx), %xmm2 + +// CHECK: vcvtneebf162ps 268435456(%rbp,%r14,8), %ymm2 +// CHECK: encoding: [0xc4,0xa2,0x7e,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneebf162ps 268435456(%rbp,%r14,8), %ymm2 + +// CHECK: vcvtneebf162ps 291(%r8,%rax,4), %ymm2 +// CHECK: encoding: [0xc4,0xc2,0x7e,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneebf162ps 291(%r8,%rax,4), %ymm2 + +// CHECK: vcvtneebf162ps (%rip), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneebf162ps (%rip), %ymm2 + +// CHECK: vcvtneebf162ps -1024(,%rbp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneebf162ps -1024(,%rbp,2), %ymm2 + +// CHECK: vcvtneebf162ps 4064(%rcx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneebf162ps 4064(%rcx), %ymm2 + +// CHECK: vcvtneebf162ps -4096(%rdx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneebf162ps -4096(%rdx), %ymm2 + +// CHECK: vcvtneeph2ps 268435456(%rbp,%r14,8), %xmm2 +// CHECK: encoding: [0xc4,0xa2,0x79,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneeph2ps 268435456(%rbp,%r14,8), %xmm2 + +// CHECK: vcvtneeph2ps 291(%r8,%rax,4), %xmm2 +// CHECK: encoding: [0xc4,0xc2,0x79,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneeph2ps 291(%r8,%rax,4), %xmm2 + +// CHECK: vcvtneeph2ps (%rip), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneeph2ps (%rip), %xmm2 + +// CHECK: vcvtneeph2ps -512(,%rbp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneeph2ps -512(,%rbp,2), %xmm2 + +// CHECK: vcvtneeph2ps 2032(%rcx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneeph2ps 2032(%rcx), %xmm2 + +// CHECK: vcvtneeph2ps -2048(%rdx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneeph2ps -2048(%rdx), %xmm2 + +// CHECK: vcvtneeph2ps 268435456(%rbp,%r14,8), %ymm2 +// CHECK: encoding: [0xc4,0xa2,0x7d,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneeph2ps 268435456(%rbp,%r14,8), %ymm2 + +// CHECK: vcvtneeph2ps 291(%r8,%rax,4), %ymm2 +// CHECK: encoding: [0xc4,0xc2,0x7d,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneeph2ps 291(%r8,%rax,4), %ymm2 + +// CHECK: vcvtneeph2ps (%rip), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneeph2ps (%rip), %ymm2 + +// CHECK: vcvtneeph2ps -1024(,%rbp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneeph2ps -1024(,%rbp,2), %ymm2 + +// CHECK: vcvtneeph2ps 4064(%rcx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneeph2ps 4064(%rcx), %ymm2 + +// CHECK: vcvtneeph2ps -4096(%rdx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneeph2ps -4096(%rdx), %ymm2 + +// CHECK: vcvtneobf162ps 268435456(%rbp,%r14,8), %xmm2 +// CHECK: encoding: [0xc4,0xa2,0x7b,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneobf162ps 268435456(%rbp,%r14,8), %xmm2 + +// CHECK: vcvtneobf162ps 291(%r8,%rax,4), %xmm2 +// CHECK: encoding: [0xc4,0xc2,0x7b,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneobf162ps 291(%r8,%rax,4), %xmm2 + +// CHECK: vcvtneobf162ps (%rip), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneobf162ps (%rip), %xmm2 + +// CHECK: vcvtneobf162ps -512(,%rbp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneobf162ps -512(,%rbp,2), %xmm2 + +// CHECK: vcvtneobf162ps 2032(%rcx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneobf162ps 2032(%rcx), %xmm2 + +// CHECK: vcvtneobf162ps -2048(%rdx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneobf162ps -2048(%rdx), %xmm2 + +// CHECK: vcvtneobf162ps 268435456(%rbp,%r14,8), %ymm2 +// CHECK: encoding: [0xc4,0xa2,0x7f,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneobf162ps 268435456(%rbp,%r14,8), %ymm2 + +// CHECK: vcvtneobf162ps 291(%r8,%rax,4), %ymm2 +// CHECK: encoding: [0xc4,0xc2,0x7f,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneobf162ps 291(%r8,%rax,4), %ymm2 + +// CHECK: vcvtneobf162ps (%rip), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneobf162ps (%rip), %ymm2 + +// CHECK: vcvtneobf162ps -1024(,%rbp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneobf162ps -1024(,%rbp,2), %ymm2 + +// CHECK: vcvtneobf162ps 4064(%rcx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneobf162ps 4064(%rcx), %ymm2 + +// CHECK: vcvtneobf162ps -4096(%rdx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneobf162ps -4096(%rdx), %ymm2 + +// CHECK: vcvtneoph2ps 268435456(%rbp,%r14,8), %xmm2 +// CHECK: encoding: [0xc4,0xa2,0x78,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneoph2ps 268435456(%rbp,%r14,8), %xmm2 + +// CHECK: vcvtneoph2ps 291(%r8,%rax,4), %xmm2 +// CHECK: encoding: [0xc4,0xc2,0x78,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneoph2ps 291(%r8,%rax,4), %xmm2 + +// CHECK: vcvtneoph2ps (%rip), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneoph2ps (%rip), %xmm2 + +// CHECK: vcvtneoph2ps -512(,%rbp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneoph2ps -512(,%rbp,2), %xmm2 + +// CHECK: vcvtneoph2ps 2032(%rcx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneoph2ps 2032(%rcx), %xmm2 + +// CHECK: vcvtneoph2ps -2048(%rdx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneoph2ps -2048(%rdx), %xmm2 + +// CHECK: vcvtneoph2ps 268435456(%rbp,%r14,8), %ymm2 +// CHECK: encoding: [0xc4,0xa2,0x7c,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneoph2ps 268435456(%rbp,%r14,8), %ymm2 + +// CHECK: vcvtneoph2ps 291(%r8,%rax,4), %ymm2 +// CHECK: encoding: [0xc4,0xc2,0x7c,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneoph2ps 291(%r8,%rax,4), %ymm2 + +// CHECK: vcvtneoph2ps (%rip), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneoph2ps (%rip), %ymm2 + +// CHECK: vcvtneoph2ps -1024(,%rbp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneoph2ps -1024(,%rbp,2), %ymm2 + +// CHECK: vcvtneoph2ps 4064(%rcx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneoph2ps 4064(%rcx), %ymm2 + +// CHECK: vcvtneoph2ps -4096(%rdx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneoph2ps -4096(%rdx), %ymm2 + +// CHECK: {vex} vcvtneps2bf16 %xmm3, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0xd3] + {vex} vcvtneps2bf16 %xmm3, %xmm2 + +// CHECK: {vex} vcvtneps2bf16 %ymm3, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0xd3] + {vex} vcvtneps2bf16 %ymm3, %xmm2 + +// CHECK: {vex} vcvtneps2bf16x 268435456(%rbp,%r14,8), %xmm2 +// CHECK: encoding: [0xc4,0xa2,0x7a,0x72,0x94,0xf5,0x00,0x00,0x00,0x10] + {vex} vcvtneps2bf16x 268435456(%rbp,%r14,8), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x 291(%r8,%rax,4), %xmm2 +// CHECK: encoding: [0xc4,0xc2,0x7a,0x72,0x94,0x80,0x23,0x01,0x00,0x00] + {vex} vcvtneps2bf16x 291(%r8,%rax,4), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x (%rip), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x15,0x00,0x00,0x00,0x00] + {vex} vcvtneps2bf16x (%rip), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x -512(,%rbp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff] + {vex} vcvtneps2bf16x -512(,%rbp,2), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x 2032(%rcx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x91,0xf0,0x07,0x00,0x00] + {vex} vcvtneps2bf16x 2032(%rcx), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x -2048(%rdx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x92,0x00,0xf8,0xff,0xff] + {vex} vcvtneps2bf16x -2048(%rdx), %xmm2 + +// CHECK: {vex} vcvtneps2bf16y -1024(,%rbp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff] + {vex} vcvtneps2bf16y -1024(,%rbp,2), %xmm2 + +// CHECK: {vex} vcvtneps2bf16y 4064(%rcx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x91,0xe0,0x0f,0x00,0x00] + {vex} vcvtneps2bf16y 4064(%rcx), %xmm2 + +// CHECK: {vex} vcvtneps2bf16y -4096(%rdx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x92,0x00,0xf0,0xff,0xff] + {vex} vcvtneps2bf16y -4096(%rdx), %xmm2 + diff --git a/llvm/test/MC/X86/avx_ne_convert-64-intel.s b/llvm/test/MC/X86/avx_ne_convert-64-intel.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/avx_ne_convert-64-intel.s @@ -0,0 +1,334 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vbcstnebf162ps xmm2, word ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x7a,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10] + vbcstnebf162ps xmm2, word ptr [rbp + 8*r14 + 268435456] + +// CHECK: vbcstnebf162ps xmm2, word ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x7a,0xb1,0x94,0x80,0x23,0x01,0x00,0x00] + vbcstnebf162ps xmm2, word ptr [r8 + 4*rax + 291] + +// CHECK: vbcstnebf162ps xmm2, word ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x15,0x00,0x00,0x00,0x00] + vbcstnebf162ps xmm2, word ptr [rip] + +// CHECK: vbcstnebf162ps xmm2, word ptr [2*rbp - 64] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnebf162ps xmm2, word ptr [2*rbp - 64] + +// CHECK: vbcstnebf162ps xmm2, word ptr [rcx + 254] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnebf162ps xmm2, word ptr [rcx + 254] + +// CHECK: vbcstnebf162ps xmm2, word ptr [rdx - 256] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnebf162ps xmm2, word ptr [rdx - 256] + +// CHECK: vbcstnebf162ps ymm2, word ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x7e,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10] + vbcstnebf162ps ymm2, word ptr [rbp + 8*r14 + 268435456] + +// CHECK: vbcstnebf162ps ymm2, word ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x7e,0xb1,0x94,0x80,0x23,0x01,0x00,0x00] + vbcstnebf162ps ymm2, word ptr [r8 + 4*rax + 291] + +// CHECK: vbcstnebf162ps ymm2, word ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x15,0x00,0x00,0x00,0x00] + vbcstnebf162ps ymm2, word ptr [rip] + +// CHECK: vbcstnebf162ps ymm2, word ptr [2*rbp - 64] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnebf162ps ymm2, word ptr [2*rbp - 64] + +// CHECK: vbcstnebf162ps ymm2, word ptr [rcx + 254] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnebf162ps ymm2, word ptr [rcx + 254] + +// CHECK: vbcstnebf162ps ymm2, word ptr [rdx - 256] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnebf162ps ymm2, word ptr [rdx - 256] + +// CHECK: vbcstnesh2ps xmm2, word ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x79,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10] + vbcstnesh2ps xmm2, word ptr [rbp + 8*r14 + 268435456] + +// CHECK: vbcstnesh2ps xmm2, word ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x79,0xb1,0x94,0x80,0x23,0x01,0x00,0x00] + vbcstnesh2ps xmm2, word ptr [r8 + 4*rax + 291] + +// CHECK: vbcstnesh2ps xmm2, word ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x15,0x00,0x00,0x00,0x00] + vbcstnesh2ps xmm2, word ptr [rip] + +// CHECK: vbcstnesh2ps xmm2, word ptr [2*rbp - 64] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnesh2ps xmm2, word ptr [2*rbp - 64] + +// CHECK: vbcstnesh2ps xmm2, word ptr [rcx + 254] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnesh2ps xmm2, word ptr [rcx + 254] + +// CHECK: vbcstnesh2ps xmm2, word ptr [rdx - 256] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnesh2ps xmm2, word ptr [rdx - 256] + +// CHECK: vbcstnesh2ps ymm2, word ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x7d,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10] + vbcstnesh2ps ymm2, word ptr [rbp + 8*r14 + 268435456] + +// CHECK: vbcstnesh2ps ymm2, word ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x7d,0xb1,0x94,0x80,0x23,0x01,0x00,0x00] + vbcstnesh2ps ymm2, word ptr [r8 + 4*rax + 291] + +// CHECK: vbcstnesh2ps ymm2, word ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x15,0x00,0x00,0x00,0x00] + vbcstnesh2ps ymm2, word ptr [rip] + +// CHECK: vbcstnesh2ps ymm2, word ptr [2*rbp - 64] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnesh2ps ymm2, word ptr [2*rbp - 64] + +// CHECK: vbcstnesh2ps ymm2, word ptr [rcx + 254] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnesh2ps ymm2, word ptr [rcx + 254] + +// CHECK: vbcstnesh2ps ymm2, word ptr [rdx - 256] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnesh2ps ymm2, word ptr [rdx - 256] + +// CHECK: vcvtneebf162ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x7a,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneebf162ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vcvtneebf162ps xmm2, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x7a,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneebf162ps xmm2, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vcvtneebf162ps xmm2, xmmword ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneebf162ps xmm2, xmmword ptr [rip] + +// CHECK: vcvtneebf162ps xmm2, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneebf162ps xmm2, xmmword ptr [2*rbp - 512] + +// CHECK: vcvtneebf162ps xmm2, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneebf162ps xmm2, xmmword ptr [rcx + 2032] + +// CHECK: vcvtneebf162ps xmm2, xmmword ptr [rdx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneebf162ps xmm2, xmmword ptr [rdx - 2048] + +// CHECK: vcvtneebf162ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x7e,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneebf162ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vcvtneebf162ps ymm2, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x7e,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneebf162ps ymm2, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vcvtneebf162ps ymm2, ymmword ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneebf162ps ymm2, ymmword ptr [rip] + +// CHECK: vcvtneebf162ps ymm2, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneebf162ps ymm2, ymmword ptr [2*rbp - 1024] + +// CHECK: vcvtneebf162ps ymm2, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneebf162ps ymm2, ymmword ptr [rcx + 4064] + +// CHECK: vcvtneebf162ps ymm2, ymmword ptr [rdx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneebf162ps ymm2, ymmword ptr [rdx - 4096] + +// CHECK: vcvtneeph2ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x79,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneeph2ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vcvtneeph2ps xmm2, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x79,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneeph2ps xmm2, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vcvtneeph2ps xmm2, xmmword ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneeph2ps xmm2, xmmword ptr [rip] + +// CHECK: vcvtneeph2ps xmm2, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneeph2ps xmm2, xmmword ptr [2*rbp - 512] + +// CHECK: vcvtneeph2ps xmm2, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneeph2ps xmm2, xmmword ptr [rcx + 2032] + +// CHECK: vcvtneeph2ps xmm2, xmmword ptr [rdx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneeph2ps xmm2, xmmword ptr [rdx - 2048] + +// CHECK: vcvtneeph2ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x7d,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneeph2ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vcvtneeph2ps ymm2, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x7d,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneeph2ps ymm2, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vcvtneeph2ps ymm2, ymmword ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneeph2ps ymm2, ymmword ptr [rip] + +// CHECK: vcvtneeph2ps ymm2, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneeph2ps ymm2, ymmword ptr [2*rbp - 1024] + +// CHECK: vcvtneeph2ps ymm2, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneeph2ps ymm2, ymmword ptr [rcx + 4064] + +// CHECK: vcvtneeph2ps ymm2, ymmword ptr [rdx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneeph2ps ymm2, ymmword ptr [rdx - 4096] + +// CHECK: vcvtneobf162ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x7b,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneobf162ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vcvtneobf162ps xmm2, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x7b,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneobf162ps xmm2, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vcvtneobf162ps xmm2, xmmword ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneobf162ps xmm2, xmmword ptr [rip] + +// CHECK: vcvtneobf162ps xmm2, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneobf162ps xmm2, xmmword ptr [2*rbp - 512] + +// CHECK: vcvtneobf162ps xmm2, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneobf162ps xmm2, xmmword ptr [rcx + 2032] + +// CHECK: vcvtneobf162ps xmm2, xmmword ptr [rdx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneobf162ps xmm2, xmmword ptr [rdx - 2048] + +// CHECK: vcvtneobf162ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x7f,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneobf162ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vcvtneobf162ps ymm2, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x7f,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneobf162ps ymm2, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vcvtneobf162ps ymm2, ymmword ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneobf162ps ymm2, ymmword ptr [rip] + +// CHECK: vcvtneobf162ps ymm2, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneobf162ps ymm2, ymmword ptr [2*rbp - 1024] + +// CHECK: vcvtneobf162ps ymm2, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneobf162ps ymm2, ymmword ptr [rcx + 4064] + +// CHECK: vcvtneobf162ps ymm2, ymmword ptr [rdx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneobf162ps ymm2, ymmword ptr [rdx - 4096] + +// CHECK: vcvtneoph2ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x78,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneoph2ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vcvtneoph2ps xmm2, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x78,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneoph2ps xmm2, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vcvtneoph2ps xmm2, xmmword ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneoph2ps xmm2, xmmword ptr [rip] + +// CHECK: vcvtneoph2ps xmm2, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneoph2ps xmm2, xmmword ptr [2*rbp - 512] + +// CHECK: vcvtneoph2ps xmm2, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneoph2ps xmm2, xmmword ptr [rcx + 2032] + +// CHECK: vcvtneoph2ps xmm2, xmmword ptr [rdx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneoph2ps xmm2, xmmword ptr [rdx - 2048] + +// CHECK: vcvtneoph2ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x7c,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneoph2ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vcvtneoph2ps ymm2, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x7c,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneoph2ps ymm2, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vcvtneoph2ps ymm2, ymmword ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneoph2ps ymm2, ymmword ptr [rip] + +// CHECK: vcvtneoph2ps ymm2, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneoph2ps ymm2, ymmword ptr [2*rbp - 1024] + +// CHECK: vcvtneoph2ps ymm2, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneoph2ps ymm2, ymmword ptr [rcx + 4064] + +// CHECK: vcvtneoph2ps ymm2, ymmword ptr [rdx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneoph2ps ymm2, ymmword ptr [rdx - 4096] + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmm3 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0xd3] + {vex} vcvtneps2bf16 xmm2, xmm3 + +// CHECK: {vex} vcvtneps2bf16 xmm2, ymm3 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0xd3] + {vex} vcvtneps2bf16 xmm2, ymm3 + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x7a,0x72,0x94,0xf5,0x00,0x00,0x00,0x10] + {vex} vcvtneps2bf16 xmm2, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x7a,0x72,0x94,0x80,0x23,0x01,0x00,0x00] + {vex} vcvtneps2bf16 xmm2, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmmword ptr [rip] +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x15,0x00,0x00,0x00,0x00] + {vex} vcvtneps2bf16 xmm2, xmmword ptr [rip] + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff] + {vex} vcvtneps2bf16 xmm2, xmmword ptr [2*rbp - 512] + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x91,0xf0,0x07,0x00,0x00] + {vex} vcvtneps2bf16 xmm2, xmmword ptr [rcx + 2032] + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmmword ptr [rdx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x92,0x00,0xf8,0xff,0xff] + {vex} vcvtneps2bf16 xmm2, xmmword ptr [rdx - 2048] + +// CHECK: {vex} vcvtneps2bf16 xmm2, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff] + {vex} vcvtneps2bf16 xmm2, ymmword ptr [2*rbp - 1024] + +// CHECK: {vex} vcvtneps2bf16 xmm2, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x91,0xe0,0x0f,0x00,0x00] + {vex} vcvtneps2bf16 xmm2, ymmword ptr [rcx + 4064] + +// CHECK: {vex} vcvtneps2bf16 xmm2, ymmword ptr [rdx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x92,0x00,0xf0,0xff,0xff] + {vex} vcvtneps2bf16 xmm2, ymmword ptr [rdx - 4096] +