diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -550,6 +550,7 @@ -------------------- - Support ``-mindirect-branch-cs-prefix`` for call and jmp to indirect thunk. - Fix 32-bit ``__fastcall`` and ``__vectorcall`` ABI mismatch with MSVC. +- Support ISA of ``AVX-NE-CONVERT``. DWARF Support in Clang ---------------------- diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -2091,6 +2091,20 @@ TARGET_HEADER_BUILTIN(__readgsdword, "UNiUNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(__readgsqword, "ULLiUNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps128, "V4fUsC*", "nV:128:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vbcstnebf162ps256, "V8fUsC*", "nV:256:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vbcstnesh2ps128, "V4fxC*", "nV:128:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vbcstnesh2ps256, "V8fxC*", "nV:256:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneebf162ps128, "V4fV8sC*", "nV:128:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneebf162ps256, "V8fV16sC*", "nV:256:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneeph2ps128, "V4fV8xC*", "nV:128:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneeph2ps256, "V8fV16xC*", "nV:256:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneobf162ps128, "V4fV8sC*", "nV:128:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneobf162ps256, "V8fV16sC*", "nV:256:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneoph2ps128, "V4fV8xC*", "nV:128:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneoph2ps256, "V8fV16xC*", "nV:256:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneps2bf16128, "V8sV4f", "nV:128:", "avxneconvert") +TARGET_BUILTIN(__builtin_ia32_vcvtneps2bf16256, "V8sV8f", "nV:256:", "avxneconvert") TARGET_HEADER_BUILTIN(_InterlockedAnd64, "WiWiD*Wi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedDecrement64, "WiWiD*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_InterlockedExchange64, "WiWiD*Wi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4582,6 +4582,8 @@ def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group; def mavx512vp2intersect : Flag<["-"], "mavx512vp2intersect">, Group; def mno_avx512vp2intersect : Flag<["-"], "mno-avx512vp2intersect">, Group; +def mavxneconvert : Flag<["-"], "mavxneconvert">, Group; +def mno_avxneconvert : Flag<["-"], "mno-avxneconvert">, Group; def mavxvnni : Flag<["-"], "mavxvnni">, Group; def mno_avxvnni : Flag<["-"], "mno-avxvnni">, Group; def madx : Flag<["-"], "madx">, Group; diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -135,6 +135,7 @@ bool HasPTWRITE = false; bool HasINVPCID = false; bool HasENQCMD = false; + bool HasAVXNECONVERT = false; bool HasKL = false; // For key locker bool HasWIDEKL = false; // For wide key locker bool HasHRESET = false; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -330,6 +330,8 @@ HasAMXINT8 = true; } else if (Feature == "+amx-tile") { HasAMXTILE = true; + } else if (Feature == "+avxneconvert") { + HasAVXNECONVERT= true; } else if (Feature == "+avxvnni") { HasAVXVNNI = true; } else if (Feature == "+serialize") { @@ -774,6 +776,9 @@ Builder.defineMacro("__AMXINT8__"); if (HasAMXBF16) Builder.defineMacro("__AMXBF16__"); + if (HasAVXNECONVERT) + Builder.defineMacro("__AVXNECONVERT__"); + Builder.defineMacro("__AVXNECONVERT_SUPPORTED__"); if (HasAVXVNNI) Builder.defineMacro("__AVXVNNI__"); if (HasSERIALIZE) @@ -963,6 +968,7 @@ .Case("xsavec", true) .Case("xsaves", true) .Case("xsaveopt", true) + .Case("avxneconvert", true) .Default(false); } @@ -992,6 +998,7 @@ .Case("avx512vbmi2", HasAVX512VBMI2) .Case("avx512ifma", HasAVX512IFMA) .Case("avx512vp2intersect", HasAVX512VP2INTERSECT) + .Case("avxneconvert", HasAVXNECONVERT) .Case("bmi", HasBMI) .Case("bmi2", HasBMI2) .Case("cldemote", HasCLDEMOTE) diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -142,6 +142,7 @@ avx512vpopcntdqintrin.h avx512vpopcntdqvlintrin.h avxintrin.h + avxneconvertintrin.h avxvnniintrin.h bmi2intrin.h bmiintrin.h diff --git a/clang/lib/Headers/avxneconvertintrin.h b/clang/lib/Headers/avxneconvertintrin.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/avxneconvertintrin.h @@ -0,0 +1,99 @@ +/*===-------------- avxneconvertintrin.h - AVXNECONVERT --------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif // __IMMINTRIN_H + +#ifndef __AVXNECONVERTINTRIN_H +#define __AVXNECONVERTINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avxneconvert"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("avxneconvert"), \ + __min_vector_width__(256))) + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_bcstnebf16_ps(const void *__A) { + return (__m128)__builtin_ia32_vbcstnebf162ps128((const void *)__A); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_bcstnebf16_ps(const void *__A) { + return (__m256)__builtin_ia32_vbcstnebf162ps256((const void *)__A); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_bcstnesh_ps(const void *__A) { + return (__m128)__builtin_ia32_vbcstnesh2ps128((const void *)__A); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_bcstnesh_ps(const void *__A) { + return (__m256)__builtin_ia32_vbcstnesh2ps256((const void *)__A); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_cvtneebf16_ps(const __m128bh *__A) { + return (__m128)__builtin_ia32_vcvtneebf162ps128((const __v8hi *)__A); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_cvtneebf16_ps(const __m256bh *__A) { + return (__m256)__builtin_ia32_vcvtneebf162ps256((const __v16hi *)__A); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_cvtneeph_ps(const __m128h *__A) { + return (__m128)__builtin_ia32_vcvtneeph2ps128((const __v8hf *)__A); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_cvtneeph_ps(const __m256h *__A) { + return (__m256)__builtin_ia32_vcvtneeph2ps256((const __v16hf *)__A); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_cvtneobf16_ps(const __m128bh *__A) { + return (__m128)__builtin_ia32_vcvtneobf162ps128((const __v8hi *)__A); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_cvtneobf16_ps(const __m256bh *__A) { + return (__m256)__builtin_ia32_vcvtneobf162ps256((const __v16hi *)__A); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_cvtneoph_ps(const __m128h *__A) { + return (__m128)__builtin_ia32_vcvtneoph2ps128((const __v8hf *)__A); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_cvtneoph_ps(const __m256h *__A) { + return (__m256)__builtin_ia32_vcvtneoph2ps256((const __v16hf *)__A); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_cvtneps_avx_pbh(__m128 __A) { + return (__m128bh)__builtin_ia32_vcvtneps2bf16128((__v4sf)__A); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS256 +_mm256_cvtneps_avx_pbh(__m256 __A) { + return (__m128bh)__builtin_ia32_vcvtneps2bf16256((__v8sf)__A); +} + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 + +#endif // __AVXNECONVERTINTRIN_H diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h --- a/clang/lib/Headers/cpuid.h +++ b/clang/lib/Headers/cpuid.h @@ -204,6 +204,9 @@ #define bit_AVX512BF16 0x00000020 #define bit_HRESET 0x00400000 +/* Features in %edx for leaf 7 sub-leaf 1 */ +#define bit_AVXNECONVERT 0x00000020 + /* Features in %eax for leaf 13 sub-leaf 1 */ #define bit_XSAVEOPT 0x00000001 #define bit_XSAVEC 0x00000002 diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -254,6 +254,11 @@ #include #endif +/* FIXME: Change these When _Float16 type is supported */ +#if defined(__AVXNECONVERT__) && defined(__AVX512FP16__) +#include +#endif + #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__RDPID__) /// Returns the value of the IA32_TSC_AUX MSR (0xc0000103). diff --git a/clang/test/CodeGen/X86/avxneconvert-builtins.c b/clang/test/CodeGen/X86/avxneconvert-builtins.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/X86/avxneconvert-builtins.c @@ -0,0 +1,89 @@ +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx2 -target-feature +avxneconvert \ +// RUN: -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s + +#include +#include + +__m128 test_mm_bcstnebf16_ps(const void *__A) { + // CHECK-LABEL: @test_mm_bcstnebf16_ps( + // CHECK: call <4 x float> @llvm.x86.vbcstnebf162ps128(ptr %{{.*}}) + return _mm_bcstnebf16_ps(__A); +} + +__m256 test_mm256_bcstnebf16_ps(const void *__A) { + // CHECK-LABEL: @test_mm256_bcstnebf16_ps( + // CHECK: call <8 x float> @llvm.x86.vbcstnebf162ps256(ptr %{{.*}}) + return _mm256_bcstnebf16_ps(__A); +} + +__m128 test_mm_bcstnesh_ps(const void *__A) { + // CHECK-LABEL: @test_mm_bcstnesh_ps( + // CHECK: call <4 x float> @llvm.x86.vbcstnesh2ps128(ptr %{{.*}}) + return _mm_bcstnesh_ps(__A); +} + +__m256 test_mm256_bcstnesh_ps(const void *__A) { + // CHECK-LABEL: @test_mm256_bcstnesh_ps( + // CHECK: call <8 x float> @llvm.x86.vbcstnesh2ps256(ptr %{{.*}}) + return _mm256_bcstnesh_ps(__A); +} + +__m128 test_mm_cvtneebf16_ps(const __m128bh *__A) { + // CHECK-LABEL: @test_mm_cvtneebf16_ps( + // CHECK: call <4 x float> @llvm.x86.vcvtneebf162ps128(ptr %{{.*}}) + return _mm_cvtneebf16_ps(__A); +} + +__m256 test_mm256_cvtneebf16_ps(const __m256bh *__A) { + // CHECK-LABEL: @test_mm256_cvtneebf16_ps( + // CHECK: call <8 x float> @llvm.x86.vcvtneebf162ps256(ptr %{{.*}}) + return _mm256_cvtneebf16_ps(__A); +} + +__m128 test_mm_cvtneeph_ps(const __m128h *__A) { + // CHECK-LABEL: @test_mm_cvtneeph_ps( + // CHECK: call <4 x float> @llvm.x86.vcvtneeph2ps128(ptr %{{.*}}) + return _mm_cvtneeph_ps(__A); +} + +__m256 test_mm256_cvtneeph_ps(const __m256h *__A) { + // CHECK-LABEL: @test_mm256_cvtneeph_ps( + // CHECK: call <8 x float> @llvm.x86.vcvtneeph2ps256(ptr %{{.*}}) + return _mm256_cvtneeph_ps(__A); +} + +__m128 test_mm_cvtneobf16_ps(const __m128bh *__A) { + // CHECK-LABEL: @test_mm_cvtneobf16_ps( + // CHECK: call <4 x float> @llvm.x86.vcvtneobf162ps128(ptr %{{.*}}) + return _mm_cvtneobf16_ps(__A); +} + +__m256 test_mm256_cvtneobf16_ps(const __m256bh *__A) { + // CHECK-LABEL: @test_mm256_cvtneobf16_ps( + // CHECK: call <8 x float> @llvm.x86.vcvtneobf162ps256(ptr %{{.*}}) + return _mm256_cvtneobf16_ps(__A); +} + +__m128 test_mm_cvtneoph_ps(const __m128h *__A) { + // CHECK-LABEL: @test_mm_cvtneoph_ps( + // CHECK: call <4 x float> @llvm.x86.vcvtneoph2ps128(ptr %{{.*}}) + return _mm_cvtneoph_ps(__A); +} + +__m256 test_mm256_cvtneoph_ps(const __m256h *__A) { + // CHECK-LABEL: @test_mm256_cvtneoph_ps( + // CHECK: call <8 x float> @llvm.x86.vcvtneoph2ps256(ptr %{{.*}}) + return _mm256_cvtneoph_ps(__A); +} + +__m128bh test_mm_cvtneps_avx_pbh(__m128 __A) { + // CHECK-LABEL: @test_mm_cvtneps_avx_pbh( + // CHECK: call <8 x i16> @llvm.x86.vcvtneps2bf16128(<4 x float> %{{.*}}) + return _mm_cvtneps_avx_pbh(__A); +} + +__m128bh test_mm256_cvtneps_avx_pbh(__m256 __A) { + // CHECK-LABEL: @test_mm256_cvtneps_avx_pbh( + // CHECK: call <8 x i16> @llvm.x86.vcvtneps2bf16256(<8 x float> %{{.*}}) + return _mm256_cvtneps_avx_pbh(__A); +} diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c --- a/clang/test/CodeGen/attr-target-x86.c +++ b/clang/test/CodeGen/attr-target-x86.c @@ -54,9 +54,9 @@ // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="i686" // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" // CHECK-NOT: tune-cpu -// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" +// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxneconvert,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686" -// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" +// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxneconvert,-avxvnni,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" // CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" // CHECK-NOT: tune-cpu // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx" diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -305,6 +305,13 @@ // AVX512FP16: "-target-feature" "+avx512fp16" // NO-AVX512FP16: "-target-feature" "-avx512fp16" +// RUN: %clang -target i686-unknown-linux-gnu -mavxneconvert %s \ +// RUN: -### -o %t.o 2>&1 | FileCheck -check-prefix=AVXNECONVERT %s +// RUN: %clang -target i686-unknown-linux-gnu -mno-avxneconvert %s \ +// RUN: -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVXNECONVERT %s +// AVXNECONVERT: "-target-feature" "+avxneconvert" +// NO-AVXNECONVERT: "-target-feature" "-avxneconvert" + // RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s // RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s // CRC32: "-target-feature" "+crc32" diff --git a/clang/test/Preprocessor/predefined-arch-macros-x86.c b/clang/test/Preprocessor/predefined-arch-macros-x86.c --- a/clang/test/Preprocessor/predefined-arch-macros-x86.c +++ b/clang/test/Preprocessor/predefined-arch-macros-x86.c @@ -32,7 +32,7 @@ // RUN: FileCheck %s --check-prefix=X86_64_V3 < %t.txt // X86_64_V3: #define __AVX2__ 1 -// X86_64_V3-NEXT: #define __AVX__ 1 +// X86_64_V3: #define __AVX__ 1 // X86_64_V3: #define __BMI2__ 1 // X86_64_V3-NEXT: #define __BMI__ 1 // X86_64_V3: #define __F16C__ 1 diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -576,6 +576,15 @@ // AVX512FP16NOAVX512BW-NOT: #define __AVX512BW__ 1 // AVX512FP16NOAVX512BW-NOT: #define __AVX512FP16__ 1 +// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavxneconvert -x c \ +// RUN: -E -dM -o - %s | FileCheck -check-prefix=AVXNECONVERT %s +// AVXNECONVERT: #define __AVXNECONVERT__ 1 +// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mno-avxneconvert -x c \ +// RUN: -E -dM -o - %s | FileCheck -check-prefix=NO-AVXNECONVERT %s +// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mavxneconvert -mno-avx2 \ +// RUN: -x c -E -dM -o - %s | FileCheck -check-prefix=NO-AVXNECONVERT %s +// NO-AVXNECONVERT-NOT: #define __AVXNECONVERT__ 1 + // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512fp16 -mno-avx512dq -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512FP16NOAVX512DQ %s // AVX512FP16NOAVX512DQ-NOT: #define __AVX512DQ__ 1 diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -130,6 +130,7 @@ Changes to the X86 Backend -------------------------- +* Support ISA of ``AVX-NE-CONVERT``. Changes to the OCaml bindings ----------------------------- diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -5115,6 +5115,36 @@ Intrinsic<[llvm_anyvector_ty], [llvm_x86amx_ty], [IntrNoMem]>; } +let TargetPrefix = "x86" in { +def int_x86_vbcstnebf162ps128 : ClangBuiltin<"__builtin_ia32_vbcstnebf162ps128">, + Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vbcstnebf162ps256 : ClangBuiltin<"__builtin_ia32_vbcstnebf162ps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vbcstnesh2ps128 : ClangBuiltin<"__builtin_ia32_vbcstnesh2ps128">, + Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vbcstnesh2ps256 : ClangBuiltin<"__builtin_ia32_vbcstnesh2ps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vcvtneebf162ps128 : ClangBuiltin<"__builtin_ia32_vcvtneebf162ps128">, + Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vcvtneebf162ps256 : ClangBuiltin<"__builtin_ia32_vcvtneebf162ps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vcvtneeph2ps128 : ClangBuiltin<"__builtin_ia32_vcvtneeph2ps128">, + Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vcvtneeph2ps256 : ClangBuiltin<"__builtin_ia32_vcvtneeph2ps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vcvtneobf162ps128 : ClangBuiltin<"__builtin_ia32_vcvtneobf162ps128">, + Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vcvtneobf162ps256 : ClangBuiltin<"__builtin_ia32_vcvtneobf162ps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vcvtneoph2ps128 : ClangBuiltin<"__builtin_ia32_vcvtneoph2ps128">, + Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vcvtneoph2ps256 : ClangBuiltin<"__builtin_ia32_vcvtneoph2ps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; +def int_x86_vcvtneps2bf16128 : ClangBuiltin<"__builtin_ia32_vcvtneps2bf16128">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty], []>; +def int_x86_vcvtneps2bf16256 : ClangBuiltin<"__builtin_ia32_vcvtneps2bf16256">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty], []>; +} //===----------------------------------------------------------------------===// // UINTR - User Level Interrupt diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -201,6 +201,7 @@ X86_FEATURE (XSAVES, "xsaves") X86_FEATURE (HRESET, "hreset") X86_FEATURE (AVX512FP16, "avx512fp16") +X86_FEATURE (AVXNECONVERT, "avxneconvert") X86_FEATURE (AVXVNNI, "avxvnni") // These features aren't really CPU features, but the frontend can set them. X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk") diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -1809,6 +1809,8 @@ Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1); + Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave; + bool HasLeafD = MaxLevel >= 0xd && !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -581,6 +581,7 @@ constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE; constexpr FeatureBitset ImpliedFeaturesHRESET = {}; +static constexpr FeatureBitset ImpliedFeaturesAVXNECONVERT = FeatureAVX2; static constexpr FeatureBitset ImpliedFeaturesAVX512FP16 = FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL; // Key Locker Features diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -254,6 +254,9 @@ def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true", "Support AMX-BF16 instructions", [FeatureAMXTILE]>; +def FeatureAVXNECONVERT : SubtargetFeature<"avxneconvert", "HasAVXNECONVERT", "true", + "Support AVX-NE-CONVERT instructions", + [FeatureAVX2]>; def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true", "Invalidate Process-Context Identifier">; def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -979,6 +979,7 @@ def HasCX16 : Predicate<"Subtarget->hasCX16()">; def HasPCONFIG : Predicate<"Subtarget->hasPCONFIG()">; def HasENQCMD : Predicate<"Subtarget->hasENQCMD()">; +def HasAVXNECONVERT : Predicate<"Subtarget->hasAVXNECONVERT()">; def HasKL : Predicate<"Subtarget->hasKL()">; def HasWIDEKL : Predicate<"Subtarget->hasWIDEKL()">; def HasHRESET : Predicate<"Subtarget->hasHRESET()">; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -8115,3 +8115,57 @@ X86GF2P8affineqb>, TAPD; } +multiclass AVX_NE_CONVERT_BASE Opcode, string OpcodeStr, + X86MemOperand MemOp128, X86MemOperand MemOp256> { + def rm : I("int_x86_"#OpcodeStr#"128") addr:$src))]>, + Sched<[WriteCvtPH2PS]>, VEX; + def Yrm : I("int_x86_"#OpcodeStr#"256") addr:$src))]>, + Sched<[WriteCvtPH2PSY]>, VEX, VEX_L; +} + +multiclass VCVTNEPS2BF16_BASE { + def rr : I<0x72, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtneps2bf16\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_vcvtneps2bf16128 VR128:$src))]>, + Sched<[WriteCvtPH2PS]>; + def rm : I<0x72, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvtneps2bf16{x}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src)))]>, + Sched<[WriteCvtPH2PS]>; + def Yrr : I<0x72, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "vcvtneps2bf16\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_vcvtneps2bf16256 VR256:$src))]>, + Sched<[WriteCvtPH2PSY]>, VEX_L; + def Yrm : I<0x72, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), + "vcvtneps2bf16{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src)))]>, + Sched<[WriteCvtPH2PSY]>, VEX_L; +} + +let Predicates = [HasAVXNECONVERT] in { + defm VBCSTNEBF162PS : AVX_NE_CONVERT_BASE<0xb1, "vbcstnebf162ps", i16mem, + i16mem>, T8XS; + defm VBCSTNESH2PS : AVX_NE_CONVERT_BASE<0xb1, "vbcstnesh2ps", f16mem, f16mem>, + T8PD; + defm VCVTNEEBF162PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneebf162ps", i128mem, + i256mem>, T8XS; + defm VCVTNEEPH2PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneeph2ps", f128mem, + f256mem>, T8PD; + defm VCVTNEOBF162PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneobf162ps", i128mem, + i256mem>, T8XD; + defm VCVTNEOPH2PS : AVX_NE_CONVERT_BASE<0xb0, "vcvtneoph2ps", f128mem, + f256mem>, T8PS; + let checkVEXPredicate = 1 in + defm VCVTNEPS2BF16 : VCVTNEPS2BF16_BASE, VEX, T8XS, ExplicitVEXPrefix; +} + +def : InstAlias<"vcvtneps2bf16x\t{$src, $dst|$dst, $src}", + (VCVTNEPS2BF16rr VR128:$dst, VR128:$src), 0, "att">; +def : InstAlias<"vcvtneps2bf16y\t{$src, $dst|$dst, $src}", + (VCVTNEPS2BF16Yrr VR128:$dst, VR256:$src), 0, "att">; diff --git a/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll b/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll @@ -0,0 +1,228 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx512fp16,+avxneconvert | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+avx512fp16,+avxneconvert | FileCheck %s --check-prefixes=X86 + +define <4 x float> @test_int_x86_vbcstnebf162ps128(i8* %A) { +; X64-LABEL: test_int_x86_vbcstnebf162ps128: +; X64: # %bb.0: +; X64-NEXT: vbcstnebf162ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x7a,0xb1,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vbcstnebf162ps128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vbcstnebf162ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x7a,0xb1,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <4 x float> @llvm.x86.vbcstnebf162ps128(i8* %A) + ret <4 x float> %ret +} +declare <4 x float> @llvm.x86.vbcstnebf162ps128(i8* %A) + +define <8 x float> @test_int_x86_vbcstnebf162ps256(i8* %A) { +; X64-LABEL: test_int_x86_vbcstnebf162ps256: +; X64: # %bb.0: +; X64-NEXT: vbcstnebf162ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7e,0xb1,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vbcstnebf162ps256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vbcstnebf162ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7e,0xb1,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <8 x float> @llvm.x86.vbcstnebf162ps256(i8* %A) + ret <8 x float> %ret +} +declare <8 x float> @llvm.x86.vbcstnebf162ps256(i8* %A) + +define <4 x float> @test_int_x86_vbcstnesh2ps128(i8* %A) { +; X64-LABEL: test_int_x86_vbcstnesh2ps128: +; X64: # %bb.0: +; X64-NEXT: vbcstnesh2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0xb1,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vbcstnesh2ps128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vbcstnesh2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0xb1,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <4 x float> @llvm.x86.vbcstnesh2ps128(i8* %A) + ret <4 x float> %ret +} +declare <4 x float> @llvm.x86.vbcstnesh2ps128(i8* %A) + +define <8 x float> @test_int_x86_vbcstnesh2ps256(i8* %A) { +; X64-LABEL: test_int_x86_vbcstnesh2ps256: +; X64: # %bb.0: +; X64-NEXT: vbcstnesh2ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0xb1,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vbcstnesh2ps256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vbcstnesh2ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0xb1,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <8 x float> @llvm.x86.vbcstnesh2ps256(i8* %A) + ret <8 x float> %ret +} +declare <8 x float> @llvm.x86.vbcstnesh2ps256(i8* %A) + +define <4 x float> @test_int_x86_vcvtneebf162ps128(i8* %A) { +; X64-LABEL: test_int_x86_vcvtneebf162ps128: +; X64: # %bb.0: +; X64-NEXT: vcvtneebf162ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x7a,0xb0,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vcvtneebf162ps128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvtneebf162ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x7a,0xb0,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <4 x float> @llvm.x86.vcvtneebf162ps128(i8* %A) + ret <4 x float> %ret +} +declare <4 x float> @llvm.x86.vcvtneebf162ps128(i8* %A) + +define <8 x float> @test_int_x86_vcvtneebf162ps256(i8* %A) { +; X64-LABEL: test_int_x86_vcvtneebf162ps256: +; X64: # %bb.0: +; X64-NEXT: vcvtneebf162ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7e,0xb0,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vcvtneebf162ps256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvtneebf162ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7e,0xb0,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <8 x float> @llvm.x86.vcvtneebf162ps256(i8* %A) + ret <8 x float> %ret +} +declare <8 x float> @llvm.x86.vcvtneebf162ps256(i8* %A) + +define <4 x float> @test_int_x86_vcvtneeph2ps128(i8* %A) { +; X64-LABEL: test_int_x86_vcvtneeph2ps128: +; X64: # %bb.0: +; X64-NEXT: vcvtneeph2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0xb0,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vcvtneeph2ps128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvtneeph2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0xb0,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <4 x float> @llvm.x86.vcvtneeph2ps128(i8* %A) + ret <4 x float> %ret +} +declare <4 x float> @llvm.x86.vcvtneeph2ps128(i8* %A) + +define <8 x float> @test_int_x86_vcvtneeph2ps256(i8* %A) { +; X64-LABEL: test_int_x86_vcvtneeph2ps256: +; X64: # %bb.0: +; X64-NEXT: vcvtneeph2ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0xb0,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vcvtneeph2ps256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvtneeph2ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0xb0,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <8 x float> @llvm.x86.vcvtneeph2ps256(i8* %A) + ret <8 x float> %ret +} +declare <8 x float> @llvm.x86.vcvtneeph2ps256(i8* %A) + +define <4 x float> @test_int_x86_vcvtneobf162ps128(i8* %A) { +; X64-LABEL: test_int_x86_vcvtneobf162ps128: +; X64: # %bb.0: +; X64-NEXT: vcvtneobf162ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x7b,0xb0,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vcvtneobf162ps128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvtneobf162ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x7b,0xb0,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <4 x float> @llvm.x86.vcvtneobf162ps128(i8* %A) + ret <4 x float> %ret +} +declare <4 x float> @llvm.x86.vcvtneobf162ps128(i8* %A) + +define <8 x float> @test_int_x86_vcvtneobf162ps256(i8* %A) { +; X64-LABEL: test_int_x86_vcvtneobf162ps256: +; X64: # %bb.0: +; X64-NEXT: vcvtneobf162ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7f,0xb0,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vcvtneobf162ps256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvtneobf162ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7f,0xb0,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <8 x float> @llvm.x86.vcvtneobf162ps256(i8* %A) + ret <8 x float> %ret +} +declare <8 x float> @llvm.x86.vcvtneobf162ps256(i8* %A) + +define <4 x float> @test_int_x86_vcvtneoph2ps128(i8* %A) { +; X64-LABEL: test_int_x86_vcvtneoph2ps128: +; X64: # %bb.0: +; X64-NEXT: vcvtneoph2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x78,0xb0,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vcvtneoph2ps128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvtneoph2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x78,0xb0,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <4 x float> @llvm.x86.vcvtneoph2ps128(i8* %A) + ret <4 x float> %ret +} +declare <4 x float> @llvm.x86.vcvtneoph2ps128(i8* %A) + +define <8 x float> @test_int_x86_vcvtneoph2ps256(i8* %A) { +; X64-LABEL: test_int_x86_vcvtneoph2ps256: +; X64: # %bb.0: +; X64-NEXT: vcvtneoph2ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7c,0xb0,0x07] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vcvtneoph2ps256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vcvtneoph2ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7c,0xb0,0x00] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <8 x float> @llvm.x86.vcvtneoph2ps256(i8* %A) + ret <8 x float> %ret +} +declare <8 x float> @llvm.x86.vcvtneoph2ps256(i8* %A) + +define <8 x i16> @test_int_x86_vcvtneps2bf16128(<4 x float> %A) { +; X64-LABEL: test_int_x86_vcvtneps2bf16128: +; X64: # %bb.0: +; X64-NEXT: {vex} vcvtneps2bf16 %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7a,0x72,0xc0] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vcvtneps2bf16128: +; X86: # %bb.0: +; X86-NEXT: {vex} vcvtneps2bf16 %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7a,0x72,0xc0] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <8 x i16> @llvm.x86.vcvtneps2bf16128(<4 x float> %A) + ret <8 x i16> %ret +} +declare <8 x i16> @llvm.x86.vcvtneps2bf16128(<4 x float> %A) + +define <8 x i16> @test_int_x86_vcvtneps2bf16256(<8 x float> %A) { +; X64-LABEL: test_int_x86_vcvtneps2bf16256: +; X64: # %bb.0: +; X64-NEXT: {vex} vcvtneps2bf16 %ymm0, %xmm0 # encoding: [0xc4,0xe2,0x7e,0x72,0xc0] +; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: test_int_x86_vcvtneps2bf16256: +; X86: # %bb.0: +; X86-NEXT: {vex} vcvtneps2bf16 %ymm0, %xmm0 # encoding: [0xc4,0xe2,0x7e,0x72,0xc0] +; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl # encoding: [0xc3] + %ret = call <8 x i16> @llvm.x86.vcvtneps2bf16256(<8 x float> %A) + ret <8 x i16> %ret +} +declare <8 x i16> @llvm.x86.vcvtneps2bf16256(<8 x float> %A) + diff --git a/llvm/test/MC/Disassembler/X86/avx-ne-convert.txt b/llvm/test/MC/Disassembler/X86/avx-ne-convert.txt new file mode 100644 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/avx-ne-convert.txt @@ -0,0 +1,335 @@ +# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vbcstnebf162ps 268435456(%esp,%esi,8), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7a,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vbcstnebf162ps 291(%edi,%eax,4), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7a,0xb1,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vbcstnebf162ps (%eax), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [eax] +0xc4,0xe2,0x7a,0xb1,0x10 + +# ATT: vbcstnebf162ps -64(,%ebp,2), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [2*ebp - 64] +0xc4,0xe2,0x7a,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vbcstnebf162ps 254(%ecx), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [ecx + 254] +0xc4,0xe2,0x7a,0xb1,0x91,0xfe,0x00,0x00,0x00 + +# ATT: vbcstnebf162ps -256(%edx), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [edx - 256] +0xc4,0xe2,0x7a,0xb1,0x92,0x00,0xff,0xff,0xff + +# ATT: vbcstnebf162ps 268435456(%esp,%esi,8), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7e,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vbcstnebf162ps 291(%edi,%eax,4), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7e,0xb1,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vbcstnebf162ps (%eax), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [eax] +0xc4,0xe2,0x7e,0xb1,0x10 + +# ATT: vbcstnebf162ps -64(,%ebp,2), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [2*ebp - 64] +0xc4,0xe2,0x7e,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vbcstnebf162ps 254(%ecx), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [ecx + 254] +0xc4,0xe2,0x7e,0xb1,0x91,0xfe,0x00,0x00,0x00 + +# ATT: vbcstnebf162ps -256(%edx), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [edx - 256] +0xc4,0xe2,0x7e,0xb1,0x92,0x00,0xff,0xff,0xff + +# ATT: vbcstnesh2ps 268435456(%esp,%esi,8), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x79,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vbcstnesh2ps 291(%edi,%eax,4), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [edi + 4*eax + 291] +0xc4,0xe2,0x79,0xb1,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vbcstnesh2ps (%eax), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [eax] +0xc4,0xe2,0x79,0xb1,0x10 + +# ATT: vbcstnesh2ps -64(,%ebp,2), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [2*ebp - 64] +0xc4,0xe2,0x79,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vbcstnesh2ps 254(%ecx), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [ecx + 254] +0xc4,0xe2,0x79,0xb1,0x91,0xfe,0x00,0x00,0x00 + +# ATT: vbcstnesh2ps -256(%edx), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [edx - 256] +0xc4,0xe2,0x79,0xb1,0x92,0x00,0xff,0xff,0xff + +# ATT: vbcstnesh2ps 268435456(%esp,%esi,8), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7d,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vbcstnesh2ps 291(%edi,%eax,4), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7d,0xb1,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vbcstnesh2ps (%eax), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [eax] +0xc4,0xe2,0x7d,0xb1,0x10 + +# ATT: vbcstnesh2ps -64(,%ebp,2), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [2*ebp - 64] +0xc4,0xe2,0x7d,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vbcstnesh2ps 254(%ecx), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [ecx + 254] +0xc4,0xe2,0x7d,0xb1,0x91,0xfe,0x00,0x00,0x00 + +# ATT: vbcstnesh2ps -256(%edx), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [edx - 256] +0xc4,0xe2,0x7d,0xb1,0x92,0x00,0xff,0xff,0xff + +# ATT: vcvtneebf162ps 268435456(%esp,%esi,8), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7a,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcvtneebf162ps 291(%edi,%eax,4), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7a,0xb0,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcvtneebf162ps (%eax), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [eax] +0xc4,0xe2,0x7a,0xb0,0x10 + +# ATT: vcvtneebf162ps -512(,%ebp,2), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [2*ebp - 512] +0xc4,0xe2,0x7a,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vcvtneebf162ps 2032(%ecx), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [ecx + 2032] +0xc4,0xe2,0x7a,0xb0,0x91,0xf0,0x07,0x00,0x00 + +# ATT: vcvtneebf162ps -2048(%edx), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [edx - 2048] +0xc4,0xe2,0x7a,0xb0,0x92,0x00,0xf8,0xff,0xff + +# ATT: vcvtneebf162ps 268435456(%esp,%esi,8), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7e,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcvtneebf162ps 291(%edi,%eax,4), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7e,0xb0,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcvtneebf162ps (%eax), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [eax] +0xc4,0xe2,0x7e,0xb0,0x10 + +# ATT: vcvtneebf162ps -1024(,%ebp,2), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [2*ebp - 1024] +0xc4,0xe2,0x7e,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vcvtneebf162ps 4064(%ecx), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [ecx + 4064] +0xc4,0xe2,0x7e,0xb0,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: vcvtneebf162ps -4096(%edx), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [edx - 4096] +0xc4,0xe2,0x7e,0xb0,0x92,0x00,0xf0,0xff,0xff + +# ATT: vcvtneeph2ps 268435456(%esp,%esi,8), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x79,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcvtneeph2ps 291(%edi,%eax,4), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x79,0xb0,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcvtneeph2ps (%eax), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [eax] +0xc4,0xe2,0x79,0xb0,0x10 + +# ATT: vcvtneeph2ps -512(,%ebp,2), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [2*ebp - 512] +0xc4,0xe2,0x79,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vcvtneeph2ps 2032(%ecx), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [ecx + 2032] +0xc4,0xe2,0x79,0xb0,0x91,0xf0,0x07,0x00,0x00 + +# ATT: vcvtneeph2ps -2048(%edx), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [edx - 2048] +0xc4,0xe2,0x79,0xb0,0x92,0x00,0xf8,0xff,0xff + +# ATT: vcvtneeph2ps 268435456(%esp,%esi,8), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7d,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcvtneeph2ps 291(%edi,%eax,4), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7d,0xb0,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcvtneeph2ps (%eax), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [eax] +0xc4,0xe2,0x7d,0xb0,0x10 + +# ATT: vcvtneeph2ps -1024(,%ebp,2), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [2*ebp - 1024] +0xc4,0xe2,0x7d,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vcvtneeph2ps 4064(%ecx), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [ecx + 4064] +0xc4,0xe2,0x7d,0xb0,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: vcvtneeph2ps -4096(%edx), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [edx - 4096] +0xc4,0xe2,0x7d,0xb0,0x92,0x00,0xf0,0xff,0xff + +# ATT: vcvtneobf162ps 268435456(%esp,%esi,8), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7b,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcvtneobf162ps 291(%edi,%eax,4), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7b,0xb0,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcvtneobf162ps (%eax), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [eax] +0xc4,0xe2,0x7b,0xb0,0x10 + +# ATT: vcvtneobf162ps -512(,%ebp,2), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [2*ebp - 512] +0xc4,0xe2,0x7b,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vcvtneobf162ps 2032(%ecx), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [ecx + 2032] +0xc4,0xe2,0x7b,0xb0,0x91,0xf0,0x07,0x00,0x00 + +# ATT: vcvtneobf162ps -2048(%edx), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [edx - 2048] +0xc4,0xe2,0x7b,0xb0,0x92,0x00,0xf8,0xff,0xff + +# ATT: vcvtneobf162ps 268435456(%esp,%esi,8), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7f,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcvtneobf162ps 291(%edi,%eax,4), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7f,0xb0,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcvtneobf162ps (%eax), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [eax] +0xc4,0xe2,0x7f,0xb0,0x10 + +# ATT: vcvtneobf162ps -1024(,%ebp,2), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [2*ebp - 1024] +0xc4,0xe2,0x7f,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vcvtneobf162ps 4064(%ecx), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [ecx + 4064] +0xc4,0xe2,0x7f,0xb0,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: vcvtneobf162ps -4096(%edx), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [edx - 4096] +0xc4,0xe2,0x7f,0xb0,0x92,0x00,0xf0,0xff,0xff + +# ATT: vcvtneoph2ps 268435456(%esp,%esi,8), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x78,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcvtneoph2ps 291(%edi,%eax,4), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x78,0xb0,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcvtneoph2ps (%eax), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [eax] +0xc4,0xe2,0x78,0xb0,0x10 + +# ATT: vcvtneoph2ps -512(,%ebp,2), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [2*ebp - 512] +0xc4,0xe2,0x78,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vcvtneoph2ps 2032(%ecx), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [ecx + 2032] +0xc4,0xe2,0x78,0xb0,0x91,0xf0,0x07,0x00,0x00 + +# ATT: vcvtneoph2ps -2048(%edx), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [edx - 2048] +0xc4,0xe2,0x78,0xb0,0x92,0x00,0xf8,0xff,0xff + +# ATT: vcvtneoph2ps 268435456(%esp,%esi,8), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7c,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vcvtneoph2ps 291(%edi,%eax,4), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7c,0xb0,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vcvtneoph2ps (%eax), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [eax] +0xc4,0xe2,0x7c,0xb0,0x10 + +# ATT: vcvtneoph2ps -1024(,%ebp,2), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [2*ebp - 1024] +0xc4,0xe2,0x7c,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vcvtneoph2ps 4064(%ecx), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [ecx + 4064] +0xc4,0xe2,0x7c,0xb0,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: vcvtneoph2ps -4096(%edx), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [edx - 4096] +0xc4,0xe2,0x7c,0xb0,0x92,0x00,0xf0,0xff,0xff + +# ATT: {vex} vcvtneps2bf16 %xmm3, %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmm3 +0xc4,0xe2,0x7a,0x72,0xd3 + +# ATT: {vex} vcvtneps2bf16 %ymm3, %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, ymm3 +0xc4,0xe2,0x7e,0x72,0xd3 + +# ATT: {vex} vcvtneps2bf16x 268435456(%esp,%esi,8), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] +0xc4,0xe2,0x7a,0x72,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: {vex} vcvtneps2bf16x 291(%edi,%eax,4), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [edi + 4*eax + 291] +0xc4,0xe2,0x7a,0x72,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: {vex} vcvtneps2bf16x (%eax), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [eax] +0xc4,0xe2,0x7a,0x72,0x10 + +# ATT: {vex} vcvtneps2bf16x -512(,%ebp,2), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [2*ebp - 512] +0xc4,0xe2,0x7a,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: {vex} vcvtneps2bf16x 2032(%ecx), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [ecx + 2032] +0xc4,0xe2,0x7a,0x72,0x91,0xf0,0x07,0x00,0x00 + +# ATT: {vex} vcvtneps2bf16x -2048(%edx), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [edx - 2048] +0xc4,0xe2,0x7a,0x72,0x92,0x00,0xf8,0xff,0xff + +# ATT: {vex} vcvtneps2bf16y -1024(,%ebp,2), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, ymmword ptr [2*ebp - 1024] +0xc4,0xe2,0x7e,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: {vex} vcvtneps2bf16y 4064(%ecx), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, ymmword ptr [ecx + 4064] +0xc4,0xe2,0x7e,0x72,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: {vex} vcvtneps2bf16y -4096(%edx), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, ymmword ptr [edx - 4096] +0xc4,0xe2,0x7e,0x72,0x92,0x00,0xf0,0xff,0xff + diff --git a/llvm/test/MC/Disassembler/X86/x86-64-avx-ne-convert.txt b/llvm/test/MC/Disassembler/X86/x86-64-avx-ne-convert.txt new file mode 100644 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/x86-64-avx-ne-convert.txt @@ -0,0 +1,335 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vbcstnebf162ps 268435456(%rbp,%r14,8), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7a,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vbcstnebf162ps 291(%r8,%rax,4), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7a,0xb1,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vbcstnebf162ps (%rip), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [rip] +0xc4,0xe2,0x7a,0xb1,0x15,0x00,0x00,0x00,0x00 + +# ATT: vbcstnebf162ps -64(,%rbp,2), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [2*rbp - 64] +0xc4,0xe2,0x7a,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vbcstnebf162ps 254(%rcx), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [rcx + 254] +0xc4,0xe2,0x7a,0xb1,0x91,0xfe,0x00,0x00,0x00 + +# ATT: vbcstnebf162ps -256(%rdx), %xmm2 +# INTEL: vbcstnebf162ps xmm2, word ptr [rdx - 256] +0xc4,0xe2,0x7a,0xb1,0x92,0x00,0xff,0xff,0xff + +# ATT: vbcstnebf162ps 268435456(%rbp,%r14,8), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7e,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vbcstnebf162ps 291(%r8,%rax,4), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7e,0xb1,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vbcstnebf162ps (%rip), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [rip] +0xc4,0xe2,0x7e,0xb1,0x15,0x00,0x00,0x00,0x00 + +# ATT: vbcstnebf162ps -64(,%rbp,2), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [2*rbp - 64] +0xc4,0xe2,0x7e,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vbcstnebf162ps 254(%rcx), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [rcx + 254] +0xc4,0xe2,0x7e,0xb1,0x91,0xfe,0x00,0x00,0x00 + +# ATT: vbcstnebf162ps -256(%rdx), %ymm2 +# INTEL: vbcstnebf162ps ymm2, word ptr [rdx - 256] +0xc4,0xe2,0x7e,0xb1,0x92,0x00,0xff,0xff,0xff + +# ATT: vbcstnesh2ps 268435456(%rbp,%r14,8), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x79,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vbcstnesh2ps 291(%r8,%rax,4), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x79,0xb1,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vbcstnesh2ps (%rip), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [rip] +0xc4,0xe2,0x79,0xb1,0x15,0x00,0x00,0x00,0x00 + +# ATT: vbcstnesh2ps -64(,%rbp,2), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [2*rbp - 64] +0xc4,0xe2,0x79,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vbcstnesh2ps 254(%rcx), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [rcx + 254] +0xc4,0xe2,0x79,0xb1,0x91,0xfe,0x00,0x00,0x00 + +# ATT: vbcstnesh2ps -256(%rdx), %xmm2 +# INTEL: vbcstnesh2ps xmm2, word ptr [rdx - 256] +0xc4,0xe2,0x79,0xb1,0x92,0x00,0xff,0xff,0xff + +# ATT: vbcstnesh2ps 268435456(%rbp,%r14,8), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7d,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vbcstnesh2ps 291(%r8,%rax,4), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7d,0xb1,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vbcstnesh2ps (%rip), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [rip] +0xc4,0xe2,0x7d,0xb1,0x15,0x00,0x00,0x00,0x00 + +# ATT: vbcstnesh2ps -64(,%rbp,2), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [2*rbp - 64] +0xc4,0xe2,0x7d,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff + +# ATT: vbcstnesh2ps 254(%rcx), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [rcx + 254] +0xc4,0xe2,0x7d,0xb1,0x91,0xfe,0x00,0x00,0x00 + +# ATT: vbcstnesh2ps -256(%rdx), %ymm2 +# INTEL: vbcstnesh2ps ymm2, word ptr [rdx - 256] +0xc4,0xe2,0x7d,0xb1,0x92,0x00,0xff,0xff,0xff + +# ATT: vcvtneebf162ps 268435456(%rbp,%r14,8), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7a,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcvtneebf162ps 291(%r8,%rax,4), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7a,0xb0,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcvtneebf162ps (%rip), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [rip] +0xc4,0xe2,0x7a,0xb0,0x15,0x00,0x00,0x00,0x00 + +# ATT: vcvtneebf162ps -512(,%rbp,2), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [2*rbp - 512] +0xc4,0xe2,0x7a,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vcvtneebf162ps 2032(%rcx), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [rcx + 2032] +0xc4,0xe2,0x7a,0xb0,0x91,0xf0,0x07,0x00,0x00 + +# ATT: vcvtneebf162ps -2048(%rdx), %xmm2 +# INTEL: vcvtneebf162ps xmm2, xmmword ptr [rdx - 2048] +0xc4,0xe2,0x7a,0xb0,0x92,0x00,0xf8,0xff,0xff + +# ATT: vcvtneebf162ps 268435456(%rbp,%r14,8), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7e,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcvtneebf162ps 291(%r8,%rax,4), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7e,0xb0,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcvtneebf162ps (%rip), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [rip] +0xc4,0xe2,0x7e,0xb0,0x15,0x00,0x00,0x00,0x00 + +# ATT: vcvtneebf162ps -1024(,%rbp,2), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [2*rbp - 1024] +0xc4,0xe2,0x7e,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vcvtneebf162ps 4064(%rcx), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [rcx + 4064] +0xc4,0xe2,0x7e,0xb0,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: vcvtneebf162ps -4096(%rdx), %ymm2 +# INTEL: vcvtneebf162ps ymm2, ymmword ptr [rdx - 4096] +0xc4,0xe2,0x7e,0xb0,0x92,0x00,0xf0,0xff,0xff + +# ATT: vcvtneeph2ps 268435456(%rbp,%r14,8), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x79,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcvtneeph2ps 291(%r8,%rax,4), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x79,0xb0,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcvtneeph2ps (%rip), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [rip] +0xc4,0xe2,0x79,0xb0,0x15,0x00,0x00,0x00,0x00 + +# ATT: vcvtneeph2ps -512(,%rbp,2), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [2*rbp - 512] +0xc4,0xe2,0x79,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vcvtneeph2ps 2032(%rcx), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [rcx + 2032] +0xc4,0xe2,0x79,0xb0,0x91,0xf0,0x07,0x00,0x00 + +# ATT: vcvtneeph2ps -2048(%rdx), %xmm2 +# INTEL: vcvtneeph2ps xmm2, xmmword ptr [rdx - 2048] +0xc4,0xe2,0x79,0xb0,0x92,0x00,0xf8,0xff,0xff + +# ATT: vcvtneeph2ps 268435456(%rbp,%r14,8), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7d,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcvtneeph2ps 291(%r8,%rax,4), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7d,0xb0,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcvtneeph2ps (%rip), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [rip] +0xc4,0xe2,0x7d,0xb0,0x15,0x00,0x00,0x00,0x00 + +# ATT: vcvtneeph2ps -1024(,%rbp,2), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [2*rbp - 1024] +0xc4,0xe2,0x7d,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vcvtneeph2ps 4064(%rcx), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [rcx + 4064] +0xc4,0xe2,0x7d,0xb0,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: vcvtneeph2ps -4096(%rdx), %ymm2 +# INTEL: vcvtneeph2ps ymm2, ymmword ptr [rdx - 4096] +0xc4,0xe2,0x7d,0xb0,0x92,0x00,0xf0,0xff,0xff + +# ATT: vcvtneobf162ps 268435456(%rbp,%r14,8), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7b,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcvtneobf162ps 291(%r8,%rax,4), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7b,0xb0,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcvtneobf162ps (%rip), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [rip] +0xc4,0xe2,0x7b,0xb0,0x15,0x00,0x00,0x00,0x00 + +# ATT: vcvtneobf162ps -512(,%rbp,2), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [2*rbp - 512] +0xc4,0xe2,0x7b,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vcvtneobf162ps 2032(%rcx), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [rcx + 2032] +0xc4,0xe2,0x7b,0xb0,0x91,0xf0,0x07,0x00,0x00 + +# ATT: vcvtneobf162ps -2048(%rdx), %xmm2 +# INTEL: vcvtneobf162ps xmm2, xmmword ptr [rdx - 2048] +0xc4,0xe2,0x7b,0xb0,0x92,0x00,0xf8,0xff,0xff + +# ATT: vcvtneobf162ps 268435456(%rbp,%r14,8), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7f,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcvtneobf162ps 291(%r8,%rax,4), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7f,0xb0,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcvtneobf162ps (%rip), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [rip] +0xc4,0xe2,0x7f,0xb0,0x15,0x00,0x00,0x00,0x00 + +# ATT: vcvtneobf162ps -1024(,%rbp,2), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [2*rbp - 1024] +0xc4,0xe2,0x7f,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vcvtneobf162ps 4064(%rcx), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [rcx + 4064] +0xc4,0xe2,0x7f,0xb0,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: vcvtneobf162ps -4096(%rdx), %ymm2 +# INTEL: vcvtneobf162ps ymm2, ymmword ptr [rdx - 4096] +0xc4,0xe2,0x7f,0xb0,0x92,0x00,0xf0,0xff,0xff + +# ATT: vcvtneoph2ps 268435456(%rbp,%r14,8), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x78,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcvtneoph2ps 291(%r8,%rax,4), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x78,0xb0,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcvtneoph2ps (%rip), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [rip] +0xc4,0xe2,0x78,0xb0,0x15,0x00,0x00,0x00,0x00 + +# ATT: vcvtneoph2ps -512(,%rbp,2), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [2*rbp - 512] +0xc4,0xe2,0x78,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vcvtneoph2ps 2032(%rcx), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [rcx + 2032] +0xc4,0xe2,0x78,0xb0,0x91,0xf0,0x07,0x00,0x00 + +# ATT: vcvtneoph2ps -2048(%rdx), %xmm2 +# INTEL: vcvtneoph2ps xmm2, xmmword ptr [rdx - 2048] +0xc4,0xe2,0x78,0xb0,0x92,0x00,0xf8,0xff,0xff + +# ATT: vcvtneoph2ps 268435456(%rbp,%r14,8), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7c,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vcvtneoph2ps 291(%r8,%rax,4), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7c,0xb0,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: vcvtneoph2ps (%rip), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [rip] +0xc4,0xe2,0x7c,0xb0,0x15,0x00,0x00,0x00,0x00 + +# ATT: vcvtneoph2ps -1024(,%rbp,2), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [2*rbp - 1024] +0xc4,0xe2,0x7c,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vcvtneoph2ps 4064(%rcx), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [rcx + 4064] +0xc4,0xe2,0x7c,0xb0,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: vcvtneoph2ps -4096(%rdx), %ymm2 +# INTEL: vcvtneoph2ps ymm2, ymmword ptr [rdx - 4096] +0xc4,0xe2,0x7c,0xb0,0x92,0x00,0xf0,0xff,0xff + +# ATT: {vex} vcvtneps2bf16 %xmm3, %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmm3 +0xc4,0xe2,0x7a,0x72,0xd3 + +# ATT: {vex} vcvtneps2bf16 %ymm3, %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, ymm3 +0xc4,0xe2,0x7e,0x72,0xd3 + +# ATT: {vex} vcvtneps2bf16x 268435456(%rbp,%r14,8), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7a,0x72,0x94,0xf5,0x00,0x00,0x00,0x10 + +# ATT: {vex} vcvtneps2bf16x 291(%r8,%rax,4), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [r8 + 4*rax + 291] +0xc4,0xc2,0x7a,0x72,0x94,0x80,0x23,0x01,0x00,0x00 + +# ATT: {vex} vcvtneps2bf16x (%rip), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [rip] +0xc4,0xe2,0x7a,0x72,0x15,0x00,0x00,0x00,0x00 + +# ATT: {vex} vcvtneps2bf16x -512(,%rbp,2), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [2*rbp - 512] +0xc4,0xe2,0x7a,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: {vex} vcvtneps2bf16x 2032(%rcx), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [rcx + 2032] +0xc4,0xe2,0x7a,0x72,0x91,0xf0,0x07,0x00,0x00 + +# ATT: {vex} vcvtneps2bf16x -2048(%rdx), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, xmmword ptr [rdx - 2048] +0xc4,0xe2,0x7a,0x72,0x92,0x00,0xf8,0xff,0xff + +# ATT: {vex} vcvtneps2bf16y -1024(,%rbp,2), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, ymmword ptr [2*rbp - 1024] +0xc4,0xe2,0x7e,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: {vex} vcvtneps2bf16y 4064(%rcx), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, ymmword ptr [rcx + 4064] +0xc4,0xe2,0x7e,0x72,0x91,0xe0,0x0f,0x00,0x00 + +# ATT: {vex} vcvtneps2bf16y -4096(%rdx), %xmm2 +# INTEL: {vex} vcvtneps2bf16 xmm2, ymmword ptr [rdx - 4096] +0xc4,0xe2,0x7e,0x72,0x92,0x00,0xf0,0xff,0xff + diff --git a/llvm/test/MC/X86/avx-ne-convert-att.s b/llvm/test/MC/X86/avx-ne-convert-att.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/avx-ne-convert-att.s @@ -0,0 +1,334 @@ +// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vbcstnebf162ps 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10] + vbcstnebf162ps 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vbcstnebf162ps 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x94,0x87,0x23,0x01,0x00,0x00] + vbcstnebf162ps 291(%edi,%eax,4), %xmm2 + +// CHECK: vbcstnebf162ps (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x10] + vbcstnebf162ps (%eax), %xmm2 + +// CHECK: vbcstnebf162ps -64(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnebf162ps -64(,%ebp,2), %xmm2 + +// CHECK: vbcstnebf162ps 254(%ecx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnebf162ps 254(%ecx), %xmm2 + +// CHECK: vbcstnebf162ps -256(%edx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnebf162ps -256(%edx), %xmm2 + +// CHECK: vbcstnebf162ps 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10] + vbcstnebf162ps 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vbcstnebf162ps 291(%edi,%eax,4), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x94,0x87,0x23,0x01,0x00,0x00] + vbcstnebf162ps 291(%edi,%eax,4), %ymm2 + +// CHECK: vbcstnebf162ps (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x10] + vbcstnebf162ps (%eax), %ymm2 + +// CHECK: vbcstnebf162ps -64(,%ebp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnebf162ps -64(,%ebp,2), %ymm2 + +// CHECK: vbcstnebf162ps 254(%ecx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnebf162ps 254(%ecx), %ymm2 + +// CHECK: vbcstnebf162ps -256(%edx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnebf162ps -256(%edx), %ymm2 + +// CHECK: vbcstnesh2ps 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10] + vbcstnesh2ps 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vbcstnesh2ps 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x94,0x87,0x23,0x01,0x00,0x00] + vbcstnesh2ps 291(%edi,%eax,4), %xmm2 + +// CHECK: vbcstnesh2ps (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x10] + vbcstnesh2ps (%eax), %xmm2 + +// CHECK: vbcstnesh2ps -64(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnesh2ps -64(,%ebp,2), %xmm2 + +// CHECK: vbcstnesh2ps 254(%ecx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnesh2ps 254(%ecx), %xmm2 + +// CHECK: vbcstnesh2ps -256(%edx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnesh2ps -256(%edx), %xmm2 + +// CHECK: vbcstnesh2ps 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10] + vbcstnesh2ps 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vbcstnesh2ps 291(%edi,%eax,4), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x94,0x87,0x23,0x01,0x00,0x00] + vbcstnesh2ps 291(%edi,%eax,4), %ymm2 + +// CHECK: vbcstnesh2ps (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x10] + vbcstnesh2ps (%eax), %ymm2 + +// CHECK: vbcstnesh2ps -64(,%ebp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnesh2ps -64(,%ebp,2), %ymm2 + +// CHECK: vbcstnesh2ps 254(%ecx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnesh2ps 254(%ecx), %ymm2 + +// CHECK: vbcstnesh2ps -256(%edx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnesh2ps -256(%edx), %ymm2 + +// CHECK: vcvtneebf162ps 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneebf162ps 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vcvtneebf162ps 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneebf162ps 291(%edi,%eax,4), %xmm2 + +// CHECK: vcvtneebf162ps (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x10] + vcvtneebf162ps (%eax), %xmm2 + +// CHECK: vcvtneebf162ps -512(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneebf162ps -512(,%ebp,2), %xmm2 + +// CHECK: vcvtneebf162ps 2032(%ecx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneebf162ps 2032(%ecx), %xmm2 + +// CHECK: vcvtneebf162ps -2048(%edx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneebf162ps -2048(%edx), %xmm2 + +// CHECK: vcvtneebf162ps 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneebf162ps 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vcvtneebf162ps 291(%edi,%eax,4), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneebf162ps 291(%edi,%eax,4), %ymm2 + +// CHECK: vcvtneebf162ps (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x10] + vcvtneebf162ps (%eax), %ymm2 + +// CHECK: vcvtneebf162ps -1024(,%ebp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneebf162ps -1024(,%ebp,2), %ymm2 + +// CHECK: vcvtneebf162ps 4064(%ecx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneebf162ps 4064(%ecx), %ymm2 + +// CHECK: vcvtneebf162ps -4096(%edx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneebf162ps -4096(%edx), %ymm2 + +// CHECK: vcvtneeph2ps 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneeph2ps 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vcvtneeph2ps 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneeph2ps 291(%edi,%eax,4), %xmm2 + +// CHECK: vcvtneeph2ps (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x10] + vcvtneeph2ps (%eax), %xmm2 + +// CHECK: vcvtneeph2ps -512(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneeph2ps -512(,%ebp,2), %xmm2 + +// CHECK: vcvtneeph2ps 2032(%ecx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneeph2ps 2032(%ecx), %xmm2 + +// CHECK: vcvtneeph2ps -2048(%edx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneeph2ps -2048(%edx), %xmm2 + +// CHECK: vcvtneeph2ps 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneeph2ps 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vcvtneeph2ps 291(%edi,%eax,4), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneeph2ps 291(%edi,%eax,4), %ymm2 + +// CHECK: vcvtneeph2ps (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x10] + vcvtneeph2ps (%eax), %ymm2 + +// CHECK: vcvtneeph2ps -1024(,%ebp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneeph2ps -1024(,%ebp,2), %ymm2 + +// CHECK: vcvtneeph2ps 4064(%ecx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneeph2ps 4064(%ecx), %ymm2 + +// CHECK: vcvtneeph2ps -4096(%edx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneeph2ps -4096(%edx), %ymm2 + +// CHECK: vcvtneobf162ps 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneobf162ps 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vcvtneobf162ps 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneobf162ps 291(%edi,%eax,4), %xmm2 + +// CHECK: vcvtneobf162ps (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x10] + vcvtneobf162ps (%eax), %xmm2 + +// CHECK: vcvtneobf162ps -512(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneobf162ps -512(,%ebp,2), %xmm2 + +// CHECK: vcvtneobf162ps 2032(%ecx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneobf162ps 2032(%ecx), %xmm2 + +// CHECK: vcvtneobf162ps -2048(%edx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneobf162ps -2048(%edx), %xmm2 + +// CHECK: vcvtneobf162ps 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneobf162ps 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vcvtneobf162ps 291(%edi,%eax,4), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneobf162ps 291(%edi,%eax,4), %ymm2 + +// CHECK: vcvtneobf162ps (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x10] + vcvtneobf162ps (%eax), %ymm2 + +// CHECK: vcvtneobf162ps -1024(,%ebp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneobf162ps -1024(,%ebp,2), %ymm2 + +// CHECK: vcvtneobf162ps 4064(%ecx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneobf162ps 4064(%ecx), %ymm2 + +// CHECK: vcvtneobf162ps -4096(%edx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneobf162ps -4096(%edx), %ymm2 + +// CHECK: vcvtneoph2ps 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneoph2ps 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vcvtneoph2ps 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneoph2ps 291(%edi,%eax,4), %xmm2 + +// CHECK: vcvtneoph2ps (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x10] + vcvtneoph2ps (%eax), %xmm2 + +// CHECK: vcvtneoph2ps -512(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneoph2ps -512(,%ebp,2), %xmm2 + +// CHECK: vcvtneoph2ps 2032(%ecx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneoph2ps 2032(%ecx), %xmm2 + +// CHECK: vcvtneoph2ps -2048(%edx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneoph2ps -2048(%edx), %xmm2 + +// CHECK: vcvtneoph2ps 268435456(%esp,%esi,8), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneoph2ps 268435456(%esp,%esi,8), %ymm2 + +// CHECK: vcvtneoph2ps 291(%edi,%eax,4), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneoph2ps 291(%edi,%eax,4), %ymm2 + +// CHECK: vcvtneoph2ps (%eax), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x10] + vcvtneoph2ps (%eax), %ymm2 + +// CHECK: vcvtneoph2ps -1024(,%ebp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneoph2ps -1024(,%ebp,2), %ymm2 + +// CHECK: vcvtneoph2ps 4064(%ecx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneoph2ps 4064(%ecx), %ymm2 + +// CHECK: vcvtneoph2ps -4096(%edx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneoph2ps -4096(%edx), %ymm2 + +// CHECK: {vex} vcvtneps2bf16 %xmm3, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0xd3] + {vex} vcvtneps2bf16 %xmm3, %xmm2 + +// CHECK: {vex} vcvtneps2bf16 %ymm3, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0xd3] + {vex} vcvtneps2bf16 %ymm3, %xmm2 + +// CHECK: {vex} vcvtneps2bf16x 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x94,0xf4,0x00,0x00,0x00,0x10] + {vex} vcvtneps2bf16x 268435456(%esp,%esi,8), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x94,0x87,0x23,0x01,0x00,0x00] + {vex} vcvtneps2bf16x 291(%edi,%eax,4), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x (%eax), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x10] + {vex} vcvtneps2bf16x (%eax), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x -512(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff] + {vex} vcvtneps2bf16x -512(,%ebp,2), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x 2032(%ecx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x91,0xf0,0x07,0x00,0x00] + {vex} vcvtneps2bf16x 2032(%ecx), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x -2048(%edx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x92,0x00,0xf8,0xff,0xff] + {vex} vcvtneps2bf16x -2048(%edx), %xmm2 + +// CHECK: {vex} vcvtneps2bf16y -1024(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff] + {vex} vcvtneps2bf16y -1024(,%ebp,2), %xmm2 + +// CHECK: {vex} vcvtneps2bf16y 4064(%ecx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x91,0xe0,0x0f,0x00,0x00] + {vex} vcvtneps2bf16y 4064(%ecx), %xmm2 + +// CHECK: {vex} vcvtneps2bf16y -4096(%edx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x92,0x00,0xf0,0xff,0xff] + {vex} vcvtneps2bf16y -4096(%edx), %xmm2 + diff --git a/llvm/test/MC/X86/avx-ne-convert-intel.s b/llvm/test/MC/X86/avx-ne-convert-intel.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/avx-ne-convert-intel.s @@ -0,0 +1,334 @@ +// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vbcstnebf162ps xmm2, word ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10] + vbcstnebf162ps xmm2, word ptr [esp + 8*esi + 268435456] + +// CHECK: vbcstnebf162ps xmm2, word ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x94,0x87,0x23,0x01,0x00,0x00] + vbcstnebf162ps xmm2, word ptr [edi + 4*eax + 291] + +// CHECK: vbcstnebf162ps xmm2, word ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x10] + vbcstnebf162ps xmm2, word ptr [eax] + +// CHECK: vbcstnebf162ps xmm2, word ptr [2*ebp - 64] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnebf162ps xmm2, word ptr [2*ebp - 64] + +// CHECK: vbcstnebf162ps xmm2, word ptr [ecx + 254] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnebf162ps xmm2, word ptr [ecx + 254] + +// CHECK: vbcstnebf162ps xmm2, word ptr [edx - 256] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnebf162ps xmm2, word ptr [edx - 256] + +// CHECK: vbcstnebf162ps ymm2, word ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10] + vbcstnebf162ps ymm2, word ptr [esp + 8*esi + 268435456] + +// CHECK: vbcstnebf162ps ymm2, word ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x94,0x87,0x23,0x01,0x00,0x00] + vbcstnebf162ps ymm2, word ptr [edi + 4*eax + 291] + +// CHECK: vbcstnebf162ps ymm2, word ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x10] + vbcstnebf162ps ymm2, word ptr [eax] + +// CHECK: vbcstnebf162ps ymm2, word ptr [2*ebp - 64] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnebf162ps ymm2, word ptr [2*ebp - 64] + +// CHECK: vbcstnebf162ps ymm2, word ptr [ecx + 254] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnebf162ps ymm2, word ptr [ecx + 254] + +// CHECK: vbcstnebf162ps ymm2, word ptr [edx - 256] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnebf162ps ymm2, word ptr [edx - 256] + +// CHECK: vbcstnesh2ps xmm2, word ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10] + vbcstnesh2ps xmm2, word ptr [esp + 8*esi + 268435456] + +// CHECK: vbcstnesh2ps xmm2, word ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x94,0x87,0x23,0x01,0x00,0x00] + vbcstnesh2ps xmm2, word ptr [edi + 4*eax + 291] + +// CHECK: vbcstnesh2ps xmm2, word ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x10] + vbcstnesh2ps xmm2, word ptr [eax] + +// CHECK: vbcstnesh2ps xmm2, word ptr [2*ebp - 64] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnesh2ps xmm2, word ptr [2*ebp - 64] + +// CHECK: vbcstnesh2ps xmm2, word ptr [ecx + 254] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnesh2ps xmm2, word ptr [ecx + 254] + +// CHECK: vbcstnesh2ps xmm2, word ptr [edx - 256] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnesh2ps xmm2, word ptr [edx - 256] + +// CHECK: vbcstnesh2ps ymm2, word ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x94,0xf4,0x00,0x00,0x00,0x10] + vbcstnesh2ps ymm2, word ptr [esp + 8*esi + 268435456] + +// CHECK: vbcstnesh2ps ymm2, word ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x94,0x87,0x23,0x01,0x00,0x00] + vbcstnesh2ps ymm2, word ptr [edi + 4*eax + 291] + +// CHECK: vbcstnesh2ps ymm2, word ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x10] + vbcstnesh2ps ymm2, word ptr [eax] + +// CHECK: vbcstnesh2ps ymm2, word ptr [2*ebp - 64] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnesh2ps ymm2, word ptr [2*ebp - 64] + +// CHECK: vbcstnesh2ps ymm2, word ptr [ecx + 254] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnesh2ps ymm2, word ptr [ecx + 254] + +// CHECK: vbcstnesh2ps ymm2, word ptr [edx - 256] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnesh2ps ymm2, word ptr [edx - 256] + +// CHECK: vcvtneebf162ps xmm2, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneebf162ps xmm2, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneebf162ps xmm2, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneebf162ps xmm2, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vcvtneebf162ps xmm2, xmmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x10] + vcvtneebf162ps xmm2, xmmword ptr [eax] + +// CHECK: vcvtneebf162ps xmm2, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneebf162ps xmm2, xmmword ptr [2*ebp - 512] + +// CHECK: vcvtneebf162ps xmm2, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneebf162ps xmm2, xmmword ptr [ecx + 2032] + +// CHECK: vcvtneebf162ps xmm2, xmmword ptr [edx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneebf162ps xmm2, xmmword ptr [edx - 2048] + +// CHECK: vcvtneebf162ps ymm2, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneebf162ps ymm2, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneebf162ps ymm2, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneebf162ps ymm2, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vcvtneebf162ps ymm2, ymmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x10] + vcvtneebf162ps ymm2, ymmword ptr [eax] + +// CHECK: vcvtneebf162ps ymm2, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneebf162ps ymm2, ymmword ptr [2*ebp - 1024] + +// CHECK: vcvtneebf162ps ymm2, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneebf162ps ymm2, ymmword ptr [ecx + 4064] + +// CHECK: vcvtneebf162ps ymm2, ymmword ptr [edx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneebf162ps ymm2, ymmword ptr [edx - 4096] + +// CHECK: vcvtneeph2ps xmm2, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneeph2ps xmm2, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneeph2ps xmm2, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneeph2ps xmm2, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vcvtneeph2ps xmm2, xmmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x10] + vcvtneeph2ps xmm2, xmmword ptr [eax] + +// CHECK: vcvtneeph2ps xmm2, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneeph2ps xmm2, xmmword ptr [2*ebp - 512] + +// CHECK: vcvtneeph2ps xmm2, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneeph2ps xmm2, xmmword ptr [ecx + 2032] + +// CHECK: vcvtneeph2ps xmm2, xmmword ptr [edx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneeph2ps xmm2, xmmword ptr [edx - 2048] + +// CHECK: vcvtneeph2ps ymm2, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneeph2ps ymm2, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneeph2ps ymm2, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneeph2ps ymm2, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vcvtneeph2ps ymm2, ymmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x10] + vcvtneeph2ps ymm2, ymmword ptr [eax] + +// CHECK: vcvtneeph2ps ymm2, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneeph2ps ymm2, ymmword ptr [2*ebp - 1024] + +// CHECK: vcvtneeph2ps ymm2, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneeph2ps ymm2, ymmword ptr [ecx + 4064] + +// CHECK: vcvtneeph2ps ymm2, ymmword ptr [edx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneeph2ps ymm2, ymmword ptr [edx - 4096] + +// CHECK: vcvtneobf162ps xmm2, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneobf162ps xmm2, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneobf162ps xmm2, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneobf162ps xmm2, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vcvtneobf162ps xmm2, xmmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x10] + vcvtneobf162ps xmm2, xmmword ptr [eax] + +// CHECK: vcvtneobf162ps xmm2, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneobf162ps xmm2, xmmword ptr [2*ebp - 512] + +// CHECK: vcvtneobf162ps xmm2, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneobf162ps xmm2, xmmword ptr [ecx + 2032] + +// CHECK: vcvtneobf162ps xmm2, xmmword ptr [edx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneobf162ps xmm2, xmmword ptr [edx - 2048] + +// CHECK: vcvtneobf162ps ymm2, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneobf162ps ymm2, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneobf162ps ymm2, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneobf162ps ymm2, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vcvtneobf162ps ymm2, ymmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x10] + vcvtneobf162ps ymm2, ymmword ptr [eax] + +// CHECK: vcvtneobf162ps ymm2, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneobf162ps ymm2, ymmword ptr [2*ebp - 1024] + +// CHECK: vcvtneobf162ps ymm2, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneobf162ps ymm2, ymmword ptr [ecx + 4064] + +// CHECK: vcvtneobf162ps ymm2, ymmword ptr [edx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneobf162ps ymm2, ymmword ptr [edx - 4096] + +// CHECK: vcvtneoph2ps xmm2, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneoph2ps xmm2, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneoph2ps xmm2, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneoph2ps xmm2, xmmword ptr [edi + 4*eax + 291] + +// CHECK: vcvtneoph2ps xmm2, xmmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x10] + vcvtneoph2ps xmm2, xmmword ptr [eax] + +// CHECK: vcvtneoph2ps xmm2, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneoph2ps xmm2, xmmword ptr [2*ebp - 512] + +// CHECK: vcvtneoph2ps xmm2, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneoph2ps xmm2, xmmword ptr [ecx + 2032] + +// CHECK: vcvtneoph2ps xmm2, xmmword ptr [edx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneoph2ps xmm2, xmmword ptr [edx - 2048] + +// CHECK: vcvtneoph2ps ymm2, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x94,0xf4,0x00,0x00,0x00,0x10] + vcvtneoph2ps ymm2, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: vcvtneoph2ps ymm2, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x94,0x87,0x23,0x01,0x00,0x00] + vcvtneoph2ps ymm2, ymmword ptr [edi + 4*eax + 291] + +// CHECK: vcvtneoph2ps ymm2, ymmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x10] + vcvtneoph2ps ymm2, ymmword ptr [eax] + +// CHECK: vcvtneoph2ps ymm2, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneoph2ps ymm2, ymmword ptr [2*ebp - 1024] + +// CHECK: vcvtneoph2ps ymm2, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneoph2ps ymm2, ymmword ptr [ecx + 4064] + +// CHECK: vcvtneoph2ps ymm2, ymmword ptr [edx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneoph2ps ymm2, ymmword ptr [edx - 4096] + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmm3 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0xd3] + {vex} vcvtneps2bf16 xmm2, xmm3 + +// CHECK: {vex} vcvtneps2bf16 xmm2, ymm3 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0xd3] + {vex} vcvtneps2bf16 xmm2, ymm3 + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x94,0xf4,0x00,0x00,0x00,0x10] + {vex} vcvtneps2bf16 xmm2, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x94,0x87,0x23,0x01,0x00,0x00] + {vex} vcvtneps2bf16 xmm2, xmmword ptr [edi + 4*eax + 291] + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmmword ptr [eax] +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x10] + {vex} vcvtneps2bf16 xmm2, xmmword ptr [eax] + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff] + {vex} vcvtneps2bf16 xmm2, xmmword ptr [2*ebp - 512] + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x91,0xf0,0x07,0x00,0x00] + {vex} vcvtneps2bf16 xmm2, xmmword ptr [ecx + 2032] + +// CHECK: {vex} vcvtneps2bf16 xmm2, xmmword ptr [edx - 2048] +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x92,0x00,0xf8,0xff,0xff] + {vex} vcvtneps2bf16 xmm2, xmmword ptr [edx - 2048] + +// CHECK: {vex} vcvtneps2bf16 xmm2, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff] + {vex} vcvtneps2bf16 xmm2, ymmword ptr [2*ebp - 1024] + +// CHECK: {vex} vcvtneps2bf16 xmm2, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x91,0xe0,0x0f,0x00,0x00] + {vex} vcvtneps2bf16 xmm2, ymmword ptr [ecx + 4064] + +// CHECK: {vex} vcvtneps2bf16 xmm2, ymmword ptr [edx - 4096] +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x92,0x00,0xf0,0xff,0xff] + {vex} vcvtneps2bf16 xmm2, ymmword ptr [edx - 4096] + diff --git a/llvm/test/MC/X86/x86-64-avx-ne-convert-att.s b/llvm/test/MC/X86/x86-64-avx-ne-convert-att.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/x86-64-avx-ne-convert-att.s @@ -0,0 +1,334 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vbcstnebf162ps 268435456(%rbp,%r14,8), %xmm2 +// CHECK: encoding: [0xc4,0xa2,0x7a,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10] + vbcstnebf162ps 268435456(%rbp,%r14,8), %xmm2 + +// CHECK: vbcstnebf162ps 291(%r8,%rax,4), %xmm2 +// CHECK: encoding: [0xc4,0xc2,0x7a,0xb1,0x94,0x80,0x23,0x01,0x00,0x00] + vbcstnebf162ps 291(%r8,%rax,4), %xmm2 + +// CHECK: vbcstnebf162ps (%rip), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x15,0x00,0x00,0x00,0x00] + vbcstnebf162ps (%rip), %xmm2 + +// CHECK: vbcstnebf162ps -64(,%rbp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnebf162ps -64(,%rbp,2), %xmm2 + +// CHECK: vbcstnebf162ps 254(%rcx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnebf162ps 254(%rcx), %xmm2 + +// CHECK: vbcstnebf162ps -256(%rdx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnebf162ps -256(%rdx), %xmm2 + +// CHECK: vbcstnebf162ps 268435456(%rbp,%r14,8), %ymm2 +// CHECK: encoding: [0xc4,0xa2,0x7e,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10] + vbcstnebf162ps 268435456(%rbp,%r14,8), %ymm2 + +// CHECK: vbcstnebf162ps 291(%r8,%rax,4), %ymm2 +// CHECK: encoding: [0xc4,0xc2,0x7e,0xb1,0x94,0x80,0x23,0x01,0x00,0x00] + vbcstnebf162ps 291(%r8,%rax,4), %ymm2 + +// CHECK: vbcstnebf162ps (%rip), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x15,0x00,0x00,0x00,0x00] + vbcstnebf162ps (%rip), %ymm2 + +// CHECK: vbcstnebf162ps -64(,%rbp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnebf162ps -64(,%rbp,2), %ymm2 + +// CHECK: vbcstnebf162ps 254(%rcx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnebf162ps 254(%rcx), %ymm2 + +// CHECK: vbcstnebf162ps -256(%rdx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnebf162ps -256(%rdx), %ymm2 + +// CHECK: vbcstnesh2ps 268435456(%rbp,%r14,8), %xmm2 +// CHECK: encoding: [0xc4,0xa2,0x79,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10] + vbcstnesh2ps 268435456(%rbp,%r14,8), %xmm2 + +// CHECK: vbcstnesh2ps 291(%r8,%rax,4), %xmm2 +// CHECK: encoding: [0xc4,0xc2,0x79,0xb1,0x94,0x80,0x23,0x01,0x00,0x00] + vbcstnesh2ps 291(%r8,%rax,4), %xmm2 + +// CHECK: vbcstnesh2ps (%rip), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x15,0x00,0x00,0x00,0x00] + vbcstnesh2ps (%rip), %xmm2 + +// CHECK: vbcstnesh2ps -64(,%rbp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnesh2ps -64(,%rbp,2), %xmm2 + +// CHECK: vbcstnesh2ps 254(%rcx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnesh2ps 254(%rcx), %xmm2 + +// CHECK: vbcstnesh2ps -256(%rdx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnesh2ps -256(%rdx), %xmm2 + +// CHECK: vbcstnesh2ps 268435456(%rbp,%r14,8), %ymm2 +// CHECK: encoding: [0xc4,0xa2,0x7d,0xb1,0x94,0xf5,0x00,0x00,0x00,0x10] + vbcstnesh2ps 268435456(%rbp,%r14,8), %ymm2 + +// CHECK: vbcstnesh2ps 291(%r8,%rax,4), %ymm2 +// CHECK: encoding: [0xc4,0xc2,0x7d,0xb1,0x94,0x80,0x23,0x01,0x00,0x00] + vbcstnesh2ps 291(%r8,%rax,4), %ymm2 + +// CHECK: vbcstnesh2ps (%rip), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x15,0x00,0x00,0x00,0x00] + vbcstnesh2ps (%rip), %ymm2 + +// CHECK: vbcstnesh2ps -64(,%rbp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x14,0x6d,0xc0,0xff,0xff,0xff] + vbcstnesh2ps -64(,%rbp,2), %ymm2 + +// CHECK: vbcstnesh2ps 254(%rcx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x91,0xfe,0x00,0x00,0x00] + vbcstnesh2ps 254(%rcx), %ymm2 + +// CHECK: vbcstnesh2ps -256(%rdx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb1,0x92,0x00,0xff,0xff,0xff] + vbcstnesh2ps -256(%rdx), %ymm2 + +// CHECK: vcvtneebf162ps 268435456(%rbp,%r14,8), %xmm2 +// CHECK: encoding: [0xc4,0xa2,0x7a,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneebf162ps 268435456(%rbp,%r14,8), %xmm2 + +// CHECK: vcvtneebf162ps 291(%r8,%rax,4), %xmm2 +// CHECK: encoding: [0xc4,0xc2,0x7a,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneebf162ps 291(%r8,%rax,4), %xmm2 + +// CHECK: vcvtneebf162ps (%rip), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneebf162ps (%rip), %xmm2 + +// CHECK: vcvtneebf162ps -512(,%rbp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneebf162ps -512(,%rbp,2), %xmm2 + +// CHECK: vcvtneebf162ps 2032(%rcx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneebf162ps 2032(%rcx), %xmm2 + +// CHECK: vcvtneebf162ps -2048(%rdx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneebf162ps -2048(%rdx), %xmm2 + +// CHECK: vcvtneebf162ps 268435456(%rbp,%r14,8), %ymm2 +// CHECK: encoding: [0xc4,0xa2,0x7e,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneebf162ps 268435456(%rbp,%r14,8), %ymm2 + +// CHECK: vcvtneebf162ps 291(%r8,%rax,4), %ymm2 +// CHECK: encoding: [0xc4,0xc2,0x7e,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneebf162ps 291(%r8,%rax,4), %ymm2 + +// CHECK: vcvtneebf162ps (%rip), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneebf162ps (%rip), %ymm2 + +// CHECK: vcvtneebf162ps -1024(,%rbp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneebf162ps -1024(,%rbp,2), %ymm2 + +// CHECK: vcvtneebf162ps 4064(%rcx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneebf162ps 4064(%rcx), %ymm2 + +// CHECK: vcvtneebf162ps -4096(%rdx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneebf162ps -4096(%rdx), %ymm2 + +// CHECK: vcvtneeph2ps 268435456(%rbp,%r14,8), %xmm2 +// CHECK: encoding: [0xc4,0xa2,0x79,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneeph2ps 268435456(%rbp,%r14,8), %xmm2 + +// CHECK: vcvtneeph2ps 291(%r8,%rax,4), %xmm2 +// CHECK: encoding: [0xc4,0xc2,0x79,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneeph2ps 291(%r8,%rax,4), %xmm2 + +// CHECK: vcvtneeph2ps (%rip), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneeph2ps (%rip), %xmm2 + +// CHECK: vcvtneeph2ps -512(,%rbp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneeph2ps -512(,%rbp,2), %xmm2 + +// CHECK: vcvtneeph2ps 2032(%rcx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneeph2ps 2032(%rcx), %xmm2 + +// CHECK: vcvtneeph2ps -2048(%rdx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x79,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneeph2ps -2048(%rdx), %xmm2 + +// CHECK: vcvtneeph2ps 268435456(%rbp,%r14,8), %ymm2 +// CHECK: encoding: [0xc4,0xa2,0x7d,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneeph2ps 268435456(%rbp,%r14,8), %ymm2 + +// CHECK: vcvtneeph2ps 291(%r8,%rax,4), %ymm2 +// CHECK: encoding: [0xc4,0xc2,0x7d,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneeph2ps 291(%r8,%rax,4), %ymm2 + +// CHECK: vcvtneeph2ps (%rip), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneeph2ps (%rip), %ymm2 + +// CHECK: vcvtneeph2ps -1024(,%rbp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneeph2ps -1024(,%rbp,2), %ymm2 + +// CHECK: vcvtneeph2ps 4064(%rcx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneeph2ps 4064(%rcx), %ymm2 + +// CHECK: vcvtneeph2ps -4096(%rdx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7d,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneeph2ps -4096(%rdx), %ymm2 + +// CHECK: vcvtneobf162ps 268435456(%rbp,%r14,8), %xmm2 +// CHECK: encoding: [0xc4,0xa2,0x7b,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneobf162ps 268435456(%rbp,%r14,8), %xmm2 + +// CHECK: vcvtneobf162ps 291(%r8,%rax,4), %xmm2 +// CHECK: encoding: [0xc4,0xc2,0x7b,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneobf162ps 291(%r8,%rax,4), %xmm2 + +// CHECK: vcvtneobf162ps (%rip), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneobf162ps (%rip), %xmm2 + +// CHECK: vcvtneobf162ps -512(,%rbp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneobf162ps -512(,%rbp,2), %xmm2 + +// CHECK: vcvtneobf162ps 2032(%rcx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneobf162ps 2032(%rcx), %xmm2 + +// CHECK: vcvtneobf162ps -2048(%rdx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7b,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneobf162ps -2048(%rdx), %xmm2 + +// CHECK: vcvtneobf162ps 268435456(%rbp,%r14,8), %ymm2 +// CHECK: encoding: [0xc4,0xa2,0x7f,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneobf162ps 268435456(%rbp,%r14,8), %ymm2 + +// CHECK: vcvtneobf162ps 291(%r8,%rax,4), %ymm2 +// CHECK: encoding: [0xc4,0xc2,0x7f,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneobf162ps 291(%r8,%rax,4), %ymm2 + +// CHECK: vcvtneobf162ps (%rip), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneobf162ps (%rip), %ymm2 + +// CHECK: vcvtneobf162ps -1024(,%rbp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneobf162ps -1024(,%rbp,2), %ymm2 + +// CHECK: vcvtneobf162ps 4064(%rcx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneobf162ps 4064(%rcx), %ymm2 + +// CHECK: vcvtneobf162ps -4096(%rdx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7f,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneobf162ps -4096(%rdx), %ymm2 + +// CHECK: vcvtneoph2ps 268435456(%rbp,%r14,8), %xmm2 +// CHECK: encoding: [0xc4,0xa2,0x78,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneoph2ps 268435456(%rbp,%r14,8), %xmm2 + +// CHECK: vcvtneoph2ps 291(%r8,%rax,4), %xmm2 +// CHECK: encoding: [0xc4,0xc2,0x78,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneoph2ps 291(%r8,%rax,4), %xmm2 + +// CHECK: vcvtneoph2ps (%rip), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneoph2ps (%rip), %xmm2 + +// CHECK: vcvtneoph2ps -512(,%rbp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x14,0x6d,0x00,0xfe,0xff,0xff] + vcvtneoph2ps -512(,%rbp,2), %xmm2 + +// CHECK: vcvtneoph2ps 2032(%rcx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x91,0xf0,0x07,0x00,0x00] + vcvtneoph2ps 2032(%rcx), %xmm2 + +// CHECK: vcvtneoph2ps -2048(%rdx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x78,0xb0,0x92,0x00,0xf8,0xff,0xff] + vcvtneoph2ps -2048(%rdx), %xmm2 + +// CHECK: vcvtneoph2ps 268435456(%rbp,%r14,8), %ymm2 +// CHECK: encoding: [0xc4,0xa2,0x7c,0xb0,0x94,0xf5,0x00,0x00,0x00,0x10] + vcvtneoph2ps 268435456(%rbp,%r14,8), %ymm2 + +// CHECK: vcvtneoph2ps 291(%r8,%rax,4), %ymm2 +// CHECK: encoding: [0xc4,0xc2,0x7c,0xb0,0x94,0x80,0x23,0x01,0x00,0x00] + vcvtneoph2ps 291(%r8,%rax,4), %ymm2 + +// CHECK: vcvtneoph2ps (%rip), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x15,0x00,0x00,0x00,0x00] + vcvtneoph2ps (%rip), %ymm2 + +// CHECK: vcvtneoph2ps -1024(,%rbp,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x14,0x6d,0x00,0xfc,0xff,0xff] + vcvtneoph2ps -1024(,%rbp,2), %ymm2 + +// CHECK: vcvtneoph2ps 4064(%rcx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x91,0xe0,0x0f,0x00,0x00] + vcvtneoph2ps 4064(%rcx), %ymm2 + +// CHECK: vcvtneoph2ps -4096(%rdx), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0x7c,0xb0,0x92,0x00,0xf0,0xff,0xff] + vcvtneoph2ps -4096(%rdx), %ymm2 + +// CHECK: {vex} vcvtneps2bf16 %xmm3, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0xd3] + {vex} vcvtneps2bf16 %xmm3, %xmm2 + +// CHECK: {vex} vcvtneps2bf16 %ymm3, %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0xd3] + {vex} vcvtneps2bf16 %ymm3, %xmm2 + +// CHECK: {vex} vcvtneps2bf16x 268435456(%rbp,%r14,8), %xmm2 +// CHECK: encoding: [0xc4,0xa2,0x7a,0x72,0x94,0xf5,0x00,0x00,0x00,0x10] + {vex} vcvtneps2bf16x 268435456(%rbp,%r14,8), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x 291(%r8,%rax,4), %xmm2 +// CHECK: encoding: [0xc4,0xc2,0x7a,0x72,0x94,0x80,0x23,0x01,0x00,0x00] + {vex} vcvtneps2bf16x 291(%r8,%rax,4), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x (%rip), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x15,0x00,0x00,0x00,0x00] + {vex} vcvtneps2bf16x (%rip), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x -512(,%rbp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff] + {vex} vcvtneps2bf16x -512(,%rbp,2), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x 2032(%rcx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x91,0xf0,0x07,0x00,0x00] + {vex} vcvtneps2bf16x 2032(%rcx), %xmm2 + +// CHECK: {vex} vcvtneps2bf16x -2048(%rdx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x72,0x92,0x00,0xf8,0xff,0xff] + {vex} vcvtneps2bf16x -2048(%rdx), %xmm2 + +// CHECK: {vex} vcvtneps2bf16y -1024(,%rbp,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff] + {vex} vcvtneps2bf16y -1024(,%rbp,2), %xmm2 + +// CHECK: {vex} vcvtneps2bf16y 4064(%rcx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x91,0xe0,0x0f,0x00,0x00] + {vex} vcvtneps2bf16y 4064(%rcx), %xmm2 + +// CHECK: {vex} vcvtneps2bf16y -4096(%rdx), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0x7e,0x72,0x92,0x00,0xf0,0xff,0xff] + {vex} vcvtneps2bf16y -4096(%rdx), %xmm2 +