diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.h b/llvm/include/llvm/Support/X86FoldTablesUtils.h copy from llvm/lib/Target/X86/X86InstrFoldTables.h copy to llvm/include/llvm/Support/X86FoldTablesUtils.h --- a/llvm/lib/Target/X86/X86InstrFoldTables.h +++ b/llvm/include/llvm/Support/X86FoldTablesUtils.h @@ -1,22 +1,15 @@ -//===-- X86InstrFoldTables.h - X86 Instruction Folding Tables ---*- C++ -*-===// +//===-- X86FoldTablesUtils.h ------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file contains the interface to query the X86 memory folding tables. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_X86_X86INSTRFOLDTABLES_H -#define LLVM_LIB_TARGET_X86_X86INSTRFOLDTABLES_H -#include - -namespace llvm { +#ifndef LLVM_SUPPORT_X86FOLDTABLESUTILS_H +#define LLVM_SUPPORT_X86FOLDTABLESUTILS_H +namespace { enum { // Select which memory operand is being unfolded. // (stored in bits 0 - 2) @@ -62,36 +55,5 @@ // Unused bits 14-15 }; - -// This struct is used for both the folding and unfold tables. They KeyOp -// is used to determine the sorting order. -struct X86MemoryFoldTableEntry { - uint16_t KeyOp; - uint16_t DstOp; - uint16_t Flags; - - bool operator<(const X86MemoryFoldTableEntry &RHS) const { - return KeyOp < RHS.KeyOp; - } - bool operator==(const X86MemoryFoldTableEntry &RHS) const { - return KeyOp == RHS.KeyOp; - } - friend bool operator<(const X86MemoryFoldTableEntry &TE, unsigned Opcode) { - return TE.KeyOp < Opcode; - } -}; - -// Look up the memory folding table entry for folding a load and a store into -// operand 0. -const X86MemoryFoldTableEntry *lookupTwoAddrFoldTable(unsigned RegOp); - -// Look up the memory folding table entry for folding a load or store with -// operand OpNum. -const X86MemoryFoldTableEntry *lookupFoldTable(unsigned RegOp, unsigned OpNum); - -// Look up the memory unfolding table entry for this instruction. -const X86MemoryFoldTableEntry *lookupUnfoldTable(unsigned MemOp); - -} // namespace llvm - -#endif +} +#endif // LLVM_SUPPORT_X86FOLDTABLESUTILS_H \ No newline at end of file diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.h b/llvm/lib/Target/X86/X86InstrFoldTables.h --- a/llvm/lib/Target/X86/X86InstrFoldTables.h +++ b/llvm/lib/Target/X86/X86InstrFoldTables.h @@ -14,55 +14,10 @@ #define LLVM_LIB_TARGET_X86_X86INSTRFOLDTABLES_H #include +#include "llvm/Support/X86FoldTablesUtils.h" namespace llvm { -enum { - // Select which memory operand is being unfolded. - // (stored in bits 0 - 2) - TB_INDEX_0 = 0, - TB_INDEX_1 = 1, - TB_INDEX_2 = 2, - TB_INDEX_3 = 3, - TB_INDEX_4 = 4, - TB_INDEX_MASK = 0x7, - - // Do not insert the reverse map (MemOp -> RegOp) into the table. - // This may be needed because there is a many -> one mapping. - TB_NO_REVERSE = 1 << 3, - - // Do not insert the forward map (RegOp -> MemOp) into the table. - // This is needed for Native Client, which prohibits branch - // instructions from using a memory operand. - TB_NO_FORWARD = 1 << 4, - - TB_FOLDED_LOAD = 1 << 5, - TB_FOLDED_STORE = 1 << 6, - TB_FOLDED_BCAST = 1 << 7, - - // Minimum alignment required for load/store. - // Used for RegOp->MemOp conversion. Encoded as Log2(Align) + 1 to allow 0 - // to mean align of 0. - // (stored in bits 8 - 11) - TB_ALIGN_SHIFT = 8, - TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT, - TB_ALIGN_16 = 5 << TB_ALIGN_SHIFT, - TB_ALIGN_32 = 6 << TB_ALIGN_SHIFT, - TB_ALIGN_64 = 7 << TB_ALIGN_SHIFT, - TB_ALIGN_MASK = 0xf << TB_ALIGN_SHIFT, - - // Broadcast type. - // (stored in bits 12 - 13) - TB_BCAST_TYPE_SHIFT = 12, - TB_BCAST_D = 0 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_Q = 1 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_SS = 2 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_SD = 3 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_MASK = 0x3 << TB_BCAST_TYPE_SHIFT, - - // Unused bits 14-15 -}; - // This struct is used for both the folding and unfold tables. They KeyOp // is used to determine the sorting order. struct X86MemoryFoldTableEntry { diff --git a/llvm/lib/Target/X86/X86MemFoldTables.inc b/llvm/lib/Target/X86/X86MemFoldTables.inc --- a/llvm/lib/Target/X86/X86MemFoldTables.inc +++ b/llvm/lib/Target/X86/X86MemFoldTables.inc @@ -258,20 +258,22 @@ {X86::JMP32r_NT, X86::JMP32m_NT, TB_FOLDED_LOAD}, {X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD}, {X86::JMP64r_NT, X86::JMP64m_NT, TB_FOLDED_LOAD}, + {X86::JMP64r_REX, X86::JMP64m_REX, TB_FOLDED_LOAD}, {X86::MMX_MOVD64from64rr, X86::MMX_MOVQ64mr, TB_FOLDED_STORE}, {X86::MMX_MOVD64grr, X86::MMX_MOVD64mr, TB_FOLDED_STORE}, + {X86::MMX_MOVQ64rr, X86::MMX_MOVQ64mr, TB_FOLDED_STORE|TB_NO_REVERSE}, {X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE}, - {X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE}, + {X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE|TB_NO_REVERSE}, {X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE}, - {X86::MOV32rr, X86::MOV32mr, TB_FOLDED_STORE}, + {X86::MOV32rr, X86::MOV32mr, TB_FOLDED_STORE|TB_NO_REVERSE}, {X86::MOV64ri32, X86::MOV64mi32, TB_FOLDED_STORE}, - {X86::MOV64rr, X86::MOV64mr, TB_FOLDED_STORE}, + {X86::MOV64rr, X86::MOV64mr, TB_FOLDED_STORE|TB_NO_REVERSE}, {X86::MOV64toSDrr, X86::MOV64mr, TB_FOLDED_STORE|TB_NO_REVERSE}, {X86::MOV8ri, X86::MOV8mi, TB_FOLDED_STORE}, - {X86::MOV8rr, X86::MOV8mr, TB_FOLDED_STORE}, - {X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE}, - {X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE|TB_ALIGN_16}, - {X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE|TB_ALIGN_16}, + {X86::MOV8rr, X86::MOV8mr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_16}, + {X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_16}, {X86::MOVDI2SSrr, X86::MOV32mr, TB_FOLDED_STORE|TB_NO_REVERSE}, {X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE|TB_ALIGN_16}, {X86::MOVDQUrr, X86::MOVDQUmr, TB_FOLDED_STORE}, @@ -279,8 +281,8 @@ {X86::MOVPQIto64rr, X86::MOVPQI2QImr, TB_FOLDED_STORE|TB_NO_REVERSE}, {X86::MOVSDto64rr, X86::MOVSDmr, TB_FOLDED_STORE|TB_NO_REVERSE}, {X86::MOVSS2DIrr, X86::MOVSSmr, TB_FOLDED_STORE}, - {X86::MOVUPDrr, X86::MOVUPDmr, TB_FOLDED_STORE}, - {X86::MOVUPSrr, X86::MOVUPSmr, TB_FOLDED_STORE}, + {X86::MOVUPDrr, X86::MOVUPDmr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::MOVUPSrr, X86::MOVUPSmr, TB_FOLDED_STORE|TB_NO_REVERSE}, {X86::MUL16r, X86::MUL16m, TB_FOLDED_LOAD}, {X86::MUL32r, X86::MUL32m, TB_FOLDED_LOAD}, {X86::MUL64r, X86::MUL64m, TB_FOLDED_LOAD}, @@ -327,38 +329,38 @@ {X86::VEXTRACTPSrr, X86::VEXTRACTPSmr, TB_FOLDED_STORE}, {X86::VMOV64toSDZrr, X86::MOV64mr, TB_FOLDED_STORE|TB_NO_REVERSE}, {X86::VMOV64toSDrr, X86::MOV64mr, TB_FOLDED_STORE|TB_NO_REVERSE}, - {X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE|TB_ALIGN_32}, - {X86::VMOVAPDZ128rr, X86::VMOVAPDZ128mr, TB_FOLDED_STORE|TB_ALIGN_16}, - {X86::VMOVAPDZ256rr, X86::VMOVAPDZ256mr, TB_FOLDED_STORE|TB_ALIGN_32}, - {X86::VMOVAPDZrr, X86::VMOVAPDZmr, TB_FOLDED_STORE|TB_ALIGN_64}, - {X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE|TB_ALIGN_16}, - {X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE|TB_ALIGN_32}, - {X86::VMOVAPSZ128rr, X86::VMOVAPSZ128mr, TB_FOLDED_STORE|TB_ALIGN_16}, - {X86::VMOVAPSZ256rr, X86::VMOVAPSZ256mr, TB_FOLDED_STORE|TB_ALIGN_32}, - {X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE|TB_ALIGN_64}, - {X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE|TB_ALIGN_16}, + {X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_32}, + {X86::VMOVAPDZ128rr, X86::VMOVAPDZ128mr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_16}, + {X86::VMOVAPDZ256rr, X86::VMOVAPDZ256mr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_32}, + {X86::VMOVAPDZrr, X86::VMOVAPDZmr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_64}, + {X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_16}, + {X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_32}, + {X86::VMOVAPSZ128rr, X86::VMOVAPSZ128mr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_16}, + {X86::VMOVAPSZ256rr, X86::VMOVAPSZ256mr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_32}, + {X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_64}, + {X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_16}, {X86::VMOVDI2SSZrr, X86::MOV32mr, TB_FOLDED_STORE|TB_NO_REVERSE}, {X86::VMOVDI2SSrr, X86::MOV32mr, TB_FOLDED_STORE|TB_NO_REVERSE}, - {X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128mr, TB_FOLDED_STORE|TB_ALIGN_16}, - {X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256mr, TB_FOLDED_STORE|TB_ALIGN_32}, - {X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE|TB_ALIGN_64}, - {X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128mr, TB_FOLDED_STORE|TB_ALIGN_16}, - {X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256mr, TB_FOLDED_STORE|TB_ALIGN_32}, - {X86::VMOVDQA64Zrr, X86::VMOVDQA64Zmr, TB_FOLDED_STORE|TB_ALIGN_64}, + {X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128mr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_16}, + {X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256mr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_32}, + {X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_64}, + {X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128mr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_16}, + {X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256mr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_32}, + {X86::VMOVDQA64Zrr, X86::VMOVDQA64Zmr, TB_FOLDED_STORE|TB_NO_REVERSE|TB_ALIGN_64}, {X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE|TB_ALIGN_32}, {X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE|TB_ALIGN_16}, - {X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128mr, TB_FOLDED_STORE}, - {X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256mr, TB_FOLDED_STORE}, - {X86::VMOVDQU16Zrr, X86::VMOVDQU16Zmr, TB_FOLDED_STORE}, - {X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128mr, TB_FOLDED_STORE}, - {X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256mr, TB_FOLDED_STORE}, - {X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE}, - {X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128mr, TB_FOLDED_STORE}, - {X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256mr, TB_FOLDED_STORE}, - {X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE}, - {X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128mr, TB_FOLDED_STORE}, - {X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256mr, TB_FOLDED_STORE}, - {X86::VMOVDQU8Zrr, X86::VMOVDQU8Zmr, TB_FOLDED_STORE}, + {X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128mr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256mr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVDQU16Zrr, X86::VMOVDQU16Zmr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128mr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256mr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128mr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256mr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128mr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256mr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVDQU8Zrr, X86::VMOVDQU8Zmr, TB_FOLDED_STORE|TB_NO_REVERSE}, {X86::VMOVDQUYrr, X86::VMOVDQUYmr, TB_FOLDED_STORE}, {X86::VMOVDQUrr, X86::VMOVDQUmr, TB_FOLDED_STORE}, {X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE}, @@ -369,16 +371,16 @@ {X86::VMOVSDto64rr, X86::VMOVSDmr, TB_FOLDED_STORE|TB_NO_REVERSE}, {X86::VMOVSS2DIZrr, X86::VMOVSSZmr, TB_FOLDED_STORE}, {X86::VMOVSS2DIrr, X86::VMOVSSmr, TB_FOLDED_STORE}, - {X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE}, - {X86::VMOVUPDZ128rr, X86::VMOVUPDZ128mr, TB_FOLDED_STORE}, - {X86::VMOVUPDZ256rr, X86::VMOVUPDZ256mr, TB_FOLDED_STORE}, - {X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE}, - {X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE}, - {X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE}, - {X86::VMOVUPSZ128rr, X86::VMOVUPSZ128mr, TB_FOLDED_STORE}, - {X86::VMOVUPSZ256rr, X86::VMOVUPSZ256mr, TB_FOLDED_STORE}, - {X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE}, - {X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE}, + {X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVUPDZ128rr, X86::VMOVUPDZ128mr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVUPDZ256rr, X86::VMOVUPDZ256mr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVUPSZ128rr, X86::VMOVUPSZ128mr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVUPSZ256rr, X86::VMOVUPSZ256mr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE|TB_NO_REVERSE}, + {X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE|TB_NO_REVERSE}, {X86::VPEXTRDZrr, X86::VPEXTRDZmr, TB_FOLDED_STORE}, {X86::VPEXTRDrr, X86::VPEXTRDmr, TB_FOLDED_STORE}, {X86::VPEXTRQZrr, X86::VPEXTRQZmr, TB_FOLDED_STORE}, @@ -488,6 +490,10 @@ {X86::IMUL32rri8, X86::IMUL32rmi8, 0}, {X86::IMUL64rri32, X86::IMUL64rmi32, 0}, {X86::IMUL64rri8, X86::IMUL64rmi8, 0}, + {X86::KMOVBkk, X86::KMOVBkm, TB_NO_REVERSE}, + {X86::KMOVDkk, X86::KMOVDkm, 0}, + {X86::KMOVQkk, X86::KMOVQkm, 0}, + {X86::KMOVWkk, X86::KMOVWkm, 0}, {X86::LWPINS32rri, X86::LWPINS32rmi, 0}, {X86::LWPINS64rri, X86::LWPINS64rmi, 0}, {X86::LWPVAL32rri, X86::LWPVAL32rmi, 0}, @@ -500,7 +506,9 @@ {X86::MMX_CVTPS2PIrr, X86::MMX_CVTPS2PIrm, TB_NO_REVERSE}, {X86::MMX_CVTTPD2PIrr, X86::MMX_CVTTPD2PIrm, TB_ALIGN_16}, {X86::MMX_CVTTPS2PIrr, X86::MMX_CVTTPS2PIrm, TB_NO_REVERSE}, + {X86::MMX_MOVD64rr, X86::MMX_MOVD64rm, 0}, {X86::MMX_MOVD64to64rr, X86::MMX_MOVQ64rm, 0}, + {X86::MMX_MOVQ64rr, X86::MMX_MOVQ64rm, 0}, {X86::MMX_PABSBrr, X86::MMX_PABSBrm, 0}, {X86::MMX_PABSDrr, X86::MMX_PABSDrm, 0}, {X86::MMX_PABSWrr, X86::MMX_PABSWrm, 0}, @@ -520,8 +528,10 @@ {X86::MOVDQUrr, X86::MOVDQUrm, 0}, {X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16}, {X86::MOVSLDUPrr, X86::MOVSLDUPrm, TB_ALIGN_16}, + {X86::MOVSX16rr32, X86::MOVSX16rm32, 0}, {X86::MOVSX16rr8, X86::MOVSX16rm8, 0}, {X86::MOVSX32rr16, X86::MOVSX32rm16, 0}, + {X86::MOVSX32rr32, X86::MOVSX32rm32, 0}, {X86::MOVSX32rr8, X86::MOVSX32rm8, 0}, {X86::MOVSX32rr8_NOREX, X86::MOVSX32rm8_NOREX, 0}, {X86::MOVSX64rr16, X86::MOVSX64rm16, 0}, @@ -620,6 +630,8 @@ {X86::VCOMISDZrr_Int, X86::VCOMISDZrm_Int, TB_NO_REVERSE}, {X86::VCOMISDrr, X86::VCOMISDrm, 0}, {X86::VCOMISDrr_Int, X86::VCOMISDrm_Int, TB_NO_REVERSE}, + {X86::VCOMISHZrr, X86::VCOMISHZrm, 0}, + {X86::VCOMISHZrr_Int, X86::VCOMISHZrm_Int, TB_NO_REVERSE}, {X86::VCOMISSZrr, X86::VCOMISSZrm, 0}, {X86::VCOMISSZrr_Int, X86::VCOMISSZrm_Int, TB_NO_REVERSE}, {X86::VCOMISSrr, X86::VCOMISSrm, 0}, @@ -629,19 +641,27 @@ {X86::VCVTDQ2PDZ256rr, X86::VCVTDQ2PDZ256rm, 0}, {X86::VCVTDQ2PDZrr, X86::VCVTDQ2PDZrm, 0}, {X86::VCVTDQ2PDrr, X86::VCVTDQ2PDrm, TB_NO_REVERSE}, + {X86::VCVTDQ2PHZ128rr, X86::VCVTDQ2PHZ128rm, 0}, + {X86::VCVTDQ2PHZ256rr, X86::VCVTDQ2PHZ256rm, 0}, + {X86::VCVTDQ2PHZrr, X86::VCVTDQ2PHZrm, 0}, {X86::VCVTDQ2PSYrr, X86::VCVTDQ2PSYrm, 0}, {X86::VCVTDQ2PSZ128rr, X86::VCVTDQ2PSZ128rm, 0}, {X86::VCVTDQ2PSZ256rr, X86::VCVTDQ2PSZ256rm, 0}, {X86::VCVTDQ2PSZrr, X86::VCVTDQ2PSZrm, 0}, {X86::VCVTDQ2PSrr, X86::VCVTDQ2PSrm, 0}, + {X86::VCVTNEPS2BF16Yrr, X86::VCVTNEPS2BF16Yrm, 0}, {X86::VCVTNEPS2BF16Z128rr, X86::VCVTNEPS2BF16Z128rm, 0}, {X86::VCVTNEPS2BF16Z256rr, X86::VCVTNEPS2BF16Z256rm, 0}, {X86::VCVTNEPS2BF16Zrr, X86::VCVTNEPS2BF16Zrm, 0}, + {X86::VCVTNEPS2BF16rr, X86::VCVTNEPS2BF16rm, 0}, {X86::VCVTPD2DQYrr, X86::VCVTPD2DQYrm, 0}, {X86::VCVTPD2DQZ128rr, X86::VCVTPD2DQZ128rm, 0}, {X86::VCVTPD2DQZ256rr, X86::VCVTPD2DQZ256rm, 0}, {X86::VCVTPD2DQZrr, X86::VCVTPD2DQZrm, 0}, {X86::VCVTPD2DQrr, X86::VCVTPD2DQrm, 0}, + {X86::VCVTPD2PHZ128rr, X86::VCVTPD2PHZ128rm, 0}, + {X86::VCVTPD2PHZ256rr, X86::VCVTPD2PHZ256rm, 0}, + {X86::VCVTPD2PHZrr, X86::VCVTPD2PHZrm, 0}, {X86::VCVTPD2PSYrr, X86::VCVTPD2PSYrm, 0}, {X86::VCVTPD2PSZ128rr, X86::VCVTPD2PSZ128rm, 0}, {X86::VCVTPD2PSZ256rr, X86::VCVTPD2PSZ256rm, 0}, @@ -656,11 +676,35 @@ {X86::VCVTPD2UQQZ128rr, X86::VCVTPD2UQQZ128rm, 0}, {X86::VCVTPD2UQQZ256rr, X86::VCVTPD2UQQZ256rm, 0}, {X86::VCVTPD2UQQZrr, X86::VCVTPD2UQQZrm, 0}, + {X86::VCVTPH2DQZ128rr, X86::VCVTPH2DQZ128rm, TB_NO_REVERSE}, + {X86::VCVTPH2DQZ256rr, X86::VCVTPH2DQZ256rm, 0}, + {X86::VCVTPH2DQZrr, X86::VCVTPH2DQZrm, 0}, + {X86::VCVTPH2PDZ128rr, X86::VCVTPH2PDZ128rm, TB_NO_REVERSE}, + {X86::VCVTPH2PDZ256rr, X86::VCVTPH2PDZ256rm, TB_NO_REVERSE}, + {X86::VCVTPH2PDZrr, X86::VCVTPH2PDZrm, 0}, + {X86::VCVTPH2PSXZ128rr, X86::VCVTPH2PSXZ128rm, TB_NO_REVERSE}, + {X86::VCVTPH2PSXZ256rr, X86::VCVTPH2PSXZ256rm, 0}, + {X86::VCVTPH2PSXZrr, X86::VCVTPH2PSXZrm, 0}, {X86::VCVTPH2PSYrr, X86::VCVTPH2PSYrm, 0}, {X86::VCVTPH2PSZ128rr, X86::VCVTPH2PSZ128rm, TB_NO_REVERSE}, {X86::VCVTPH2PSZ256rr, X86::VCVTPH2PSZ256rm, 0}, {X86::VCVTPH2PSZrr, X86::VCVTPH2PSZrm, 0}, {X86::VCVTPH2PSrr, X86::VCVTPH2PSrm, TB_NO_REVERSE}, + {X86::VCVTPH2QQZ128rr, X86::VCVTPH2QQZ128rm, TB_NO_REVERSE}, + {X86::VCVTPH2QQZ256rr, X86::VCVTPH2QQZ256rm, TB_NO_REVERSE}, + {X86::VCVTPH2QQZrr, X86::VCVTPH2QQZrm, 0}, + {X86::VCVTPH2UDQZ128rr, X86::VCVTPH2UDQZ128rm, TB_NO_REVERSE}, + {X86::VCVTPH2UDQZ256rr, X86::VCVTPH2UDQZ256rm, 0}, + {X86::VCVTPH2UDQZrr, X86::VCVTPH2UDQZrm, 0}, + {X86::VCVTPH2UQQZ128rr, X86::VCVTPH2UQQZ128rm, TB_NO_REVERSE}, + {X86::VCVTPH2UQQZ256rr, X86::VCVTPH2UQQZ256rm, TB_NO_REVERSE}, + {X86::VCVTPH2UQQZrr, X86::VCVTPH2UQQZrm, 0}, + {X86::VCVTPH2UWZ128rr, X86::VCVTPH2UWZ128rm, 0}, + {X86::VCVTPH2UWZ256rr, X86::VCVTPH2UWZ256rm, 0}, + {X86::VCVTPH2UWZrr, X86::VCVTPH2UWZrm, 0}, + {X86::VCVTPH2WZ128rr, X86::VCVTPH2WZ128rm, 0}, + {X86::VCVTPH2WZ256rr, X86::VCVTPH2WZ256rm, 0}, + {X86::VCVTPH2WZrr, X86::VCVTPH2WZrm, 0}, {X86::VCVTPS2DQYrr, X86::VCVTPS2DQYrm, 0}, {X86::VCVTPS2DQZ128rr, X86::VCVTPS2DQZ128rm, 0}, {X86::VCVTPS2DQZ256rr, X86::VCVTPS2DQZ256rm, 0}, @@ -671,6 +715,9 @@ {X86::VCVTPS2PDZ256rr, X86::VCVTPS2PDZ256rm, 0}, {X86::VCVTPS2PDZrr, X86::VCVTPS2PDZrm, 0}, {X86::VCVTPS2PDrr, X86::VCVTPS2PDrm, TB_NO_REVERSE}, + {X86::VCVTPS2PHXZ128rr, X86::VCVTPS2PHXZ128rm, 0}, + {X86::VCVTPS2PHXZ256rr, X86::VCVTPS2PHXZ256rm, 0}, + {X86::VCVTPS2PHXZrr, X86::VCVTPS2PHXZrm, 0}, {X86::VCVTPS2QQZ128rr, X86::VCVTPS2QQZ128rm, TB_NO_REVERSE}, {X86::VCVTPS2QQZ256rr, X86::VCVTPS2QQZ256rm, 0}, {X86::VCVTPS2QQZrr, X86::VCVTPS2QQZrm, 0}, @@ -683,6 +730,9 @@ {X86::VCVTQQ2PDZ128rr, X86::VCVTQQ2PDZ128rm, 0}, {X86::VCVTQQ2PDZ256rr, X86::VCVTQQ2PDZ256rm, 0}, {X86::VCVTQQ2PDZrr, X86::VCVTQQ2PDZrm, 0}, + {X86::VCVTQQ2PHZ128rr, X86::VCVTQQ2PHZ128rm, 0}, + {X86::VCVTQQ2PHZ256rr, X86::VCVTQQ2PHZ256rm, 0}, + {X86::VCVTQQ2PHZrr, X86::VCVTQQ2PHZrm, 0}, {X86::VCVTQQ2PSZ128rr, X86::VCVTQQ2PSZ128rm, 0}, {X86::VCVTQQ2PSZ256rr, X86::VCVTQQ2PSZ256rm, 0}, {X86::VCVTQQ2PSZrr, X86::VCVTQQ2PSZrm, 0}, @@ -696,6 +746,10 @@ {X86::VCVTSD2SIrr_Int, X86::VCVTSD2SIrm_Int, TB_NO_REVERSE}, {X86::VCVTSD2USI64Zrr_Int, X86::VCVTSD2USI64Zrm_Int, TB_NO_REVERSE}, {X86::VCVTSD2USIZrr_Int, X86::VCVTSD2USIZrm_Int, TB_NO_REVERSE}, + {X86::VCVTSH2SI64Zrr_Int, X86::VCVTSH2SI64Zrm_Int, TB_NO_REVERSE}, + {X86::VCVTSH2SIZrr_Int, X86::VCVTSH2SIZrm_Int, TB_NO_REVERSE}, + {X86::VCVTSH2USI64Zrr_Int, X86::VCVTSH2USI64Zrm_Int, TB_NO_REVERSE}, + {X86::VCVTSH2USIZrr_Int, X86::VCVTSH2USIZrm_Int, TB_NO_REVERSE}, {X86::VCVTSS2SI64Zrr, X86::VCVTSS2SI64Zrm, 0}, {X86::VCVTSS2SI64Zrr_Int, X86::VCVTSS2SI64Zrm_Int, TB_NO_REVERSE}, {X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0}, @@ -720,6 +774,24 @@ {X86::VCVTTPD2UQQZ128rr, X86::VCVTTPD2UQQZ128rm, 0}, {X86::VCVTTPD2UQQZ256rr, X86::VCVTTPD2UQQZ256rm, 0}, {X86::VCVTTPD2UQQZrr, X86::VCVTTPD2UQQZrm, 0}, + {X86::VCVTTPH2DQZ128rr, X86::VCVTTPH2DQZ128rm, TB_NO_REVERSE}, + {X86::VCVTTPH2DQZ256rr, X86::VCVTTPH2DQZ256rm, 0}, + {X86::VCVTTPH2DQZrr, X86::VCVTTPH2DQZrm, 0}, + {X86::VCVTTPH2QQZ128rr, X86::VCVTTPH2QQZ128rm, TB_NO_REVERSE}, + {X86::VCVTTPH2QQZ256rr, X86::VCVTTPH2QQZ256rm, TB_NO_REVERSE}, + {X86::VCVTTPH2QQZrr, X86::VCVTTPH2QQZrm, 0}, + {X86::VCVTTPH2UDQZ128rr, X86::VCVTTPH2UDQZ128rm, TB_NO_REVERSE}, + {X86::VCVTTPH2UDQZ256rr, X86::VCVTTPH2UDQZ256rm, 0}, + {X86::VCVTTPH2UDQZrr, X86::VCVTTPH2UDQZrm, 0}, + {X86::VCVTTPH2UQQZ128rr, X86::VCVTTPH2UQQZ128rm, TB_NO_REVERSE}, + {X86::VCVTTPH2UQQZ256rr, X86::VCVTTPH2UQQZ256rm, TB_NO_REVERSE}, + {X86::VCVTTPH2UQQZrr, X86::VCVTTPH2UQQZrm, 0}, + {X86::VCVTTPH2UWZ128rr, X86::VCVTTPH2UWZ128rm, 0}, + {X86::VCVTTPH2UWZ256rr, X86::VCVTTPH2UWZ256rm, 0}, + {X86::VCVTTPH2UWZrr, X86::VCVTTPH2UWZrm, 0}, + {X86::VCVTTPH2WZ128rr, X86::VCVTTPH2WZ128rm, 0}, + {X86::VCVTTPH2WZ256rr, X86::VCVTTPH2WZ256rm, 0}, + {X86::VCVTTPH2WZrr, X86::VCVTTPH2WZrm, 0}, {X86::VCVTTPS2DQYrr, X86::VCVTTPS2DQYrm, 0}, {X86::VCVTTPS2DQZ128rr, X86::VCVTTPS2DQZ128rm, 0}, {X86::VCVTTPS2DQZ256rr, X86::VCVTTPS2DQZ256rm, 0}, @@ -746,6 +818,14 @@ {X86::VCVTTSD2USI64Zrr_Int, X86::VCVTTSD2USI64Zrm_Int, TB_NO_REVERSE}, {X86::VCVTTSD2USIZrr, X86::VCVTTSD2USIZrm, 0}, {X86::VCVTTSD2USIZrr_Int, X86::VCVTTSD2USIZrm_Int, TB_NO_REVERSE}, + {X86::VCVTTSH2SI64Zrr, X86::VCVTTSH2SI64Zrm, 0}, + {X86::VCVTTSH2SI64Zrr_Int, X86::VCVTTSH2SI64Zrm_Int, TB_NO_REVERSE}, + {X86::VCVTTSH2SIZrr, X86::VCVTTSH2SIZrm, 0}, + {X86::VCVTTSH2SIZrr_Int, X86::VCVTTSH2SIZrm_Int, TB_NO_REVERSE}, + {X86::VCVTTSH2USI64Zrr, X86::VCVTTSH2USI64Zrm, 0}, + {X86::VCVTTSH2USI64Zrr_Int, X86::VCVTTSH2USI64Zrm_Int, TB_NO_REVERSE}, + {X86::VCVTTSH2USIZrr, X86::VCVTTSH2USIZrm, 0}, + {X86::VCVTTSH2USIZrr_Int, X86::VCVTTSH2USIZrm_Int, TB_NO_REVERSE}, {X86::VCVTTSS2SI64Zrr, X86::VCVTTSS2SI64Zrm, 0}, {X86::VCVTTSS2SI64Zrr_Int, X86::VCVTTSS2SI64Zrm_Int, TB_NO_REVERSE}, {X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0}, @@ -761,15 +841,27 @@ {X86::VCVTUDQ2PDZ128rr, X86::VCVTUDQ2PDZ128rm, TB_NO_REVERSE}, {X86::VCVTUDQ2PDZ256rr, X86::VCVTUDQ2PDZ256rm, 0}, {X86::VCVTUDQ2PDZrr, X86::VCVTUDQ2PDZrm, 0}, + {X86::VCVTUDQ2PHZ128rr, X86::VCVTUDQ2PHZ128rm, 0}, + {X86::VCVTUDQ2PHZ256rr, X86::VCVTUDQ2PHZ256rm, 0}, + {X86::VCVTUDQ2PHZrr, X86::VCVTUDQ2PHZrm, 0}, {X86::VCVTUDQ2PSZ128rr, X86::VCVTUDQ2PSZ128rm, 0}, {X86::VCVTUDQ2PSZ256rr, X86::VCVTUDQ2PSZ256rm, 0}, {X86::VCVTUDQ2PSZrr, X86::VCVTUDQ2PSZrm, 0}, {X86::VCVTUQQ2PDZ128rr, X86::VCVTUQQ2PDZ128rm, 0}, {X86::VCVTUQQ2PDZ256rr, X86::VCVTUQQ2PDZ256rm, 0}, {X86::VCVTUQQ2PDZrr, X86::VCVTUQQ2PDZrm, 0}, + {X86::VCVTUQQ2PHZ128rr, X86::VCVTUQQ2PHZ128rm, 0}, + {X86::VCVTUQQ2PHZ256rr, X86::VCVTUQQ2PHZ256rm, 0}, + {X86::VCVTUQQ2PHZrr, X86::VCVTUQQ2PHZrm, 0}, {X86::VCVTUQQ2PSZ128rr, X86::VCVTUQQ2PSZ128rm, 0}, {X86::VCVTUQQ2PSZ256rr, X86::VCVTUQQ2PSZ256rm, 0}, {X86::VCVTUQQ2PSZrr, X86::VCVTUQQ2PSZrm, 0}, + {X86::VCVTUW2PHZ128rr, X86::VCVTUW2PHZ128rm, 0}, + {X86::VCVTUW2PHZ256rr, X86::VCVTUW2PHZ256rm, 0}, + {X86::VCVTUW2PHZrr, X86::VCVTUW2PHZrm, 0}, + {X86::VCVTW2PHZ128rr, X86::VCVTW2PHZ128rm, 0}, + {X86::VCVTW2PHZ256rr, X86::VCVTW2PHZ256rm, 0}, + {X86::VCVTW2PHZrr, X86::VCVTW2PHZrm, 0}, {X86::VEXP2PDZr, X86::VEXP2PDZm, 0}, {X86::VEXP2PSZr, X86::VEXP2PSZm, 0}, {X86::VEXPANDPDZ128rr, X86::VEXPANDPDZ128rm, TB_NO_REVERSE}, @@ -879,6 +971,7 @@ {X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0}, {X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0}, {X86::VMOVUPSrr, X86::VMOVUPSrm, 0}, + {X86::VMOVW2SHrr, X86::VMOVWrm, TB_NO_REVERSE}, {X86::VMOVZPQILo2PQIZrr, X86::VMOVQI2PQIZrm, TB_NO_REVERSE}, {X86::VMOVZPQILo2PQIrr, X86::VMOVQI2PQIrm, TB_NO_REVERSE}, {X86::VPABSBYrr, X86::VPABSBYrm, 0}, @@ -1198,6 +1291,8 @@ {X86::VUCOMISDZrr_Int, X86::VUCOMISDZrm_Int, TB_NO_REVERSE}, {X86::VUCOMISDrr, X86::VUCOMISDrm, 0}, {X86::VUCOMISDrr_Int, X86::VUCOMISDrm_Int, TB_NO_REVERSE}, + {X86::VUCOMISHZrr, X86::VUCOMISHZrm, 0}, + {X86::VUCOMISHZrr_Int, X86::VUCOMISHZrm_Int, TB_NO_REVERSE}, {X86::VUCOMISSZrr, X86::VUCOMISSZrm, 0}, {X86::VUCOMISSZrr_Int, X86::VUCOMISSZrm_Int, TB_NO_REVERSE}, {X86::VUCOMISSrr, X86::VUCOMISSrm, 0}, @@ -1659,6 +1754,9 @@ {X86::VCVTDQ2PDZ128rrkz, X86::VCVTDQ2PDZ128rmkz, TB_NO_REVERSE}, {X86::VCVTDQ2PDZ256rrkz, X86::VCVTDQ2PDZ256rmkz, 0}, {X86::VCVTDQ2PDZrrkz, X86::VCVTDQ2PDZrmkz, 0}, + {X86::VCVTDQ2PHZ128rrkz, X86::VCVTDQ2PHZ128rmkz, 0}, + {X86::VCVTDQ2PHZ256rrkz, X86::VCVTDQ2PHZ256rmkz, 0}, + {X86::VCVTDQ2PHZrrkz, X86::VCVTDQ2PHZrmkz, 0}, {X86::VCVTDQ2PSZ128rrkz, X86::VCVTDQ2PSZ128rmkz, 0}, {X86::VCVTDQ2PSZ256rrkz, X86::VCVTDQ2PSZ256rmkz, 0}, {X86::VCVTDQ2PSZrrkz, X86::VCVTDQ2PSZrmkz, 0}, @@ -1671,6 +1769,9 @@ {X86::VCVTPD2DQZ128rrkz, X86::VCVTPD2DQZ128rmkz, 0}, {X86::VCVTPD2DQZ256rrkz, X86::VCVTPD2DQZ256rmkz, 0}, {X86::VCVTPD2DQZrrkz, X86::VCVTPD2DQZrmkz, 0}, + {X86::VCVTPD2PHZ128rrkz, X86::VCVTPD2PHZ128rmkz, 0}, + {X86::VCVTPD2PHZ256rrkz, X86::VCVTPD2PHZ256rmkz, 0}, + {X86::VCVTPD2PHZrrkz, X86::VCVTPD2PHZrmkz, 0}, {X86::VCVTPD2PSZ128rrkz, X86::VCVTPD2PSZ128rmkz, 0}, {X86::VCVTPD2PSZ256rrkz, X86::VCVTPD2PSZ256rmkz, 0}, {X86::VCVTPD2PSZrrkz, X86::VCVTPD2PSZrmkz, 0}, @@ -1683,15 +1784,42 @@ {X86::VCVTPD2UQQZ128rrkz, X86::VCVTPD2UQQZ128rmkz, 0}, {X86::VCVTPD2UQQZ256rrkz, X86::VCVTPD2UQQZ256rmkz, 0}, {X86::VCVTPD2UQQZrrkz, X86::VCVTPD2UQQZrmkz, 0}, + {X86::VCVTPH2DQZ128rrkz, X86::VCVTPH2DQZ128rmkz, TB_NO_REVERSE}, + {X86::VCVTPH2DQZ256rrkz, X86::VCVTPH2DQZ256rmkz, 0}, + {X86::VCVTPH2DQZrrkz, X86::VCVTPH2DQZrmkz, 0}, + {X86::VCVTPH2PDZ128rrkz, X86::VCVTPH2PDZ128rmkz, TB_NO_REVERSE}, + {X86::VCVTPH2PDZ256rrkz, X86::VCVTPH2PDZ256rmkz, TB_NO_REVERSE}, + {X86::VCVTPH2PDZrrkz, X86::VCVTPH2PDZrmkz, 0}, + {X86::VCVTPH2PSXZ128rrkz, X86::VCVTPH2PSXZ128rmkz, TB_NO_REVERSE}, + {X86::VCVTPH2PSXZ256rrkz, X86::VCVTPH2PSXZ256rmkz, 0}, + {X86::VCVTPH2PSXZrrkz, X86::VCVTPH2PSXZrmkz, 0}, {X86::VCVTPH2PSZ128rrkz, X86::VCVTPH2PSZ128rmkz, TB_NO_REVERSE}, {X86::VCVTPH2PSZ256rrkz, X86::VCVTPH2PSZ256rmkz, 0}, {X86::VCVTPH2PSZrrkz, X86::VCVTPH2PSZrmkz, 0}, + {X86::VCVTPH2QQZ128rrkz, X86::VCVTPH2QQZ128rmkz, TB_NO_REVERSE}, + {X86::VCVTPH2QQZ256rrkz, X86::VCVTPH2QQZ256rmkz, TB_NO_REVERSE}, + {X86::VCVTPH2QQZrrkz, X86::VCVTPH2QQZrmkz, 0}, + {X86::VCVTPH2UDQZ128rrkz, X86::VCVTPH2UDQZ128rmkz, TB_NO_REVERSE}, + {X86::VCVTPH2UDQZ256rrkz, X86::VCVTPH2UDQZ256rmkz, 0}, + {X86::VCVTPH2UDQZrrkz, X86::VCVTPH2UDQZrmkz, 0}, + {X86::VCVTPH2UQQZ128rrkz, X86::VCVTPH2UQQZ128rmkz, TB_NO_REVERSE}, + {X86::VCVTPH2UQQZ256rrkz, X86::VCVTPH2UQQZ256rmkz, TB_NO_REVERSE}, + {X86::VCVTPH2UQQZrrkz, X86::VCVTPH2UQQZrmkz, 0}, + {X86::VCVTPH2UWZ128rrkz, X86::VCVTPH2UWZ128rmkz, 0}, + {X86::VCVTPH2UWZ256rrkz, X86::VCVTPH2UWZ256rmkz, 0}, + {X86::VCVTPH2UWZrrkz, X86::VCVTPH2UWZrmkz, 0}, + {X86::VCVTPH2WZ128rrkz, X86::VCVTPH2WZ128rmkz, 0}, + {X86::VCVTPH2WZ256rrkz, X86::VCVTPH2WZ256rmkz, 0}, + {X86::VCVTPH2WZrrkz, X86::VCVTPH2WZrmkz, 0}, {X86::VCVTPS2DQZ128rrkz, X86::VCVTPS2DQZ128rmkz, 0}, {X86::VCVTPS2DQZ256rrkz, X86::VCVTPS2DQZ256rmkz, 0}, {X86::VCVTPS2DQZrrkz, X86::VCVTPS2DQZrmkz, 0}, {X86::VCVTPS2PDZ128rrkz, X86::VCVTPS2PDZ128rmkz, TB_NO_REVERSE}, {X86::VCVTPS2PDZ256rrkz, X86::VCVTPS2PDZ256rmkz, 0}, {X86::VCVTPS2PDZrrkz, X86::VCVTPS2PDZrmkz, 0}, + {X86::VCVTPS2PHXZ128rrkz, X86::VCVTPS2PHXZ128rmkz, 0}, + {X86::VCVTPS2PHXZ256rrkz, X86::VCVTPS2PHXZ256rmkz, 0}, + {X86::VCVTPS2PHXZrrkz, X86::VCVTPS2PHXZrmkz, 0}, {X86::VCVTPS2QQZ128rrkz, X86::VCVTPS2QQZ128rmkz, TB_NO_REVERSE}, {X86::VCVTPS2QQZ256rrkz, X86::VCVTPS2QQZ256rmkz, 0}, {X86::VCVTPS2QQZrrkz, X86::VCVTPS2QQZrmkz, 0}, @@ -1704,17 +1832,28 @@ {X86::VCVTQQ2PDZ128rrkz, X86::VCVTQQ2PDZ128rmkz, 0}, {X86::VCVTQQ2PDZ256rrkz, X86::VCVTQQ2PDZ256rmkz, 0}, {X86::VCVTQQ2PDZrrkz, X86::VCVTQQ2PDZrmkz, 0}, + {X86::VCVTQQ2PHZ128rrkz, X86::VCVTQQ2PHZ128rmkz, 0}, + {X86::VCVTQQ2PHZ256rrkz, X86::VCVTQQ2PHZ256rmkz, 0}, + {X86::VCVTQQ2PHZrrkz, X86::VCVTQQ2PHZrmkz, 0}, {X86::VCVTQQ2PSZ128rrkz, X86::VCVTQQ2PSZ128rmkz, 0}, {X86::VCVTQQ2PSZ256rrkz, X86::VCVTQQ2PSZ256rmkz, 0}, {X86::VCVTQQ2PSZrrkz, X86::VCVTQQ2PSZrmkz, 0}, + {X86::VCVTSD2SHZrr, X86::VCVTSD2SHZrm, 0}, + {X86::VCVTSD2SHZrr_Int, X86::VCVTSD2SHZrm_Int, TB_NO_REVERSE}, {X86::VCVTSD2SSZrr, X86::VCVTSD2SSZrm, 0}, {X86::VCVTSD2SSZrr_Int, X86::VCVTSD2SSZrm_Int, TB_NO_REVERSE}, {X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0}, {X86::VCVTSD2SSrr_Int, X86::VCVTSD2SSrm_Int, TB_NO_REVERSE}, + {X86::VCVTSH2SDZrr, X86::VCVTSH2SDZrm, 0}, + {X86::VCVTSH2SDZrr_Int, X86::VCVTSH2SDZrm_Int, TB_NO_REVERSE}, + {X86::VCVTSH2SSZrr, X86::VCVTSH2SSZrm, 0}, + {X86::VCVTSH2SSZrr_Int, X86::VCVTSH2SSZrm_Int, TB_NO_REVERSE}, {X86::VCVTSI2SDZrr, X86::VCVTSI2SDZrm, 0}, {X86::VCVTSI2SDZrr_Int, X86::VCVTSI2SDZrm_Int, 0}, {X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0}, {X86::VCVTSI2SDrr_Int, X86::VCVTSI2SDrm_Int, 0}, + {X86::VCVTSI2SHZrr, X86::VCVTSI2SHZrm, 0}, + {X86::VCVTSI2SHZrr_Int, X86::VCVTSI2SHZrm_Int, 0}, {X86::VCVTSI2SSZrr, X86::VCVTSI2SSZrm, 0}, {X86::VCVTSI2SSZrr_Int, X86::VCVTSI2SSZrm_Int, 0}, {X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0}, @@ -1723,6 +1862,8 @@ {X86::VCVTSI642SDZrr_Int, X86::VCVTSI642SDZrm_Int, 0}, {X86::VCVTSI642SDrr, X86::VCVTSI642SDrm, 0}, {X86::VCVTSI642SDrr_Int, X86::VCVTSI642SDrm_Int, 0}, + {X86::VCVTSI642SHZrr, X86::VCVTSI642SHZrm, 0}, + {X86::VCVTSI642SHZrr_Int, X86::VCVTSI642SHZrm_Int, 0}, {X86::VCVTSI642SSZrr, X86::VCVTSI642SSZrm, 0}, {X86::VCVTSI642SSZrr_Int, X86::VCVTSI642SSZrm_Int, 0}, {X86::VCVTSI642SSrr, X86::VCVTSI642SSrm, 0}, @@ -1731,6 +1872,8 @@ {X86::VCVTSS2SDZrr_Int, X86::VCVTSS2SDZrm_Int, TB_NO_REVERSE}, {X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0}, {X86::VCVTSS2SDrr_Int, X86::VCVTSS2SDrm_Int, TB_NO_REVERSE}, + {X86::VCVTSS2SHZrr, X86::VCVTSS2SHZrm, 0}, + {X86::VCVTSS2SHZrr_Int, X86::VCVTSS2SHZrm_Int, TB_NO_REVERSE}, {X86::VCVTTPD2DQZ128rrkz, X86::VCVTTPD2DQZ128rmkz, 0}, {X86::VCVTTPD2DQZ256rrkz, X86::VCVTTPD2DQZ256rmkz, 0}, {X86::VCVTTPD2DQZrrkz, X86::VCVTTPD2DQZrmkz, 0}, @@ -1743,6 +1886,24 @@ {X86::VCVTTPD2UQQZ128rrkz, X86::VCVTTPD2UQQZ128rmkz, 0}, {X86::VCVTTPD2UQQZ256rrkz, X86::VCVTTPD2UQQZ256rmkz, 0}, {X86::VCVTTPD2UQQZrrkz, X86::VCVTTPD2UQQZrmkz, 0}, + {X86::VCVTTPH2DQZ128rrkz, X86::VCVTTPH2DQZ128rmkz, TB_NO_REVERSE}, + {X86::VCVTTPH2DQZ256rrkz, X86::VCVTTPH2DQZ256rmkz, 0}, + {X86::VCVTTPH2DQZrrkz, X86::VCVTTPH2DQZrmkz, 0}, + {X86::VCVTTPH2QQZ128rrkz, X86::VCVTTPH2QQZ128rmkz, TB_NO_REVERSE}, + {X86::VCVTTPH2QQZ256rrkz, X86::VCVTTPH2QQZ256rmkz, TB_NO_REVERSE}, + {X86::VCVTTPH2QQZrrkz, X86::VCVTTPH2QQZrmkz, 0}, + {X86::VCVTTPH2UDQZ128rrkz, X86::VCVTTPH2UDQZ128rmkz, TB_NO_REVERSE}, + {X86::VCVTTPH2UDQZ256rrkz, X86::VCVTTPH2UDQZ256rmkz, 0}, + {X86::VCVTTPH2UDQZrrkz, X86::VCVTTPH2UDQZrmkz, 0}, + {X86::VCVTTPH2UQQZ128rrkz, X86::VCVTTPH2UQQZ128rmkz, TB_NO_REVERSE}, + {X86::VCVTTPH2UQQZ256rrkz, X86::VCVTTPH2UQQZ256rmkz, TB_NO_REVERSE}, + {X86::VCVTTPH2UQQZrrkz, X86::VCVTTPH2UQQZrmkz, 0}, + {X86::VCVTTPH2UWZ128rrkz, X86::VCVTTPH2UWZ128rmkz, 0}, + {X86::VCVTTPH2UWZ256rrkz, X86::VCVTTPH2UWZ256rmkz, 0}, + {X86::VCVTTPH2UWZrrkz, X86::VCVTTPH2UWZrmkz, 0}, + {X86::VCVTTPH2WZ128rrkz, X86::VCVTTPH2WZ128rmkz, 0}, + {X86::VCVTTPH2WZ256rrkz, X86::VCVTTPH2WZ256rmkz, 0}, + {X86::VCVTTPH2WZrrkz, X86::VCVTTPH2WZrmkz, 0}, {X86::VCVTTPS2DQZ128rrkz, X86::VCVTTPS2DQZ128rmkz, 0}, {X86::VCVTTPS2DQZ256rrkz, X86::VCVTTPS2DQZ256rmkz, 0}, {X86::VCVTTPS2DQZrrkz, X86::VCVTTPS2DQZrmkz, 0}, @@ -1758,23 +1919,39 @@ {X86::VCVTUDQ2PDZ128rrkz, X86::VCVTUDQ2PDZ128rmkz, TB_NO_REVERSE}, {X86::VCVTUDQ2PDZ256rrkz, X86::VCVTUDQ2PDZ256rmkz, 0}, {X86::VCVTUDQ2PDZrrkz, X86::VCVTUDQ2PDZrmkz, 0}, + {X86::VCVTUDQ2PHZ128rrkz, X86::VCVTUDQ2PHZ128rmkz, 0}, + {X86::VCVTUDQ2PHZ256rrkz, X86::VCVTUDQ2PHZ256rmkz, 0}, + {X86::VCVTUDQ2PHZrrkz, X86::VCVTUDQ2PHZrmkz, 0}, {X86::VCVTUDQ2PSZ128rrkz, X86::VCVTUDQ2PSZ128rmkz, 0}, {X86::VCVTUDQ2PSZ256rrkz, X86::VCVTUDQ2PSZ256rmkz, 0}, {X86::VCVTUDQ2PSZrrkz, X86::VCVTUDQ2PSZrmkz, 0}, {X86::VCVTUQQ2PDZ128rrkz, X86::VCVTUQQ2PDZ128rmkz, 0}, {X86::VCVTUQQ2PDZ256rrkz, X86::VCVTUQQ2PDZ256rmkz, 0}, {X86::VCVTUQQ2PDZrrkz, X86::VCVTUQQ2PDZrmkz, 0}, + {X86::VCVTUQQ2PHZ128rrkz, X86::VCVTUQQ2PHZ128rmkz, 0}, + {X86::VCVTUQQ2PHZ256rrkz, X86::VCVTUQQ2PHZ256rmkz, 0}, + {X86::VCVTUQQ2PHZrrkz, X86::VCVTUQQ2PHZrmkz, 0}, {X86::VCVTUQQ2PSZ128rrkz, X86::VCVTUQQ2PSZ128rmkz, 0}, {X86::VCVTUQQ2PSZ256rrkz, X86::VCVTUQQ2PSZ256rmkz, 0}, {X86::VCVTUQQ2PSZrrkz, X86::VCVTUQQ2PSZrmkz, 0}, {X86::VCVTUSI2SDZrr, X86::VCVTUSI2SDZrm, 0}, {X86::VCVTUSI2SDZrr_Int, X86::VCVTUSI2SDZrm_Int, 0}, + {X86::VCVTUSI2SHZrr, X86::VCVTUSI2SHZrm, 0}, + {X86::VCVTUSI2SHZrr_Int, X86::VCVTUSI2SHZrm_Int, 0}, {X86::VCVTUSI2SSZrr, X86::VCVTUSI2SSZrm, 0}, {X86::VCVTUSI2SSZrr_Int, X86::VCVTUSI2SSZrm_Int, 0}, {X86::VCVTUSI642SDZrr, X86::VCVTUSI642SDZrm, 0}, {X86::VCVTUSI642SDZrr_Int, X86::VCVTUSI642SDZrm_Int, 0}, + {X86::VCVTUSI642SHZrr, X86::VCVTUSI642SHZrm, 0}, + {X86::VCVTUSI642SHZrr_Int, X86::VCVTUSI642SHZrm_Int, 0}, {X86::VCVTUSI642SSZrr, X86::VCVTUSI642SSZrm, 0}, {X86::VCVTUSI642SSZrr_Int, X86::VCVTUSI642SSZrm_Int, 0}, + {X86::VCVTUW2PHZ128rrkz, X86::VCVTUW2PHZ128rmkz, 0}, + {X86::VCVTUW2PHZ256rrkz, X86::VCVTUW2PHZ256rmkz, 0}, + {X86::VCVTUW2PHZrrkz, X86::VCVTUW2PHZrmkz, 0}, + {X86::VCVTW2PHZ128rrkz, X86::VCVTW2PHZ128rmkz, 0}, + {X86::VCVTW2PHZ256rrkz, X86::VCVTW2PHZ256rmkz, 0}, + {X86::VCVTW2PHZrrkz, X86::VCVTW2PHZrmkz, 0}, {X86::VDBPSADBWZ128rri, X86::VDBPSADBWZ128rmi, 0}, {X86::VDBPSADBWZ256rri, X86::VDBPSADBWZ256rmi, 0}, {X86::VDBPSADBWZrri, X86::VDBPSADBWZrmi, 0}, @@ -1933,6 +2110,8 @@ {X86::VINSERTI64x2Z256rr, X86::VINSERTI64x2Z256rm, 0}, {X86::VINSERTI64x2Zrr, X86::VINSERTI64x2Zrm, 0}, {X86::VINSERTI64x4Zrr, X86::VINSERTI64x4Zrm, 0}, + {X86::VINSERTPSZrr, X86::VINSERTPSZrm, TB_NO_REVERSE}, + {X86::VINSERTPSrr, X86::VINSERTPSrm, TB_NO_REVERSE}, {X86::VMAXCPDYrr, X86::VMAXCPDYrm, 0}, {X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rm, 0}, {X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rm, 0}, @@ -3137,6 +3316,9 @@ {X86::VCVTDQ2PDZ128rrk, X86::VCVTDQ2PDZ128rmk, TB_NO_REVERSE}, {X86::VCVTDQ2PDZ256rrk, X86::VCVTDQ2PDZ256rmk, 0}, {X86::VCVTDQ2PDZrrk, X86::VCVTDQ2PDZrmk, 0}, + {X86::VCVTDQ2PHZ128rrk, X86::VCVTDQ2PHZ128rmk, 0}, + {X86::VCVTDQ2PHZ256rrk, X86::VCVTDQ2PHZ256rmk, 0}, + {X86::VCVTDQ2PHZrrk, X86::VCVTDQ2PHZrmk, 0}, {X86::VCVTDQ2PSZ128rrk, X86::VCVTDQ2PSZ128rmk, 0}, {X86::VCVTDQ2PSZ256rrk, X86::VCVTDQ2PSZ256rmk, 0}, {X86::VCVTDQ2PSZrrk, X86::VCVTDQ2PSZrmk, 0}, @@ -3149,6 +3331,9 @@ {X86::VCVTPD2DQZ128rrk, X86::VCVTPD2DQZ128rmk, 0}, {X86::VCVTPD2DQZ256rrk, X86::VCVTPD2DQZ256rmk, 0}, {X86::VCVTPD2DQZrrk, X86::VCVTPD2DQZrmk, 0}, + {X86::VCVTPD2PHZ128rrk, X86::VCVTPD2PHZ128rmk, 0}, + {X86::VCVTPD2PHZ256rrk, X86::VCVTPD2PHZ256rmk, 0}, + {X86::VCVTPD2PHZrrk, X86::VCVTPD2PHZrmk, 0}, {X86::VCVTPD2PSZ128rrk, X86::VCVTPD2PSZ128rmk, 0}, {X86::VCVTPD2PSZ256rrk, X86::VCVTPD2PSZ256rmk, 0}, {X86::VCVTPD2PSZrrk, X86::VCVTPD2PSZrmk, 0}, @@ -3161,15 +3346,42 @@ {X86::VCVTPD2UQQZ128rrk, X86::VCVTPD2UQQZ128rmk, 0}, {X86::VCVTPD2UQQZ256rrk, X86::VCVTPD2UQQZ256rmk, 0}, {X86::VCVTPD2UQQZrrk, X86::VCVTPD2UQQZrmk, 0}, + {X86::VCVTPH2DQZ128rrk, X86::VCVTPH2DQZ128rmk, TB_NO_REVERSE}, + {X86::VCVTPH2DQZ256rrk, X86::VCVTPH2DQZ256rmk, 0}, + {X86::VCVTPH2DQZrrk, X86::VCVTPH2DQZrmk, 0}, + {X86::VCVTPH2PDZ128rrk, X86::VCVTPH2PDZ128rmk, TB_NO_REVERSE}, + {X86::VCVTPH2PDZ256rrk, X86::VCVTPH2PDZ256rmk, TB_NO_REVERSE}, + {X86::VCVTPH2PDZrrk, X86::VCVTPH2PDZrmk, 0}, + {X86::VCVTPH2PSXZ128rrk, X86::VCVTPH2PSXZ128rmk, TB_NO_REVERSE}, + {X86::VCVTPH2PSXZ256rrk, X86::VCVTPH2PSXZ256rmk, 0}, + {X86::VCVTPH2PSXZrrk, X86::VCVTPH2PSXZrmk, 0}, {X86::VCVTPH2PSZ128rrk, X86::VCVTPH2PSZ128rmk, TB_NO_REVERSE}, {X86::VCVTPH2PSZ256rrk, X86::VCVTPH2PSZ256rmk, 0}, {X86::VCVTPH2PSZrrk, X86::VCVTPH2PSZrmk, 0}, + {X86::VCVTPH2QQZ128rrk, X86::VCVTPH2QQZ128rmk, TB_NO_REVERSE}, + {X86::VCVTPH2QQZ256rrk, X86::VCVTPH2QQZ256rmk, TB_NO_REVERSE}, + {X86::VCVTPH2QQZrrk, X86::VCVTPH2QQZrmk, 0}, + {X86::VCVTPH2UDQZ128rrk, X86::VCVTPH2UDQZ128rmk, TB_NO_REVERSE}, + {X86::VCVTPH2UDQZ256rrk, X86::VCVTPH2UDQZ256rmk, 0}, + {X86::VCVTPH2UDQZrrk, X86::VCVTPH2UDQZrmk, 0}, + {X86::VCVTPH2UQQZ128rrk, X86::VCVTPH2UQQZ128rmk, TB_NO_REVERSE}, + {X86::VCVTPH2UQQZ256rrk, X86::VCVTPH2UQQZ256rmk, TB_NO_REVERSE}, + {X86::VCVTPH2UQQZrrk, X86::VCVTPH2UQQZrmk, 0}, + {X86::VCVTPH2UWZ128rrk, X86::VCVTPH2UWZ128rmk, 0}, + {X86::VCVTPH2UWZ256rrk, X86::VCVTPH2UWZ256rmk, 0}, + {X86::VCVTPH2UWZrrk, X86::VCVTPH2UWZrmk, 0}, + {X86::VCVTPH2WZ128rrk, X86::VCVTPH2WZ128rmk, 0}, + {X86::VCVTPH2WZ256rrk, X86::VCVTPH2WZ256rmk, 0}, + {X86::VCVTPH2WZrrk, X86::VCVTPH2WZrmk, 0}, {X86::VCVTPS2DQZ128rrk, X86::VCVTPS2DQZ128rmk, 0}, {X86::VCVTPS2DQZ256rrk, X86::VCVTPS2DQZ256rmk, 0}, {X86::VCVTPS2DQZrrk, X86::VCVTPS2DQZrmk, 0}, {X86::VCVTPS2PDZ128rrk, X86::VCVTPS2PDZ128rmk, TB_NO_REVERSE}, {X86::VCVTPS2PDZ256rrk, X86::VCVTPS2PDZ256rmk, 0}, {X86::VCVTPS2PDZrrk, X86::VCVTPS2PDZrmk, 0}, + {X86::VCVTPS2PHXZ128rrk, X86::VCVTPS2PHXZ128rmk, 0}, + {X86::VCVTPS2PHXZ256rrk, X86::VCVTPS2PHXZ256rmk, 0}, + {X86::VCVTPS2PHXZrrk, X86::VCVTPS2PHXZrmk, 0}, {X86::VCVTPS2QQZ128rrk, X86::VCVTPS2QQZ128rmk, TB_NO_REVERSE}, {X86::VCVTPS2QQZ256rrk, X86::VCVTPS2QQZ256rmk, 0}, {X86::VCVTPS2QQZrrk, X86::VCVTPS2QQZrmk, 0}, @@ -3182,11 +3394,18 @@ {X86::VCVTQQ2PDZ128rrk, X86::VCVTQQ2PDZ128rmk, 0}, {X86::VCVTQQ2PDZ256rrk, X86::VCVTQQ2PDZ256rmk, 0}, {X86::VCVTQQ2PDZrrk, X86::VCVTQQ2PDZrmk, 0}, + {X86::VCVTQQ2PHZ128rrk, X86::VCVTQQ2PHZ128rmk, 0}, + {X86::VCVTQQ2PHZ256rrk, X86::VCVTQQ2PHZ256rmk, 0}, + {X86::VCVTQQ2PHZrrk, X86::VCVTQQ2PHZrmk, 0}, {X86::VCVTQQ2PSZ128rrk, X86::VCVTQQ2PSZ128rmk, 0}, {X86::VCVTQQ2PSZ256rrk, X86::VCVTQQ2PSZ256rmk, 0}, {X86::VCVTQQ2PSZrrk, X86::VCVTQQ2PSZrmk, 0}, + {X86::VCVTSD2SHZrr_Intkz, X86::VCVTSD2SHZrm_Intkz, TB_NO_REVERSE}, {X86::VCVTSD2SSZrr_Intkz, X86::VCVTSD2SSZrm_Intkz, TB_NO_REVERSE}, + {X86::VCVTSH2SDZrr_Intkz, X86::VCVTSH2SDZrm_Intkz, TB_NO_REVERSE}, + {X86::VCVTSH2SSZrr_Intkz, X86::VCVTSH2SSZrm_Intkz, TB_NO_REVERSE}, {X86::VCVTSS2SDZrr_Intkz, X86::VCVTSS2SDZrm_Intkz, TB_NO_REVERSE}, + {X86::VCVTSS2SHZrr_Intkz, X86::VCVTSS2SHZrm_Intkz, TB_NO_REVERSE}, {X86::VCVTTPD2DQZ128rrk, X86::VCVTTPD2DQZ128rmk, 0}, {X86::VCVTTPD2DQZ256rrk, X86::VCVTTPD2DQZ256rmk, 0}, {X86::VCVTTPD2DQZrrk, X86::VCVTTPD2DQZrmk, 0}, @@ -3199,6 +3418,24 @@ {X86::VCVTTPD2UQQZ128rrk, X86::VCVTTPD2UQQZ128rmk, 0}, {X86::VCVTTPD2UQQZ256rrk, X86::VCVTTPD2UQQZ256rmk, 0}, {X86::VCVTTPD2UQQZrrk, X86::VCVTTPD2UQQZrmk, 0}, + {X86::VCVTTPH2DQZ128rrk, X86::VCVTTPH2DQZ128rmk, TB_NO_REVERSE}, + {X86::VCVTTPH2DQZ256rrk, X86::VCVTTPH2DQZ256rmk, 0}, + {X86::VCVTTPH2DQZrrk, X86::VCVTTPH2DQZrmk, 0}, + {X86::VCVTTPH2QQZ128rrk, X86::VCVTTPH2QQZ128rmk, TB_NO_REVERSE}, + {X86::VCVTTPH2QQZ256rrk, X86::VCVTTPH2QQZ256rmk, TB_NO_REVERSE}, + {X86::VCVTTPH2QQZrrk, X86::VCVTTPH2QQZrmk, 0}, + {X86::VCVTTPH2UDQZ128rrk, X86::VCVTTPH2UDQZ128rmk, TB_NO_REVERSE}, + {X86::VCVTTPH2UDQZ256rrk, X86::VCVTTPH2UDQZ256rmk, 0}, + {X86::VCVTTPH2UDQZrrk, X86::VCVTTPH2UDQZrmk, 0}, + {X86::VCVTTPH2UQQZ128rrk, X86::VCVTTPH2UQQZ128rmk, TB_NO_REVERSE}, + {X86::VCVTTPH2UQQZ256rrk, X86::VCVTTPH2UQQZ256rmk, TB_NO_REVERSE}, + {X86::VCVTTPH2UQQZrrk, X86::VCVTTPH2UQQZrmk, 0}, + {X86::VCVTTPH2UWZ128rrk, X86::VCVTTPH2UWZ128rmk, 0}, + {X86::VCVTTPH2UWZ256rrk, X86::VCVTTPH2UWZ256rmk, 0}, + {X86::VCVTTPH2UWZrrk, X86::VCVTTPH2UWZrmk, 0}, + {X86::VCVTTPH2WZ128rrk, X86::VCVTTPH2WZ128rmk, 0}, + {X86::VCVTTPH2WZ256rrk, X86::VCVTTPH2WZ256rmk, 0}, + {X86::VCVTTPH2WZrrk, X86::VCVTTPH2WZrmk, 0}, {X86::VCVTTPS2DQZ128rrk, X86::VCVTTPS2DQZ128rmk, 0}, {X86::VCVTTPS2DQZ256rrk, X86::VCVTTPS2DQZ256rmk, 0}, {X86::VCVTTPS2DQZrrk, X86::VCVTTPS2DQZrmk, 0}, @@ -3214,15 +3451,27 @@ {X86::VCVTUDQ2PDZ128rrk, X86::VCVTUDQ2PDZ128rmk, TB_NO_REVERSE}, {X86::VCVTUDQ2PDZ256rrk, X86::VCVTUDQ2PDZ256rmk, 0}, {X86::VCVTUDQ2PDZrrk, X86::VCVTUDQ2PDZrmk, 0}, + {X86::VCVTUDQ2PHZ128rrk, X86::VCVTUDQ2PHZ128rmk, 0}, + {X86::VCVTUDQ2PHZ256rrk, X86::VCVTUDQ2PHZ256rmk, 0}, + {X86::VCVTUDQ2PHZrrk, X86::VCVTUDQ2PHZrmk, 0}, {X86::VCVTUDQ2PSZ128rrk, X86::VCVTUDQ2PSZ128rmk, 0}, {X86::VCVTUDQ2PSZ256rrk, X86::VCVTUDQ2PSZ256rmk, 0}, {X86::VCVTUDQ2PSZrrk, X86::VCVTUDQ2PSZrmk, 0}, {X86::VCVTUQQ2PDZ128rrk, X86::VCVTUQQ2PDZ128rmk, 0}, {X86::VCVTUQQ2PDZ256rrk, X86::VCVTUQQ2PDZ256rmk, 0}, {X86::VCVTUQQ2PDZrrk, X86::VCVTUQQ2PDZrmk, 0}, + {X86::VCVTUQQ2PHZ128rrk, X86::VCVTUQQ2PHZ128rmk, 0}, + {X86::VCVTUQQ2PHZ256rrk, X86::VCVTUQQ2PHZ256rmk, 0}, + {X86::VCVTUQQ2PHZrrk, X86::VCVTUQQ2PHZrmk, 0}, {X86::VCVTUQQ2PSZ128rrk, X86::VCVTUQQ2PSZ128rmk, 0}, {X86::VCVTUQQ2PSZ256rrk, X86::VCVTUQQ2PSZ256rmk, 0}, {X86::VCVTUQQ2PSZrrk, X86::VCVTUQQ2PSZrmk, 0}, + {X86::VCVTUW2PHZ128rrk, X86::VCVTUW2PHZ128rmk, 0}, + {X86::VCVTUW2PHZ256rrk, X86::VCVTUW2PHZ256rmk, 0}, + {X86::VCVTUW2PHZrrk, X86::VCVTUW2PHZrmk, 0}, + {X86::VCVTW2PHZ128rrk, X86::VCVTW2PHZ128rmk, 0}, + {X86::VCVTW2PHZ256rrk, X86::VCVTW2PHZ256rmk, 0}, + {X86::VCVTW2PHZrrk, X86::VCVTW2PHZrmk, 0}, {X86::VDBPSADBWZ128rrikz, X86::VDBPSADBWZ128rmikz, 0}, {X86::VDBPSADBWZ256rrikz, X86::VDBPSADBWZ256rmikz, 0}, {X86::VDBPSADBWZrrikz, X86::VDBPSADBWZrmikz, 0}, @@ -5729,3 +5978,4 @@ {X86::VXORPSZ256rrk, X86::VXORPSZ256rmk, 0}, {X86::VXORPSZrrk, X86::VXORPSZrmk, 0}, }; + diff --git a/llvm/test/TableGen/x86-auto-memfold.td b/llvm/test/TableGen/x86-auto-memfold.td new file mode 100644 --- /dev/null +++ b/llvm/test/TableGen/x86-auto-memfold.td @@ -0,0 +1,2 @@ +// RUN: llvm-tblgen -gen-x86-fold-tables -asmwriternum=1 %p/../../lib/Target/X86/X86.td -I %p/../../include -I %p/../../lib/Target/X86/ -I %p/../../include/ -I %p/../../lib/Target/ --write-if-changed -o %t1 +// RUN: cmp %p/../../lib/Target/X86/X86MemFoldTables.inc %t1 --ignore-initial=0:568 diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp --- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp @@ -13,7 +13,9 @@ #include "CodeGenTarget.h" #include "X86RecognizableInstr.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/X86FoldTablesUtils.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/TableGenBackend.h" @@ -21,23 +23,14 @@ using namespace X86Disassembler; namespace { - -// 3 possible strategies for the unfolding flag (TB_NO_REVERSE) of the -// manual added entries. -enum UnfoldStrategy { - UNFOLD, // Allow unfolding - NO_UNFOLD, // Prevent unfolding - NO_STRATEGY // Make decision according to operands' sizes -}; - // Represents an entry in the manual mapped instructions set. struct ManualMapEntry { const char *RegInstStr; const char *MemInstStr; - UnfoldStrategy Strategy; + uint16_t Strategy; ManualMapEntry(const char *RegInstStr, const char *MemInstStr, - UnfoldStrategy Strategy = NO_STRATEGY) + uint16_t Strategy = 0) : RegInstStr(RegInstStr), MemInstStr(MemInstStr), Strategy(Strategy) {} }; @@ -50,37 +43,7 @@ "PCMPESTRM", "PCMPESTRI", "PCMPISTRM", "PCMPISTRI" }; -// For manually mapping instructions that do not match by their encoding. -const ManualMapEntry ManualMapSet[] = { - { "ADD16ri_DB", "ADD16mi", NO_UNFOLD }, - { "ADD16ri8_DB", "ADD16mi8", NO_UNFOLD }, - { "ADD16rr_DB", "ADD16mr", NO_UNFOLD }, - { "ADD32ri_DB", "ADD32mi", NO_UNFOLD }, - { "ADD32ri8_DB", "ADD32mi8", NO_UNFOLD }, - { "ADD32rr_DB", "ADD32mr", NO_UNFOLD }, - { "ADD64ri32_DB", "ADD64mi32", NO_UNFOLD }, - { "ADD64ri8_DB", "ADD64mi8", NO_UNFOLD }, - { "ADD64rr_DB", "ADD64mr", NO_UNFOLD }, - { "ADD8ri_DB", "ADD8mi", NO_UNFOLD }, - { "ADD8rr_DB", "ADD8mr", NO_UNFOLD }, - { "ADD16rr_DB", "ADD16rm", NO_UNFOLD }, - { "ADD32rr_DB", "ADD32rm", NO_UNFOLD }, - { "ADD64rr_DB", "ADD64rm", NO_UNFOLD }, - { "ADD8rr_DB", "ADD8rm", NO_UNFOLD }, - { "MMX_MOVD64from64rr", "MMX_MOVQ64mr", UNFOLD }, - { "MMX_MOVD64grr", "MMX_MOVD64mr", UNFOLD }, - { "MOVLHPSrr", "MOVHPSrm", NO_UNFOLD }, - { "PUSH16r", "PUSH16rmm", UNFOLD }, - { "PUSH32r", "PUSH32rmm", UNFOLD }, - { "PUSH64r", "PUSH64rmm", UNFOLD }, - { "TAILJMPr", "TAILJMPm", UNFOLD }, - { "TAILJMPr64", "TAILJMPm64", UNFOLD }, - { "TAILJMPr64_REX", "TAILJMPm64_REX", UNFOLD }, - { "VMOVLHPSZrr", "VMOVHPSZ128rm", NO_UNFOLD }, - { "VMOVLHPSrr", "VMOVHPSrm", NO_UNFOLD }, -}; - - +#include "X86FoldTablesEmitterManualMapSet.inc" static bool isExplicitAlign(const CodeGenInstruction *Inst) { return any_of(ExplicitAlign, [Inst](const char *InstStr) { return Inst->TheDef->getName().contains(InstStr); @@ -104,37 +67,44 @@ public: bool CannotUnfold = false; + bool CannotFold = false; bool IsLoad = false; bool IsStore = false; bool IsAligned = false; unsigned int Alignment = 0; + X86FoldTableEntry() = default; X86FoldTableEntry(const CodeGenInstruction *RegInst, const CodeGenInstruction *MemInst) : RegInst(RegInst), MemInst(MemInst) {} void print(formatted_raw_ostream &OS) const { + // Stop printing record if it can't fold and unfold. + if(CannotUnfold && CannotFold) + return; OS.indent(2); - OS << "{ X86::" << RegInst->TheDef->getName() << ","; - OS.PadToColumn(40); - OS << "X86::" << MemInst->TheDef->getName() << ","; - OS.PadToColumn(75); + OS << "{X86::" << RegInst->TheDef->getName() << ", "; + //OS.PadToColumn(40); + OS << "X86::" << MemInst->TheDef->getName() << ", "; + //OS.PadToColumn(75); std::string Attrs; if (IsLoad) - Attrs += "TB_FOLDED_LOAD | "; + Attrs += "TB_FOLDED_LOAD|"; if (IsStore) - Attrs += "TB_FOLDED_STORE | "; + Attrs += "TB_FOLDED_STORE|"; if (CannotUnfold) - Attrs += "TB_NO_REVERSE | "; + Attrs += "TB_NO_REVERSE|"; + if (CannotFold) + Attrs += "TB_NO_FORWARD|"; if (IsAligned) - Attrs += "TB_ALIGN_" + std::to_string(Alignment) + " | "; + Attrs += "TB_ALIGN_" + std::to_string(Alignment) + "|"; - StringRef SimplifiedAttrs = StringRef(Attrs).rtrim("| "); + StringRef SimplifiedAttrs = StringRef(Attrs).rtrim("|"); if (SimplifiedAttrs.empty()) SimplifiedAttrs = "0"; - OS << SimplifiedAttrs << " },\n"; + OS << SimplifiedAttrs << "},\n"; } bool operator<(const X86FoldTableEntry &RHS) const { @@ -147,7 +117,23 @@ } }; - typedef std::vector FoldTable; + struct CodeGenInstructionComparator { + // Comparator function + bool operator()(const CodeGenInstruction *LHS, + const CodeGenInstruction *RHS) const { + assert(LHS && RHS && "LHS and RHS shouldn't be nullptr"); + bool LHSpseudo = LHS->TheDef->getValueAsBit("isPseudo"); + bool RHSpseudo = RHS->TheDef->getValueAsBit("isPseudo"); + if (LHSpseudo != RHSpseudo) + return LHSpseudo; + + return LHS->TheDef->getName() < RHS->TheDef->getName(); + } + }; + + typedef std::map + FoldTable; // std::vector for each folding table. // Table2Addr - Holds instructions which their memory form performs load+store // Table#i - Holds instructions which the their memory form perform a load OR @@ -169,14 +155,14 @@ // Decides to which table to add the entry with the given instructions. // S sets the strategy of adding the TB_NO_REVERSE flag. void updateTables(const CodeGenInstruction *RegInstr, - const CodeGenInstruction *MemInstr, - const UnfoldStrategy S = NO_STRATEGY); + const CodeGenInstruction *MemInstr, const uint16_t S = 0, + bool IsManual = false); // Generates X86FoldTableEntry with the given instructions and fill it with // the appropriate flags - then adds it to Table. void addEntryWithFlags(FoldTable &Table, const CodeGenInstruction *RegInstr, - const CodeGenInstruction *MemInstr, - const UnfoldStrategy S, const unsigned int FoldedInd); + const CodeGenInstruction *MemInstr, const uint16_t S, + const unsigned int FoldedInd, bool isManual); // Print the given table as a static const C++ array of type // X86MemoryFoldTableEntry. @@ -185,8 +171,8 @@ OS << "static const X86MemoryFoldTableEntry MemoryFold" << TableName << "[] = {\n"; - for (const X86FoldTableEntry &E : Table) - E.print(OS); + for (auto &E : Table) + E.second.print(OS); OS << "};\n\n"; } @@ -391,13 +377,25 @@ void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table, const CodeGenInstruction *RegInstr, const CodeGenInstruction *MemInstr, - const UnfoldStrategy S, - const unsigned int FoldedInd) { + const uint16_t S, + const unsigned int FoldedInd, + bool isManual) { X86FoldTableEntry Result = X86FoldTableEntry(RegInstr, MemInstr); Record *RegRec = RegInstr->TheDef; Record *MemRec = MemInstr->TheDef; + if (isManual) { + Result.CannotUnfold = (S & TB_NO_REVERSE) != 0; + Result.CannotFold = (S & TB_NO_FORWARD) != 0; + Result.IsLoad = (S & TB_FOLDED_LOAD) != 0; + Result.IsStore = (S & TB_FOLDED_STORE) != 0; + Result.IsAligned = (S & TB_ALIGN_MASK) != 0; + Result.Alignment = 1 << (((S & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT) - 1); + Table[RegInstr] = Result; + return; + } + // Only table0 entries should explicitly specify a load or store flag. if (&Table == &Table0) { unsigned MemInOpsNum = MemRec->getValueAsDag("InOperandList")->getNumArgs(); @@ -420,12 +418,26 @@ // the register in the register form instruction. // If the register's size is greater than the memory's operand size, do not // allow unfolding. - if (S == UNFOLD) - Result.CannotUnfold = false; - else if (S == NO_UNFOLD) + + // the unfolded load size will be based on the register size. If that’s bigger + // than the memory operand size, the unfolded load will load more memory and + // potentially cause a memory fault. + if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec)) + Result.CannotUnfold = true; + + // Check no-kz version's isMoveReg + if (RegRec->getName().ends_with("rkz")) { + const CodeGenInstruction *BaseRegInst = + &Target.getInstruction(Records.getDef( + RegRec->getName().substr(0, RegRec->getName().size() - 2))); + Result.CannotUnfold = BaseRegInst->isMoveReg ? true : Result.CannotUnfold; + } else if (RegRec->getName().ends_with("rk")) { + const CodeGenInstruction *BaseRegInst = + &Target.getInstruction(Records.getDef( + RegRec->getName().substr(0, RegRec->getName().size() - 1))); + Result.CannotUnfold = BaseRegInst->isMoveReg ? true : Result.CannotUnfold; + } else if (RegInstr->isMoveReg && Result.IsStore) Result.CannotUnfold = true; - else if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec)) - Result.CannotUnfold = true; // S == NO_STRATEGY uint64_t Enc = getValueFromBitsInit(RegRec->getValueAsBitsInit("OpEncBits")); if (isExplicitAlign(RegInstr)) { @@ -443,13 +455,19 @@ Result.Alignment = 16; } } + // Expand is only ever created as a masked instruction. It is not safe to + // unfold a masked expand because we don't know if it came from an expand load + // intrinsic or folding a plain load. If it is from a expand load intrinsic, + // Unfolding to plain load would read more elements and could trigger a fault. + if (RegRec->getName().contains("EXPAND")) + Result.CannotUnfold = true; - Table.push_back(Result); + Table[RegInstr] = Result; } void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr, const CodeGenInstruction *MemInstr, - const UnfoldStrategy S) { + const uint16_t S, bool IsManual) { Record *RegRec = RegInstr->TheDef; Record *MemRec = MemInstr->TheDef; @@ -460,7 +478,7 @@ // Instructions which Read-Modify-Write should be added to Table2Addr. if (MemOutSize != RegOutSize && MemInSize == RegInSize) { - addEntryWithFlags(Table2Addr, RegInstr, MemInstr, S, 0); + addEntryWithFlags(Table2Addr, RegInstr, MemInstr, S, 0, IsManual); return; } @@ -477,19 +495,19 @@ isMemoryOperand(MemOpRec)) { switch (i) { case 0: - addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0); + addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0, IsManual); return; case 1: - addEntryWithFlags(Table1, RegInstr, MemInstr, S, 1); + addEntryWithFlags(Table1, RegInstr, MemInstr, S, 1, IsManual); return; case 2: - addEntryWithFlags(Table2, RegInstr, MemInstr, S, 2); + addEntryWithFlags(Table2, RegInstr, MemInstr, S, 2, IsManual); return; case 3: - addEntryWithFlags(Table3, RegInstr, MemInstr, S, 3); + addEntryWithFlags(Table3, RegInstr, MemInstr, S, 3, IsManual); return; case 4: - addEntryWithFlags(Table4, RegInstr, MemInstr, S, 4); + addEntryWithFlags(Table4, RegInstr, MemInstr, S, 4, IsManual); return; } } @@ -506,7 +524,7 @@ Record *MemOpRec = MemInstr->Operands[RegOutSize - 1].Rec; if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec) && getRegOperandSize(RegOpRec) == getMemOperandSize(MemOpRec)) - addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0); + addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0, IsManual); } } @@ -589,17 +607,9 @@ Record *MemInstIter = Records.getDef(Entry.MemInstStr); updateTables(&(Target.getInstruction(RegInstIter)), - &(Target.getInstruction(MemInstIter)), Entry.Strategy); + &(Target.getInstruction(MemInstIter)), Entry.Strategy, true); } - // Sort the tables before printing. - llvm::sort(Table2Addr); - llvm::sort(Table0); - llvm::sort(Table1); - llvm::sort(Table2); - llvm::sort(Table3); - llvm::sort(Table4); - // Print all tables. printTable(Table2Addr, "Table2Addr", OS); printTable(Table0, "Table0", OS); diff --git a/llvm/utils/TableGen/X86FoldTablesEmitterManualMapSet.inc b/llvm/utils/TableGen/X86FoldTablesEmitterManualMapSet.inc new file mode 100644 --- /dev/null +++ b/llvm/utils/TableGen/X86FoldTablesEmitterManualMapSet.inc @@ -0,0 +1,77 @@ +const ManualMapEntry ManualMapSet[] = { + // Part1: These following records are for manually mapping instructions that + // do not match by their encoding. + { "ADD16ri_DB", "ADD16mi", TB_NO_REVERSE }, + { "ADD16ri8_DB", "ADD16mi8", TB_NO_REVERSE }, + { "ADD16rr_DB", "ADD16mr", TB_NO_REVERSE }, + { "ADD32ri_DB", "ADD32mi", TB_NO_REVERSE }, + { "ADD32ri8_DB", "ADD32mi8", TB_NO_REVERSE }, + { "ADD32rr_DB", "ADD32mr", TB_NO_REVERSE }, + { "ADD64ri32_DB", "ADD64mi32", TB_NO_REVERSE }, + { "ADD64ri8_DB", "ADD64mi8", TB_NO_REVERSE }, + { "ADD64rr_DB", "ADD64mr", TB_NO_REVERSE }, + { "ADD8ri_DB", "ADD8mi", TB_NO_REVERSE }, + { "ADD8rr_DB", "ADD8mr", TB_NO_REVERSE }, + { "ADD16rr_DB", "ADD16rm", TB_NO_REVERSE }, + { "ADD32rr_DB", "ADD32rm", TB_NO_REVERSE }, + { "ADD64rr_DB", "ADD64rm", TB_NO_REVERSE }, + { "ADD8rr_DB", "ADD8rm", TB_NO_REVERSE }, + { "MMX_MOVD64from64rr", "MMX_MOVQ64mr", TB_FOLDED_STORE }, + { "MMX_MOVD64grr", "MMX_MOVD64mr", TB_FOLDED_STORE }, + { "MOV64toSDrr", "MOV64mr", TB_FOLDED_STORE | TB_NO_REVERSE }, + { "MOVDI2SSrr", "MOV32mr", TB_FOLDED_STORE | TB_NO_REVERSE }, + { "MOVPQIto64rr", "MOVPQI2QImr", TB_FOLDED_STORE | TB_NO_REVERSE }, + { "MOVSDto64rr", "MOVSDmr", TB_FOLDED_STORE | TB_NO_REVERSE }, + { "MOVSS2DIrr", "MOVSSmr", TB_FOLDED_STORE }, + { "MOVLHPSrr", "MOVHPSrm", TB_NO_REVERSE }, + { "PUSH16r", "PUSH16rmm", TB_FOLDED_LOAD }, + { "PUSH32r", "PUSH32rmm", TB_FOLDED_LOAD }, + { "PUSH64r", "PUSH64rmm", TB_FOLDED_LOAD }, + { "TAILJMPr", "TAILJMPm", TB_FOLDED_LOAD }, + { "TAILJMPr64", "TAILJMPm64", TB_FOLDED_LOAD }, + { "TAILJMPr64_REX", "TAILJMPm64_REX", TB_FOLDED_LOAD }, + { "TCRETURNri", "TCRETURNmi", TB_FOLDED_LOAD | TB_NO_FORWARD }, + { "TCRETURNri64", "TCRETURNmi64", TB_FOLDED_LOAD | TB_NO_FORWARD }, + { "VMOVLHPSZrr", "VMOVHPSZ128rm", TB_NO_REVERSE }, + { "VMOVLHPSrr", "VMOVHPSrm", TB_NO_REVERSE }, + { "VMOV64toSDZrr", "MOV64mr", TB_FOLDED_STORE | TB_NO_REVERSE }, + { "VMOV64toSDrr", "MOV64mr", TB_FOLDED_STORE | TB_NO_REVERSE }, + { "VMOVDI2SSZrr", "MOV32mr", TB_FOLDED_STORE | TB_NO_REVERSE }, + { "VMOVDI2SSrr", "MOV32mr", TB_FOLDED_STORE | TB_NO_REVERSE }, + { "VMOVPQIto64Zrr", "VMOVPQI2QIZmr", TB_FOLDED_STORE | TB_NO_REVERSE }, + { "VMOVPQIto64rr", "VMOVPQI2QImr", TB_FOLDED_STORE | TB_NO_REVERSE }, + { "VMOVSDto64Zrr", "VMOVSDZmr", TB_FOLDED_STORE | TB_NO_REVERSE }, + { "VMOVSDto64rr", "VMOVSDmr", TB_FOLDED_STORE | TB_NO_REVERSE }, + { "VMOVSS2DIZrr", "VMOVSSZmr", TB_FOLDED_STORE }, + { "VMOVSS2DIrr", "VMOVSSmr", TB_FOLDED_STORE }, + { "MMX_MOVD64to64rr", "MMX_MOVQ64rm", 0 }, + { "MOV64toPQIrr", "MOVQI2PQIrm", TB_NO_REVERSE }, + { "MOV64toSDrr", "MOVSDrm_alt", TB_NO_REVERSE }, + { "MOVDI2SSrr", "MOVSSrm_alt", 0 }, + { "VMOV64toPQIZrr", "VMOVQI2PQIZrm", TB_NO_REVERSE }, + { "VMOV64toPQIrr", "VMOVQI2PQIrm", TB_NO_REVERSE }, + { "VMOV64toSDZrr", "VMOVSDZrm_alt", TB_NO_REVERSE }, + { "VMOV64toSDrr", "VMOVSDrm_alt", TB_NO_REVERSE }, + { "VMOVDI2SSZrr", "VMOVSSZrm_alt", 0 }, + { "VMOVDI2SSrr", "VMOVSSrm_alt", 0 }, + { "MOVSDrr", "MOVLPDrm", TB_NO_REVERSE }, + { "VMOVSDZrr", "VMOVLPDZ128rm", TB_NO_REVERSE }, + { "VMOVSDrr", "VMOVLPDrm", TB_NO_REVERSE }, + + // Part2: These following records are for manually mapping instructions that + // have same opcode. + // INSERTPSrm has no count_s while INSERTPSrr has count_s. + // count_s is to indicate which element in dst vector is inserted. + // if count_s!=0, we can't fold INSERTPSrr into INSERTPSrm + // + // the following folding can happen when count_s==0 + // load xmm0, m32 + // insertpsrr xmm1, xmm0, imm + // => + // insertpsrm xmm1, m32, imm + { "INSERTPSrr", "INSERTPSrm", TB_NO_REVERSE | TB_NO_FORWARD }, + { "UD1Lr", "UD1Lm", TB_NO_REVERSE | TB_NO_FORWARD }, + { "UD1Qr", "UD1Qm", TB_NO_REVERSE | TB_NO_FORWARD }, + { "UD1Wr", "UD1Wm", TB_NO_REVERSE | TB_NO_FORWARD } +}; +