Index: include/llvm/CodeGen/MachineInstr.h =================================================================== --- include/llvm/CodeGen/MachineInstr.h +++ include/llvm/CodeGen/MachineInstr.h @@ -60,7 +60,7 @@ /// otherwise easily derivable from the IR text. /// enum CommentFlag { - ReloadReuse = 0x1 + ReloadReuse = 0x1 // higher bits are reserved for target dep comments. }; enum MIFlag { @@ -143,8 +143,8 @@ } /// Set a flag for the AsmPrinter. - void setAsmPrinterFlag(CommentFlag Flag) { - AsmPrinterFlags |= (uint8_t)Flag; + void setAsmPrinterFlag(uint8_t Flag) { + AsmPrinterFlags |= Flag; } /// Clear specific AsmPrinter flags. Index: include/llvm/MC/MCStreamer.h =================================================================== --- include/llvm/MC/MCStreamer.h +++ include/llvm/MC/MCStreamer.h @@ -262,7 +262,11 @@ /// /// If the comment includes embedded \n's, they will each get the comment /// prefix as appropriate. The added comment should not end with a \n. - virtual void AddComment(const Twine &T) {} + /// By default, each comment is terminated with an end of line, i.e. the + /// EOL param is set to true by default. If one prefers not to end the + /// comment with a new line then the EOL param should be passed + /// with a false value. + virtual void AddComment(const Twine &T, bool EOL = true) {} /// \brief Return a raw_ostream that comments can be written to. Unlike /// AddComment, you are required to terminate comments with \n if you use this Index: lib/MC/MCAsmStreamer.cpp =================================================================== --- lib/MC/MCAsmStreamer.cpp +++ lib/MC/MCAsmStreamer.cpp @@ -100,7 +100,7 @@ /// file if applicable as a QoI issue to make the output of the compiler /// more readable. This only affects the MCAsmStreamer, and only when /// verbose assembly output is enabled. - void AddComment(const Twine &T) override; + void AddComment(const Twine &T, bool EOL = true) override; /// AddEncodingComment - Add a comment showing the encoding of an instruction. void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &); @@ -301,12 +301,14 @@ /// file if applicable as a QoI issue to make the output of the compiler /// more readable. This only affects the MCAsmStreamer, and only when /// verbose assembly output is enabled. -void MCAsmStreamer::AddComment(const Twine &T) { +/// By deafult EOL is set to true so that each comment goes on its own line. +void MCAsmStreamer::AddComment(const Twine &T, bool EOL) { if (!IsVerboseAsm) return; T.toVector(CommentToEmit); - // Each comment goes on its own line. - CommentToEmit.push_back('\n'); + + if (EOL) + CommentToEmit.push_back('\n'); // Place comment in a new line. } void MCAsmStreamer::EmitCommentsAndEOL() { Index: lib/Target/X86/CMakeLists.txt =================================================================== --- lib/Target/X86/CMakeLists.txt +++ lib/Target/X86/CMakeLists.txt @@ -40,6 +40,7 @@ X86InterleavedAccess.cpp X86InstrFMA3Info.cpp X86InstrInfo.cpp + X86EvexToVex.cpp X86MCInstLower.cpp X86MachineFunctionInfo.cpp X86OptimizeLEAs.cpp Index: lib/Target/X86/InstPrinter/X86InstComments.h =================================================================== --- lib/Target/X86/InstPrinter/X86InstComments.h +++ lib/Target/X86/InstPrinter/X86InstComments.h @@ -16,6 +16,11 @@ #define LLVM_LIB_TARGET_X86_INSTPRINTER_X86INSTCOMMENTS_H namespace llvm { + + enum AsmComments { + AC_EVEX_2_VEX = 0x2 // For instr that was compressed from EVEX to VEX. + }; + class MCInst; class raw_ostream; bool EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Index: lib/Target/X86/X86.h =================================================================== --- lib/Target/X86/X86.h +++ lib/Target/X86/X86.h @@ -87,6 +87,13 @@ FunctionPass *createX86FixupBWInsts(); void initializeFixupBWInstPassPass(PassRegistry &); + +/// This pass replaces EVEX ecnoded of AVX-512 instructiosn by VEX +/// encoding when possible in order to reduce code size. +FunctionPass *createX86EvexToVexInsts(); + +void initializeEvexToVexInstPassPass(PassRegistry &); + } // End llvm namespace #endif Index: lib/Target/X86/X86EvexToVex.cpp =================================================================== --- lib/Target/X86/X86EvexToVex.cpp +++ lib/Target/X86/X86EvexToVex.cpp @@ -0,0 +1,213 @@ +//===----------------------- X86EvexToVex.cpp ----------------------------===// +// Compress EVEX instructions to VEX encoding when possible to reduce code size +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +/// \file +/// This file defines the pass that goes over all AVX-512 instructions which +/// are encoded using the EVEX prefix and if possible replaces them by their +/// corresponding VEX encoding which is usually shorter by 2 bytes. +/// EVEX instructions may be encoded via the VEX prefix when the AVX-512 +/// instruction has a corresponding AVX/AVX2 opcode and when it does not +/// use the xmm or the mask registers or xmm/ymm registers wuith indexes +/// higher than 15. +/// The pass applies code reduction on the generated code for AVX-512 instrs. +/// +//===---------------------------------------------------------------------===// + +#include "InstPrinter/X86InstComments.h" +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86InstrInfo.h" +#include "X86InstrTablesInfo.h" +#include "X86MachineFunctionInfo.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" + +using namespace llvm; + +#define EVEX2VEX_DESC "Compressing EVEX instrs to VEX encoding when possible" +#define EVEX2VEX_NAME "x86-evex-to-vex-compress" + +#define DEBUG_TYPE EVEX2VEX_NAME + +namespace { + +class EvexToVexInstPass : public MachineFunctionPass { + + /// X86EvexToVexCompressTable - Evex to Vex encoding opcode map. + typedef DenseMap EvexToVexTableType; + EvexToVexTableType EvexToVex128Table; + EvexToVexTableType EvexToVex256Table; + + /// For EVEX instructions that can be encoded using VEX encoding, replace + /// them by the VEX encoding in order to reduce size. + bool CompressEvexToVexImpl(MachineInstr &MI) const; + + /// For initializing the hash map tables of all AVX-512 EVEX + /// corresponding to AVX/AVX2 opcodes. + void AddTableEntry(EvexToVexTableType &EvexToVexTable, uint16_t EvexOp, + uint16_t VexOp); + +public: + static char ID; + + StringRef getPassName() const override { return EVEX2VEX_DESC; } + + EvexToVexInstPass() : MachineFunctionPass(ID) { + initializeEvexToVexInstPassPass(*PassRegistry::getPassRegistry()); + + // Initialize the EVEX to VEX 128 table map. + for (X86EvexToVexCompressTableEntry Entry : X86EvexToVex128CompressTable) { + AddTableEntry(EvexToVex128Table, Entry.EvexOpcode, Entry.VexOpcode); + } + + // Initialize the EVEX to VEX 256 table map. + for (X86EvexToVexCompressTableEntry Entry : X86EvexToVex256CompressTable) { + AddTableEntry(EvexToVex256Table, Entry.EvexOpcode, Entry.VexOpcode); + } + } + + /// Loop over all of the basic blocks, replacing EVEX instructions + /// by equivalent VEX instructions when possible for reducing code size. + bool runOnMachineFunction(MachineFunction &MF) override; + + // This pass runs after regalloc and doesn't support VReg operands. + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + +private: + /// Machine instruction info used throughout the class. + const X86InstrInfo *TII; +}; + +char EvexToVexInstPass::ID = 0; +} + +INITIALIZE_PASS(EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC, false, false) + +FunctionPass *llvm::createX86EvexToVexInsts() { + return new EvexToVexInstPass(); +} + +bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getSubtarget().getInstrInfo(); + + const X86Subtarget &ST = MF.getSubtarget(); + if (!ST.hasAVX512()) + return false; + + bool Changed = false; + + /// Go over all basic blocks in function and replace + /// EVEX encoded instrs by VEX encoding when possible. + for (MachineBasicBlock &MBB : MF) { + + // Traverse the basic block. + for (MachineInstr &MI : MBB) + Changed |= CompressEvexToVexImpl(MI); + } + + return Changed; +} + +void EvexToVexInstPass::AddTableEntry(EvexToVexTableType &EvexToVexTable, + uint16_t EvexOp, uint16_t VexOp) { + EvexToVexTable[EvexOp] = VexOp; +} + +// For EVEX instructions that can be encoded using VEX encoding +// replace them by the VEX encoding in order to reduce size. +bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const { + + // VEX format. + // # of bytes: 0,2,3 1 1 0,1 0,1,2,4 0,1 + // [Prefixes] [VEX] OPCODE ModR/M [SIB] [DISP] [IMM] + // + // EVEX format. + // # of bytes: 4 1 1 1 4 / 1 1 + // [Prefixes] EVEX Opcode ModR/M [SIB] [Disp32] / [Disp8*N] [Immediate] + + const MCInstrDesc &Desc = MI.getDesc(); + + // Check for EVEX instructions only. + if ((Desc.TSFlags & X86II::EncodingMask) != X86II::EVEX) + return false; + + // Check for EVEX instructions with mask or broadcast as in these cases + // the EVEX prefix is needed in order to carry this information + // thus preventing the transformation to VEX encoding. + if (Desc.TSFlags & (X86II::EVEX_K | X86II::EVEX_B)) + return false; + + // Check for non EVEX_V512 instrs only. + // EVEX_V512 instr: bit EVEX_L2 = 1; bit VEX_L = 0. + if ((Desc.TSFlags & X86II::EVEX_L2) && !(Desc.TSFlags & X86II::VEX_L)) + return false; + + // EVEX_V128 instr: bit EVEX_L2 = 0, bit VEX_L = 0. + bool IsEVEX_V128 = + (!(Desc.TSFlags & X86II::EVEX_L2) && !(Desc.TSFlags & X86II::VEX_L)); + + // EVEX_V256 instr: bit EVEX_L2 = 0, bit VEX_L = 1. + bool IsEVEX_V256 = + (!(Desc.TSFlags & X86II::EVEX_L2) && (Desc.TSFlags & X86II::VEX_L)); + + unsigned NewOpc = 0; + + // Check for EVEX_V256 instructions. + if (IsEVEX_V256) { + // Search for opcode in the EvexToVex256 table. + auto It = EvexToVex256Table.find(MI.getOpcode()); + if (It != EvexToVex256Table.end()) + NewOpc = It->second; + } + + // Check for EVEX_V128 or Scalar instructions. + else if (IsEVEX_V128) { + // Search for opcode in the EvexToVex128 table. + auto It = EvexToVex128Table.find(MI.getOpcode()); + if (It != EvexToVex128Table.end()) + NewOpc = It->second; + } + + if (!NewOpc) + return false; + + auto isHiRegIdx = [](unsigned Reg) { + // Check for XMM register with indexes between 16 - 31. + if (Reg >= X86::XMM16 && Reg <= X86::XMM31) + return true; + + // Check for YMM register with indexes between 16 - 31. + if (Reg >= X86::YMM16 && Reg <= X86::YMM31) + return true; + + return false; + }; + + // Check that operands are not ZMM regs or + // XMM/YMM regs with hi indexes between 16 - 31. + for (const MachineOperand &MO : MI.explicit_operands()) { + if (!MO.isReg()) + continue; + + unsigned Reg = MO.getReg(); + + assert (!(Reg >= X86::ZMM0 && Reg <= X86::ZMM31)); + + if (isHiRegIdx(Reg)) + return false; + } + + const MCInstrDesc &MCID = TII->get(NewOpc); + MI.setDesc(MCID); + MI.setAsmPrinterFlag(AC_EVEX_2_VEX); + return true; +} Index: lib/Target/X86/X86InstrTablesInfo.h =================================================================== --- lib/Target/X86/X86InstrTablesInfo.h +++ lib/Target/X86/X86InstrTablesInfo.h @@ -0,0 +1,1148 @@ +//===-- X86AVX512Info.h - X86 Instruction Tables Information ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains related X86 Instruction Information Tables. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_X86_X86INSTRTABLESINFO_H +#define LLVM_LIB_TARGET_X86_X86INSTRTABLESINFO_H + +using namespace llvm; + +struct X86EvexToVexCompressTableEntry { + uint16_t EvexOpcode; + uint16_t VexOpcode; +}; + + + +// X86 EVEX encoded instructions that have a VEX 128 encoding +// (table format: ). +static const X86EvexToVexCompressTableEntry + X86EvexToVex128CompressTable[] = { + // EVEX scalar with corresponding VEX. + { X86::Int_VCOMISDZrm , X86::Int_VCOMISDrm }, + { X86::Int_VCOMISDZrr , X86::Int_VCOMISDrr }, + { X86::Int_VCOMISSZrm , X86::Int_VCOMISSrm }, + { X86::Int_VCOMISSZrr , X86::Int_VCOMISSrr }, + { X86::Int_VUCOMISDZrm , X86::Int_VUCOMISDrm }, + { X86::Int_VUCOMISDZrr , X86::Int_VUCOMISDrr }, + { X86::Int_VUCOMISSZrm , X86::Int_VUCOMISSrm }, + { X86::Int_VUCOMISSZrr , X86::Int_VUCOMISSrr }, + { X86::VADDSDZrm , X86::VADDSDrm }, + { X86::VADDSDZrm_Int , X86::VADDSDrm_Int }, + { X86::VADDSDZrr , X86::VADDSDrr }, + { X86::VADDSDZrr_Int , X86::VADDSDrr_Int }, + { X86::VADDSSZrm , X86::VADDSSrm }, + { X86::VADDSSZrm_Int , X86::VADDSSrm_Int }, + { X86::VADDSSZrr , X86::VADDSSrr }, + { X86::VADDSSZrr_Int , X86::VADDSSrr_Int }, + { X86::VCOMISDZrm , X86::VCOMISDrm }, + { X86::VCOMISDZrr , X86::VCOMISDrr }, + { X86::VCOMISSZrm , X86::VCOMISSrm }, + { X86::VCOMISSZrr , X86::VCOMISSrr }, + { X86::VCVTSD2SI64Zrm , X86::VCVTSD2SI64rm }, + { X86::VCVTSD2SI64Zrr , X86::VCVTSD2SI64rr }, + { X86::VCVTSD2SIZrm , X86::VCVTSD2SIrm }, + { X86::VCVTSD2SIZrr , X86::VCVTSD2SIrr }, + { X86::VCVTSD2SSZrm , X86::VCVTSD2SSrm }, + { X86::VCVTSD2SSZrr , X86::VCVTSD2SSrr }, + { X86::VCVTSI2SDZrm , X86::VCVTSI2SDrm }, + { X86::VCVTSI2SDZrm_Int , X86::Int_VCVTSI2SDrm }, + { X86::VCVTSI2SDZrr , X86::VCVTSI2SDrr }, + { X86::VCVTSI2SDZrr_Int , X86::Int_VCVTSI2SDrr }, + { X86::VCVTSI2SSZrm , X86::VCVTSI2SSrm }, + { X86::VCVTSI2SSZrm_Int , X86::Int_VCVTSI2SSrm }, + { X86::VCVTSI2SSZrr , X86::VCVTSI2SSrr }, + { X86::VCVTSI2SSZrr_Int , X86::Int_VCVTSI2SSrr }, + { X86::VCVTSS2SDZrm , X86::VCVTSS2SDrm }, + { X86::VCVTSS2SDZrr , X86::VCVTSS2SDrr }, + { X86::VCVTSS2SI64Zrm , X86::VCVTSS2SI64rm }, + { X86::VCVTSS2SI64Zrr , X86::VCVTSS2SI64rr }, + { X86::VCVTSS2SIZrm , X86::VCVTSS2SIrm }, + { X86::VCVTSS2SIZrr , X86::VCVTSS2SIrr }, + { X86::VCVTTSD2SI64Zrm , X86::VCVTTSD2SI64rm }, + { X86::VCVTTSD2SI64Zrm_Int , X86::Int_VCVTTSD2SI64rm }, + { X86::VCVTTSD2SI64Zrr , X86::VCVTTSD2SI64rr }, + { X86::VCVTTSD2SI64Zrr_Int , X86::Int_VCVTTSD2SI64rr }, + { X86::VCVTTSD2SIZrm , X86::VCVTTSD2SIrm }, + { X86::VCVTTSD2SIZrm_Int , X86::Int_VCVTTSD2SIrm }, + { X86::VCVTTSD2SIZrr , X86::VCVTTSD2SIrr }, + { X86::VCVTTSD2SIZrr_Int , X86::Int_VCVTTSD2SIrr }, + { X86::VCVTTSS2SI64Zrm , X86::VCVTTSS2SI64rm }, + { X86::VCVTTSS2SI64Zrm_Int , X86::Int_VCVTTSS2SI64rm }, + { X86::VCVTTSS2SI64Zrr , X86::VCVTTSS2SI64rr }, + { X86::VCVTTSS2SI64Zrr_Int , X86::Int_VCVTTSS2SI64rr }, + { X86::VCVTTSS2SIZrm , X86::VCVTTSS2SIrm }, + { X86::VCVTTSS2SIZrm_Int , X86::Int_VCVTTSS2SIrm }, + { X86::VCVTTSS2SIZrr , X86::VCVTTSS2SIrr }, + { X86::VCVTTSS2SIZrr_Int , X86::Int_VCVTTSS2SIrr }, + { X86::VDIVSDZrm , X86::VDIVSDrm }, + { X86::VDIVSDZrm_Int , X86::VDIVSDrm_Int }, + { X86::VDIVSDZrr , X86::VDIVSDrr }, + { X86::VDIVSDZrr_Int , X86::VDIVSDrr_Int }, + { X86::VDIVSSZrm , X86::VDIVSSrm }, + { X86::VDIVSSZrm_Int , X86::VDIVSSrm_Int }, + { X86::VDIVSSZrr , X86::VDIVSSrr }, + { X86::VDIVSSZrr_Int , X86::VDIVSSrr_Int }, + { X86::VFMADD132SDZm , X86::VFMADD132SDm }, + { X86::VFMADD132SDZm_Int , X86::VFMADD132SDm_Int }, + { X86::VFMADD132SDZr , X86::VFMADD132SDr }, + { X86::VFMADD132SDZr_Int , X86::VFMADD132SDr_Int }, + { X86::VFMADD132SSZm , X86::VFMADD132SSm }, + { X86::VFMADD132SSZm_Int , X86::VFMADD132SSm_Int }, + { X86::VFMADD132SSZr , X86::VFMADD132SSr }, + { X86::VFMADD132SSZr_Int , X86::VFMADD132SSr_Int }, + { X86::VFMADD213SDZm , X86::VFMADD213SDm }, + { X86::VFMADD213SDZm_Int , X86::VFMADD213SDm_Int }, + { X86::VFMADD213SDZr , X86::VFMADD213SDr }, + { X86::VFMADD213SDZr_Int , X86::VFMADD213SDr_Int }, + { X86::VFMADD213SSZm , X86::VFMADD213SSm }, + { X86::VFMADD213SSZm_Int , X86::VFMADD213SSm_Int }, + { X86::VFMADD213SSZr , X86::VFMADD213SSr }, + { X86::VFMADD213SSZr_Int , X86::VFMADD213SSr_Int }, + { X86::VFMADD231SDZm , X86::VFMADD231SDm }, + { X86::VFMADD231SDZm_Int , X86::VFMADD231SDm_Int }, + { X86::VFMADD231SDZr , X86::VFMADD231SDr }, + { X86::VFMADD231SDZr_Int , X86::VFMADD231SDr_Int }, + { X86::VFMADD231SSZm , X86::VFMADD231SSm }, + { X86::VFMADD231SSZm_Int , X86::VFMADD231SSm_Int }, + { X86::VFMADD231SSZr , X86::VFMADD231SSr }, + { X86::VFMADD231SSZr_Int , X86::VFMADD231SSr_Int }, + { X86::VFMSUB132SDZm , X86::VFMSUB132SDm }, + { X86::VFMSUB132SDZm_Int , X86::VFMSUB132SDm_Int }, + { X86::VFMSUB132SDZr , X86::VFMSUB132SDr }, + { X86::VFMSUB132SDZr_Int , X86::VFMSUB132SDr_Int }, + { X86::VFMSUB132SSZm , X86::VFMSUB132SSm }, + { X86::VFMSUB132SSZm_Int , X86::VFMSUB132SSm_Int }, + { X86::VFMSUB132SSZr , X86::VFMSUB132SSr }, + { X86::VFMSUB132SSZr_Int , X86::VFMSUB132SSr_Int }, + { X86::VFMSUB213SDZm , X86::VFMSUB213SDm }, + { X86::VFMSUB213SDZm_Int , X86::VFMSUB213SDm_Int }, + { X86::VFMSUB213SDZr , X86::VFMSUB213SDr }, + { X86::VFMSUB213SDZr_Int , X86::VFMSUB213SDr_Int }, + { X86::VFMSUB213SSZm , X86::VFMSUB213SSm }, + { X86::VFMSUB213SSZm_Int , X86::VFMSUB213SSm_Int }, + { X86::VFMSUB213SSZr , X86::VFMSUB213SSr }, + { X86::VFMSUB213SSZr_Int , X86::VFMSUB213SSr_Int }, + { X86::VFMSUB231SDZm , X86::VFMSUB231SDm }, + { X86::VFMSUB231SDZm_Int , X86::VFMSUB231SDm_Int }, + { X86::VFMSUB231SDZr , X86::VFMSUB231SDr }, + { X86::VFMSUB231SDZr_Int , X86::VFMSUB231SDr_Int }, + { X86::VFMSUB231SSZm , X86::VFMSUB231SSm }, + { X86::VFMSUB231SSZm_Int , X86::VFMSUB231SSm_Int }, + { X86::VFMSUB231SSZr , X86::VFMSUB231SSr }, + { X86::VFMSUB231SSZr_Int , X86::VFMSUB231SSr_Int }, + { X86::VFNMADD132SDZm , X86::VFNMADD132SDm }, + { X86::VFNMADD132SDZm_Int , X86::VFNMADD132SDm_Int }, + { X86::VFNMADD132SDZr , X86::VFNMADD132SDr }, + { X86::VFNMADD132SDZr_Int , X86::VFNMADD132SDr_Int }, + { X86::VFNMADD132SSZm , X86::VFNMADD132SSm }, + { X86::VFNMADD132SSZm_Int , X86::VFNMADD132SSm_Int }, + { X86::VFNMADD132SSZr , X86::VFNMADD132SSr }, + { X86::VFNMADD132SSZr_Int , X86::VFNMADD132SSr_Int }, + { X86::VFNMADD213SDZm , X86::VFNMADD213SDm }, + { X86::VFNMADD213SDZm_Int , X86::VFNMADD213SDm_Int }, + { X86::VFNMADD213SDZr , X86::VFNMADD213SDr }, + { X86::VFNMADD213SDZr_Int , X86::VFNMADD213SDr_Int }, + { X86::VFNMADD213SSZm , X86::VFNMADD213SSm }, + { X86::VFNMADD213SSZm_Int , X86::VFNMADD213SSm_Int }, + { X86::VFNMADD213SSZr , X86::VFNMADD213SSr }, + { X86::VFNMADD213SSZr_Int , X86::VFNMADD213SSr_Int }, + { X86::VFNMADD231SDZm , X86::VFNMADD231SDm }, + { X86::VFNMADD231SDZm_Int , X86::VFNMADD231SDm_Int }, + { X86::VFNMADD231SDZr , X86::VFNMADD231SDr }, + { X86::VFNMADD231SDZr_Int , X86::VFNMADD231SDr_Int }, + { X86::VFNMADD231SSZm , X86::VFNMADD231SSm }, + { X86::VFNMADD231SSZm_Int , X86::VFNMADD231SSm_Int }, + { X86::VFNMADD231SSZr , X86::VFNMADD231SSr }, + { X86::VFNMADD231SSZr_Int , X86::VFNMADD231SSr_Int }, + { X86::VFNMSUB132SDZm , X86::VFNMSUB132SDm }, + { X86::VFNMSUB132SDZm_Int , X86::VFNMSUB132SDm_Int }, + { X86::VFNMSUB132SDZr , X86::VFNMSUB132SDr }, + { X86::VFNMSUB132SDZr_Int , X86::VFNMSUB132SDr_Int }, + { X86::VFNMSUB132SSZm , X86::VFNMSUB132SSm }, + { X86::VFNMSUB132SSZm_Int , X86::VFNMSUB132SSm_Int }, + { X86::VFNMSUB132SSZr , X86::VFNMSUB132SSr }, + { X86::VFNMSUB132SSZr_Int , X86::VFNMSUB132SSr_Int }, + { X86::VFNMSUB213SDZm , X86::VFNMSUB213SDm }, + { X86::VFNMSUB213SDZm_Int , X86::VFNMSUB213SDm_Int }, + { X86::VFNMSUB213SDZr , X86::VFNMSUB213SDr }, + { X86::VFNMSUB213SDZr_Int , X86::VFNMSUB213SDr_Int }, + { X86::VFNMSUB213SSZm , X86::VFNMSUB213SSm }, + { X86::VFNMSUB213SSZm_Int , X86::VFNMSUB213SSm_Int }, + { X86::VFNMSUB213SSZr , X86::VFNMSUB213SSr }, + { X86::VFNMSUB213SSZr_Int , X86::VFNMSUB213SSr_Int }, + { X86::VFNMSUB231SDZm , X86::VFNMSUB231SDm }, + { X86::VFNMSUB231SDZm_Int , X86::VFNMSUB231SDm_Int }, + { X86::VFNMSUB231SDZr , X86::VFNMSUB231SDr }, + { X86::VFNMSUB231SDZr_Int , X86::VFNMSUB231SDr_Int }, + { X86::VFNMSUB231SSZm , X86::VFNMSUB231SSm }, + { X86::VFNMSUB231SSZm_Int , X86::VFNMSUB231SSm_Int }, + { X86::VFNMSUB231SSZr , X86::VFNMSUB231SSr }, + { X86::VFNMSUB231SSZr_Int , X86::VFNMSUB231SSr_Int }, + { X86::VMAXCSDZrm , X86::VMAXCSDrm }, + { X86::VMAXCSDZrr , X86::VMAXCSDrr }, + { X86::VMAXCSSZrm , X86::VMAXCSSrm }, + { X86::VMAXCSSZrr , X86::VMAXCSSrr }, + { X86::VMAXSDZrm , X86::VMAXSDrm }, + { X86::VMAXSDZrm_Int , X86::VMAXSDrm_Int }, + { X86::VMAXSDZrr , X86::VMAXSDrr }, + { X86::VMAXSDZrr_Int , X86::VMAXSDrr_Int }, + { X86::VMAXSSZrm , X86::VMAXSSrm }, + { X86::VMAXSSZrm_Int , X86::VMAXSSrm_Int }, + { X86::VMAXSSZrr , X86::VMAXSSrr }, + { X86::VMAXSSZrr_Int , X86::VMAXSSrr_Int }, + { X86::VMINCSDZrm , X86::VMINCSDrm }, + { X86::VMINCSDZrr , X86::VMINCSDrr }, + { X86::VMINCSSZrm , X86::VMINCSSrm }, + { X86::VMINCSSZrr , X86::VMINCSSrr }, + { X86::VMINSDZrm , X86::VMINSDrm }, + { X86::VMINSDZrm_Int , X86::VMINSDrm_Int }, + { X86::VMINSDZrr , X86::VMINSDrr }, + { X86::VMINSDZrr_Int , X86::VMINSDrr_Int }, + { X86::VMINSSZrm , X86::VMINSSrm }, + { X86::VMINSSZrm_Int , X86::VMINSSrm_Int }, + { X86::VMINSSZrr , X86::VMINSSrr }, + { X86::VMINSSZrr_Int , X86::VMINSSrr_Int }, + { X86::VMOV64toSDZrr , X86::VMOV64toSDrr }, + { X86::VMOVDI2SSZrm , X86::VMOVDI2SSrm }, + { X86::VMOVDI2SSZrr , X86::VMOVDI2SSrr }, + { X86::VMOVSDZmr , X86::VMOVSDmr }, + { X86::VMOVSDZrm , X86::VMOVSDrm }, + { X86::VMOVSDZrr , X86::VMOVSDrr }, + { X86::VMOVSSZmr , X86::VMOVSSmr }, + { X86::VMOVSSZrm , X86::VMOVSSrm }, + { X86::VMOVSSZrr , X86::VMOVSSrr }, + { X86::VMOVSSZrr_REV , X86::VMOVSSrr_REV }, + { X86::VMULSDZrm , X86::VMULSDrm }, + { X86::VMULSDZrm_Int , X86::VMULSDrm_Int }, + { X86::VMULSDZrr , X86::VMULSDrr }, + { X86::VMULSDZrr_Int , X86::VMULSDrr_Int }, + { X86::VMULSSZrm , X86::VMULSSrm }, + { X86::VMULSSZrm_Int , X86::VMULSSrm_Int }, + { X86::VMULSSZrr , X86::VMULSSrr }, + { X86::VMULSSZrr_Int , X86::VMULSSrr_Int }, + { X86::VSQRTSDZm , X86::VSQRTSDm }, + { X86::VSQRTSDZm_Int , X86::VSQRTSDm_Int }, + { X86::VSQRTSDZr , X86::VSQRTSDr }, + { X86::VSQRTSDZr_Int , X86::VSQRTSDr_Int }, + { X86::VSQRTSSZm , X86::VSQRTSSm }, + { X86::VSQRTSSZm_Int , X86::VSQRTSSm_Int }, + { X86::VSQRTSSZr , X86::VSQRTSSr }, + { X86::VSQRTSSZr_Int , X86::VSQRTSSr_Int }, + { X86::VSUBSDZrm , X86::VSUBSDrm }, + { X86::VSUBSDZrm_Int , X86::VSUBSDrm_Int }, + { X86::VSUBSDZrr , X86::VSUBSDrr }, + { X86::VSUBSDZrr_Int , X86::VSUBSDrr_Int }, + { X86::VSUBSSZrm , X86::VSUBSSrm }, + { X86::VSUBSSZrm_Int , X86::VSUBSSrm_Int }, + { X86::VSUBSSZrr , X86::VSUBSSrr }, + { X86::VSUBSSZrr_Int , X86::VSUBSSrr_Int }, + { X86::VUCOMISDZrm , X86::VUCOMISDrm }, + { X86::VUCOMISDZrr , X86::VUCOMISDrr }, + { X86::VUCOMISSZrm , X86::VUCOMISSrm }, + { X86::VUCOMISSZrr , X86::VUCOMISSrr }, + + { X86::VMOV64toPQIZrr , X86::VMOV64toPQIrr }, + { X86::VMOV64toSDZrr , X86::VMOV64toSDrr }, + { X86::VMOVDI2PDIZrm , X86::VMOVDI2PDIrm }, + { X86::VMOVDI2PDIZrr , X86::VMOVDI2PDIrr }, + { X86::VMOVLHPSZrr , X86::VMOVLHPSrr }, + { X86::VMOVHLPSZrr , X86::VMOVHLPSrr }, + { X86::VMOVPDI2DIZmr , X86::VMOVPDI2DImr }, + { X86::VMOVPDI2DIZrr , X86::VMOVPDI2DIrr }, + { X86::VMOVPQI2QIZmr , X86::VMOVPQI2QImr }, + { X86::VMOVPQIto64Zrr , X86::VMOVPQIto64rr }, + { X86::VMOVQI2PQIZrm , X86::VMOVQI2PQIrm }, + { X86::VMOVZPQILo2PQIZrr , X86::VMOVZPQILo2PQIrr }, + + { X86::VPEXTRBZmr , X86::VPEXTRBmr }, + { X86::VPEXTRBZrr , X86::VPEXTRBrr }, + { X86::VPEXTRDZmr , X86::VPEXTRDmr }, + { X86::VPEXTRDZrr , X86::VPEXTRDrr }, + { X86::VPEXTRQZmr , X86::VPEXTRQmr }, + { X86::VPEXTRQZrr , X86::VPEXTRQrr }, + { X86::VPEXTRWZmr , X86::VPEXTRWmr }, + { X86::VPEXTRWZrr , X86::VPEXTRWri }, + + { X86::VPINSRBZrm , X86::VPINSRBrm }, + { X86::VPINSRBZrr , X86::VPINSRBrr }, + { X86::VPINSRDZrm , X86::VPINSRDrm }, + { X86::VPINSRDZrr , X86::VPINSRDrr }, + { X86::VPINSRQZrm , X86::VPINSRQrm }, + { X86::VPINSRQZrr , X86::VPINSRQrr }, + { X86::VPINSRWZrm , X86::VPINSRWrmi }, + { X86::VPINSRWZrr , X86::VPINSRWrri }, + + // EVEX 128 with corresponding VEX. + { X86::VADDPDZ128rm , X86::VADDPDrm }, + { X86::VADDPDZ128rr , X86::VADDPDrr }, + { X86::VADDPSZ128rm , X86::VADDPSrm }, + { X86::VADDPSZ128rr , X86::VADDPSrr }, + { X86::VANDNPDZ128rm , X86::VANDNPDrm }, + { X86::VANDNPDZ128rr , X86::VANDNPDrr }, + { X86::VANDNPSZ128rm , X86::VANDNPSrm }, + { X86::VANDNPSZ128rr , X86::VANDNPSrr }, + { X86::VANDPDZ128rm , X86::VANDPDrm }, + { X86::VANDPDZ128rr , X86::VANDPDrr }, + { X86::VANDPSZ128rm , X86::VANDPSrm }, + { X86::VANDPSZ128rr , X86::VANDPSrr }, + { X86::VBROADCASTSSZ128m , X86::VBROADCASTSSrm }, + { X86::VBROADCASTSSZ128r , X86::VBROADCASTSSrr }, + { X86::VBROADCASTSSZ128r_s , X86::VBROADCASTSSrr }, + { X86::VCVTDQ2PDZ128rm , X86::VCVTDQ2PDrm }, + { X86::VCVTDQ2PDZ128rr , X86::VCVTDQ2PDrr }, + { X86::VCVTDQ2PSZ128rm , X86::VCVTDQ2PSrm }, + { X86::VCVTDQ2PSZ128rr , X86::VCVTDQ2PSrr }, + { X86::VCVTPD2DQZ128rm , X86::VCVTPD2DQrm }, + { X86::VCVTPD2DQZ128rr , X86::VCVTPD2DQrr }, + { X86::VCVTPD2PSZ128rm , X86::VCVTPD2PSrm }, + { X86::VCVTPD2PSZ128rr , X86::VCVTPD2PSrr }, + { X86::VCVTPH2PSZ128rm , X86::VCVTPH2PSrm }, + { X86::VCVTPH2PSZ128rr , X86::VCVTPH2PSrr }, + { X86::VCVTPS2DQZ128rm , X86::VCVTPS2DQrm }, + { X86::VCVTPS2DQZ128rr , X86::VCVTPS2DQrr }, + { X86::VCVTPS2PDZ128rm , X86::VCVTPS2PDrm }, + { X86::VCVTPS2PDZ128rr , X86::VCVTPS2PDrr }, + { X86::VCVTPS2PHZ128mr , X86::VCVTPS2PHmr }, + { X86::VCVTPS2PHZ128rr , X86::VCVTPS2PHrr }, + { X86::VCVTTPD2DQZ128rm , X86::VCVTTPD2DQrm }, + { X86::VCVTTPD2DQZ128rr , X86::VCVTTPD2DQrr }, + { X86::VCVTTPS2DQZ128rm , X86::VCVTTPS2DQrm }, + { X86::VCVTTPS2DQZ128rr , X86::VCVTTPS2DQrr }, + { X86::VDIVPDZ128rm , X86::VDIVPDrm }, + { X86::VDIVPDZ128rr , X86::VDIVPDrr }, + { X86::VDIVPSZ128rm , X86::VDIVPSrm }, + { X86::VDIVPSZ128rr , X86::VDIVPSrr }, + { X86::VFMADD132PDZ128m , X86::VFMADD132PDm }, + { X86::VFMADD132PDZ128r , X86::VFMADD132PDr }, + { X86::VFMADD132PSZ128m , X86::VFMADD132PSm }, + { X86::VFMADD132PSZ128r , X86::VFMADD132PSr }, + { X86::VFMADD213PDZ128m , X86::VFMADD213PDm }, + { X86::VFMADD213PDZ128r , X86::VFMADD213PDr }, + { X86::VFMADD213PSZ128m , X86::VFMADD213PSm }, + { X86::VFMADD213PSZ128r , X86::VFMADD213PSr }, + { X86::VFMADD231PDZ128m , X86::VFMADD231PDm }, + { X86::VFMADD231PDZ128r , X86::VFMADD231PDr }, + { X86::VFMADD231PSZ128m , X86::VFMADD231PSm }, + { X86::VFMADD231PSZ128r , X86::VFMADD231PSr }, + { X86::VFMADDSUB132PDZ128m , X86::VFMADDSUB132PDm }, + { X86::VFMADDSUB132PDZ128r , X86::VFMADDSUB132PDr }, + { X86::VFMADDSUB132PSZ128m , X86::VFMADDSUB132PSm }, + { X86::VFMADDSUB132PSZ128r , X86::VFMADDSUB132PSr }, + { X86::VFMADDSUB213PDZ128m , X86::VFMADDSUB213PDm }, + { X86::VFMADDSUB213PDZ128r , X86::VFMADDSUB213PDr }, + { X86::VFMADDSUB213PSZ128m , X86::VFMADDSUB213PSm }, + { X86::VFMADDSUB213PSZ128r , X86::VFMADDSUB213PSr }, + { X86::VFMADDSUB231PDZ128m , X86::VFMADDSUB231PDm }, + { X86::VFMADDSUB231PDZ128r , X86::VFMADDSUB231PDr }, + { X86::VFMADDSUB231PSZ128m , X86::VFMADDSUB231PSm }, + { X86::VFMADDSUB231PSZ128r , X86::VFMADDSUB231PSr }, + { X86::VFMSUB132PDZ128m , X86::VFMSUB132PDm }, + { X86::VFMSUB132PDZ128r , X86::VFMSUB132PDr }, + { X86::VFMSUB132PSZ128m , X86::VFMSUB132PSm }, + { X86::VFMSUB132PSZ128r , X86::VFMSUB132PSr }, + { X86::VFMSUB213PDZ128m , X86::VFMSUB213PDm }, + { X86::VFMSUB213PDZ128r , X86::VFMSUB213PDr }, + { X86::VFMSUB213PSZ128m , X86::VFMSUB213PSm }, + { X86::VFMSUB213PSZ128r , X86::VFMSUB213PSr }, + { X86::VFMSUB231PDZ128m , X86::VFMSUB231PDm }, + { X86::VFMSUB231PDZ128r , X86::VFMSUB231PDr }, + { X86::VFMSUB231PSZ128m , X86::VFMSUB231PSm }, + { X86::VFMSUB231PSZ128r , X86::VFMSUB231PSr }, + { X86::VFMSUBADD132PDZ128m , X86::VFMSUBADD132PDm }, + { X86::VFMSUBADD132PDZ128r , X86::VFMSUBADD132PDr }, + { X86::VFMSUBADD132PSZ128m , X86::VFMSUBADD132PSm }, + { X86::VFMSUBADD132PSZ128r , X86::VFMSUBADD132PSr }, + { X86::VFMSUBADD213PDZ128m , X86::VFMSUBADD213PDm }, + { X86::VFMSUBADD213PDZ128r , X86::VFMSUBADD213PDr }, + { X86::VFMSUBADD213PSZ128m , X86::VFMSUBADD213PSm }, + { X86::VFMSUBADD213PSZ128r , X86::VFMSUBADD213PSr }, + { X86::VFMSUBADD231PDZ128m , X86::VFMSUBADD231PDm }, + { X86::VFMSUBADD231PDZ128r , X86::VFMSUBADD231PDr }, + { X86::VFMSUBADD231PSZ128m , X86::VFMSUBADD231PSm }, + { X86::VFMSUBADD231PSZ128r , X86::VFMSUBADD231PSr }, + { X86::VFNMADD132PDZ128m , X86::VFNMADD132PDm }, + { X86::VFNMADD132PDZ128r , X86::VFNMADD132PDr }, + { X86::VFNMADD132PSZ128m , X86::VFNMADD132PSm }, + { X86::VFNMADD132PSZ128r , X86::VFNMADD132PSr }, + { X86::VFNMADD213PDZ128m , X86::VFNMADD213PDm }, + { X86::VFNMADD213PDZ128r , X86::VFNMADD213PDr }, + { X86::VFNMADD213PSZ128m , X86::VFNMADD213PSm }, + { X86::VFNMADD213PSZ128r , X86::VFNMADD213PSr }, + { X86::VFNMADD231PDZ128m , X86::VFNMADD231PDm }, + { X86::VFNMADD231PDZ128r , X86::VFNMADD231PDr }, + { X86::VFNMADD231PSZ128m , X86::VFNMADD231PSm }, + { X86::VFNMADD231PSZ128r , X86::VFNMADD231PSr }, + { X86::VFNMSUB132PDZ128m , X86::VFNMSUB132PDm }, + { X86::VFNMSUB132PDZ128r , X86::VFNMSUB132PDr }, + { X86::VFNMSUB132PSZ128m , X86::VFNMSUB132PSm }, + { X86::VFNMSUB132PSZ128r , X86::VFNMSUB132PSr }, + { X86::VFNMSUB213PDZ128m , X86::VFNMSUB213PDm }, + { X86::VFNMSUB213PDZ128r , X86::VFNMSUB213PDr }, + { X86::VFNMSUB213PSZ128m , X86::VFNMSUB213PSm }, + { X86::VFNMSUB213PSZ128r , X86::VFNMSUB213PSr }, + { X86::VFNMSUB231PDZ128m , X86::VFNMSUB231PDm }, + { X86::VFNMSUB231PDZ128r , X86::VFNMSUB231PDr }, + { X86::VFNMSUB231PSZ128m , X86::VFNMSUB231PSm }, + { X86::VFNMSUB231PSZ128r , X86::VFNMSUB231PSr }, + { X86::VMAXCPDZ128rm , X86::VMAXCPDrm }, + { X86::VMAXCPDZ128rr , X86::VMAXCPDrr }, + { X86::VMAXCPSZ128rm , X86::VMAXCPSrm }, + { X86::VMAXCPSZ128rr , X86::VMAXCPSrr }, + { X86::VMAXPDZ128rm , X86::VMAXPDrm }, + { X86::VMAXPDZ128rr , X86::VMAXPDrr }, + { X86::VMAXPSZ128rm , X86::VMAXPSrm }, + { X86::VMAXPSZ128rr , X86::VMAXPSrr }, + { X86::VMINCPDZ128rm , X86::VMINCPDrm }, + { X86::VMINCPDZ128rr , X86::VMINCPDrr }, + { X86::VMINCPSZ128rm , X86::VMINCPSrm }, + { X86::VMINCPSZ128rr , X86::VMINCPSrr }, + { X86::VMINPDZ128rm , X86::VMINPDrm }, + { X86::VMINPDZ128rr , X86::VMINPDrr }, + { X86::VMINPSZ128rm , X86::VMINPSrm }, + { X86::VMINPSZ128rr , X86::VMINPSrr }, + { X86::VMOVAPDZ128mr , X86::VMOVAPDmr }, + { X86::VMOVAPDZ128rm , X86::VMOVAPDrm }, + { X86::VMOVAPDZ128rr , X86::VMOVAPDrr }, + { X86::VMOVAPDZ128rr_REV , X86::VMOVAPDrr_REV }, + { X86::VMOVAPSZ128mr , X86::VMOVAPSmr }, + { X86::VMOVAPSZ128rm , X86::VMOVAPSrm }, + { X86::VMOVAPSZ128rr , X86::VMOVAPSrr }, + { X86::VMOVAPSZ128rr_REV , X86::VMOVAPSrr_REV }, + { X86::VMOVDDUPZ128rm , X86::VMOVDDUPrm }, + { X86::VMOVDDUPZ128rr , X86::VMOVDDUPrr }, + { X86::VMOVDQA32Z128mr , X86::VMOVDQAmr }, + { X86::VMOVDQA32Z128rm , X86::VMOVDQArm }, + { X86::VMOVDQA32Z128rr , X86::VMOVDQArr }, + { X86::VMOVDQA32Z128rr_REV , X86::VMOVDQArr_REV }, + { X86::VMOVDQA64Z128mr , X86::VMOVDQAmr }, + { X86::VMOVDQA64Z128rm , X86::VMOVDQArm }, + { X86::VMOVDQA64Z128rr , X86::VMOVDQArr }, + { X86::VMOVDQA64Z128rr_REV , X86::VMOVDQArr_REV }, + { X86::VMOVDQU16Z128mr , X86::VMOVDQUmr }, + { X86::VMOVDQU16Z128rm , X86::VMOVDQUrm }, + { X86::VMOVDQU16Z128rr , X86::VMOVDQUrr }, + { X86::VMOVDQU16Z128rr_REV , X86::VMOVDQUrr_REV }, + { X86::VMOVDQU32Z128mr , X86::VMOVDQUmr }, + { X86::VMOVDQU32Z128rm , X86::VMOVDQUrm }, + { X86::VMOVDQU32Z128rr , X86::VMOVDQUrr }, + { X86::VMOVDQU32Z128rr_REV , X86::VMOVDQUrr_REV }, + { X86::VMOVDQU64Z128mr , X86::VMOVDQUmr }, + { X86::VMOVDQU64Z128rm , X86::VMOVDQUrm }, + { X86::VMOVDQU64Z128rr , X86::VMOVDQUrr }, + { X86::VMOVDQU64Z128rr_REV , X86::VMOVDQUrr_REV }, + { X86::VMOVDQU8Z128mr , X86::VMOVDQUmr }, + { X86::VMOVDQU8Z128rm , X86::VMOVDQUrm }, + { X86::VMOVDQU8Z128rr , X86::VMOVDQUrr }, + { X86::VMOVDQU8Z128rr_REV , X86::VMOVDQUrr_REV }, + { X86::VMOVHPDZ128mr , X86::VMOVHPDmr }, + { X86::VMOVHPDZ128rm , X86::VMOVHPDrm }, + { X86::VMOVHPSZ128mr , X86::VMOVHPSmr }, + { X86::VMOVHPSZ128rm , X86::VMOVHPSrm }, + { X86::VMOVLPDZ128mr , X86::VMOVLPDmr }, + { X86::VMOVLPDZ128rm , X86::VMOVLPDrm }, + { X86::VMOVLPSZ128mr , X86::VMOVLPSmr }, + { X86::VMOVLPSZ128rm , X86::VMOVLPSrm }, + { X86::VMOVNTDQAZ128rm , X86::VMOVNTDQArm }, + { X86::VMOVNTDQZ128mr , X86::VMOVNTDQmr }, + { X86::VMOVNTPDZ128mr , X86::VMOVNTPDmr }, + { X86::VMOVNTPSZ128mr , X86::VMOVNTPSmr }, + { X86::VMOVSHDUPZ128rm , X86::VMOVSHDUPrm }, + { X86::VMOVSHDUPZ128rr , X86::VMOVSHDUPrr }, + { X86::VMOVSLDUPZ128rm , X86::VMOVSLDUPrm }, + { X86::VMOVSLDUPZ128rr , X86::VMOVSLDUPrr }, + { X86::VMOVUPDZ128mr , X86::VMOVUPDmr }, + { X86::VMOVUPDZ128rm , X86::VMOVUPDrm }, + { X86::VMOVUPDZ128rr , X86::VMOVUPDrr }, + { X86::VMOVUPDZ128rr_REV , X86::VMOVUPDrr_REV }, + { X86::VMOVUPSZ128mr , X86::VMOVUPSmr }, + { X86::VMOVUPSZ128rm , X86::VMOVUPSrm }, + { X86::VMOVUPSZ128rr , X86::VMOVUPSrr }, + { X86::VMOVUPSZ128rr_REV , X86::VMOVUPSrr_REV }, + { X86::VMULPDZ128rm , X86::VMULPDrm }, + { X86::VMULPDZ128rr , X86::VMULPDrr }, + { X86::VMULPSZ128rm , X86::VMULPSrm }, + { X86::VMULPSZ128rr , X86::VMULPSrr }, + { X86::VORPDZ128rm , X86::VORPDrm }, + { X86::VORPDZ128rr , X86::VORPDrr }, + { X86::VORPSZ128rm , X86::VORPSrm }, + { X86::VORPSZ128rr , X86::VORPSrr }, + { X86::VPABSBZ128rm , X86::VPABSBrm }, + { X86::VPABSBZ128rr , X86::VPABSBrr }, + { X86::VPABSDZ128rm , X86::VPABSDrm }, + { X86::VPABSDZ128rr , X86::VPABSDrr }, + { X86::VPABSWZ128rm , X86::VPABSWrm }, + { X86::VPABSWZ128rr , X86::VPABSWrr }, + { X86::VPACKSSDWZ128rm , X86::VPACKSSDWrm }, + { X86::VPACKSSDWZ128rr , X86::VPACKSSDWrr }, + { X86::VPACKSSWBZ128rm , X86::VPACKSSWBrm }, + { X86::VPACKSSWBZ128rr , X86::VPACKSSWBrr }, + { X86::VPACKUSDWZ128rm , X86::VPACKUSDWrm }, + { X86::VPACKUSDWZ128rr , X86::VPACKUSDWrr }, + { X86::VPACKUSWBZ128rm , X86::VPACKUSWBrm }, + { X86::VPACKUSWBZ128rr , X86::VPACKUSWBrr }, + { X86::VPADDBZ128rm , X86::VPADDBrm }, + { X86::VPADDBZ128rr , X86::VPADDBrr }, + { X86::VPADDDZ128rm , X86::VPADDDrm }, + { X86::VPADDDZ128rr , X86::VPADDDrr }, + { X86::VPADDQZ128rm , X86::VPADDQrm }, + { X86::VPADDQZ128rr , X86::VPADDQrr }, + { X86::VPADDSBZ128rm , X86::VPADDSBrm }, + { X86::VPADDSBZ128rr , X86::VPADDSBrr }, + { X86::VPADDSWZ128rm , X86::VPADDSWrm }, + { X86::VPADDSWZ128rr , X86::VPADDSWrr }, + { X86::VPADDUSBZ128rm , X86::VPADDUSBrm }, + { X86::VPADDUSBZ128rr , X86::VPADDUSBrr }, + { X86::VPADDUSWZ128rm , X86::VPADDUSWrm }, + { X86::VPADDUSWZ128rr , X86::VPADDUSWrr }, + { X86::VPADDWZ128rm , X86::VPADDWrm }, + { X86::VPADDWZ128rr , X86::VPADDWrr }, + { X86::VPALIGNRZ128rmi , X86::VPALIGNRrmi }, + { X86::VPALIGNRZ128rri , X86::VPALIGNRrri }, + { X86::VPANDDZ128rm , X86::VPANDrm }, + { X86::VPANDDZ128rr , X86::VPANDrr }, + { X86::VPANDQZ128rm , X86::VPANDrm }, + { X86::VPANDQZ128rr , X86::VPANDrr }, + { X86::VPAVGBZ128rm , X86::VPAVGBrm }, + { X86::VPAVGBZ128rr , X86::VPAVGBrr }, + { X86::VPAVGWZ128rm , X86::VPAVGWrm }, + { X86::VPAVGWZ128rr , X86::VPAVGWrr }, + { X86::VPBROADCASTBZ128m , X86::VPBROADCASTBrm }, + { X86::VPBROADCASTBZ128r , X86::VPBROADCASTBrr }, + { X86::VPBROADCASTDZ128m , X86::VPBROADCASTDrm }, + { X86::VPBROADCASTDZ128r , X86::VPBROADCASTDrr }, + { X86::VPBROADCASTQZ128m , X86::VPBROADCASTQrm }, + { X86::VPBROADCASTQZ128r , X86::VPBROADCASTQrr }, + { X86::VPBROADCASTWZ128m , X86::VPBROADCASTWrm }, + { X86::VPBROADCASTWZ128r , X86::VPBROADCASTWrr }, + { X86::VPERMILPDZ128mi , X86::VPERMILPDmi }, + { X86::VPERMILPDZ128ri , X86::VPERMILPDri }, + { X86::VPERMILPDZ128rm , X86::VPERMILPDrm }, + { X86::VPERMILPDZ128rr , X86::VPERMILPDrr }, + { X86::VPERMILPSZ128mi , X86::VPERMILPSmi }, + { X86::VPERMILPSZ128ri , X86::VPERMILPSri }, + { X86::VPERMILPSZ128rm , X86::VPERMILPSrm }, + { X86::VPERMILPSZ128rr , X86::VPERMILPSrr }, + { X86::VPMADDUBSWZ128rm , X86::VPMADDUBSWrm }, + { X86::VPMADDUBSWZ128rr , X86::VPMADDUBSWrr }, + { X86::VPMADDWDZ128rm , X86::VPMADDWDrm }, + { X86::VPMADDWDZ128rr , X86::VPMADDWDrr }, + { X86::VPMAXSBZ128rm , X86::VPMAXSBrm }, + { X86::VPMAXSBZ128rr , X86::VPMAXSBrr }, + { X86::VPMAXSDZ128rm , X86::VPMAXSDrm }, + { X86::VPMAXSDZ128rr , X86::VPMAXSDrr }, + { X86::VPMAXSWZ128rm , X86::VPMAXSWrm }, + { X86::VPMAXSWZ128rr , X86::VPMAXSWrr }, + { X86::VPMAXUBZ128rm , X86::VPMAXUBrm }, + { X86::VPMAXUBZ128rr , X86::VPMAXUBrr }, + { X86::VPMAXUDZ128rm , X86::VPMAXUDrm }, + { X86::VPMAXUDZ128rr , X86::VPMAXUDrr }, + { X86::VPMAXUWZ128rm , X86::VPMAXUWrm }, + { X86::VPMAXUWZ128rr , X86::VPMAXUWrr }, + { X86::VPMINSBZ128rm , X86::VPMINSBrm }, + { X86::VPMINSBZ128rr , X86::VPMINSBrr }, + { X86::VPMINSDZ128rm , X86::VPMINSDrm }, + { X86::VPMINSDZ128rr , X86::VPMINSDrr }, + { X86::VPMINSWZ128rm , X86::VPMINSWrm }, + { X86::VPMINSWZ128rr , X86::VPMINSWrr }, + { X86::VPMINUBZ128rm , X86::VPMINUBrm }, + { X86::VPMINUBZ128rr , X86::VPMINUBrr }, + { X86::VPMINUDZ128rm , X86::VPMINUDrm }, + { X86::VPMINUDZ128rr , X86::VPMINUDrr }, + { X86::VPMINUWZ128rm , X86::VPMINUWrm }, + { X86::VPMINUWZ128rr , X86::VPMINUWrr }, + { X86::VPMOVSXBDZ128rm , X86::VPMOVSXBDrm }, + { X86::VPMOVSXBDZ128rr , X86::VPMOVSXBDrr }, + { X86::VPMOVSXBQZ128rm , X86::VPMOVSXBQrm }, + { X86::VPMOVSXBQZ128rr , X86::VPMOVSXBQrr }, + { X86::VPMOVSXBWZ128rm , X86::VPMOVSXBWrm }, + { X86::VPMOVSXBWZ128rr , X86::VPMOVSXBWrr }, + { X86::VPMOVSXDQZ128rm , X86::VPMOVSXDQrm }, + { X86::VPMOVSXDQZ128rr , X86::VPMOVSXDQrr }, + { X86::VPMOVSXWDZ128rm , X86::VPMOVSXWDrm }, + { X86::VPMOVSXWDZ128rr , X86::VPMOVSXWDrr }, + { X86::VPMOVSXWQZ128rm , X86::VPMOVSXWQrm }, + { X86::VPMOVSXWQZ128rr , X86::VPMOVSXWQrr }, + { X86::VPMOVZXBDZ128rm , X86::VPMOVZXBDrm }, + { X86::VPMOVZXBDZ128rr , X86::VPMOVZXBDrr }, + { X86::VPMOVZXBQZ128rm , X86::VPMOVZXBQrm }, + { X86::VPMOVZXBQZ128rr , X86::VPMOVZXBQrr }, + { X86::VPMOVZXBWZ128rm , X86::VPMOVZXBWrm }, + { X86::VPMOVZXBWZ128rr , X86::VPMOVZXBWrr }, + { X86::VPMOVZXDQZ128rm , X86::VPMOVZXDQrm }, + { X86::VPMOVZXDQZ128rr , X86::VPMOVZXDQrr }, + { X86::VPMOVZXWDZ128rm , X86::VPMOVZXWDrm }, + { X86::VPMOVZXWDZ128rr , X86::VPMOVZXWDrr }, + { X86::VPMOVZXWQZ128rm , X86::VPMOVZXWQrm }, + { X86::VPMOVZXWQZ128rr , X86::VPMOVZXWQrr }, + { X86::VPMULDQZ128rm , X86::VPMULDQrm }, + { X86::VPMULDQZ128rr , X86::VPMULDQrr }, + { X86::VPMULHRSWZ128rm , X86::VPMULHRSWrm }, + { X86::VPMULHRSWZ128rr , X86::VPMULHRSWrr }, + { X86::VPMULHUWZ128rm , X86::VPMULHUWrm }, + { X86::VPMULHUWZ128rr , X86::VPMULHUWrr }, + { X86::VPMULHWZ128rm , X86::VPMULHWrm }, + { X86::VPMULHWZ128rr , X86::VPMULHWrr }, + { X86::VPMULLDZ128rm , X86::VPMULLDrm }, + { X86::VPMULLDZ128rr , X86::VPMULLDrr }, + { X86::VPMULLWZ128rm , X86::VPMULLWrm }, + { X86::VPMULLWZ128rr , X86::VPMULLWrr }, + { X86::VPMULUDQZ128rm , X86::VPMULUDQrm }, + { X86::VPMULUDQZ128rr , X86::VPMULUDQrr }, + { X86::VPORDZ128rm , X86::VPORrm }, + { X86::VPORDZ128rr , X86::VPORrr }, + { X86::VPORQZ128rm , X86::VPORrm }, + { X86::VPORQZ128rr , X86::VPORrr }, + { X86::VPSADBWZ128rm , X86::VPSADBWrm }, + { X86::VPSADBWZ128rr , X86::VPSADBWrr }, + { X86::VPSHUFBZ128rm , X86::VPSHUFBrm }, + { X86::VPSHUFBZ128rr , X86::VPSHUFBrr }, + { X86::VPSHUFDZ128mi , X86::VPSHUFDmi }, + { X86::VPSHUFDZ128ri , X86::VPSHUFDri }, + { X86::VPSHUFHWZ128mi , X86::VPSHUFHWmi }, + { X86::VPSHUFHWZ128ri , X86::VPSHUFHWri }, + { X86::VPSHUFLWZ128mi , X86::VPSHUFLWmi }, + { X86::VPSHUFLWZ128ri , X86::VPSHUFLWri }, + { X86::VPSLLDQZ128rr , X86::VPSLLDQri }, + { X86::VPSLLDZ128ri , X86::VPSLLDri }, + { X86::VPSLLDZ128rm , X86::VPSLLDrm }, + { X86::VPSLLDZ128rr , X86::VPSLLDrr }, + { X86::VPSLLQZ128ri , X86::VPSLLQri }, + { X86::VPSLLQZ128rm , X86::VPSLLQrm }, + { X86::VPSLLQZ128rr , X86::VPSLLQrr }, + { X86::VPSLLVDZ128rm , X86::VPSLLVDrm }, + { X86::VPSLLVDZ128rr , X86::VPSLLVDrr }, + { X86::VPSLLVQZ128rm , X86::VPSLLVQrm }, + { X86::VPSLLVQZ128rr , X86::VPSLLVQrr }, + { X86::VPSLLWZ128ri , X86::VPSLLWri }, + { X86::VPSLLWZ128rm , X86::VPSLLWrm }, + { X86::VPSLLWZ128rr , X86::VPSLLWrr }, + { X86::VPSRADZ128ri , X86::VPSRADri }, + { X86::VPSRADZ128rm , X86::VPSRADrm }, + { X86::VPSRADZ128rr , X86::VPSRADrr }, + { X86::VPSRAVDZ128rm , X86::VPSRAVDrm }, + { X86::VPSRAVDZ128rr , X86::VPSRAVDrr }, + { X86::VPSRAWZ128ri , X86::VPSRAWri }, + { X86::VPSRAWZ128rm , X86::VPSRAWrm }, + { X86::VPSRAWZ128rr , X86::VPSRAWrr }, + { X86::VPSRLDQZ128rr , X86::VPSRLDQri }, + { X86::VPSRLDZ128ri , X86::VPSRLDri }, + { X86::VPSRLDZ128rm , X86::VPSRLDrm }, + { X86::VPSRLDZ128rr , X86::VPSRLDrr }, + { X86::VPSRLQZ128ri , X86::VPSRLQri }, + { X86::VPSRLQZ128rm , X86::VPSRLQrm }, + { X86::VPSRLQZ128rr , X86::VPSRLQrr }, + { X86::VPSRLVDZ128rm , X86::VPSRLVDrm }, + { X86::VPSRLVDZ128rr , X86::VPSRLVDrr }, + { X86::VPSRLVQZ128rm , X86::VPSRLVQrm }, + { X86::VPSRLVQZ128rr , X86::VPSRLVQrr }, + { X86::VPSRLWZ128ri , X86::VPSRLWri }, + { X86::VPSRLWZ128rm , X86::VPSRLWrm }, + { X86::VPSRLWZ128rr , X86::VPSRLWrr }, + { X86::VPSUBBZ128rm , X86::VPSUBBrm }, + { X86::VPSUBBZ128rr , X86::VPSUBBrr }, + { X86::VPSUBDZ128rm , X86::VPSUBDrm }, + { X86::VPSUBDZ128rr , X86::VPSUBDrr }, + { X86::VPSUBQZ128rm , X86::VPSUBQrm }, + { X86::VPSUBQZ128rr , X86::VPSUBQrr }, + { X86::VPSUBSBZ128rm , X86::VPSUBSBrm }, + { X86::VPSUBSBZ128rr , X86::VPSUBSBrr }, + { X86::VPSUBSWZ128rm , X86::VPSUBSWrm }, + { X86::VPSUBSWZ128rr , X86::VPSUBSWrr }, + { X86::VPSUBUSBZ128rm , X86::VPSUBUSBrm }, + { X86::VPSUBUSBZ128rr , X86::VPSUBUSBrr }, + { X86::VPSUBUSWZ128rm , X86::VPSUBUSWrm }, + { X86::VPSUBUSWZ128rr , X86::VPSUBUSWrr }, + { X86::VPSUBWZ128rm , X86::VPSUBWrm }, + { X86::VPSUBWZ128rr , X86::VPSUBWrr }, + { X86::VPUNPCKHBWZ128rm , X86::VPUNPCKHBWrm }, + { X86::VPUNPCKHBWZ128rr , X86::VPUNPCKHBWrr }, + { X86::VPUNPCKHDQZ128rm , X86::VPUNPCKHDQrm }, + { X86::VPUNPCKHDQZ128rr , X86::VPUNPCKHDQrr }, + { X86::VPUNPCKHQDQZ128rm , X86::VPUNPCKHQDQrm }, + { X86::VPUNPCKHQDQZ128rr , X86::VPUNPCKHQDQrr }, + { X86::VPUNPCKHWDZ128rm , X86::VPUNPCKHWDrm }, + { X86::VPUNPCKHWDZ128rr , X86::VPUNPCKHWDrr }, + { X86::VPUNPCKLBWZ128rm , X86::VPUNPCKLBWrm }, + { X86::VPUNPCKLBWZ128rr , X86::VPUNPCKLBWrr }, + { X86::VPUNPCKLDQZ128rm , X86::VPUNPCKLDQrm }, + { X86::VPUNPCKLDQZ128rr , X86::VPUNPCKLDQrr }, + { X86::VPUNPCKLQDQZ128rm , X86::VPUNPCKLQDQrm }, + { X86::VPUNPCKLQDQZ128rr , X86::VPUNPCKLQDQrr }, + { X86::VPUNPCKLWDZ128rm , X86::VPUNPCKLWDrm }, + { X86::VPUNPCKLWDZ128rr , X86::VPUNPCKLWDrr }, + { X86::VPXORDZ128rm , X86::VPXORrm }, + { X86::VPXORDZ128rr , X86::VPXORrr }, + { X86::VPXORQZ128rm , X86::VPXORrm }, + { X86::VPXORQZ128rr , X86::VPXORrr }, + { X86::VSHUFPDZ128rmi , X86::VSHUFPDrmi }, + { X86::VSHUFPDZ128rri , X86::VSHUFPDrri }, + { X86::VSHUFPSZ128rmi , X86::VSHUFPSrmi }, + { X86::VSHUFPSZ128rri , X86::VSHUFPSrri }, + { X86::VSQRTPDZ128m , X86::VSQRTPDm }, + { X86::VSQRTPDZ128r , X86::VSQRTPDr }, + { X86::VSQRTPSZ128m , X86::VSQRTPSm }, + { X86::VSQRTPSZ128r , X86::VSQRTPSr }, + { X86::VSUBPDZ128rm , X86::VSUBPDrm }, + { X86::VSUBPDZ128rr , X86::VSUBPDrr }, + { X86::VSUBPSZ128rm , X86::VSUBPSrm }, + { X86::VSUBPSZ128rr , X86::VSUBPSrr }, + { X86::VUNPCKHPDZ128rm , X86::VUNPCKHPDrm }, + { X86::VUNPCKHPDZ128rr , X86::VUNPCKHPDrr }, + { X86::VUNPCKHPSZ128rm , X86::VUNPCKHPSrm }, + { X86::VUNPCKHPSZ128rr , X86::VUNPCKHPSrr }, + { X86::VUNPCKLPDZ128rm , X86::VUNPCKLPDrm }, + { X86::VUNPCKLPDZ128rr , X86::VUNPCKLPDrr }, + { X86::VUNPCKLPSZ128rm , X86::VUNPCKLPSrm }, + { X86::VUNPCKLPSZ128rr , X86::VUNPCKLPSrr }, + { X86::VXORPDZ128rm , X86::VXORPDrm }, + { X86::VXORPDZ128rr , X86::VXORPDrr }, + { X86::VXORPSZ128rm , X86::VXORPSrm }, + { X86::VXORPSZ128rr , X86::VXORPSrr }, +}; + + +// X86 EVEX encoded instructions that have a VEX 256 encoding +// (table format: ). + static const X86EvexToVexCompressTableEntry + X86EvexToVex256CompressTable[] = { + { X86::VADDPDZ256rm , X86::VADDPDYrm }, + { X86::VADDPDZ256rr , X86::VADDPDYrr }, + { X86::VADDPSZ256rm , X86::VADDPSYrm }, + { X86::VADDPSZ256rr , X86::VADDPSYrr }, + { X86::VANDNPDZ256rm , X86::VANDNPDYrm }, + { X86::VANDNPDZ256rr , X86::VANDNPDYrr }, + { X86::VANDNPSZ256rm , X86::VANDNPSYrm }, + { X86::VANDNPSZ256rr , X86::VANDNPSYrr }, + { X86::VANDPDZ256rm , X86::VANDPDYrm }, + { X86::VANDPDZ256rr , X86::VANDPDYrr }, + { X86::VANDPSZ256rm , X86::VANDPSYrm }, + { X86::VANDPSZ256rr , X86::VANDPSYrr }, + { X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm }, + { X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr }, + { X86::VBROADCASTSDZ256r_s , X86::VBROADCASTSDYrr }, + { X86::VBROADCASTSSZ256m , X86::VBROADCASTSSYrm }, + { X86::VBROADCASTSSZ256r , X86::VBROADCASTSSYrr }, + { X86::VBROADCASTSSZ256r_s , X86::VBROADCASTSSYrr }, + { X86::VCVTDQ2PDZ256rm , X86::VCVTDQ2PDYrm }, + { X86::VCVTDQ2PDZ256rr , X86::VCVTDQ2PDYrr }, + { X86::VCVTDQ2PSZ256rm , X86::VCVTDQ2PSYrm }, + { X86::VCVTDQ2PSZ256rr , X86::VCVTDQ2PSYrr }, + { X86::VCVTPD2DQZ256rm , X86::VCVTPD2DQYrm }, + { X86::VCVTPD2DQZ256rr , X86::VCVTPD2DQYrr }, + { X86::VCVTPD2PSZ256rm , X86::VCVTPD2PSYrm }, + { X86::VCVTPD2PSZ256rr , X86::VCVTPD2PSYrr }, + { X86::VCVTPH2PSZ256rm , X86::VCVTPH2PSYrm }, + { X86::VCVTPH2PSZ256rr , X86::VCVTPH2PSYrr }, + { X86::VCVTPS2DQZ256rm , X86::VCVTPS2DQYrm }, + { X86::VCVTPS2DQZ256rr , X86::VCVTPS2DQYrr }, + { X86::VCVTPS2PDZ256rm , X86::VCVTPS2PDYrm }, + { X86::VCVTPS2PDZ256rr , X86::VCVTPS2PDYrr }, + { X86::VCVTPS2PHZ256mr , X86::VCVTPS2PHYmr }, + { X86::VCVTPS2PHZ256rr , X86::VCVTPS2PHYrr }, + { X86::VCVTTPD2DQZ256rm , X86::VCVTTPD2DQYrm }, + { X86::VCVTTPD2DQZ256rr , X86::VCVTTPD2DQYrr }, + { X86::VCVTTPS2DQZ256rm , X86::VCVTTPS2DQYrm }, + { X86::VCVTTPS2DQZ256rr , X86::VCVTTPS2DQYrr }, + { X86::VDIVPDZ256rm , X86::VDIVPDYrm }, + { X86::VDIVPDZ256rr , X86::VDIVPDYrr }, + { X86::VDIVPSZ256rm , X86::VDIVPSYrm }, + { X86::VDIVPSZ256rr , X86::VDIVPSYrr }, + { X86::VFMADD132PDZ256m , X86::VFMADD132PDYm }, + { X86::VFMADD132PDZ256r , X86::VFMADD132PDYr }, + { X86::VFMADD132PSZ256m , X86::VFMADD132PSYm }, + { X86::VFMADD132PSZ256r , X86::VFMADD132PSYr }, + { X86::VFMADD213PDZ256m , X86::VFMADD213PDYm }, + { X86::VFMADD213PDZ256r , X86::VFMADD213PDYr }, + { X86::VFMADD213PSZ256m , X86::VFMADD213PSYm }, + { X86::VFMADD213PSZ256r , X86::VFMADD213PSYr }, + { X86::VFMADD231PDZ256m , X86::VFMADD231PDYm }, + { X86::VFMADD231PDZ256r , X86::VFMADD231PDYr }, + { X86::VFMADD231PSZ256m , X86::VFMADD231PSYm }, + { X86::VFMADD231PSZ256r , X86::VFMADD231PSYr }, + { X86::VFMADDSUB132PDZ256m , X86::VFMADDSUB132PDYm }, + { X86::VFMADDSUB132PDZ256r , X86::VFMADDSUB132PDYr }, + { X86::VFMADDSUB132PSZ256m , X86::VFMADDSUB132PSYm }, + { X86::VFMADDSUB132PSZ256r , X86::VFMADDSUB132PSYr }, + { X86::VFMADDSUB213PDZ256m , X86::VFMADDSUB213PDYm }, + { X86::VFMADDSUB213PDZ256r , X86::VFMADDSUB213PDYr }, + { X86::VFMADDSUB213PSZ256m , X86::VFMADDSUB213PSYm }, + { X86::VFMADDSUB213PSZ256r , X86::VFMADDSUB213PSYr }, + { X86::VFMADDSUB231PDZ256m , X86::VFMADDSUB231PDYm }, + { X86::VFMADDSUB231PDZ256r , X86::VFMADDSUB231PDYr }, + { X86::VFMADDSUB231PSZ256m , X86::VFMADDSUB231PSYm }, + { X86::VFMADDSUB231PSZ256r , X86::VFMADDSUB231PSYr }, + { X86::VFMSUB132PDZ256m , X86::VFMSUB132PDYm }, + { X86::VFMSUB132PDZ256r , X86::VFMSUB132PDYr }, + { X86::VFMSUB132PSZ256m , X86::VFMSUB132PSYm }, + { X86::VFMSUB132PSZ256r , X86::VFMSUB132PSYr }, + { X86::VFMSUB213PDZ256m , X86::VFMSUB213PDYm }, + { X86::VFMSUB213PDZ256r , X86::VFMSUB213PDYr }, + { X86::VFMSUB213PSZ256m , X86::VFMSUB213PSYm }, + { X86::VFMSUB213PSZ256r , X86::VFMSUB213PSYr }, + { X86::VFMSUB231PDZ256m , X86::VFMSUB231PDYm }, + { X86::VFMSUB231PDZ256r , X86::VFMSUB231PDYr }, + { X86::VFMSUB231PSZ256m , X86::VFMSUB231PSYm }, + { X86::VFMSUB231PSZ256r , X86::VFMSUB231PSYr }, + { X86::VFMSUBADD132PDZ256m , X86::VFMSUBADD132PDYm }, + { X86::VFMSUBADD132PDZ256r , X86::VFMSUBADD132PDYr }, + { X86::VFMSUBADD132PSZ256m , X86::VFMSUBADD132PSYm }, + { X86::VFMSUBADD132PSZ256r , X86::VFMSUBADD132PSYr }, + { X86::VFMSUBADD213PDZ256m , X86::VFMSUBADD213PDYm }, + { X86::VFMSUBADD213PDZ256r , X86::VFMSUBADD213PDYr }, + { X86::VFMSUBADD213PSZ256m , X86::VFMSUBADD213PSYm }, + { X86::VFMSUBADD213PSZ256r , X86::VFMSUBADD213PSYr }, + { X86::VFMSUBADD231PDZ256m , X86::VFMSUBADD231PDYm }, + { X86::VFMSUBADD231PDZ256r , X86::VFMSUBADD231PDYr }, + { X86::VFMSUBADD231PSZ256m , X86::VFMSUBADD231PSYm }, + { X86::VFMSUBADD231PSZ256r , X86::VFMSUBADD231PSYr }, + { X86::VFNMADD132PDZ256m , X86::VFNMADD132PDYm }, + { X86::VFNMADD132PDZ256r , X86::VFNMADD132PDYr }, + { X86::VFNMADD132PSZ256m , X86::VFNMADD132PSYm }, + { X86::VFNMADD132PSZ256r , X86::VFNMADD132PSYr }, + { X86::VFNMADD213PDZ256m , X86::VFNMADD213PDYm }, + { X86::VFNMADD213PDZ256r , X86::VFNMADD213PDYr }, + { X86::VFNMADD213PSZ256m , X86::VFNMADD213PSYm }, + { X86::VFNMADD213PSZ256r , X86::VFNMADD213PSYr }, + { X86::VFNMADD231PDZ256m , X86::VFNMADD231PDYm }, + { X86::VFNMADD231PDZ256r , X86::VFNMADD231PDYr }, + { X86::VFNMADD231PSZ256m , X86::VFNMADD231PSYm }, + { X86::VFNMADD231PSZ256r , X86::VFNMADD231PSYr }, + { X86::VFNMSUB132PDZ256m , X86::VFNMSUB132PDYm }, + { X86::VFNMSUB132PDZ256r , X86::VFNMSUB132PDYr }, + { X86::VFNMSUB132PSZ256m , X86::VFNMSUB132PSYm }, + { X86::VFNMSUB132PSZ256r , X86::VFNMSUB132PSYr }, + { X86::VFNMSUB213PDZ256m , X86::VFNMSUB213PDYm }, + { X86::VFNMSUB213PDZ256r , X86::VFNMSUB213PDYr }, + { X86::VFNMSUB213PSZ256m , X86::VFNMSUB213PSYm }, + { X86::VFNMSUB213PSZ256r , X86::VFNMSUB213PSYr }, + { X86::VFNMSUB231PDZ256m , X86::VFNMSUB231PDYm }, + { X86::VFNMSUB231PDZ256r , X86::VFNMSUB231PDYr }, + { X86::VFNMSUB231PSZ256m , X86::VFNMSUB231PSYm }, + { X86::VFNMSUB231PSZ256r , X86::VFNMSUB231PSYr }, + { X86::VMAXCPDZ256rm , X86::VMAXCPDYrm }, + { X86::VMAXCPDZ256rr , X86::VMAXCPDYrr }, + { X86::VMAXCPSZ256rm , X86::VMAXCPSYrm }, + { X86::VMAXCPSZ256rr , X86::VMAXCPSYrr }, + { X86::VMAXPDZ256rm , X86::VMAXPDYrm }, + { X86::VMAXPDZ256rr , X86::VMAXPDYrr }, + { X86::VMAXPSZ256rm , X86::VMAXPSYrm }, + { X86::VMAXPSZ256rr , X86::VMAXPSYrr }, + { X86::VMINCPDZ256rm , X86::VMINCPDYrm }, + { X86::VMINCPDZ256rr , X86::VMINCPDYrr }, + { X86::VMINCPSZ256rm , X86::VMINCPSYrm }, + { X86::VMINCPSZ256rr , X86::VMINCPSYrr }, + { X86::VMINPDZ256rm , X86::VMINPDYrm }, + { X86::VMINPDZ256rr , X86::VMINPDYrr }, + { X86::VMINPSZ256rm , X86::VMINPSYrm }, + { X86::VMINPSZ256rr , X86::VMINPSYrr }, + { X86::VMOVAPDZ256mr , X86::VMOVAPDYmr }, + { X86::VMOVAPDZ256rm , X86::VMOVAPDYrm }, + { X86::VMOVAPDZ256rr , X86::VMOVAPDYrr }, + { X86::VMOVAPDZ256rr_REV , X86::VMOVAPDYrr_REV }, + { X86::VMOVAPSZ256mr , X86::VMOVAPSYmr }, + { X86::VMOVAPSZ256rm , X86::VMOVAPSYrm }, + { X86::VMOVAPSZ256rr , X86::VMOVAPSYrr }, + { X86::VMOVAPSZ256rr_REV , X86::VMOVAPSYrr_REV }, + { X86::VMOVDDUPZ256rm , X86::VMOVDDUPYrm }, + { X86::VMOVDDUPZ256rr , X86::VMOVDDUPYrr }, + { X86::VMOVDQA32Z256mr , X86::VMOVDQAYmr }, + { X86::VMOVDQA32Z256rm , X86::VMOVDQAYrm }, + { X86::VMOVDQA32Z256rr , X86::VMOVDQAYrr }, + { X86::VMOVDQA32Z256rr_REV , X86::VMOVDQAYrr_REV }, + { X86::VMOVDQA64Z256mr , X86::VMOVDQAYmr }, + { X86::VMOVDQA64Z256rm , X86::VMOVDQAYrm }, + { X86::VMOVDQA64Z256rr , X86::VMOVDQAYrr }, + { X86::VMOVDQA64Z256rr_REV , X86::VMOVDQAYrr_REV }, + { X86::VMOVDQU16Z256mr , X86::VMOVDQUYmr }, + { X86::VMOVDQU16Z256rm , X86::VMOVDQUYrm }, + { X86::VMOVDQU16Z256rr , X86::VMOVDQUYrr }, + { X86::VMOVDQU16Z256rr_REV , X86::VMOVDQUYrr_REV }, + { X86::VMOVDQU32Z256mr , X86::VMOVDQUYmr }, + { X86::VMOVDQU32Z256rm , X86::VMOVDQUYrm }, + { X86::VMOVDQU32Z256rr , X86::VMOVDQUYrr }, + { X86::VMOVDQU32Z256rr_REV , X86::VMOVDQUYrr_REV }, + { X86::VMOVDQU64Z256mr , X86::VMOVDQUYmr }, + { X86::VMOVDQU64Z256rm , X86::VMOVDQUYrm }, + { X86::VMOVDQU64Z256rr , X86::VMOVDQUYrr }, + { X86::VMOVDQU64Z256rr_REV , X86::VMOVDQUYrr_REV }, + { X86::VMOVDQU8Z256mr , X86::VMOVDQUYmr }, + { X86::VMOVDQU8Z256rm , X86::VMOVDQUYrm }, + { X86::VMOVDQU8Z256rr , X86::VMOVDQUYrr }, + { X86::VMOVDQU8Z256rr_REV , X86::VMOVDQUYrr_REV }, + { X86::VMOVNTDQAZ256rm , X86::VMOVNTDQAYrm }, + { X86::VMOVNTDQZ256mr , X86::VMOVNTDQYmr }, + { X86::VMOVNTPDZ256mr , X86::VMOVNTPDYmr }, + { X86::VMOVNTPSZ256mr , X86::VMOVNTPSYmr }, + { X86::VMOVSHDUPZ256rm , X86::VMOVSHDUPYrm }, + { X86::VMOVSHDUPZ256rr , X86::VMOVSHDUPYrr }, + { X86::VMOVSLDUPZ256rm , X86::VMOVSLDUPYrm }, + { X86::VMOVSLDUPZ256rr , X86::VMOVSLDUPYrr }, + { X86::VMOVUPDZ256mr , X86::VMOVUPDYmr }, + { X86::VMOVUPDZ256rm , X86::VMOVUPDYrm }, + { X86::VMOVUPDZ256rr , X86::VMOVUPDYrr }, + { X86::VMOVUPDZ256rr_REV , X86::VMOVUPDYrr_REV }, + { X86::VMOVUPSZ256mr , X86::VMOVUPSYmr }, + { X86::VMOVUPSZ256rm , X86::VMOVUPSYrm }, + { X86::VMOVUPSZ256rr , X86::VMOVUPSYrr }, + { X86::VMOVUPSZ256rr_REV , X86::VMOVUPSYrr_REV }, + { X86::VMULPDZ256rm , X86::VMULPDYrm }, + { X86::VMULPDZ256rr , X86::VMULPDYrr }, + { X86::VMULPSZ256rm , X86::VMULPSYrm }, + { X86::VMULPSZ256rr , X86::VMULPSYrr }, + { X86::VORPDZ256rm , X86::VORPDYrm }, + { X86::VORPDZ256rr , X86::VORPDYrr }, + { X86::VORPSZ256rm , X86::VORPSYrm }, + { X86::VORPSZ256rr , X86::VORPSYrr }, + { X86::VPABSBZ256rm , X86::VPABSBYrm }, + { X86::VPABSBZ256rr , X86::VPABSBYrr }, + { X86::VPABSDZ256rm , X86::VPABSDYrm }, + { X86::VPABSDZ256rr , X86::VPABSDYrr }, + { X86::VPABSWZ256rm , X86::VPABSWYrm }, + { X86::VPABSWZ256rr , X86::VPABSWYrr }, + { X86::VPACKSSDWZ256rm , X86::VPACKSSDWYrm }, + { X86::VPACKSSDWZ256rr , X86::VPACKSSDWYrr }, + { X86::VPACKSSWBZ256rm , X86::VPACKSSWBYrm }, + { X86::VPACKSSWBZ256rr , X86::VPACKSSWBYrr }, + { X86::VPACKUSDWZ256rm , X86::VPACKUSDWYrm }, + { X86::VPACKUSDWZ256rr , X86::VPACKUSDWYrr }, + { X86::VPACKUSWBZ256rm , X86::VPACKUSWBYrm }, + { X86::VPACKUSWBZ256rr , X86::VPACKUSWBYrr }, + { X86::VPADDBZ256rm , X86::VPADDBYrm }, + { X86::VPADDBZ256rr , X86::VPADDBYrr }, + { X86::VPADDDZ256rm , X86::VPADDDYrm }, + { X86::VPADDDZ256rr , X86::VPADDDYrr }, + { X86::VPADDQZ256rm , X86::VPADDQYrm }, + { X86::VPADDQZ256rr , X86::VPADDQYrr }, + { X86::VPADDSBZ256rm , X86::VPADDSBYrm }, + { X86::VPADDSBZ256rr , X86::VPADDSBYrr }, + { X86::VPADDSWZ256rm , X86::VPADDSWYrm }, + { X86::VPADDSWZ256rr , X86::VPADDSWYrr }, + { X86::VPADDUSBZ256rm , X86::VPADDUSBYrm }, + { X86::VPADDUSBZ256rr , X86::VPADDUSBYrr }, + { X86::VPADDUSWZ256rm , X86::VPADDUSWYrm }, + { X86::VPADDUSWZ256rr , X86::VPADDUSWYrr }, + { X86::VPADDWZ256rm , X86::VPADDWYrm }, + { X86::VPADDWZ256rr , X86::VPADDWYrr }, + { X86::VPALIGNRZ256rmi , X86::VPALIGNRYrmi }, + { X86::VPALIGNRZ256rri , X86::VPALIGNRYrri }, + { X86::VPANDDZ256rm , X86::VPANDYrm }, + { X86::VPANDDZ256rr , X86::VPANDYrr }, + { X86::VPANDQZ256rm , X86::VPANDYrm }, + { X86::VPANDQZ256rr , X86::VPANDYrr }, + { X86::VPAVGBZ256rm , X86::VPAVGBYrm }, + { X86::VPAVGBZ256rr , X86::VPAVGBYrr }, + { X86::VPAVGWZ256rm , X86::VPAVGWYrm }, + { X86::VPAVGWZ256rr , X86::VPAVGWYrr }, + { X86::VPBROADCASTBZ256m , X86::VPBROADCASTBYrm }, + { X86::VPBROADCASTBZ256r , X86::VPBROADCASTBYrr }, + { X86::VPBROADCASTDZ256m , X86::VPBROADCASTDYrm }, + { X86::VPBROADCASTDZ256r , X86::VPBROADCASTDYrr }, + { X86::VPBROADCASTQZ256m , X86::VPBROADCASTQYrm }, + { X86::VPBROADCASTQZ256r , X86::VPBROADCASTQYrr }, + { X86::VPBROADCASTWZ256m , X86::VPBROADCASTWYrm }, + { X86::VPBROADCASTWZ256r , X86::VPBROADCASTWYrr }, + { X86::VPERMDZ256rm , X86::VPERMDYrm }, + { X86::VPERMDZ256rr , X86::VPERMDYrr }, + { X86::VPERMILPDZ256mi , X86::VPERMILPDYmi }, + { X86::VPERMILPDZ256ri , X86::VPERMILPDYri }, + { X86::VPERMILPDZ256rm , X86::VPERMILPDYrm }, + { X86::VPERMILPDZ256rr , X86::VPERMILPDYrr }, + { X86::VPERMILPSZ256mi , X86::VPERMILPSYmi }, + { X86::VPERMILPSZ256ri , X86::VPERMILPSYri }, + { X86::VPERMILPSZ256rm , X86::VPERMILPSYrm }, + { X86::VPERMILPSZ256rr , X86::VPERMILPSYrr }, + { X86::VPERMPDZ256mi , X86::VPERMPDYmi }, + { X86::VPERMPDZ256ri , X86::VPERMPDYri }, + { X86::VPERMPSZ256rm , X86::VPERMPSYrm }, + { X86::VPERMPSZ256rr , X86::VPERMPSYrr }, + { X86::VPERMQZ256mi , X86::VPERMQYmi }, + { X86::VPERMQZ256ri , X86::VPERMQYri }, + { X86::VPMADDUBSWZ256rm , X86::VPMADDUBSWYrm }, + { X86::VPMADDUBSWZ256rr , X86::VPMADDUBSWYrr }, + { X86::VPMADDWDZ256rm , X86::VPMADDWDYrm }, + { X86::VPMADDWDZ256rr , X86::VPMADDWDYrr }, + { X86::VPMAXSBZ256rm , X86::VPMAXSBYrm }, + { X86::VPMAXSBZ256rr , X86::VPMAXSBYrr }, + { X86::VPMAXSDZ256rm , X86::VPMAXSDYrm }, + { X86::VPMAXSDZ256rr , X86::VPMAXSDYrr }, + { X86::VPMAXSWZ256rm , X86::VPMAXSWYrm }, + { X86::VPMAXSWZ256rr , X86::VPMAXSWYrr }, + { X86::VPMAXUBZ256rm , X86::VPMAXUBYrm }, + { X86::VPMAXUBZ256rr , X86::VPMAXUBYrr }, + { X86::VPMAXUDZ256rm , X86::VPMAXUDYrm }, + { X86::VPMAXUDZ256rr , X86::VPMAXUDYrr }, + { X86::VPMAXUWZ256rm , X86::VPMAXUWYrm }, + { X86::VPMAXUWZ256rr , X86::VPMAXUWYrr }, + { X86::VPMINSBZ256rm , X86::VPMINSBYrm }, + { X86::VPMINSBZ256rr , X86::VPMINSBYrr }, + { X86::VPMINSDZ256rm , X86::VPMINSDYrm }, + { X86::VPMINSDZ256rr , X86::VPMINSDYrr }, + { X86::VPMINSWZ256rm , X86::VPMINSWYrm }, + { X86::VPMINSWZ256rr , X86::VPMINSWYrr }, + { X86::VPMINUBZ256rm , X86::VPMINUBYrm }, + { X86::VPMINUBZ256rr , X86::VPMINUBYrr }, + { X86::VPMINUDZ256rm , X86::VPMINUDYrm }, + { X86::VPMINUDZ256rr , X86::VPMINUDYrr }, + { X86::VPMINUWZ256rm , X86::VPMINUWYrm }, + { X86::VPMINUWZ256rr , X86::VPMINUWYrr }, + { X86::VPMOVSXBDZ256rm , X86::VPMOVSXBDYrm }, + { X86::VPMOVSXBDZ256rr , X86::VPMOVSXBDYrr }, + { X86::VPMOVSXBQZ256rm , X86::VPMOVSXBQYrm }, + { X86::VPMOVSXBQZ256rr , X86::VPMOVSXBQYrr }, + { X86::VPMOVSXBWZ256rm , X86::VPMOVSXBWYrm }, + { X86::VPMOVSXBWZ256rr , X86::VPMOVSXBWYrr }, + { X86::VPMOVSXDQZ256rm , X86::VPMOVSXDQYrm }, + { X86::VPMOVSXDQZ256rr , X86::VPMOVSXDQYrr }, + { X86::VPMOVSXWDZ256rm , X86::VPMOVSXWDYrm }, + { X86::VPMOVSXWDZ256rr , X86::VPMOVSXWDYrr }, + { X86::VPMOVSXWQZ256rm , X86::VPMOVSXWQYrm }, + { X86::VPMOVSXWQZ256rr , X86::VPMOVSXWQYrr }, + { X86::VPMOVZXBDZ256rm , X86::VPMOVZXBDYrm }, + { X86::VPMOVZXBDZ256rr , X86::VPMOVZXBDYrr }, + { X86::VPMOVZXBQZ256rm , X86::VPMOVZXBQYrm }, + { X86::VPMOVZXBQZ256rr , X86::VPMOVZXBQYrr }, + { X86::VPMOVZXBWZ256rm , X86::VPMOVZXBWYrm }, + { X86::VPMOVZXBWZ256rr , X86::VPMOVZXBWYrr }, + { X86::VPMOVZXDQZ256rm , X86::VPMOVZXDQYrm }, + { X86::VPMOVZXDQZ256rr , X86::VPMOVZXDQYrr }, + { X86::VPMOVZXWDZ256rm , X86::VPMOVZXWDYrm }, + { X86::VPMOVZXWDZ256rr , X86::VPMOVZXWDYrr }, + { X86::VPMOVZXWQZ256rm , X86::VPMOVZXWQYrm }, + { X86::VPMOVZXWQZ256rr , X86::VPMOVZXWQYrr }, + { X86::VPMULDQZ256rm , X86::VPMULDQYrm }, + { X86::VPMULDQZ256rr , X86::VPMULDQYrr }, + { X86::VPMULHRSWZ256rm , X86::VPMULHRSWYrm }, + { X86::VPMULHRSWZ256rr , X86::VPMULHRSWYrr }, + { X86::VPMULHUWZ256rm , X86::VPMULHUWYrm }, + { X86::VPMULHUWZ256rr , X86::VPMULHUWYrr }, + { X86::VPMULHWZ256rm , X86::VPMULHWYrm }, + { X86::VPMULHWZ256rr , X86::VPMULHWYrr }, + { X86::VPMULLDZ256rm , X86::VPMULLDYrm }, + { X86::VPMULLDZ256rr , X86::VPMULLDYrr }, + { X86::VPMULLWZ256rm , X86::VPMULLWYrm }, + { X86::VPMULLWZ256rr , X86::VPMULLWYrr }, + { X86::VPMULUDQZ256rm , X86::VPMULUDQYrm }, + { X86::VPMULUDQZ256rr , X86::VPMULUDQYrr }, + { X86::VPORDZ256rm , X86::VPORYrm }, + { X86::VPORDZ256rr , X86::VPORYrr }, + { X86::VPORQZ256rm , X86::VPORYrm }, + { X86::VPORQZ256rr , X86::VPORYrr }, + { X86::VPSADBWZ256rm , X86::VPSADBWYrm }, + { X86::VPSADBWZ256rr , X86::VPSADBWYrr }, + { X86::VPSHUFBZ256rm , X86::VPSHUFBYrm }, + { X86::VPSHUFBZ256rr , X86::VPSHUFBYrr }, + { X86::VPSHUFDZ256mi , X86::VPSHUFDYmi }, + { X86::VPSHUFDZ256ri , X86::VPSHUFDYri }, + { X86::VPSHUFHWZ256mi , X86::VPSHUFHWYmi }, + { X86::VPSHUFHWZ256ri , X86::VPSHUFHWYri }, + { X86::VPSHUFLWZ256mi , X86::VPSHUFLWYmi }, + { X86::VPSHUFLWZ256ri , X86::VPSHUFLWYri }, + { X86::VPSLLDQZ256rr , X86::VPSLLDQYri }, + { X86::VPSLLDZ256ri , X86::VPSLLDYri }, + { X86::VPSLLDZ256rm , X86::VPSLLDYrm }, + { X86::VPSLLDZ256rr , X86::VPSLLDYrr }, + { X86::VPSLLQZ256ri , X86::VPSLLQYri }, + { X86::VPSLLQZ256rm , X86::VPSLLQYrm }, + { X86::VPSLLQZ256rr , X86::VPSLLQYrr }, + { X86::VPSLLVDZ256rm , X86::VPSLLVDYrm }, + { X86::VPSLLVDZ256rr , X86::VPSLLVDYrr }, + { X86::VPSLLVQZ256rm , X86::VPSLLVQYrm }, + { X86::VPSLLVQZ256rr , X86::VPSLLVQYrr }, + { X86::VPSLLWZ256ri , X86::VPSLLWYri }, + { X86::VPSLLWZ256rm , X86::VPSLLWYrm }, + { X86::VPSLLWZ256rr , X86::VPSLLWYrr }, + { X86::VPSRADZ256ri , X86::VPSRADYri }, + { X86::VPSRADZ256rm , X86::VPSRADYrm }, + { X86::VPSRADZ256rr , X86::VPSRADYrr }, + { X86::VPSRAVDZ256rm , X86::VPSRAVDYrm }, + { X86::VPSRAVDZ256rr , X86::VPSRAVDYrr }, + { X86::VPSRAWZ256ri , X86::VPSRAWYri }, + { X86::VPSRAWZ256rm , X86::VPSRAWYrm }, + { X86::VPSRAWZ256rr , X86::VPSRAWYrr }, + { X86::VPSRLDQZ256rr , X86::VPSRLDQYri }, + { X86::VPSRLDZ256ri , X86::VPSRLDYri }, + { X86::VPSRLDZ256rm , X86::VPSRLDYrm }, + { X86::VPSRLDZ256rr , X86::VPSRLDYrr }, + { X86::VPSRLQZ256ri , X86::VPSRLQYri }, + { X86::VPSRLQZ256rm , X86::VPSRLQYrm }, + { X86::VPSRLQZ256rr , X86::VPSRLQYrr }, + { X86::VPSRLVDZ256rm , X86::VPSRLVDYrm }, + { X86::VPSRLVDZ256rr , X86::VPSRLVDYrr }, + { X86::VPSRLVQZ256rm , X86::VPSRLVQYrm }, + { X86::VPSRLVQZ256rr , X86::VPSRLVQYrr }, + { X86::VPSRLWZ256ri , X86::VPSRLWYri }, + { X86::VPSRLWZ256rm , X86::VPSRLWYrm }, + { X86::VPSRLWZ256rr , X86::VPSRLWYrr }, + { X86::VPSUBBZ256rm , X86::VPSUBBYrm }, + { X86::VPSUBBZ256rr , X86::VPSUBBYrr }, + { X86::VPSUBDZ256rm , X86::VPSUBDYrm }, + { X86::VPSUBDZ256rr , X86::VPSUBDYrr }, + { X86::VPSUBQZ256rm , X86::VPSUBQYrm }, + { X86::VPSUBQZ256rr , X86::VPSUBQYrr }, + { X86::VPSUBSBZ256rm , X86::VPSUBSBYrm }, + { X86::VPSUBSBZ256rr , X86::VPSUBSBYrr }, + { X86::VPSUBSWZ256rm , X86::VPSUBSWYrm }, + { X86::VPSUBSWZ256rr , X86::VPSUBSWYrr }, + { X86::VPSUBUSBZ256rm , X86::VPSUBUSBYrm }, + { X86::VPSUBUSBZ256rr , X86::VPSUBUSBYrr }, + { X86::VPSUBUSWZ256rm , X86::VPSUBUSWYrm }, + { X86::VPSUBUSWZ256rr , X86::VPSUBUSWYrr }, + { X86::VPSUBWZ256rm , X86::VPSUBWYrm }, + { X86::VPSUBWZ256rr , X86::VPSUBWYrr }, + { X86::VPUNPCKHBWZ256rm , X86::VPUNPCKHBWYrm }, + { X86::VPUNPCKHBWZ256rr , X86::VPUNPCKHBWYrr }, + { X86::VPUNPCKHDQZ256rm , X86::VPUNPCKHDQYrm }, + { X86::VPUNPCKHDQZ256rr , X86::VPUNPCKHDQYrr }, + { X86::VPUNPCKHQDQZ256rm , X86::VPUNPCKHQDQYrm }, + { X86::VPUNPCKHQDQZ256rr , X86::VPUNPCKHQDQYrr }, + { X86::VPUNPCKHWDZ256rm , X86::VPUNPCKHWDYrm }, + { X86::VPUNPCKHWDZ256rr , X86::VPUNPCKHWDYrr }, + { X86::VPUNPCKLBWZ256rm , X86::VPUNPCKLBWYrm }, + { X86::VPUNPCKLBWZ256rr , X86::VPUNPCKLBWYrr }, + { X86::VPUNPCKLDQZ256rm , X86::VPUNPCKLDQYrm }, + { X86::VPUNPCKLDQZ256rr , X86::VPUNPCKLDQYrr }, + { X86::VPUNPCKLQDQZ256rm , X86::VPUNPCKLQDQYrm }, + { X86::VPUNPCKLQDQZ256rr , X86::VPUNPCKLQDQYrr }, + { X86::VPUNPCKLWDZ256rm , X86::VPUNPCKLWDYrm }, + { X86::VPUNPCKLWDZ256rr , X86::VPUNPCKLWDYrr }, + { X86::VPXORDZ256rm , X86::VPXORYrm }, + { X86::VPXORDZ256rr , X86::VPXORYrr }, + { X86::VPXORQZ256rm , X86::VPXORYrm }, + { X86::VPXORQZ256rr , X86::VPXORYrr }, + { X86::VSHUFPDZ256rmi , X86::VSHUFPDYrmi }, + { X86::VSHUFPDZ256rri , X86::VSHUFPDYrri }, + { X86::VSHUFPSZ256rmi , X86::VSHUFPSYrmi }, + { X86::VSHUFPSZ256rri , X86::VSHUFPSYrri }, + { X86::VSQRTPDZ256m , X86::VSQRTPDYm }, + { X86::VSQRTPDZ256r , X86::VSQRTPDYr }, + { X86::VSQRTPSZ256m , X86::VSQRTPSYm }, + { X86::VSQRTPSZ256r , X86::VSQRTPSYr }, + { X86::VSUBPDZ256rm , X86::VSUBPDYrm }, + { X86::VSUBPDZ256rr , X86::VSUBPDYrr }, + { X86::VSUBPSZ256rm , X86::VSUBPSYrm }, + { X86::VSUBPSZ256rr , X86::VSUBPSYrr }, + { X86::VUNPCKHPDZ256rm , X86::VUNPCKHPDYrm }, + { X86::VUNPCKHPDZ256rr , X86::VUNPCKHPDYrr }, + { X86::VUNPCKHPSZ256rm , X86::VUNPCKHPSYrm }, + { X86::VUNPCKHPSZ256rr , X86::VUNPCKHPSYrr }, + { X86::VUNPCKLPDZ256rm , X86::VUNPCKLPDYrm }, + { X86::VUNPCKLPDZ256rr , X86::VUNPCKLPDYrr }, + { X86::VUNPCKLPSZ256rm , X86::VUNPCKLPSYrm }, + { X86::VUNPCKLPSZ256rr , X86::VUNPCKLPSYrr }, + { X86::VXORPDZ256rm , X86::VXORPDYrm }, + { X86::VXORPDZ256rr , X86::VXORPDYrr }, + { X86::VXORPSZ256rm , X86::VXORPSYrm }, + { X86::VXORPSZ256rr , X86::VXORPSYrr }, +}; + +#endif \ No newline at end of file Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -16,6 +16,7 @@ #include "X86RegisterInfo.h" #include "X86ShuffleDecodeConstantPool.h" #include "InstPrinter/X86ATTInstPrinter.h" +#include "InstPrinter/X86InstComments.h" #include "MCTargetDesc/X86BaseInfo.h" #include "Utils/X86ShuffleDecode.h" #include "llvm/ADT/Optional.h" @@ -1290,6 +1291,13 @@ X86MCInstLower MCInstLowering(*MF, *this); const X86RegisterInfo *RI = MF->getSubtarget().getRegisterInfo(); + // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that + // are compressed from EVEX encoding to VEX encoding. + if (TM.Options.MCOptions.ShowMCEncoding) { + if (MI->getAsmPrinterFlags() & AC_EVEX_2_VEX) + OutStreamer->AddComment("EVEX TO VEX Compression ", false); + } + switch (MI->getOpcode()) { case TargetOpcode::DBG_VALUE: llvm_unreachable("Should be handled target independently"); Index: lib/Target/X86/X86TargetMachine.cpp =================================================================== --- lib/Target/X86/X86TargetMachine.cpp +++ lib/Target/X86/X86TargetMachine.cpp @@ -46,6 +46,7 @@ initializeGlobalISel(PR); initializeWinEHStatePassPass(PR); initializeFixupBWInstPassPass(PR); + initializeEvexToVexInstPassPass(PR); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -399,5 +400,6 @@ addPass(createX86FixupBWInsts()); addPass(createX86PadShortFunctions()); addPass(createX86FixupLEAs()); + addPass(createX86EvexToVexInsts()); } } Index: test/CodeGen/X86/avx-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/avx-intrinsics-x86.ll +++ test/CodeGen/X86/avx-intrinsics-x86.ll @@ -102,7 +102,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_comieq_sd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2f,0xc1] +; AVX512VL-NEXT: vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] ; AVX512VL-NEXT: setnp %al ## encoding: [0x0f,0x9b,0xc0] ; AVX512VL-NEXT: sete %cl ## encoding: [0x0f,0x94,0xc1] ; AVX512VL-NEXT: andb %al, %cl ## encoding: [0x20,0xc1] @@ -125,7 +125,7 @@ ; AVX512VL-LABEL: test_x86_sse2_comige_sd: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; AVX512VL-NEXT: vcomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2f,0xc1] +; AVX512VL-NEXT: vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] ; AVX512VL-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -145,7 +145,7 @@ ; AVX512VL-LABEL: test_x86_sse2_comigt_sd: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; AVX512VL-NEXT: vcomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2f,0xc1] +; AVX512VL-NEXT: vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] ; AVX512VL-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -165,7 +165,7 @@ ; AVX512VL-LABEL: test_x86_sse2_comile_sd: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; AVX512VL-NEXT: vcomisd %xmm0, %xmm1 ## encoding: [0x62,0xf1,0xfd,0x08,0x2f,0xc8] +; AVX512VL-NEXT: vcomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8] ; AVX512VL-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -185,7 +185,7 @@ ; AVX512VL-LABEL: test_x86_sse2_comilt_sd: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; AVX512VL-NEXT: vcomisd %xmm0, %xmm1 ## encoding: [0x62,0xf1,0xfd,0x08,0x2f,0xc8] +; AVX512VL-NEXT: vcomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8] ; AVX512VL-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -206,7 +206,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_comineq_sd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2f,0xc1] +; AVX512VL-NEXT: vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] ; AVX512VL-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0] ; AVX512VL-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1] ; AVX512VL-NEXT: orb %al, %cl ## encoding: [0x08,0xc1] @@ -226,7 +226,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_cvtdq2ps: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5b,0xc0] +; AVX512VL-NEXT: vcvtdq2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res @@ -242,7 +242,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_cvtpd2dq: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0xe6,0xc0] +; AVX512VL-NEXT: vcvtpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -258,7 +258,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_cvtpd2ps: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvtpd2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x5a,0xc0] +; AVX512VL-NEXT: vcvtpd2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res @@ -285,7 +285,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_cvtsd2si: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvtsd2si %xmm0, %eax ## encoding: [0x62,0xf1,0x7f,0x08,0x2d,0xc0] +; AVX512VL-NEXT: vcvtsd2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2d,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; [#uses=1] ret i32 %res @@ -312,7 +312,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_cvtsi2sd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x2a,0x44,0x24,0x01] +; AVX512VL-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res @@ -339,7 +339,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_cvttpd2dq: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe6,0xc0] +; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -355,7 +355,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_cvttps2dq: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x5b,0xc0] +; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -371,7 +371,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_cvttsd2si: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvttsd2si %xmm0, %eax ## encoding: [0x62,0xf1,0x7f,0x08,0x2c,0xc0] +; AVX512VL-NEXT: vcvttsd2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2c,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; [#uses=1] ret i32 %res @@ -388,7 +388,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_max_pd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x5f,0xc1] +; AVX512VL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5f,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res @@ -415,7 +415,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_min_pd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vminpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x5d,0xc1] +; AVX512VL-NEXT: vminpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5d,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res @@ -455,7 +455,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_packssdw_128: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6b,0xc1] +; AVX512VL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -471,7 +471,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_packsswb_128: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x63,0xc1] +; AVX512VL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -487,7 +487,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_packuswb_128: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x67,0xc1] +; AVX512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -503,7 +503,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_padds_b: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xec,0xc1] +; AVX512VL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -519,7 +519,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_padds_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xed,0xc1] +; AVX512VL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -535,7 +535,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_paddus_b: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdc,0xc1] +; AVX512VL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -551,7 +551,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_paddus_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdd,0xc1] +; AVX512VL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -567,7 +567,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_pavg_b: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe0,0xc1] +; AVX512VL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -583,7 +583,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_pavg_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe3,0xc1] +; AVX512VL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -599,7 +599,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_pmadd_wd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf5,0xc1] +; AVX512VL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -615,7 +615,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_pmaxs_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xee,0xc1] +; AVX512VL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -631,7 +631,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_pmaxu_b: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xde,0xc1] +; AVX512VL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -647,7 +647,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_pmins_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xea,0xc1] +; AVX512VL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -663,7 +663,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_pminu_b: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xda,0xc1] +; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -690,7 +690,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_pmulh_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe5,0xc1] +; AVX512VL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -706,7 +706,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_pmulhu_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe4,0xc1] +; AVX512VL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -722,7 +722,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_pmulu_dq: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xf4,0xc1] +; AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res @@ -738,7 +738,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_psad_bw: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf6,0xc1] +; AVX512VL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf6,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res @@ -754,7 +754,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_psll_d: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf2,0xc1] +; AVX512VL-NEXT: vpslld %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -770,7 +770,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_psll_q: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xf3,0xc1] +; AVX512VL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf3,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res @@ -786,7 +786,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_psll_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf1,0xc1] +; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -802,7 +802,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_pslli_d: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpslld $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xf0,0x07] +; AVX512VL-NEXT: vpslld $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x07] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -818,7 +818,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_pslli_q: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsllq $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x73,0xf0,0x07] +; AVX512VL-NEXT: vpsllq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf0,0x07] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res @@ -834,7 +834,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_pslli_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsllw $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x71,0xf0,0x07] +; AVX512VL-NEXT: vpsllw $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x07] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -850,7 +850,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_psra_d: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe2,0xc1] +; AVX512VL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -866,7 +866,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_psra_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe1,0xc1] +; AVX512VL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -882,7 +882,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_psrai_d: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrad $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xe0,0x07] +; AVX512VL-NEXT: vpsrad $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xe0,0x07] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -898,7 +898,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_psrai_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsraw $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x71,0xe0,0x07] +; AVX512VL-NEXT: vpsraw $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x07] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -914,7 +914,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_psrl_d: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd2,0xc1] +; AVX512VL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -930,7 +930,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_psrl_q: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd3,0xc1] +; AVX512VL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res @@ -946,7 +946,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_psrl_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd1,0xc1] +; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -962,7 +962,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_psrli_d: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrld $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xd0,0x07] +; AVX512VL-NEXT: vpsrld $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xd0,0x07] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -978,7 +978,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_psrli_q: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrlq $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x73,0xd0,0x07] +; AVX512VL-NEXT: vpsrlq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x07] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res @@ -994,7 +994,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_psrli_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrlw $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x71,0xd0,0x07] +; AVX512VL-NEXT: vpsrlw $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xd0,0x07] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -1010,7 +1010,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_psubs_b: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe8,0xc1] +; AVX512VL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -1026,7 +1026,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_psubs_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe9,0xc1] +; AVX512VL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -1042,7 +1042,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_psubus_b: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd8,0xc1] +; AVX512VL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -1058,7 +1058,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_psubus_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd9,0xc1] +; AVX512VL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -1100,7 +1100,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_ucomieq_sd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vucomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2e,0xc1] +; AVX512VL-NEXT: vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] ; AVX512VL-NEXT: setnp %al ## encoding: [0x0f,0x9b,0xc0] ; AVX512VL-NEXT: sete %cl ## encoding: [0x0f,0x94,0xc1] ; AVX512VL-NEXT: andb %al, %cl ## encoding: [0x20,0xc1] @@ -1123,7 +1123,7 @@ ; AVX512VL-LABEL: test_x86_sse2_ucomige_sd: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; AVX512VL-NEXT: vucomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2e,0xc1] +; AVX512VL-NEXT: vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] ; AVX512VL-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -1143,7 +1143,7 @@ ; AVX512VL-LABEL: test_x86_sse2_ucomigt_sd: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; AVX512VL-NEXT: vucomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2e,0xc1] +; AVX512VL-NEXT: vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] ; AVX512VL-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -1163,7 +1163,7 @@ ; AVX512VL-LABEL: test_x86_sse2_ucomile_sd: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; AVX512VL-NEXT: vucomisd %xmm0, %xmm1 ## encoding: [0x62,0xf1,0xfd,0x08,0x2e,0xc8] +; AVX512VL-NEXT: vucomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8] ; AVX512VL-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -1183,7 +1183,7 @@ ; AVX512VL-LABEL: test_x86_sse2_ucomilt_sd: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; AVX512VL-NEXT: vucomisd %xmm0, %xmm1 ## encoding: [0x62,0xf1,0xfd,0x08,0x2e,0xc8] +; AVX512VL-NEXT: vucomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8] ; AVX512VL-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -1204,7 +1204,7 @@ ; ; AVX512VL-LABEL: test_x86_sse2_ucomineq_sd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vucomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2e,0xc1] +; AVX512VL-NEXT: vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] ; AVX512VL-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0] ; AVX512VL-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1] ; AVX512VL-NEXT: orb %al, %cl ## encoding: [0x08,0xc1] @@ -1376,7 +1376,7 @@ ; ; AVX512VL-LABEL: test_x86_sse41_packusdw: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x2b,0xc1] +; AVX512VL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -1414,7 +1414,7 @@ ; ; AVX512VL-LABEL: test_x86_sse41_pmaxsb: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3c,0xc1] +; AVX512VL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3c,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -1430,7 +1430,7 @@ ; ; AVX512VL-LABEL: test_x86_sse41_pmaxsd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3d,0xc1] +; AVX512VL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3d,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -1446,7 +1446,7 @@ ; ; AVX512VL-LABEL: test_x86_sse41_pmaxud: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3f,0xc1] +; AVX512VL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3f,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -1462,7 +1462,7 @@ ; ; AVX512VL-LABEL: test_x86_sse41_pmaxuw: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3e,0xc1] +; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -1478,7 +1478,7 @@ ; ; AVX512VL-LABEL: test_x86_sse41_pminsb: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x38,0xc1] +; AVX512VL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x38,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -1494,7 +1494,7 @@ ; ; AVX512VL-LABEL: test_x86_sse41_pminsd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x39,0xc1] +; AVX512VL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x39,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -1510,7 +1510,7 @@ ; ; AVX512VL-LABEL: test_x86_sse41_pminud: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpminud %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3b,0xc1] +; AVX512VL-NEXT: vpminud %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3b,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -1526,7 +1526,7 @@ ; ; AVX512VL-LABEL: test_x86_sse41_pminuw: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3a,0xc1] +; AVX512VL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -1542,7 +1542,7 @@ ; ; AVX512VL-LABEL: test_x86_sse41_pmuldq: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0xc1] +; AVX512VL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res @@ -1663,7 +1663,7 @@ ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08] ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; AVX512VL-NEXT: vmovdqu8 (%eax), %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x6f,0x00] +; AVX512VL-NEXT: vmovdqu (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x00] ; AVX512VL-NEXT: movl $7, %eax ## encoding: [0xb8,0x07,0x00,0x00,0x00] ; AVX512VL-NEXT: movl $7, %edx ## encoding: [0xba,0x07,0x00,0x00,0x00] ; AVX512VL-NEXT: vpcmpestri $7, (%ecx), %xmm0 ## encoding: [0xc4,0xe3,0x79,0x61,0x01,0x07] @@ -1816,7 +1816,7 @@ ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04] -; AVX512VL-NEXT: vmovdqu8 (%ecx), %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x6f,0x01] +; AVX512VL-NEXT: vmovdqu (%ecx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01] ; AVX512VL-NEXT: vpcmpistri $7, (%eax), %xmm0 ## encoding: [0xc4,0xe3,0x79,0x63,0x00,0x07] ; AVX512VL-NEXT: movl %ecx, %eax ## encoding: [0x89,0xc8] ; AVX512VL-NEXT: retl ## encoding: [0xc3] @@ -1949,7 +1949,7 @@ ; ; AVX512VL-LABEL: test_x86_sse_comieq_ss: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2f,0xc1] +; AVX512VL-NEXT: vcomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] ; AVX512VL-NEXT: setnp %al ## encoding: [0x0f,0x9b,0xc0] ; AVX512VL-NEXT: sete %cl ## encoding: [0x0f,0x94,0xc1] ; AVX512VL-NEXT: andb %al, %cl ## encoding: [0x20,0xc1] @@ -1972,7 +1972,7 @@ ; AVX512VL-LABEL: test_x86_sse_comige_ss: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; AVX512VL-NEXT: vcomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2f,0xc1] +; AVX512VL-NEXT: vcomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] ; AVX512VL-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -1992,7 +1992,7 @@ ; AVX512VL-LABEL: test_x86_sse_comigt_ss: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; AVX512VL-NEXT: vcomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2f,0xc1] +; AVX512VL-NEXT: vcomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] ; AVX512VL-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -2012,7 +2012,7 @@ ; AVX512VL-LABEL: test_x86_sse_comile_ss: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; AVX512VL-NEXT: vcomiss %xmm0, %xmm1 ## encoding: [0x62,0xf1,0x7c,0x08,0x2f,0xc8] +; AVX512VL-NEXT: vcomiss %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8] ; AVX512VL-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -2032,7 +2032,7 @@ ; AVX512VL-LABEL: test_x86_sse_comilt_ss: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; AVX512VL-NEXT: vcomiss %xmm0, %xmm1 ## encoding: [0x62,0xf1,0x7c,0x08,0x2f,0xc8] +; AVX512VL-NEXT: vcomiss %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8] ; AVX512VL-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -2053,7 +2053,7 @@ ; ; AVX512VL-LABEL: test_x86_sse_comineq_ss: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2f,0xc1] +; AVX512VL-NEXT: vcomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] ; AVX512VL-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0] ; AVX512VL-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1] ; AVX512VL-NEXT: orb %al, %cl ## encoding: [0x08,0xc1] @@ -2075,7 +2075,7 @@ ; AVX512VL-LABEL: test_x86_sse_cvtsi2ss: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl $7, %eax ## encoding: [0xb8,0x07,0x00,0x00,0x00] -; AVX512VL-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x2a,0xc0] +; AVX512VL-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] ret <4 x float> %res @@ -2091,7 +2091,7 @@ ; ; AVX512VL-LABEL: test_x86_sse_cvtss2si: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvtss2si %xmm0, %eax ## encoding: [0x62,0xf1,0x7e,0x08,0x2d,0xc0] +; AVX512VL-NEXT: vcvtss2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; [#uses=1] ret i32 %res @@ -2107,7 +2107,7 @@ ; ; AVX512VL-LABEL: test_x86_sse_cvttss2si: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvttss2si %xmm0, %eax ## encoding: [0x62,0xf1,0x7e,0x08,0x2c,0xc0] +; AVX512VL-NEXT: vcvttss2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; [#uses=1] ret i32 %res @@ -2136,7 +2136,7 @@ ; ; AVX512VL-LABEL: test_x86_sse_max_ps: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5f,0xc1] +; AVX512VL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res @@ -2163,7 +2163,7 @@ ; ; AVX512VL-LABEL: test_x86_sse_min_ps: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vminps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5d,0xc1] +; AVX512VL-NEXT: vminps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res @@ -2294,7 +2294,7 @@ ; ; AVX512VL-LABEL: test_x86_sse_ucomieq_ss: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vucomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2e,0xc1] +; AVX512VL-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] ; AVX512VL-NEXT: setnp %al ## encoding: [0x0f,0x9b,0xc0] ; AVX512VL-NEXT: sete %cl ## encoding: [0x0f,0x94,0xc1] ; AVX512VL-NEXT: andb %al, %cl ## encoding: [0x20,0xc1] @@ -2317,7 +2317,7 @@ ; AVX512VL-LABEL: test_x86_sse_ucomige_ss: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; AVX512VL-NEXT: vucomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2e,0xc1] +; AVX512VL-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] ; AVX512VL-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -2337,7 +2337,7 @@ ; AVX512VL-LABEL: test_x86_sse_ucomigt_ss: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; AVX512VL-NEXT: vucomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2e,0xc1] +; AVX512VL-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] ; AVX512VL-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -2357,7 +2357,7 @@ ; AVX512VL-LABEL: test_x86_sse_ucomile_ss: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; AVX512VL-NEXT: vucomiss %xmm0, %xmm1 ## encoding: [0x62,0xf1,0x7c,0x08,0x2e,0xc8] +; AVX512VL-NEXT: vucomiss %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8] ; AVX512VL-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -2377,7 +2377,7 @@ ; AVX512VL-LABEL: test_x86_sse_ucomilt_ss: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; AVX512VL-NEXT: vucomiss %xmm0, %xmm1 ## encoding: [0x62,0xf1,0x7c,0x08,0x2e,0xc8] +; AVX512VL-NEXT: vucomiss %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8] ; AVX512VL-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -2398,7 +2398,7 @@ ; ; AVX512VL-LABEL: test_x86_sse_ucomineq_ss: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vucomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2e,0xc1] +; AVX512VL-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] ; AVX512VL-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0] ; AVX512VL-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1] ; AVX512VL-NEXT: orb %al, %cl ## encoding: [0x08,0xc1] @@ -2418,7 +2418,7 @@ ; ; AVX512VL-LABEL: test_x86_ssse3_pabs_b_128: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpabsb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x1c,0xc0] +; AVX512VL-NEXT: vpabsb %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -2434,7 +2434,7 @@ ; ; AVX512VL-LABEL: test_x86_ssse3_pabs_d_128: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpabsd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x1e,0xc0] +; AVX512VL-NEXT: vpabsd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -2450,7 +2450,7 @@ ; ; AVX512VL-LABEL: test_x86_ssse3_pabs_w_128: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpabsw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x1d,0xc0] +; AVX512VL-NEXT: vpabsw %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -2532,7 +2532,7 @@ ; ; AVX512VL-LABEL: test_x86_ssse3_pmadd_ub_sw_128: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x04,0xc1] +; AVX512VL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -2552,8 +2552,8 @@ ; AVX512VL-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; AVX512VL-NEXT: vmovdqu8 (%eax), %xmm1 ## encoding: [0x62,0xf1,0x7f,0x08,0x6f,0x08] -; AVX512VL-NEXT: vpmaddubsw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0x04,0xc0] +; AVX512VL-NEXT: vmovdqu (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x08] +; AVX512VL-NEXT: vpmaddubsw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x04,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %a0 = load <16 x i8>, <16 x i8>* %ptr %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1] @@ -2569,7 +2569,7 @@ ; ; AVX512VL-LABEL: test_x86_ssse3_pmul_hr_sw_128: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x0b,0xc1] +; AVX512VL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -2585,7 +2585,7 @@ ; ; AVX512VL-LABEL: test_x86_ssse3_pshuf_b_128: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x00,0xc1] +; AVX512VL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -2772,7 +2772,7 @@ ; ; AVX512VL-LABEL: test_x86_avx_cvt_pd2_ps_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvtpd2ps %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x5a,0xc0] +; AVX512VL-NEXT: vcvtpd2ps %ymm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5a,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res @@ -2789,7 +2789,7 @@ ; ; AVX512VL-LABEL: test_x86_avx_cvt_pd2dq_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvtpd2dq %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x28,0xe6,0xc0] +; AVX512VL-NEXT: vcvtpd2dq %ymm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xff,0xe6,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -2816,7 +2816,7 @@ ; ; AVX512VL-LABEL: test_x86_avx_cvtdq2_ps_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5b,0xc0] +; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1] ret <8 x float> %res @@ -2833,7 +2833,7 @@ ; ; AVX512VL-LABEL: test_x86_avx_cvtt_pd2dq_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvttpd2dq %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xe6,0xc0] +; AVX512VL-NEXT: vcvttpd2dq %ymm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -2849,7 +2849,7 @@ ; ; AVX512VL-LABEL: test_x86_avx_cvtt_ps2dq_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x5b,0xc0] +; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res @@ -3042,7 +3042,7 @@ ; ; AVX512VL-LABEL: test_x86_avx_max_pd_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x5f,0xc1] +; AVX512VL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5f,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] ret <4 x double> %res @@ -3058,7 +3058,7 @@ ; ; AVX512VL-LABEL: test_x86_avx_max_ps_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5f,0xc1] +; AVX512VL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5f,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] ret <8 x float> %res @@ -3074,7 +3074,7 @@ ; ; AVX512VL-LABEL: test_x86_avx_min_pd_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vminpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x5d,0xc1] +; AVX512VL-NEXT: vminpd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5d,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] ret <4 x double> %res @@ -3090,7 +3090,7 @@ ; ; AVX512VL-LABEL: test_x86_avx_min_ps_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vminps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5d,0xc1] +; AVX512VL-NEXT: vminps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5d,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] ret <8 x float> %res @@ -3320,7 +3320,7 @@ ; ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x0d,0xc1] +; AVX512VL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0d,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res @@ -3336,7 +3336,7 @@ ; ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x0d,0xc1] +; AVX512VL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0d,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1] ret <4 x double> %res @@ -3352,7 +3352,7 @@ ; ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpermilpd $9, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x05,0xc0,0x09] +; AVX512VL-NEXT: vpermilpd $9, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x09] ; AVX512VL-NEXT: ## ymm0 = ymm0[1,0,2,3] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> ) ; <<4 x double>> [#uses=1] @@ -3367,7 +3367,7 @@ ; ; AVX512VL-LABEL: test_x86_avx_vpermilvar_ps: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x0c,0xc1] +; AVX512VL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res @@ -3382,7 +3382,7 @@ ; AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; AVX512VL-NEXT: vpermilps (%eax), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x0c,0x00] +; AVX512VL-NEXT: vpermilps (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x00] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %a2 = load <4 x i32>, <4 x i32>* %a1 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1] @@ -3399,7 +3399,7 @@ ; ; AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x0c,0xc1] +; AVX512VL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0c,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] ret <8 x float> %res @@ -3747,9 +3747,9 @@ ; AVX512VL-LABEL: movnt_dq: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; AVX512VL-NEXT: vpaddq LCPI247_0, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI247_0, kind: FK_Data_4 -; AVX512VL-NEXT: vmovntdq %ymm0, (%eax) ## encoding: [0x62,0xf1,0x7d,0x28,0xe7,0x00] +; AVX512VL-NEXT: vpaddq LCPI247_0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI247_0, kind: FK_Data_4 +; AVX512VL-NEXT: vmovntdq %ymm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x00] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %a2 = add <2 x i64> %a1, %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> @@ -3769,7 +3769,7 @@ ; AVX512VL-LABEL: movnt_ps: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; AVX512VL-NEXT: vmovntps %ymm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x28,0x2b,0x00] +; AVX512VL-NEXT: vmovntps %ymm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x00] ; AVX512VL-NEXT: retl ## encoding: [0xc3] tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind ret void @@ -3790,9 +3790,9 @@ ; AVX512VL-LABEL: movnt_pd: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; AVX512VL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0x57,0xc9] -; AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1] -; AVX512VL-NEXT: vmovntpd %ymm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x28,0x2b,0x00] +; AVX512VL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x57,0xc9] +; AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] +; AVX512VL-NEXT: vmovntpd %ymm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x00] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %a2 = fadd <4 x double> %a1, tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind Index: test/CodeGen/X86/avx2-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/avx2-intrinsics-x86.ll +++ test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -10,7 +10,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_packssdw: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6b,0xc1] +; AVX512VL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -26,7 +26,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_packsswb: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x63,0xc1] +; AVX512VL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1] ret <32 x i8> %res @@ -42,7 +42,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_packuswb: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x67,0xc1] +; AVX512VL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1] ret <32 x i8> %res @@ -58,7 +58,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_padds_b: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xec,0xc1] +; AVX512VL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] ret <32 x i8> %res @@ -74,7 +74,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_padds_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xed,0xc1] +; AVX512VL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -90,7 +90,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_paddus_b: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdc,0xc1] +; AVX512VL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] ret <32 x i8> %res @@ -106,7 +106,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_paddus_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdd,0xc1] +; AVX512VL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -122,7 +122,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pavg_b: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe0,0xc1] +; AVX512VL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe0,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] ret <32 x i8> %res @@ -138,7 +138,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pavg_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe3,0xc1] +; AVX512VL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe3,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -154,7 +154,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pmadd_wd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf5,0xc1] +; AVX512VL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res @@ -170,7 +170,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pmaxs_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xee,0xc1] +; AVX512VL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xee,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -186,7 +186,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pmaxu_b: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xde,0xc1] +; AVX512VL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xde,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] ret <32 x i8> %res @@ -202,7 +202,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pmins_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xea,0xc1] +; AVX512VL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xea,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -218,7 +218,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pminu_b: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpminub %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xda,0xc1] +; AVX512VL-NEXT: vpminub %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xda,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] ret <32 x i8> %res @@ -251,7 +251,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pmulh_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe5,0xc1] +; AVX512VL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -267,7 +267,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pmulhu_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe4,0xc1] +; AVX512VL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -283,7 +283,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pmulu_dq: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xf4,0xc1] +; AVX512VL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1] ret <4 x i64> %res @@ -299,7 +299,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psad_bw: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf6,0xc1] +; AVX512VL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf6,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1] ret <4 x i64> %res @@ -315,7 +315,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psll_d: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf2,0xc1] +; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res @@ -331,7 +331,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psll_q: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xf3,0xc1] +; AVX512VL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] ret <4 x i64> %res @@ -347,7 +347,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psll_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf1,0xc1] +; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -363,7 +363,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pslli_d: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpslld $7, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x72,0xf0,0x07] +; AVX512VL-NEXT: vpslld $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xf0,0x07] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res @@ -379,7 +379,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pslli_q: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsllq $7, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x73,0xf0,0x07] +; AVX512VL-NEXT: vpsllq $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x73,0xf0,0x07] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] ret <4 x i64> %res @@ -395,7 +395,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pslli_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x71,0xf0,0x07] +; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xf0,0x07] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -411,7 +411,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psra_d: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe2,0xc1] +; AVX512VL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res @@ -427,7 +427,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psra_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe1,0xc1] +; AVX512VL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -443,7 +443,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psrai_d: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrad $7, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x72,0xe0,0x07] +; AVX512VL-NEXT: vpsrad $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xe0,0x07] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res @@ -459,7 +459,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psrai_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsraw $7, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x71,0xe0,0x07] +; AVX512VL-NEXT: vpsraw $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xe0,0x07] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -475,7 +475,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psrl_d: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd2,0xc1] +; AVX512VL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res @@ -491,7 +491,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psrl_q: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd3,0xc1] +; AVX512VL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] ret <4 x i64> %res @@ -507,7 +507,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psrl_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd1,0xc1] +; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -523,7 +523,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psrli_d: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrld $7, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x72,0xd0,0x07] +; AVX512VL-NEXT: vpsrld $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xd0,0x07] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res @@ -539,7 +539,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psrli_q: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrlq $7, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x73,0xd0,0x07] +; AVX512VL-NEXT: vpsrlq $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x73,0xd0,0x07] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] ret <4 x i64> %res @@ -555,7 +555,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psrli_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x71,0xd0,0x07] +; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xd0,0x07] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -571,7 +571,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psubs_b: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe8,0xc1] +; AVX512VL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] ret <32 x i8> %res @@ -587,7 +587,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psubs_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe9,0xc1] +; AVX512VL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -603,7 +603,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psubus_b: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd8,0xc1] +; AVX512VL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] ret <32 x i8> %res @@ -619,7 +619,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psubus_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd9,0xc1] +; AVX512VL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -635,7 +635,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pabs_b: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpabsb %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x1c,0xc0] +; AVX512VL-NEXT: vpabsb %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1c,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1] ret <32 x i8> %res @@ -651,7 +651,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pabs_d: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpabsd %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x1e,0xc0] +; AVX512VL-NEXT: vpabsd %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res @@ -667,7 +667,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pabs_w: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpabsw %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x1d,0xc0] +; AVX512VL-NEXT: vpabsw %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -749,7 +749,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x04,0xc1] +; AVX512VL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -768,8 +768,8 @@ ; AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; AVX512VL-NEXT: vmovdqu8 (%eax), %ymm1 ## encoding: [0x62,0xf1,0x7f,0x28,0x6f,0x08] -; AVX512VL-NEXT: vpmaddubsw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0x04,0xc0] +; AVX512VL-NEXT: vmovdqu (%eax), %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x08] +; AVX512VL-NEXT: vpmaddubsw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x04,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %a0 = load <32 x i8>, <32 x i8>* %ptr %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1] @@ -784,7 +784,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pmul_hr_sw: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x0b,0xc1] +; AVX512VL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -800,7 +800,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pshuf_b: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x00,0xc1] +; AVX512VL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x00,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <32 x i8> %res @@ -851,7 +851,7 @@ ; AVX512VL-LABEL: test_x86_avx2_movntdqa: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; AVX512VL-NEXT: vmovntdqa (%eax), %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x2a,0x00] +; AVX512VL-NEXT: vmovntdqa (%eax), %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2a,0x00] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0) ; <<4 x i64>> [#uses=1] ret <4 x i64> %res @@ -878,7 +878,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_packusdw: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x2b,0xc1] +; AVX512VL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -917,7 +917,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pmaxsb: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x3c,0xc1] +; AVX512VL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3c,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] ret <32 x i8> %res @@ -933,7 +933,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pmaxsd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x3d,0xc1] +; AVX512VL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3d,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res @@ -949,7 +949,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pmaxud: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x3f,0xc1] +; AVX512VL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res @@ -965,7 +965,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pmaxuw: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x3e,0xc1] +; AVX512VL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3e,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -981,7 +981,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pminsb: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x38,0xc1] +; AVX512VL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x38,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] ret <32 x i8> %res @@ -997,7 +997,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pminsd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x39,0xc1] +; AVX512VL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x39,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res @@ -1013,7 +1013,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pminud: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpminud %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x3b,0xc1] +; AVX512VL-NEXT: vpminud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3b,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res @@ -1029,7 +1029,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_pminuw: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x3a,0xc1] +; AVX512VL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3a,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] ret <16 x i16> %res @@ -1079,7 +1079,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_permd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0x36,0xc0] +; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res @@ -1098,7 +1098,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_permps: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0x16,0xc0] +; AVX512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] ret <8 x float> %res @@ -1236,7 +1236,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psllv_d: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x47,0xc1] +; AVX512VL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -1252,7 +1252,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psllv_d_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x47,0xc1] +; AVX512VL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res @@ -1268,7 +1268,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psllv_q: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x47,0xc1] +; AVX512VL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res @@ -1284,7 +1284,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psllv_q_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x47,0xc1] +; AVX512VL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] ret <4 x i64> %res @@ -1300,7 +1300,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psrlv_d: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x45,0xc1] +; AVX512VL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -1316,7 +1316,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psrlv_d_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x45,0xc1] +; AVX512VL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res @@ -1332,7 +1332,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psrlv_q: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x45,0xc1] +; AVX512VL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res @@ -1348,7 +1348,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psrlv_q_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x45,0xc1] +; AVX512VL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] ret <4 x i64> %res @@ -1364,7 +1364,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psrav_d: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x46,0xc1] +; AVX512VL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -1382,11 +1382,11 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psrav_d_const: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} xmm0 = [2,9,4294967284,23] -; AVX512VL-NEXT: ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI91_0, kind: FK_Data_4 -; AVX512VL-NEXT: vpsravd LCPI91_1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x46,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI91_1, kind: FK_Data_4 +; AVX512VL-NEXT: vmovdqa LCPI91_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23] +; AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI91_0, kind: FK_Data_4 +; AVX512VL-NEXT: vpsravd LCPI91_1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI91_1, kind: FK_Data_4 ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> , <4 x i32> ) ret <4 x i32> %res @@ -1401,7 +1401,7 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psrav_d_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0xc1] +; AVX512VL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xc1] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res @@ -1419,11 +1419,11 @@ ; ; AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] -; AVX512VL-NEXT: ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI93_0, kind: FK_Data_4 -; AVX512VL-NEXT: vpsravd LCPI93_1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI93_1, kind: FK_Data_4 +; AVX512VL-NEXT: vmovdqa LCPI93_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] +; AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI93_0, kind: FK_Data_4 +; AVX512VL-NEXT: vpsravd LCPI93_1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI93_1, kind: FK_Data_4 ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> , <8 x i32> ) ret <8 x i32> %res @@ -1667,10 +1667,10 @@ ; AVX512VL-LABEL: test_gather_mask: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; AVX512VL-NEXT: vmovaps %ymm2, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xda] +; AVX512VL-NEXT: vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda] ; AVX512VL-NEXT: vgatherdps %ymm3, (%eax,%ymm1,4), %ymm0 ## encoding: [0xc4,0xe2,0x65,0x92,0x04,0x88] ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] -; AVX512VL-NEXT: vmovups %ymm2, (%eax) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x10] +; AVX512VL-NEXT: vmovups %ymm2, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x10] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %a_i8 = bitcast float* %a to i8* %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, Index: test/CodeGen/X86/avx2-vbroadcast.ll =================================================================== --- test/CodeGen/X86/avx2-vbroadcast.ll +++ test/CodeGen/X86/avx2-vbroadcast.ll @@ -1140,7 +1140,7 @@ ; X32-AVX512VL-NEXT: vmovaps %xmm0, (%esp) ; X32-AVX512VL-NEXT: vpbroadcastb (%eax), %xmm1 ; X32-AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) -; X32-AVX512VL-NEXT: vmovdqa32 %xmm1, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp) ; X32-AVX512VL-NEXT: addl $60, %esp ; X32-AVX512VL-NEXT: retl ; @@ -1152,7 +1152,7 @@ ; X64-AVX512VL-NEXT: vmovd %eax, %xmm1 ; X64-AVX512VL-NEXT: vpbroadcastb %xmm1, %xmm1 ; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) -; X64-AVX512VL-NEXT: vmovdqa32 %xmm1, -{{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp) ; X64-AVX512VL-NEXT: retq eintry: %__a.addr.i = alloca <2 x i64>, align 16 @@ -1234,7 +1234,7 @@ ; X32-AVX512VL-NEXT: vmovaps %ymm0, (%esp) ; X32-AVX512VL-NEXT: vpbroadcastb (%eax), %ymm1 ; X32-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) -; X32-AVX512VL-NEXT: vmovdqa32 %ymm1, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp) ; X32-AVX512VL-NEXT: movl %ebp, %esp ; X32-AVX512VL-NEXT: popl %ebp ; X32-AVX512VL-NEXT: retl @@ -1257,7 +1257,7 @@ ; X64-AVX512VL-NEXT: vmovd %eax, %xmm1 ; X64-AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm1 ; X64-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) -; X64-AVX512VL-NEXT: vmovdqa32 %ymm1, {{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) ; X64-AVX512VL-NEXT: movq %rbp, %rsp ; X64-AVX512VL-NEXT: popq %rbp ; X64-AVX512VL-NEXT: retq @@ -1312,7 +1312,7 @@ ; X32-AVX512VL-NEXT: vmovaps %xmm0, (%esp) ; X32-AVX512VL-NEXT: vpbroadcastw (%eax), %xmm1 ; X32-AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) -; X32-AVX512VL-NEXT: vmovdqa32 %xmm1, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp) ; X32-AVX512VL-NEXT: addl $60, %esp ; X32-AVX512VL-NEXT: retl ; @@ -1324,7 +1324,7 @@ ; X64-AVX512VL-NEXT: vmovd %eax, %xmm1 ; X64-AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1 ; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) -; X64-AVX512VL-NEXT: vmovdqa32 %xmm1, -{{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp) ; X64-AVX512VL-NEXT: retq entry: %__a.addr.i = alloca <2 x i64>, align 16 @@ -1406,7 +1406,7 @@ ; X32-AVX512VL-NEXT: vmovaps %ymm0, (%esp) ; X32-AVX512VL-NEXT: vpbroadcastw (%eax), %ymm1 ; X32-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) -; X32-AVX512VL-NEXT: vmovdqa32 %ymm1, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp) ; X32-AVX512VL-NEXT: movl %ebp, %esp ; X32-AVX512VL-NEXT: popl %ebp ; X32-AVX512VL-NEXT: retl @@ -1429,7 +1429,7 @@ ; X64-AVX512VL-NEXT: vmovd %eax, %xmm1 ; X64-AVX512VL-NEXT: vpbroadcastw %xmm1, %ymm1 ; X64-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) -; X64-AVX512VL-NEXT: vmovdqa32 %ymm1, {{[0-9]+}}(%rsp) +; X64-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) ; X64-AVX512VL-NEXT: movq %rbp, %rsp ; X64-AVX512VL-NEXT: popq %rbp ; X64-AVX512VL-NEXT: retq @@ -1650,7 +1650,7 @@ ; X32-AVX512VL-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1 ; X32-AVX512VL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 ; X32-AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) -; X32-AVX512VL-NEXT: vmovdqa32 %xmm1, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp) ; X32-AVX512VL-NEXT: addl $60, %esp ; X32-AVX512VL-NEXT: retl ; @@ -1754,7 +1754,7 @@ ; X32-AVX512VL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 ; X32-AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm1, %ymm1 ; X32-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) -; X32-AVX512VL-NEXT: vmovdqa32 %ymm1, {{[0-9]+}}(%esp) +; X32-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp) ; X32-AVX512VL-NEXT: movl %ebp, %esp ; X32-AVX512VL-NEXT: popl %ebp ; X32-AVX512VL-NEXT: retl Index: test/CodeGen/X86/avx512-arith.ll =================================================================== --- test/CodeGen/X86/avx512-arith.ll +++ test/CodeGen/X86/avx512-arith.ll @@ -90,38 +90,41 @@ define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { ; AVX512F-LABEL: imulq512: ; AVX512F: ## BB#0: -; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm2 -; AVX512F-NEXT: vpmuludq %zmm0, %zmm2, %zmm2 +; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 ; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm3 ; AVX512F-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 -; AVX512F-NEXT: vpaddq %zmm2, %zmm3, %zmm2 -; AVX512F-NEXT: vpsllq $32, %zmm2, %zmm2 +; AVX512F-NEXT: vpsllq $32, %zmm3, %zmm3 +; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm1 ; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 -; AVX512F-NEXT: vpaddq %zmm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpsllq $32, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddq %zmm0, %zmm3, %zmm0 +; AVX512F-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: imulq512: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrlq $32, %zmm1, %zmm2 -; AVX512VL-NEXT: vpmuludq %zmm0, %zmm2, %zmm2 +; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 ; AVX512VL-NEXT: vpsrlq $32, %zmm0, %zmm3 ; AVX512VL-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 -; AVX512VL-NEXT: vpaddq %zmm2, %zmm3, %zmm2 -; AVX512VL-NEXT: vpsllq $32, %zmm2, %zmm2 +; AVX512VL-NEXT: vpsllq $32, %zmm3, %zmm3 +; AVX512VL-NEXT: vpsrlq $32, %zmm1, %zmm1 ; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 -; AVX512VL-NEXT: vpaddq %zmm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpsllq $32, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddq %zmm0, %zmm3, %zmm0 +; AVX512VL-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: imulq512: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm2 -; AVX512BW-NEXT: vpmuludq %zmm0, %zmm2, %zmm2 +; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 ; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm3 ; AVX512BW-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 -; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2 +; AVX512BW-NEXT: vpsllq $32, %zmm3, %zmm3 +; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm1 ; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllq $32, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddq %zmm0, %zmm3, %zmm0 +; AVX512BW-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: imulq512: @@ -140,38 +143,41 @@ define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) { ; AVX512F-LABEL: imulq256: ; AVX512F: ## BB#0: -; AVX512F-NEXT: vpsrlq $32, %ymm1, %ymm2 -; AVX512F-NEXT: vpmuludq %ymm0, %ymm2, %ymm2 +; AVX512F-NEXT: vpmuludq %ymm0, %ymm1, %ymm2 ; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm3 ; AVX512F-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 -; AVX512F-NEXT: vpaddq %ymm2, %ymm3, %ymm2 -; AVX512F-NEXT: vpsllq $32, %ymm2, %ymm2 +; AVX512F-NEXT: vpsllq $32, %ymm3, %ymm3 +; AVX512F-NEXT: vpsrlq $32, %ymm1, %ymm1 ; AVX512F-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpaddq %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllq $32, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddq %ymm0, %ymm3, %ymm0 +; AVX512F-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: imulq256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrlq $32, %ymm1, %ymm2 -; AVX512VL-NEXT: vpmuludq %ymm0, %ymm2, %ymm2 +; AVX512VL-NEXT: vpmuludq %ymm0, %ymm1, %ymm2 ; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm3 ; AVX512VL-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 -; AVX512VL-NEXT: vpaddq %ymm2, %ymm3, %ymm2 -; AVX512VL-NEXT: vpsllq $32, %ymm2, %ymm2 +; AVX512VL-NEXT: vpsllq $32, %ymm3, %ymm3 +; AVX512VL-NEXT: vpsrlq $32, %ymm1, %ymm1 ; AVX512VL-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 -; AVX512VL-NEXT: vpaddq %ymm2, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsllq $32, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddq %ymm0, %ymm3, %ymm0 +; AVX512VL-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: imulq256: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm2 -; AVX512BW-NEXT: vpmuludq %ymm0, %ymm2, %ymm2 +; AVX512BW-NEXT: vpmuludq %ymm0, %ymm1, %ymm2 ; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm3 ; AVX512BW-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 -; AVX512BW-NEXT: vpaddq %ymm2, %ymm3, %ymm2 -; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2 +; AVX512BW-NEXT: vpsllq $32, %ymm3, %ymm3 +; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm1 ; AVX512BW-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 -; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0 +; AVX512BW-NEXT: vpsllq $32, %ymm0, %ymm0 +; AVX512BW-NEXT: vpaddq %ymm0, %ymm3, %ymm0 +; AVX512BW-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: imulq256: @@ -193,38 +199,41 @@ define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) { ; AVX512F-LABEL: imulq128: ; AVX512F: ## BB#0: -; AVX512F-NEXT: vpsrlq $32, %xmm1, %xmm2 -; AVX512F-NEXT: vpmuludq %xmm0, %xmm2, %xmm2 +; AVX512F-NEXT: vpmuludq %xmm0, %xmm1, %xmm2 ; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm3 ; AVX512F-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 -; AVX512F-NEXT: vpaddq %xmm2, %xmm3, %xmm2 -; AVX512F-NEXT: vpsllq $32, %xmm2, %xmm2 +; AVX512F-NEXT: vpsllq $32, %xmm3, %xmm3 +; AVX512F-NEXT: vpsrlq $32, %xmm1, %xmm1 ; AVX512F-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 -; AVX512F-NEXT: vpaddq %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: vpsllq $32, %xmm0, %xmm0 +; AVX512F-NEXT: vpaddq %xmm0, %xmm3, %xmm0 +; AVX512F-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: imulq128: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpsrlq $32, %xmm1, %xmm2 -; AVX512VL-NEXT: vpmuludq %xmm0, %xmm2, %xmm2 +; AVX512VL-NEXT: vpmuludq %xmm0, %xmm1, %xmm2 ; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm3 ; AVX512VL-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 -; AVX512VL-NEXT: vpaddq %xmm2, %xmm3, %xmm2 -; AVX512VL-NEXT: vpsllq $32, %xmm2, %xmm2 +; AVX512VL-NEXT: vpsllq $32, %xmm3, %xmm3 +; AVX512VL-NEXT: vpsrlq $32, %xmm1, %xmm1 ; AVX512VL-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 -; AVX512VL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 +; AVX512VL-NEXT: vpsllq $32, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddq %xmm0, %xmm3, %xmm0 +; AVX512VL-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: imulq128: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm2 -; AVX512BW-NEXT: vpmuludq %xmm0, %xmm2, %xmm2 +; AVX512BW-NEXT: vpmuludq %xmm0, %xmm1, %xmm2 ; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm3 ; AVX512BW-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 -; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2 -; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2 +; AVX512BW-NEXT: vpsllq $32, %xmm3, %xmm3 +; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 -; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0 +; AVX512BW-NEXT: vpsllq $32, %xmm0, %xmm0 +; AVX512BW-NEXT: vpaddq %xmm0, %xmm3, %xmm0 +; AVX512BW-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: imulq128: @@ -724,7 +733,7 @@ ; ; AVX512VL-LABEL: test_mask_vminpd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpxord %ymm4, %ymm4, %ymm4 +; AVX512VL-NEXT: vpxor %ymm4, %ymm4, %ymm4 ; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 ; AVX512VL-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} ; AVX512VL-NEXT: retq @@ -747,7 +756,7 @@ ; ; SKX-LABEL: test_mask_vminpd: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %ymm4, %ymm4, %ymm4 +; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4 ; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 ; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} ; SKX-NEXT: retq @@ -787,7 +796,7 @@ ; ; AVX512VL-LABEL: test_mask_vmaxpd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpxord %ymm4, %ymm4, %ymm4 +; AVX512VL-NEXT: vpxor %ymm4, %ymm4, %ymm4 ; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 ; AVX512VL-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} ; AVX512VL-NEXT: retq @@ -810,7 +819,7 @@ ; ; SKX-LABEL: test_mask_vmaxpd: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %ymm4, %ymm4, %ymm4 +; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4 ; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 ; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} ; SKX-NEXT: retq Index: test/CodeGen/X86/avx512-cvt.ll =================================================================== --- test/CodeGen/X86/avx512-cvt.ll +++ test/CodeGen/X86/avx512-cvt.ll @@ -1037,7 +1037,7 @@ ; ; SKX-LABEL: uitofp_8i1_float: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1 +; SKX-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} ; SKX-NEXT: vcvtudq2ps %ymm0, %ymm0 @@ -1060,7 +1060,7 @@ ; ; SKX-LABEL: uitofp_8i1_double: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1 +; SKX-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} ; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 @@ -1081,7 +1081,7 @@ ; ; SKX-LABEL: uitofp_4i1_float: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} ; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0 @@ -1102,7 +1102,7 @@ ; ; SKX-LABEL: uitofp_4i1_double: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} ; SKX-NEXT: vcvtudq2pd %xmm0, %ymm0 @@ -1131,7 +1131,7 @@ ; ; SKX-LABEL: uitofp_2i1_float: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} @@ -1155,7 +1155,7 @@ ; ; SKX-LABEL: uitofp_2i1_double: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 ; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} Index: test/CodeGen/X86/avx512-ext.ll =================================================================== --- test/CodeGen/X86/avx512-ext.ll +++ test/CodeGen/X86/avx512-ext.ll @@ -1996,9 +1996,9 @@ ; ; SKX-LABEL: zext_4xi1_to_4x32: ; SKX: ## BB#0: -; SKX-NEXT: vmovdqa64 {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] -; SKX-NEXT: vpandq %xmm2, %xmm1, %xmm1 -; SKX-NEXT: vpandq %xmm2, %xmm0, %xmm0 +; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 +; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} ; SKX-NEXT: retq @@ -2019,9 +2019,9 @@ ; ; SKX-LABEL: zext_2xi1_to_2xi64: ; SKX: ## BB#0: -; SKX-NEXT: vmovdqa64 {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] -; SKX-NEXT: vpandq %xmm2, %xmm1, %xmm1 -; SKX-NEXT: vpandq %xmm2, %xmm0, %xmm0 +; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 +; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 ; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} ; SKX-NEXT: retq Index: test/CodeGen/X86/avx512-gather-scatter-intrin.ll =================================================================== --- test/CodeGen/X86/avx512-gather-scatter-intrin.ll +++ test/CodeGen/X86/avx512-gather-scatter-intrin.ll @@ -332,7 +332,7 @@ ; CHECK-LABEL: test_int_x86_avx512_gather3div4_di: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm2 +; CHECK-NEXT: vmovdqa %ymm0, %ymm2 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1} ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1} @@ -369,7 +369,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: kxnorw %k0, %k0, %k2 -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm2 +; CHECK-NEXT: vmovdqa %xmm0, %xmm2 ; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2} ; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1} ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 @@ -404,7 +404,7 @@ ; CHECK-LABEL: test_int_x86_avx512_gather3div8_si: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm2 +; CHECK-NEXT: vmovdqa %xmm0, %xmm2 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2} ; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1} @@ -507,7 +507,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ; CHECK-NEXT: kxnorw %k0, %k0, %k2 -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm2 +; CHECK-NEXT: vmovdqa %xmm0, %xmm2 ; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2} ; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,2), %xmm0 {%k1} ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 @@ -542,7 +542,7 @@ ; CHECK-LABEL: test_int_x86_avx512_gather3siv8_si: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm2 +; CHECK-NEXT: vmovdqa %ymm0, %ymm2 ; CHECK-NEXT: kmovq %k1, %k2 ; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2} ; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,2), %ymm0 {%k1} Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -544,7 +544,7 @@ ; SKX-NEXT: vpmovm2b %k1, %zmm0 ; SKX-NEXT: vpsllq $40, %xmm0, %xmm0 ; SKX-NEXT: vpmovm2b %k0, %zmm1 -; SKX-NEXT: vmovdqu8 {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; SKX-NEXT: vmovdqu {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; SKX-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 @@ -612,7 +612,7 @@ ; SKX-NEXT: vpmovm2b %k1, %zmm0 ; SKX-NEXT: vpsllq $40, %xmm0, %xmm0 ; SKX-NEXT: vpmovm2b %k0, %zmm1 -; SKX-NEXT: vmovdqu8 {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; SKX-NEXT: vmovdqu {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; SKX-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 Index: test/CodeGen/X86/avx512-masked_memop-16-8.ll =================================================================== --- test/CodeGen/X86/avx512-masked_memop-16-8.ll +++ test/CodeGen/X86/avx512-masked_memop-16-8.ll @@ -21,7 +21,7 @@ ; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 ; CHECK-NEXT: vpmovb2m %ymm0, %k1 ; CHECK-NEXT: vmovdqu8 (%rdi), %ymm1 {%k1} -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %res = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* %addr, i32 4, <32 x i1>%mask, <32 x i8> %val) ret <32 x i8> %res Index: test/CodeGen/X86/avx512-mov.ll =================================================================== --- test/CodeGen/X86/avx512-mov.ll +++ test/CodeGen/X86/avx512-mov.ll @@ -13,7 +13,7 @@ define <4 x i32> @test2(i32 %x) { ; CHECK-LABEL: test2: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7] +; CHECK-NEXT: vmovd %edi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = insertelement <4 x i32>undef, i32 %x, i32 0 ret <4 x i32>%res @@ -22,7 +22,7 @@ define <2 x i64> @test3(i64 %x) { ; CHECK-LABEL: test3: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovq %rdi, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6e,0xc7] +; CHECK-NEXT: vmovq %rdi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = insertelement <2 x i64>undef, i64 %x, i32 0 ret <2 x i64>%res @@ -31,7 +31,7 @@ define <4 x i32> @test4(i32* %x) { ; CHECK-LABEL: test4: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07] +; CHECK-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] ; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: retq ## encoding: [0xc3] %y = load i32, i32* %x @@ -42,7 +42,7 @@ define void @test5(float %x, float* %y) { ; CHECK-LABEL: test5: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovss %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x08,0x11,0x07] +; CHECK-NEXT: vmovss %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] store float %x, float* %y, align 4 ret void @@ -51,7 +51,7 @@ define void @test6(double %x, double* %y) { ; CHECK-LABEL: test6: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovsd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xff,0x08,0x11,0x07] +; CHECK-NEXT: vmovsd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] store double %x, double* %y, align 8 ret void @@ -60,7 +60,7 @@ define float @test7(i32* %x) { ; CHECK-LABEL: test7: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07] +; CHECK-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] ; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: retq ## encoding: [0xc3] %y = load i32, i32* %x @@ -71,7 +71,7 @@ define i32 @test8(<4 x i32> %x) { ; CHECK-LABEL: test8: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovd %xmm0, %eax ## encoding: [0x62,0xf1,0x7d,0x08,0x7e,0xc0] +; CHECK-NEXT: vmovd %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = extractelement <4 x i32> %x, i32 0 ret i32 %res @@ -80,7 +80,7 @@ define i64 @test9(<2 x i64> %x) { ; CHECK-LABEL: test9: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovq %xmm0, %rax ## encoding: [0x62,0xf1,0xfd,0x08,0x7e,0xc0] +; CHECK-NEXT: vmovq %xmm0, %rax ## EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = extractelement <2 x i64> %x, i32 0 ret i64 %res @@ -89,7 +89,7 @@ define <4 x i32> @test10(i32* %x) { ; CHECK-LABEL: test10: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07] +; CHECK-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] ; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: retq ## encoding: [0xc3] %y = load i32, i32* %x, align 4 @@ -100,7 +100,7 @@ define <4 x float> @test11(float* %x) { ; CHECK-LABEL: test11: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07] +; CHECK-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] ; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: retq ## encoding: [0xc3] %y = load float, float* %x, align 4 @@ -111,7 +111,7 @@ define <2 x double> @test12(double* %x) { ; CHECK-LABEL: test12: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovsd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x10,0x07] +; CHECK-NEXT: vmovsd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] ; CHECK-NEXT: ## xmm0 = mem[0],zero ; CHECK-NEXT: retq ## encoding: [0xc3] %y = load double, double* %x, align 8 @@ -122,7 +122,7 @@ define <2 x i64> @test13(i64 %x) { ; CHECK-LABEL: test13: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovq %rdi, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6e,0xc7] +; CHECK-NEXT: vmovq %rdi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0 ret <2 x i64>%res @@ -131,7 +131,7 @@ define <4 x i32> @test14(i32 %x) { ; CHECK-LABEL: test14: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7] +; CHECK-NEXT: vmovd %edi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0 ret <4 x i32>%res @@ -140,7 +140,7 @@ define <4 x i32> @test15(i32* %x) { ; CHECK-LABEL: test15: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07] +; CHECK-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] ; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: retq ## encoding: [0xc3] %y = load i32, i32* %x, align 4 Index: test/CodeGen/X86/avx512-scalar.ll =================================================================== --- test/CodeGen/X86/avx512-scalar.ll +++ test/CodeGen/X86/avx512-scalar.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx --show-mc-encoding | FileCheck %s --check-prefix AVX ; AVX512-LABEL: @test_fdiv -; AVX512: vdivss %xmm{{.*}} ## encoding: [0x62 +; AVX512: vdivss %xmm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5 ; AVX-LABEL: @test_fdiv ; AVX: vdivss %xmm{{.*}} ## encoding: [0xc5 @@ -13,7 +13,7 @@ } ; AVX512-LABEL: @test_fsub -; AVX512: vsubss %xmm{{.*}} ## encoding: [0x62 +; AVX512: vsubss %xmm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5 ; AVX-LABEL: @test_fsub ; AVX: vsubss %xmm{{.*}} ## encoding: [0xc5 @@ -23,7 +23,7 @@ } ; AVX512-LABEL: @test_fadd -; AVX512: vaddsd %xmm{{.*}} ## encoding: [0x62 +; AVX512: vaddsd %xmm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5 ; AVX-LABEL: @test_fadd ; AVX: vaddsd %xmm{{.*}} ## encoding: [0xc5 @@ -50,7 +50,7 @@ } ; AVX512-LABEL: @test_sqrt -; AVX512: vsqrtsd %xmm{{.*}} ## encoding: [0x62 +; AVX512: vsqrtsd %xmm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5 ; AVX-LABEL: @test_sqrt ; AVX: vsqrtsd %xmm{{.*}} ## encoding: [0xc5 @@ -70,7 +70,7 @@ } ; AVX512-LABEL: @test_vmax -; AVX512: vmaxss %xmm{{.*}} ## encoding: [0x62 +; AVX512: vmaxss %xmm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5 ; AVX-LABEL: @test_vmax ; AVX: vmaxss %xmm{{.*}} ## encoding: [0xc5 @@ -92,7 +92,7 @@ } ; AVX512-SKX-LABEL: @zero_float -; AVX512-SKX: vxorps %xmm{{.*}}, %xmm{{.*}}, %xmm{{.*}} ## encoding: [0x62, +; AVX512-SKX: vxorps %xmm{{.*}}, %xmm{{.*}}, %xmm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5 ; AVX512-KNL-LABEL: @zero_float ; AVX512-KNL: vxorps %xmm{{.*}}, %xmm{{.*}}, %xmm{{.*}} ## encoding: [0xc5, ; AVX-LABEL: @zero_float @@ -104,7 +104,7 @@ } ; AVX512-SKX-LABEL: @zero_double -; AVX512-SKX: vxorpd %xmm{{.*}}, %xmm{{.*}}, %xmm{{.*}} ## encoding: [0x62, +; AVX512-SKX: vxorpd %xmm{{.*}}, %xmm{{.*}}, %xmm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5 ; AVX512-KNL-LABEL: @zero_double ; AVX512-KNL: vxorpd %xmm{{.*}}, %xmm{{.*}}, %xmm{{.*}} ## encoding: [0xc5, ; AVX-LABEL: @zero_double Index: test/CodeGen/X86/avx512-vbroadcasti128.ll =================================================================== --- test/CodeGen/X86/avx512-vbroadcasti128.ll +++ test/CodeGen/X86/avx512-vbroadcasti128.ll @@ -234,23 +234,23 @@ define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) { ; X64-AVX512VL-LABEL: PR29088: ; X64-AVX512VL: ## BB#0: -; X64-AVX512VL-NEXT: vmovdqa64 (%rdi), %xmm0 -; X64-AVX512VL-NEXT: vpxord %ymm1, %ymm1, %ymm1 -; X64-AVX512VL-NEXT: vmovdqa32 %ymm1, (%rsi) +; X64-AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512VL-NEXT: vpxor %ymm1, %ymm1, %ymm1 +; X64-AVX512VL-NEXT: vmovdqa %ymm1, (%rsi) ; X64-AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512VL-NEXT: retq ; ; X64-AVX512BWVL-LABEL: PR29088: ; X64-AVX512BWVL: ## BB#0: -; X64-AVX512BWVL-NEXT: vmovdqa64 (%rdi), %xmm0 -; X64-AVX512BWVL-NEXT: vpxord %ymm1, %ymm1, %ymm1 -; X64-AVX512BWVL-NEXT: vmovdqa32 %ymm1, (%rsi) +; X64-AVX512BWVL-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512BWVL-NEXT: vpxor %ymm1, %ymm1, %ymm1 +; X64-AVX512BWVL-NEXT: vmovdqa %ymm1, (%rsi) ; X64-AVX512BWVL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512BWVL-NEXT: retq ; ; X64-AVX512DQVL-LABEL: PR29088: ; X64-AVX512DQVL: ## BB#0: -; X64-AVX512DQVL-NEXT: vmovdqa64 (%rdi), %xmm0 +; X64-AVX512DQVL-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512DQVL-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; X64-AVX512DQVL-NEXT: vmovaps %ymm1, (%rsi) ; X64-AVX512DQVL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 Index: test/CodeGen/X86/avx512-vbroadcasti256.ll =================================================================== --- test/CodeGen/X86/avx512-vbroadcasti256.ll +++ test/CodeGen/X86/avx512-vbroadcasti256.ll @@ -78,7 +78,7 @@ define <32 x i16> @test_broadcast_16i16_32i16(<16 x i16> *%p) nounwind { ; X64-AVX512VL-LABEL: test_broadcast_16i16_32i16: ; X64-AVX512VL: ## BB#0: -; X64-AVX512VL-NEXT: vmovdqa64 (%rdi), %ymm1 +; X64-AVX512VL-NEXT: vmovdqa (%rdi), %ymm1 ; X64-AVX512VL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm0 ; X64-AVX512VL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm1 ; X64-AVX512VL-NEXT: retq @@ -91,7 +91,7 @@ ; ; X64-AVX512DQVL-LABEL: test_broadcast_16i16_32i16: ; X64-AVX512DQVL: ## BB#0: -; X64-AVX512DQVL-NEXT: vmovdqa64 (%rdi), %ymm1 +; X64-AVX512DQVL-NEXT: vmovdqa (%rdi), %ymm1 ; X64-AVX512DQVL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm0 ; X64-AVX512DQVL-NEXT: vpaddw {{.*}}(%rip), %ymm1, %ymm1 ; X64-AVX512DQVL-NEXT: retq @@ -104,7 +104,7 @@ define <64 x i8> @test_broadcast_32i8_64i8(<32 x i8> *%p) nounwind { ; X64-AVX512VL-LABEL: test_broadcast_32i8_64i8: ; X64-AVX512VL: ## BB#0: -; X64-AVX512VL-NEXT: vmovdqa64 (%rdi), %ymm1 +; X64-AVX512VL-NEXT: vmovdqa (%rdi), %ymm1 ; X64-AVX512VL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm0 ; X64-AVX512VL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm1 ; X64-AVX512VL-NEXT: retq @@ -117,7 +117,7 @@ ; ; X64-AVX512DQVL-LABEL: test_broadcast_32i8_64i8: ; X64-AVX512DQVL: ## BB#0: -; X64-AVX512DQVL-NEXT: vmovdqa64 (%rdi), %ymm1 +; X64-AVX512DQVL-NEXT: vmovdqa (%rdi), %ymm1 ; X64-AVX512DQVL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm0 ; X64-AVX512DQVL-NEXT: vpaddb {{.*}}(%rip), %ymm1, %ymm1 ; X64-AVX512DQVL-NEXT: retq Index: test/CodeGen/X86/avx512-vec-cmp.ll =================================================================== --- test/CodeGen/X86/avx512-vec-cmp.ll +++ test/CodeGen/X86/avx512-vec-cmp.ll @@ -1190,7 +1190,7 @@ ; ; SKX-LABEL: test44: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] ; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] ; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 @@ -1213,7 +1213,7 @@ ; ; SKX-LABEL: test45: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7] ; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 Index: test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll @@ -6,12 +6,12 @@ define <32 x i8>@test_int_x86_avx512_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) { ; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpbroadcastb %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x78,0xd0] +; CHECK-NEXT: vpbroadcastb %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0xd0] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x78,0xc8] ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x78,0xc0] -; CHECK-NEXT: vpaddb %ymm1, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc9] -; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfc,0xc1] +; CHECK-NEXT: vpaddb %ymm1, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc9] +; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 -1) %res1 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) @@ -26,12 +26,12 @@ define <16 x i8>@test_int_x86_avx512_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpbroadcastb %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x78,0xd0] +; CHECK-NEXT: vpbroadcastb %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x78,0xc8] ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x78,0xc0] -; CHECK-NEXT: vpaddb %xmm1, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc9] -; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1] +; CHECK-NEXT: vpaddb %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc9] +; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1) %res1 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) @@ -46,12 +46,12 @@ define <16 x i16>@test_int_x86_avx512_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpbroadcastw %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x79,0xd0] +; CHECK-NEXT: vpbroadcastw %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x79,0xc8] ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x79,0xc0] -; CHECK-NEXT: vpaddw %ymm1, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc9] -; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc1] +; CHECK-NEXT: vpaddw %ymm1, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc9] +; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 -1) %res1 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) @@ -66,12 +66,12 @@ define <8 x i16>@test_int_x86_avx512_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpbroadcastw %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x79,0xd0] +; CHECK-NEXT: vpbroadcastw %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x79,0xc8] ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x79,0xc0] -; CHECK-NEXT: vpaddw %xmm1, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc9] -; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1] +; CHECK-NEXT: vpaddw %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc9] +; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1) %res1 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) @@ -128,7 +128,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovdqu8 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x07] -; CHECK-NEXT: vmovdqu8 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7f,0x08,0x7f,0x06] +; CHECK-NEXT: vmovdqu %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr1, <16 x i8> %x1, i16 %x2) call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr2, <16 x i8> %x1, i16 -1) @@ -142,7 +142,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edx, %k1 ## encoding: [0xc5,0xfb,0x92,0xca] ; CHECK-NEXT: vmovdqu8 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x07] -; CHECK-NEXT: vmovdqu8 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7f,0x28,0x7f,0x06] +; CHECK-NEXT: vmovdqu %ymm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr1, <32 x i8> %x1, i32 %x2) call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr2, <32 x i8> %x1, i32 -1) @@ -156,7 +156,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovdqu16 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x7f,0x07] -; CHECK-NEXT: vmovdqu16 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xff,0x08,0x7f,0x06] +; CHECK-NEXT: vmovdqu %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr1, <8 x i16> %x1, i8 %x2) call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr2, <8 x i16> %x1, i8 -1) @@ -170,7 +170,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovdqu16 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x7f,0x07] -; CHECK-NEXT: vmovdqu16 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xff,0x28,0x7f,0x06] +; CHECK-NEXT: vmovdqu %ymm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr1, <16 x i16> %x1, i16 %x2) call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr2, <16 x i16> %x1, i16 -1) @@ -182,11 +182,11 @@ define <8 x i16>@test_int_x86_avx512_mask_loadu_w_128(i8* %ptr, i8* %ptr2, <8 x i16> %x1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_loadu_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu16 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x6f,0x07] +; CHECK-NEXT: vmovdqu (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovdqu16 (%rsi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x6f,0x06] ; CHECK-NEXT: vmovdqu16 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x6f,0x0f] -; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1] +; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr, <8 x i16> %x1, i8 -1) %res = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr2, <8 x i16> %res0, i8 %mask) @@ -200,11 +200,11 @@ define <16 x i16>@test_int_x86_avx512_mask_loadu_w_256(i8* %ptr, i8* %ptr2, <16 x i16> %x1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_loadu_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu16 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xff,0x28,0x6f,0x07] +; CHECK-NEXT: vmovdqu (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovdqu16 (%rsi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x6f,0x06] ; CHECK-NEXT: vmovdqu16 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x0f] -; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc1] +; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr, <16 x i16> %x1, i16 -1) %res = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr2, <16 x i16> %res0, i16 %mask) @@ -218,11 +218,11 @@ define <16 x i8>@test_int_x86_avx512_mask_loadu_b_128(i8* %ptr, i8* %ptr2, <16 x i8> %x1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_loadu_b_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu8 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x6f,0x07] +; CHECK-NEXT: vmovdqu (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovdqu8 (%rsi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x06] ; CHECK-NEXT: vmovdqu8 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x0f] -; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1] +; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr, <16 x i8> %x1, i16 -1) %res = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr2, <16 x i8> %res0, i16 %mask) @@ -236,11 +236,11 @@ define <32 x i8>@test_int_x86_avx512_mask_loadu_b_256(i8* %ptr, i8* %ptr2, <32 x i8> %x1, i32 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_loadu_b_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu8 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7f,0x28,0x6f,0x07] +; CHECK-NEXT: vmovdqu (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] ; CHECK-NEXT: kmovd %edx, %k1 ## encoding: [0xc5,0xfb,0x92,0xca] ; CHECK-NEXT: vmovdqu8 (%rsi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x06] ; CHECK-NEXT: vmovdqu8 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x0f] -; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfc,0xc1] +; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr, <32 x i8> %x1, i32 -1) %res = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr2, <32 x i8> %res0, i32 %mask) @@ -254,15 +254,15 @@ define <16 x i8>@test_int_x86_avx512_mask_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3, i16 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_palignr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf3,0x7d,0x08,0x0f,0xd9,0x02] +; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x02] ; CHECK-NEXT: ## xmm3 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x0f,0xd1,0x02] ; CHECK-NEXT: ## xmm2 {%k1} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] ; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x0f,0xc1,0x02] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] -; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0] -; CHECK-NEXT: vpaddb %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc3] +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 %x4) %res1 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> zeroinitializer, i16 %x4) @@ -277,15 +277,15 @@ define <32 x i8>@test_int_x86_avx512_mask_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3, i32 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_palignr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf3,0x7d,0x28,0x0f,0xd9,0x02] +; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x0f,0xd9,0x02] ; CHECK-NEXT: ## ymm3 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x0f,0xd1,0x02] ; CHECK-NEXT: ## ymm2 {%k1} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] ; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x0f,0xc1,0x02] ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] -; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0] -; CHECK-NEXT: vpaddb %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfc,0xc3] +; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] +; CHECK-NEXT: vpaddb %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 %x4) %res1 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> zeroinitializer, i32 %x4) @@ -300,15 +300,15 @@ define <8 x i16>@test_int_x86_avx512_mask_pshufh_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pshufh_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpshufhw $3, %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0x70,0xd0,0x03] +; CHECK-NEXT: vpshufhw $3, %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xd0,0x03] ; CHECK-NEXT: ## xmm2 = xmm0[0,1,2,3,7,4,4,4] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpshufhw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x70,0xc8,0x03] ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0,1,2,3,7,4,4,4] ; CHECK-NEXT: vpshufhw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x70,0xc0,0x03] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0,1,2,3,7,4,4,4] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] -; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc2] +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) @@ -323,15 +323,15 @@ define <16 x i16>@test_int_x86_avx512_mask_pshufh_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pshufh_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpshufhw $3, %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0x70,0xd0,0x03] +; CHECK-NEXT: vpshufhw $3, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x70,0xd0,0x03] ; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpshufhw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x70,0xc8,0x03] ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] ; CHECK-NEXT: vpshufhw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x70,0xc0,0x03] ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] -; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc2] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) @@ -346,15 +346,15 @@ define <8 x i16>@test_int_x86_avx512_mask_pshufl_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pshufl_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpshuflw $3, %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7f,0x08,0x70,0xd0,0x03] +; CHECK-NEXT: vpshuflw $3, %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xd0,0x03] ; CHECK-NEXT: ## xmm2 = xmm0[3,0,0,0,4,5,6,7] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpshuflw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x70,0xc8,0x03] ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[3,0,0,0,4,5,6,7] ; CHECK-NEXT: vpshuflw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0x89,0x70,0xc0,0x03] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[3,0,0,0,4,5,6,7] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] -; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc2] +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) @@ -369,15 +369,15 @@ define <16 x i16>@test_int_x86_avx512_mask_pshufl_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pshufl_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpshuflw $3, %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7f,0x28,0x70,0xd0,0x03] +; CHECK-NEXT: vpshuflw $3, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xff,0x70,0xd0,0x03] ; CHECK-NEXT: ## ymm2 = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpshuflw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x70,0xc8,0x03] ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] ; CHECK-NEXT: vpshuflw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0xa9,0x70,0xc0,0x03] ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] -; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc2] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) @@ -484,12 +484,12 @@ define <16 x i8>@test_int_x86_avx512_mask_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0x68,0xd9] +; CHECK-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xd9] ; CHECK-NEXT: ## xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x68,0xd1] ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc3] +; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) %res1 = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) @@ -502,12 +502,12 @@ define <16 x i8>@test_int_x86_avx512_mask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0x60,0xd9] +; CHECK-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xd9] ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x60,0xd1] ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc3] +; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) %res1 = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) @@ -520,12 +520,12 @@ define <32 x i8>@test_int_x86_avx512_mask_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0x68,0xd9] +; CHECK-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x68,0xd9] ; CHECK-NEXT: ## ymm3 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x68,0xd1] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc3] +; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) %res1 = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) @@ -538,12 +538,12 @@ define <32 x i8>@test_int_x86_avx512_mask_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0x60,0xd9] +; CHECK-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x60,0xd9] ; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x60,0xd1] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc3] +; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) %res1 = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) @@ -556,12 +556,12 @@ define <8 x i16>@test_int_x86_avx512_mask_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0x61,0xd9] +; CHECK-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xd9] ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x61,0xd1] ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc3] +; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) @@ -574,12 +574,12 @@ define <8 x i16>@test_int_x86_avx512_mask_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0x69,0xd9] +; CHECK-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xd9] ; CHECK-NEXT: ## xmm3 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x69,0xd1] ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc3] +; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) @@ -592,12 +592,12 @@ define <16 x i16>@test_int_x86_avx512_mask_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0x61,0xd9] +; CHECK-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x61,0xd9] ; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x61,0xd1] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] -; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc3] +; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) @@ -610,12 +610,12 @@ define <16 x i16>@test_int_x86_avx512_mask_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0x69,0xd9] +; CHECK-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x69,0xd9] ; CHECK-NEXT: ## ymm3 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x69,0xd1] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] -; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc3] +; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) @@ -626,7 +626,7 @@ define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_mask_add_epi16_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1] +; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) ret <8 x i16> %res @@ -637,7 +637,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) ret <8 x i16> %res @@ -656,7 +656,7 @@ define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { ; CHECK-LABEL: test_mask_add_epi16_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0x07] +; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) @@ -668,7 +668,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) @@ -691,7 +691,7 @@ define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: test_mask_add_epi16_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc1] +; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) ret <16 x i16> %res @@ -702,7 +702,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) ret <16 x i16> %res @@ -721,7 +721,7 @@ define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { ; CHECK-LABEL: test_mask_add_epi16_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0x07] +; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) @@ -733,7 +733,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) @@ -756,7 +756,7 @@ define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_mask_sub_epi16_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0xc1] +; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) ret <8 x i16> %res @@ -767,7 +767,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) ret <8 x i16> %res @@ -786,7 +786,7 @@ define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { ; CHECK-LABEL: test_mask_sub_epi16_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0x07] +; CHECK-NEXT: vpsubw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) @@ -798,7 +798,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) @@ -821,7 +821,7 @@ define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: test_mask_sub_epi16_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0xc1] +; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) ret <16 x i16> %res @@ -832,7 +832,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) ret <16 x i16> %res @@ -851,7 +851,7 @@ define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { ; CHECK-LABEL: test_mask_sub_epi16_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0x07] +; CHECK-NEXT: vpsubw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) @@ -863,7 +863,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) @@ -1081,7 +1081,7 @@ define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_mask_mullo_epi16_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0xc1] +; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) ret <8 x i16> %res @@ -1092,7 +1092,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) ret <8 x i16> %res @@ -1111,7 +1111,7 @@ define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { ; CHECK-LABEL: test_mask_mullo_epi16_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmullw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0x07] +; CHECK-NEXT: vpmullw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) @@ -1123,7 +1123,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmullw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) @@ -1146,7 +1146,7 @@ define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: test_mask_mullo_epi16_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0xc1] +; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) ret <16 x i16> %res @@ -1157,7 +1157,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) ret <16 x i16> %res @@ -1176,7 +1176,7 @@ define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { ; CHECK-LABEL: test_mask_mullo_epi16_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmullw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0x07] +; CHECK-NEXT: vpmullw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) @@ -1188,7 +1188,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmullw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) @@ -1216,7 +1216,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmaxsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3c,0xd1] ; CHECK-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x3c,0xc1] -; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2 ,i16 %mask) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) @@ -1229,10 +1229,10 @@ define <32 x i8>@test_int_x86_avx512_mask_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_b_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmaxsb %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x3c,0xd9] +; CHECK-NEXT: vpmaxsb %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3c,0xd9] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmaxsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3c,0xd1] -; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc3] +; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) @@ -1245,10 +1245,10 @@ define <8 x i16>@test_int_x86_avx512_mask_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmaxsw %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0xee,0xd9] +; CHECK-NEXT: vpmaxsw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmaxsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xee,0xd1] -; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc3] +; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) @@ -1264,7 +1264,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmaxsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xee,0xd1] ; CHECK-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xee,0xc1] -; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) @@ -1280,7 +1280,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmaxub %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xde,0xd1] ; CHECK-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xde,0xc1] -; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) @@ -1293,10 +1293,10 @@ define <32 x i8>@test_int_x86_avx512_mask_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_b_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmaxub %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0xde,0xd9] +; CHECK-NEXT: vpmaxub %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xde,0xd9] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmaxub %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xde,0xd1] -; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc3] +; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) @@ -1309,10 +1309,10 @@ define <8 x i16>@test_int_x86_avx512_mask_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x3e,0xd9] +; CHECK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3e,0xd1] -; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc3] +; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) @@ -1328,7 +1328,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3e,0xd1] ; CHECK-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x3e,0xc1] -; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) @@ -1344,7 +1344,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpminsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x38,0xd1] ; CHECK-NEXT: vpminsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x38,0xc1] -; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) @@ -1357,10 +1357,10 @@ define <32 x i8>@test_int_x86_avx512_mask_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmins_b_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpminsb %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x38,0xd9] +; CHECK-NEXT: vpminsb %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x38,0xd9] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpminsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x38,0xd1] -; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc3] +; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) @@ -1373,10 +1373,10 @@ define <8 x i16>@test_int_x86_avx512_mask_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmins_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpminsw %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0xea,0xd9] +; CHECK-NEXT: vpminsw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpminsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xea,0xd1] -; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc3] +; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) @@ -1392,7 +1392,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpminsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xea,0xd1] ; CHECK-NEXT: vpminsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xea,0xc1] -; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) @@ -1408,7 +1408,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpminub %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xda,0xd1] ; CHECK-NEXT: vpminub %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xda,0xc1] -; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) @@ -1421,10 +1421,10 @@ define <32 x i8>@test_int_x86_avx512_mask_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pminu_b_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpminub %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0xda,0xd9] +; CHECK-NEXT: vpminub %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xda,0xd9] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpminub %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xda,0xd1] -; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc3] +; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) @@ -1437,10 +1437,10 @@ define <8 x i16>@test_int_x86_avx512_mask_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pminu_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpminuw %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x3a,0xd9] +; CHECK-NEXT: vpminuw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpminuw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3a,0xd1] -; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc3] +; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) @@ -1456,7 +1456,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpminuw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3a,0xd1] ; CHECK-NEXT: vpminuw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x3a,0xc1] -; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) @@ -1469,12 +1469,12 @@ define <8 x i16>@test_int_x86_avx512_mask_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0xd1,0xd9] +; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1] ; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xc1] -; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb] -; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1] +; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xcb] +; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) @@ -1489,12 +1489,12 @@ define <16 x i16>@test_int_x86_avx512_mask_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0xd1,0xd9] +; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1] ; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xc1] -; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xcb] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1) @@ -1509,12 +1509,12 @@ define <8 x i16>@test_int_x86_avx512_mask_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psra_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0xe1,0xd9] +; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1] ; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xc1] -; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] -; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc3] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) @@ -1529,12 +1529,12 @@ define <16 x i16>@test_int_x86_avx512_mask_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psra_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0xe1,0xd9] +; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1] ; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xc1] -; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] -; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc3] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) @@ -1549,12 +1549,12 @@ define <8 x i16>@test_int_x86_avx512_mask_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psll_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0xf1,0xd9] +; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1] ; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xc1] -; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] -; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc3] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) @@ -1569,12 +1569,12 @@ define <16 x i16>@test_int_x86_avx512_mask_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psll_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0xf1,0xd9] +; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1] ; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xc1] -; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] -; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc3] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) @@ -1589,12 +1589,12 @@ define <8 x i16>@test_int_x86_avx512_mask_psrl_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_wi_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0x71,0xd0,0x03] +; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xd0,0x03] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03] ; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x71,0xd0,0x03] -; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xca] -; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1] +; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xca] +; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1) @@ -1609,12 +1609,12 @@ define <16 x i16>@test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_wi_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0x71,0xd0,0x03] +; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xd0,0x03] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03] ; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xd0,0x03] -; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xca] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xca] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1) @@ -1629,12 +1629,12 @@ define <8 x i16>@test_int_x86_avx512_mask_psra_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psra_wi_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsraw $3, %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0x71,0xe0,0x03] +; CHECK-NEXT: vpsraw $3, %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xe0,0x03] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsraw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03] ; CHECK-NEXT: vpsraw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x71,0xe0,0x03] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] -; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc2] +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) @@ -1649,12 +1649,12 @@ define <16 x i16>@test_int_x86_avx512_mask_psra_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psra_wi_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsraw $3, %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0x71,0xe0,0x03] +; CHECK-NEXT: vpsraw $3, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xe0,0x03] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsraw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03] ; CHECK-NEXT: vpsraw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xe0,0x03] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] -; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc2] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) @@ -1669,12 +1669,12 @@ define <8 x i16>@test_int_x86_avx512_mask_psll_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psll_wi_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsllw $3, %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0x71,0xf0,0x03] +; CHECK-NEXT: vpsllw $3, %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf0,0x03] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsllw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03] ; CHECK-NEXT: vpsllw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x71,0xf0,0x03] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] -; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc2] +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) @@ -1689,12 +1689,12 @@ define <16 x i16>@test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psll_wi_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsllw $3, %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0x71,0xf0,0x03] +; CHECK-NEXT: vpsllw $3, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xf0,0x03] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsllw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03] ; CHECK-NEXT: vpsllw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xf0,0x03] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] -; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc2] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) @@ -1709,10 +1709,10 @@ define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_b_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x00,0xd9] +; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1] -; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc3] +; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) @@ -1725,10 +1725,10 @@ define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_b_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpshufb %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x00,0xd9] +; CHECK-NEXT: vpshufb %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x00,0xd9] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpshufb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1] -; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc3] +; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) @@ -1741,15 +1741,15 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovzxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovzxbw %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x30,0xd0] +; CHECK-NEXT: vpmovzxbw %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xd0] ; CHECK-NEXT: ## xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovzxbw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x30,0xc8] ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; CHECK-NEXT: vpmovzxbw %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x30,0xc0] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] -; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc2] +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2) @@ -1764,15 +1764,15 @@ define <16 x i16>@test_int_x86_avx512_mask_pmovzxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovzxbw %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x30,0xd0] +; CHECK-NEXT: vpmovzxbw %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xd0] ; CHECK-NEXT: ## ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovzxbw %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x30,0xc8] ; CHECK-NEXT: ## ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; CHECK-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xc0] ; CHECK-NEXT: ## ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] -; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc2] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2) @@ -1788,12 +1788,12 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovsxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsxbw %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x20,0xd0] +; CHECK-NEXT: vpmovsxbw %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovsxbw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8] ; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x20,0xc0] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] -; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc2] +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2) @@ -1808,12 +1808,12 @@ define <16 x i16>@test_int_x86_avx512_mask_pmovsxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsxbw %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x20,0xd0] +; CHECK-NEXT: vpmovsxbw %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x20,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8] ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xc0] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] -; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc2] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2) @@ -1828,12 +1828,12 @@ define <2 x i64>@test_int_x86_avx512_mask_pmovsxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsxdq %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x25,0xd0] +; CHECK-NEXT: vpmovsxdq %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovsxdq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8] ; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x25,0xc0] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] -; CHECK-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc2] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2) @@ -1848,12 +1848,12 @@ define <4 x i64>@test_int_x86_avx512_mask_pmovsxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsxdq %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x25,0xd0] +; CHECK-NEXT: vpmovsxdq %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x25,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovsxdq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8] ; CHECK-NEXT: vpmovsxdq %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xc0] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] -; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc2] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2) Index: test/CodeGen/X86/avx512bwvl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -22,11 +22,11 @@ ; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vpcmpordb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x07] ; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] -; CHECK-NEXT: vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7] +; CHECK-NEXT: vmovd %edi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] ; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] ; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02] ; CHECK-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x03] -; CHECK-NEXT: vmovd %r8d, %xmm1 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc8] +; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8] ; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] ; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] ; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] @@ -71,11 +71,11 @@ ; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vpcmpordb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x07] ; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] -; CHECK-NEXT: vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7] +; CHECK-NEXT: vmovd %edi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] ; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] ; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02] ; CHECK-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x03] -; CHECK-NEXT: vmovd %r8d, %xmm1 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc8] +; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8] ; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] ; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] ; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] @@ -121,11 +121,11 @@ ; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vpcmpordub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x07] ; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] -; CHECK-NEXT: vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7] +; CHECK-NEXT: vmovd %edi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] ; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] ; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02] ; CHECK-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x03] -; CHECK-NEXT: vmovd %r8d, %xmm1 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc8] +; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8] ; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] ; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] ; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] @@ -170,11 +170,11 @@ ; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vpcmpordub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x07] ; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] -; CHECK-NEXT: vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7] +; CHECK-NEXT: vmovd %edi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] ; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] ; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02] ; CHECK-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x03] -; CHECK-NEXT: vmovd %r8d, %xmm1 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc8] +; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8] ; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] ; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] ; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] @@ -214,20 +214,20 @@ ; CHECK-NEXT: vpcmpordw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc1,0x07] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] -; CHECK-NEXT: vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1] -; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] -; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02] +; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] -; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03] +; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04] +; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] -; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05] +; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06] +; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07] +; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1) %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 @@ -262,20 +262,20 @@ ; CHECK-NEXT: vpcmpordw %ymm1, %ymm0, %k3 {%k3} ## encoding: [0x62,0xf3,0xfd,0x2b,0x3f,0xd9,0x07] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] -; CHECK-NEXT: vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1] -; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] -; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02] +; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03] +; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04] +; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] -; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05] +; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06] +; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] -; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07] +; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask) %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 @@ -311,20 +311,20 @@ ; CHECK-NEXT: vpcmporduw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc1,0x07] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] -; CHECK-NEXT: vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1] -; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] -; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02] +; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] -; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03] +; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04] +; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] -; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05] +; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06] +; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07] +; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1) %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 @@ -359,20 +359,20 @@ ; CHECK-NEXT: vpcmporduw %ymm1, %ymm0, %k3 {%k3} ## encoding: [0x62,0xf3,0xfd,0x2b,0x3e,0xd9,0x07] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] -; CHECK-NEXT: vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1] -; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] -; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02] +; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03] +; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04] +; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] -; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05] +; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06] +; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] -; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07] +; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask) %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 @@ -502,20 +502,20 @@ ; CHECK-NEXT: vpcmpordb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc1,0x07] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] -; CHECK-NEXT: vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1] -; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] -; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02] +; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] -; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03] +; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04] +; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] -; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05] +; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06] +; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07] +; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1) %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 @@ -550,20 +550,20 @@ ; CHECK-NEXT: vpcmpordb %xmm1, %xmm0, %k3 {%k3} ## encoding: [0x62,0xf3,0x7d,0x0b,0x3f,0xd9,0x07] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] -; CHECK-NEXT: vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1] -; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] -; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02] +; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03] +; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04] +; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] -; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05] +; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06] +; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] -; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07] +; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask) %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 @@ -599,20 +599,20 @@ ; CHECK-NEXT: vpcmpordub %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc1,0x07] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] -; CHECK-NEXT: vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1] -; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] -; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02] +; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] -; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03] +; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04] +; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] -; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05] +; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06] +; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07] +; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1) %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 @@ -647,20 +647,20 @@ ; CHECK-NEXT: vpcmpordub %xmm1, %xmm0, %k3 {%k3} ## encoding: [0x62,0xf3,0x7d,0x0b,0x3e,0xd9,0x07] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] -; CHECK-NEXT: vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1] -; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] -; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02] +; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03] +; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04] +; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] -; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05] +; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06] +; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] -; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07] +; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask) %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 @@ -696,20 +696,20 @@ ; CHECK-NEXT: vpcmpordw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc1,0x07] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc1,0x00] -; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] -; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04] +; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] -; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x06] +; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x08] +; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] -; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0a] +; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0c] +; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0e] +; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1) %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 @@ -744,20 +744,20 @@ ; CHECK-NEXT: vpcmpordw %xmm1, %xmm0, %k3 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3f,0xd9,0x07] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc1,0x00] -; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] -; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04] +; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x06] +; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x08] +; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] -; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0a] +; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0c] +; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] -; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0e] +; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask) %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 @@ -793,20 +793,20 @@ ; CHECK-NEXT: vpcmporduw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x07] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc1,0x00] -; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] -; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04] +; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] -; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x06] +; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x08] +; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] -; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0a] +; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0c] +; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0e] +; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1) %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 @@ -841,20 +841,20 @@ ; CHECK-NEXT: vpcmporduw %xmm1, %xmm0, %k3 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3e,0xd9,0x07] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] -; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc1,0x00] -; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] +; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] -; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04] +; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x06] +; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x08] +; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] -; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0a] +; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0c] +; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] -; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0e] +; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask) %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 @@ -929,10 +929,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xd8] +; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8] ; CHECK-NEXT: vfmadd132pd %xmm1, %xmm2, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x98,0xd9] -; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0xa8,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc1] +; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa8,0xca] +; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -946,10 +946,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xda] +; CHECK-NEXT: vmovapd %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xda] ; CHECK-NEXT: vfmadd231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd9] -; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0xa8,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc1] +; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa8,0xca] +; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -963,10 +963,10 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xd9] +; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9] ; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0xa8,0xda] -; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0xa8,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc1] +; CHECK-NEXT: vfmadd213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa8,0xca] +; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -978,10 +978,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xd8] +; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8] ; CHECK-NEXT: vfmadd132pd %ymm1, %ymm2, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x98,0xd9] -; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0xa8,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc1] +; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa8,0xca] +; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -995,10 +995,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm2, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xda] +; CHECK-NEXT: vmovapd %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xda] ; CHECK-NEXT: vfmadd231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd9] -; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0xa8,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc1] +; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa8,0xca] +; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -1012,10 +1012,10 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xd9] +; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9] ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0xa8,0xda] -; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0xa8,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc1] +; CHECK-NEXT: vfmadd213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa8,0xca] +; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -1027,10 +1027,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8] ; CHECK-NEXT: vfmadd132ps %xmm1, %xmm2, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x98,0xd9] -; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0xa8,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc1] +; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa8,0xca] +; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -1044,10 +1044,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xda] +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xda] ; CHECK-NEXT: vfmadd231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd9] -; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0xa8,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc1] +; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa8,0xca] +; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -1061,10 +1061,10 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd9] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9] ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0xa8,0xda] -; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0xa8,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc1] +; CHECK-NEXT: vfmadd213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa8,0xca] +; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -1076,10 +1076,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8] ; CHECK-NEXT: vfmadd132ps %ymm1, %ymm2, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x98,0xd9] -; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0xa8,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc1] +; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa8,0xca] +; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -1093,10 +1093,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xda] +; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda] ; CHECK-NEXT: vfmadd231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd9] -; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0xa8,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc1] +; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa8,0xca] +; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -1110,10 +1110,10 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd9] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9] ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0xa8,0xda] -; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0xa8,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc1] +; CHECK-NEXT: vfmadd213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa8,0xca] +; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -1128,10 +1128,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xda] +; CHECK-NEXT: vmovapd %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xda] ; CHECK-NEXT: vfmsub231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd9] -; CHECK-NEXT: vfmsub213pd %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0xaa,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc1] +; CHECK-NEXT: vfmsub213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xaa,0xca] +; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -1146,10 +1146,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm2, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xda] +; CHECK-NEXT: vmovapd %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xda] ; CHECK-NEXT: vfmsub231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd9] -; CHECK-NEXT: vfmsub213pd %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0xaa,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc1] +; CHECK-NEXT: vfmsub213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xaa,0xca] +; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -1163,10 +1163,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xda] +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xda] ; CHECK-NEXT: vfmsub231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd9] -; CHECK-NEXT: vfmsub213ps %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0xaa,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc1] +; CHECK-NEXT: vfmsub213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xaa,0xca] +; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -1180,10 +1180,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xda] +; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda] ; CHECK-NEXT: vfmsub231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd9] -; CHECK-NEXT: vfmsub213ps %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0xaa,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc1] +; CHECK-NEXT: vfmsub213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xaa,0xca] +; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -1292,10 +1292,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xd8] +; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8] ; CHECK-NEXT: vfnmsub132pd %xmm1, %xmm2, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x9e,0xd9] -; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0xae,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc1] +; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xae,0xca] +; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -1309,10 +1309,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xda] +; CHECK-NEXT: vmovapd %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xda] ; CHECK-NEXT: vfnmsub231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd9] -; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0xae,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc1] +; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xae,0xca] +; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -1324,10 +1324,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xd8] +; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8] ; CHECK-NEXT: vfnmsub132pd %ymm1, %ymm2, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x9e,0xd9] -; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0xae,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc1] +; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xae,0xca] +; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -1341,10 +1341,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm2, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xda] +; CHECK-NEXT: vmovapd %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xda] ; CHECK-NEXT: vfnmsub231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd9] -; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0xae,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc1] +; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xae,0xca] +; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -1356,10 +1356,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8] ; CHECK-NEXT: vfnmsub132ps %xmm1, %xmm2, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xd9] -; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0xae,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc1] +; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xae,0xca] +; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -1373,10 +1373,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xda] +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xda] ; CHECK-NEXT: vfnmsub231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd9] -; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0xae,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc1] +; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xae,0xca] +; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -1388,10 +1388,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8] ; CHECK-NEXT: vfnmsub132ps %ymm1, %ymm2, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xd9] -; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0xae,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc1] +; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xae,0xca] +; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -1405,10 +1405,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xda] +; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda] ; CHECK-NEXT: vfnmsub231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd9] -; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0xae,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc1] +; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xae,0xca] +; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -1420,10 +1420,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xd8] +; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8] ; CHECK-NEXT: vfnmadd132pd %xmm1, %xmm2, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x9c,0xd9] -; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0xac,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc1] +; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xac,0xca] +; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -1435,10 +1435,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xd8] +; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8] ; CHECK-NEXT: vfnmadd132pd %ymm1, %ymm2, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x9c,0xd9] -; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0xac,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc1] +; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xac,0xca] +; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -1450,10 +1450,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8] ; CHECK-NEXT: vfnmadd132ps %xmm1, %xmm2, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xd9] -; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0xac,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc1] +; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xac,0xca] +; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -1465,10 +1465,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8] ; CHECK-NEXT: vfnmadd132ps %ymm1, %ymm2, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xd9] -; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0xac,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc1] +; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xac,0xca] +; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -1528,10 +1528,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xd8] +; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8] ; CHECK-NEXT: vfmaddsub132pd %xmm1, %xmm2, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x96,0xd9] -; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0xa6,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc1] +; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa6,0xca] +; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -1545,10 +1545,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xda] +; CHECK-NEXT: vmovapd %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xda] ; CHECK-NEXT: vfmaddsub231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd9] -; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0xa6,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc1] +; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa6,0xca] +; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -1562,10 +1562,10 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xd9] +; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9] ; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0xa6,0xda] -; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0xa6,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc1] +; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa6,0xca] +; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -1577,10 +1577,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xd8] +; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8] ; CHECK-NEXT: vfmaddsub132pd %ymm1, %ymm2, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x96,0xd9] -; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0xa6,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc1] +; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa6,0xca] +; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -1594,10 +1594,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm2, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xda] +; CHECK-NEXT: vmovapd %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xda] ; CHECK-NEXT: vfmaddsub231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd9] -; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0xa6,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc1] +; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa6,0xca] +; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -1611,10 +1611,10 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xd9] +; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9] ; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0xa6,0xda] -; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0xa6,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc1] +; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa6,0xca] +; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -1626,10 +1626,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8] ; CHECK-NEXT: vfmaddsub132ps %xmm1, %xmm2, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x96,0xd9] -; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0xa6,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc1] +; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa6,0xca] +; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -1643,10 +1643,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xda] +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xda] ; CHECK-NEXT: vfmaddsub231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd9] -; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0xa6,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc1] +; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa6,0xca] +; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -1660,10 +1660,10 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd9] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9] ; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0xa6,0xda] -; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0xa6,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc1] +; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa6,0xca] +; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -1675,10 +1675,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8] ; CHECK-NEXT: vfmaddsub132ps %ymm1, %ymm2, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x6d,0x29,0x96,0xd9] -; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0xa6,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc1] +; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa6,0xca] +; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -1692,10 +1692,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xda] +; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda] ; CHECK-NEXT: vfmaddsub231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd9] -; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0xa6,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc1] +; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa6,0xca] +; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -1709,10 +1709,10 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd9] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9] ; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0xa6,0xda] -; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0xa6,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc1] +; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa6,0xca] +; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -1726,10 +1726,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xda] +; CHECK-NEXT: vmovapd %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xda] ; CHECK-NEXT: vfmsubadd231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd9] -; CHECK-NEXT: vfmsubadd213pd %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0xa7,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc1] +; CHECK-NEXT: vfmsubadd213pd %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa7,0xca] +; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -1743,10 +1743,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm2, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xda] +; CHECK-NEXT: vmovapd %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xda] ; CHECK-NEXT: vfmsubadd231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd9] -; CHECK-NEXT: vfmsubadd213pd %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0xa7,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc1] +; CHECK-NEXT: vfmsubadd213pd %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0xa7,0xca] +; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -1760,10 +1760,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xda] +; CHECK-NEXT: vmovaps %xmm2, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xda] ; CHECK-NEXT: vfmsubadd231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd9] -; CHECK-NEXT: vfmsubadd213ps %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0xa7,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc1] +; CHECK-NEXT: vfmsubadd213ps %xmm2, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa7,0xca] +; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -1777,10 +1777,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xda] +; CHECK-NEXT: vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda] ; CHECK-NEXT: vfmsubadd231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd9] -; CHECK-NEXT: vfmsubadd213ps %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0xa7,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc1] +; CHECK-NEXT: vfmsubadd213ps %ymm2, %ymm0, %ymm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xa7,0xca] +; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -1802,7 +1802,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { ; CHECK-LABEL: test_mask_vfmadd128_ps_rz: ; CHECK: ## BB#0: -; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2] +; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind ret <4 x float> %res @@ -1833,7 +1833,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) { ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkz: ; CHECK: ## BB#0: -; CHECK-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0x07] +; CHECK-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %a2 = load <4 x float>, <4 x float>* %ptr_a2 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind @@ -1843,7 +1843,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) { ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkza: ; CHECK: ## BB#0: -; CHECK-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0x07] +; CHECK-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind @@ -1921,7 +1921,7 @@ define <2 x double> @test_mask_vfmadd128_pd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { ; CHECK-LABEL: test_mask_vfmadd128_pd_rz: ; CHECK: ## BB#0: -; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2] +; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind ret <2 x double> %res @@ -1941,7 +1941,7 @@ define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) { ; CHECK-LABEL: test_mask_vfmadd128_pd_rmkz: ; CHECK: ## BB#0: -; CHECK-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0x07] +; CHECK-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %a2 = load <2 x double>, <2 x double>* %ptr_a2 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind @@ -1961,7 +1961,7 @@ define <4 x double> @test_mask_vfmadd256_pd_rz(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { ; CHECK-LABEL: test_mask_vfmadd256_pd_rz: ; CHECK: ## BB#0: -; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2] +; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind ret <4 x double> %res @@ -1981,7 +1981,7 @@ define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) { ; CHECK-LABEL: test_mask_vfmadd256_pd_rmkz: ; CHECK: ## BB#0: -; CHECK-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0x07] +; CHECK-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %a2 = load <4 x double>, <4 x double>* %ptr_a2 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind @@ -1991,7 +1991,7 @@ define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_mask_packs_epi32_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6b,0xc1] +; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) ret <8 x i16> %res @@ -2002,7 +2002,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) ret <8 x i16> %res @@ -2021,7 +2021,7 @@ define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_packs_epi32_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6b,0x07] +; CHECK-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) @@ -2033,7 +2033,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) @@ -2068,7 +2068,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 @@ -2095,7 +2095,7 @@ define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: test_mask_packs_epi32_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6b,0xc1] +; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) ret <16 x i16> %res @@ -2106,7 +2106,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) ret <16 x i16> %res @@ -2125,7 +2125,7 @@ define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_packs_epi32_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6b,0x07] +; CHECK-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) @@ -2137,7 +2137,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) @@ -2172,7 +2172,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 @@ -2199,7 +2199,7 @@ define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_mask_packs_epi16_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x63,0xc1] +; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) ret <16 x i8> %res @@ -2210,7 +2210,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) ret <16 x i8> %res @@ -2229,7 +2229,7 @@ define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { ; CHECK-LABEL: test_mask_packs_epi16_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x63,0x07] +; CHECK-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) @@ -2241,7 +2241,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) @@ -2264,7 +2264,7 @@ define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: test_mask_packs_epi16_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x63,0xc1] +; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) ret <32 x i8> %res @@ -2275,7 +2275,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) ret <32 x i8> %res @@ -2294,7 +2294,7 @@ define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { ; CHECK-LABEL: test_mask_packs_epi16_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x63,0x07] +; CHECK-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) @@ -2306,7 +2306,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpacksswb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x63,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) @@ -2330,7 +2330,7 @@ define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_mask_packus_epi32_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x2b,0xc1] +; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) ret <8 x i16> %res @@ -2341,7 +2341,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) ret <8 x i16> %res @@ -2360,7 +2360,7 @@ define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_packus_epi32_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x2b,0x07] +; CHECK-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) @@ -2372,7 +2372,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) @@ -2407,7 +2407,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 @@ -2434,7 +2434,7 @@ define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: test_mask_packus_epi32_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x2b,0xc1] +; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) ret <16 x i16> %res @@ -2445,7 +2445,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) ret <16 x i16> %res @@ -2464,7 +2464,7 @@ define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_packus_epi32_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x2b,0x07] +; CHECK-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) @@ -2476,7 +2476,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) @@ -2511,7 +2511,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 @@ -2538,7 +2538,7 @@ define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_mask_packus_epi16_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x67,0xc1] +; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) ret <16 x i8> %res @@ -2549,7 +2549,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) ret <16 x i8> %res @@ -2568,7 +2568,7 @@ define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { ; CHECK-LABEL: test_mask_packus_epi16_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x67,0x07] +; CHECK-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) @@ -2580,7 +2580,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpackuswb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) @@ -2603,7 +2603,7 @@ define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: test_mask_packus_epi16_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x67,0xc1] +; CHECK-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) ret <32 x i8> %res @@ -2614,7 +2614,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) ret <32 x i8> %res @@ -2633,7 +2633,7 @@ define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { ; CHECK-LABEL: test_mask_packus_epi16_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x67,0x07] +; CHECK-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) @@ -2645,7 +2645,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpackuswb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x67,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) @@ -2668,7 +2668,7 @@ define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_mask_adds_epi16_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xed,0xc1] +; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) ret <8 x i16> %res @@ -2679,7 +2679,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) ret <8 x i16> %res @@ -2698,7 +2698,7 @@ define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { ; CHECK-LABEL: test_mask_adds_epi16_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xed,0x07] +; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) @@ -2710,7 +2710,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) @@ -2733,7 +2733,7 @@ define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: test_mask_adds_epi16_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xed,0xc1] +; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) ret <16 x i16> %res @@ -2744,7 +2744,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) ret <16 x i16> %res @@ -2763,7 +2763,7 @@ define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { ; CHECK-LABEL: test_mask_adds_epi16_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xed,0x07] +; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) @@ -2775,7 +2775,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) @@ -2798,7 +2798,7 @@ define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_mask_subs_epi16_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe9,0xc1] +; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) ret <8 x i16> %res @@ -2809,7 +2809,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) ret <8 x i16> %res @@ -2828,7 +2828,7 @@ define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { ; CHECK-LABEL: test_mask_subs_epi16_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe9,0x07] +; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) @@ -2840,7 +2840,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) @@ -2863,7 +2863,7 @@ define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: test_mask_subs_epi16_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe9,0xc1] +; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) ret <16 x i16> %res @@ -2874,7 +2874,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) ret <16 x i16> %res @@ -2893,7 +2893,7 @@ define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { ; CHECK-LABEL: test_mask_subs_epi16_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe9,0x07] +; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) @@ -2905,7 +2905,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) @@ -2928,7 +2928,7 @@ define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_mask_adds_epu16_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdd,0xc1] +; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) ret <8 x i16> %res @@ -2939,7 +2939,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) ret <8 x i16> %res @@ -2958,7 +2958,7 @@ define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { ; CHECK-LABEL: test_mask_adds_epu16_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdd,0x07] +; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) @@ -2970,7 +2970,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) @@ -2993,7 +2993,7 @@ define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: test_mask_adds_epu16_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdd,0xc1] +; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) ret <16 x i16> %res @@ -3004,7 +3004,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) ret <16 x i16> %res @@ -3023,7 +3023,7 @@ define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { ; CHECK-LABEL: test_mask_adds_epu16_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdd,0x07] +; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) @@ -3035,7 +3035,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) @@ -3058,7 +3058,7 @@ define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_mask_subs_epu16_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd9,0xc1] +; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) ret <8 x i16> %res @@ -3069,7 +3069,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) ret <8 x i16> %res @@ -3088,7 +3088,7 @@ define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { ; CHECK-LABEL: test_mask_subs_epu16_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd9,0x07] +; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) @@ -3100,7 +3100,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i16>, <8 x i16>* %ptr_b %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) @@ -3123,7 +3123,7 @@ define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: test_mask_subs_epu16_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd9,0xc1] +; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) ret <16 x i16> %res @@ -3134,7 +3134,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) ret <16 x i16> %res @@ -3153,7 +3153,7 @@ define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { ; CHECK-LABEL: test_mask_subs_epu16_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd9,0x07] +; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) @@ -3165,7 +3165,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i16>, <16 x i16>* %ptr_b %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) @@ -3188,7 +3188,7 @@ define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test_mask_adds_epi8_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xec,0xc1] +; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) ret <16 x i8> %res @@ -3199,7 +3199,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) ret <16 x i8> %res @@ -3218,7 +3218,7 @@ define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { ; CHECK-LABEL: test_mask_adds_epi8_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xec,0x07] +; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i8>, <16 x i8>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) @@ -3230,7 +3230,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i8>, <16 x i8>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) @@ -3253,7 +3253,7 @@ define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) { ; CHECK-LABEL: test_mask_adds_epi8_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xec,0xc1] +; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) ret <32 x i8> %res @@ -3264,7 +3264,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) ret <32 x i8> %res @@ -3283,7 +3283,7 @@ define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { ; CHECK-LABEL: test_mask_adds_epi8_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xec,0x07] +; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <32 x i8>, <32 x i8>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) @@ -3295,7 +3295,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <32 x i8>, <32 x i8>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) @@ -3318,7 +3318,7 @@ define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test_mask_subs_epi8_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe8,0xc1] +; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) ret <16 x i8> %res @@ -3329,7 +3329,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) ret <16 x i8> %res @@ -3348,7 +3348,7 @@ define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { ; CHECK-LABEL: test_mask_subs_epi8_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe8,0x07] +; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i8>, <16 x i8>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) @@ -3360,7 +3360,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i8>, <16 x i8>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) @@ -3383,7 +3383,7 @@ define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) { ; CHECK-LABEL: test_mask_subs_epi8_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe8,0xc1] +; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) ret <32 x i8> %res @@ -3394,7 +3394,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) ret <32 x i8> %res @@ -3413,7 +3413,7 @@ define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { ; CHECK-LABEL: test_mask_subs_epi8_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe8,0x07] +; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <32 x i8>, <32 x i8>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) @@ -3425,7 +3425,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <32 x i8>, <32 x i8>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) @@ -3448,7 +3448,7 @@ define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test_mask_adds_epu8_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdc,0xc1] +; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) ret <16 x i8> %res @@ -3459,7 +3459,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) ret <16 x i8> %res @@ -3478,7 +3478,7 @@ define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { ; CHECK-LABEL: test_mask_adds_epu8_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdc,0x07] +; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i8>, <16 x i8>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) @@ -3490,7 +3490,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i8>, <16 x i8>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) @@ -3513,7 +3513,7 @@ define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) { ; CHECK-LABEL: test_mask_adds_epu8_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdc,0xc1] +; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) ret <32 x i8> %res @@ -3524,7 +3524,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) ret <32 x i8> %res @@ -3543,7 +3543,7 @@ define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { ; CHECK-LABEL: test_mask_adds_epu8_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdc,0x07] +; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <32 x i8>, <32 x i8>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) @@ -3555,7 +3555,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <32 x i8>, <32 x i8>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) @@ -3578,7 +3578,7 @@ define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test_mask_subs_epu8_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd8,0xc1] +; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) ret <16 x i8> %res @@ -3589,7 +3589,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) ret <16 x i8> %res @@ -3608,7 +3608,7 @@ define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { ; CHECK-LABEL: test_mask_subs_epu8_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd8,0x07] +; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i8>, <16 x i8>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) @@ -3620,7 +3620,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <16 x i8>, <16 x i8>* %ptr_b %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) @@ -3643,7 +3643,7 @@ define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) { ; CHECK-LABEL: test_mask_subs_epu8_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd8,0xc1] +; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) ret <32 x i8> %res @@ -3654,7 +3654,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) ret <32 x i8> %res @@ -3673,7 +3673,7 @@ define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { ; CHECK-LABEL: test_mask_subs_epu8_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd8,0x07] +; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <32 x i8>, <32 x i8>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) @@ -3685,7 +3685,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <32 x i8>, <32 x i8>* %ptr_b %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) @@ -3711,10 +3711,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd9] +; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xda] ; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xca] -; CHECK-NEXT: vpaddw %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfd,0xc1] +; CHECK-NEXT: vpaddw %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) @@ -3728,10 +3728,10 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd9] +; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x7d,0xda] ; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xca] -; CHECK-NEXT: vpaddw %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfd,0xc1] +; CHECK-NEXT: vpaddw %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) @@ -3745,10 +3745,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd9] +; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xda] ; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xca] -; CHECK-NEXT: vpaddw %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfd,0xc1] +; CHECK-NEXT: vpaddw %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) @@ -3762,10 +3762,10 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd9] +; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x7d,0xda] ; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xca] -; CHECK-NEXT: vpaddw %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfd,0xc1] +; CHECK-NEXT: vpaddw %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) @@ -3779,10 +3779,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd9] +; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; CHECK-NEXT: vpermi2w %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x75,0xda] ; CHECK-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x75,0xca] -; CHECK-NEXT: vpaddw %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfd,0xc1] +; CHECK-NEXT: vpaddw %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) @@ -3796,10 +3796,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd9] +; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x75,0xda] ; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x75,0xca] -; CHECK-NEXT: vpaddw %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfd,0xc1] +; CHECK-NEXT: vpaddw %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) @@ -3814,8 +3814,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1] -; CHECK-NEXT: vpavgb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe0,0xc1] -; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0] +; CHECK-NEXT: vpavgb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1] +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) @@ -3830,8 +3830,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1] -; CHECK-NEXT: vpavgb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe0,0xc1] -; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0] +; CHECK-NEXT: vpavgb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe0,0xc1] +; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) @@ -3846,8 +3846,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1] -; CHECK-NEXT: vpavgw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe3,0xc1] -; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] +; CHECK-NEXT: vpavgw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) @@ -3862,8 +3862,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1] -; CHECK-NEXT: vpavgw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe3,0xc1] -; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] +; CHECK-NEXT: vpavgw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe3,0xc1] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) @@ -3878,8 +3878,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpabsb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8] -; CHECK-NEXT: vpabsb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x1c,0xc0] -; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfc,0xc0] +; CHECK-NEXT: vpabsb %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0] +; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1) @@ -3894,8 +3894,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpabsb %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8] -; CHECK-NEXT: vpabsb %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x1c,0xc0] -; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfc,0xc0] +; CHECK-NEXT: vpabsb %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1c,0xc0] +; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 -1) @@ -3910,8 +3910,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpabsw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8] -; CHECK-NEXT: vpabsw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x1d,0xc0] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] +; CHECK-NEXT: vpabsw %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0] +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1) @@ -3926,8 +3926,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpabsw %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8] -; CHECK-NEXT: vpabsw %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x1d,0xc0] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] +; CHECK-NEXT: vpabsw %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xc0] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 -1) @@ -3942,8 +3942,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1] -; CHECK-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe4,0xc1] -; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] +; CHECK-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) @@ -3958,8 +3958,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1] -; CHECK-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe4,0xc1] -; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] +; CHECK-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xc1] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) @@ -3974,8 +3974,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1] -; CHECK-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe5,0xc1] -; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] +; CHECK-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) @@ -3990,8 +3990,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1] -; CHECK-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe5,0xc1] -; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] +; CHECK-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xc1] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) @@ -4006,8 +4006,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1] -; CHECK-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x0b,0xc1] -; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] +; CHECK-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xc1] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) @@ -4022,8 +4022,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1] -; CHECK-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x0b,0xc1] -; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] +; CHECK-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xc1] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) @@ -4040,8 +4040,8 @@ ; CHECK-NEXT: vpmovwb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1] ; CHECK-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc2] ; CHECK-NEXT: vpmovwb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc0] -; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1] -; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc2] +; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] +; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) @@ -4074,8 +4074,8 @@ ; CHECK-NEXT: vpmovswb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1] ; CHECK-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc2] ; CHECK-NEXT: vpmovswb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc0] -; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1] -; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc2] +; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] +; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) @@ -4108,8 +4108,8 @@ ; CHECK-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1] ; CHECK-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc2] ; CHECK-NEXT: vpmovuswb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc0] -; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1] -; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc2] +; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] +; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) @@ -4142,8 +4142,8 @@ ; CHECK-NEXT: vpmovwb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1] ; CHECK-NEXT: vpmovwb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc2] ; CHECK-NEXT: vpmovwb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0] -; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1] -; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc2] +; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] +; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) @@ -4176,8 +4176,8 @@ ; CHECK-NEXT: vpmovswb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x20,0xc1] ; CHECK-NEXT: vpmovswb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x20,0xc2] ; CHECK-NEXT: vpmovswb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x20,0xc0] -; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1] -; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc2] +; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] +; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) @@ -4210,8 +4210,8 @@ ; CHECK-NEXT: vpmovuswb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x10,0xc1] ; CHECK-NEXT: vpmovuswb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x10,0xc2] ; CHECK-NEXT: vpmovuswb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x10,0xc0] -; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1] -; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc2] +; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] +; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) @@ -4242,8 +4242,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1] -; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf5,0xc1] -; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] +; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1] +; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 -1) @@ -4258,8 +4258,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1] -; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf5,0xc1] -; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] +; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xc1] +; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 -1) @@ -4274,8 +4274,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1] -; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x04,0xc1] -; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] +; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xc1] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 -1) @@ -4290,8 +4290,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1] -; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x04,0xc1] -; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] +; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xc1] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 -1) @@ -4308,8 +4308,8 @@ ; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] ; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x02] ; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x02] -; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb] -; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1] +; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xcb] +; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4) %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> zeroinitializer, i8 %x4) @@ -4328,8 +4328,8 @@ ; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] ; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x02] ; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x02] -; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xcb] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4) %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> zeroinitializer, i16 %x4) @@ -4444,8 +4444,8 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1] ; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1] -; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] -; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc3] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) @@ -4464,8 +4464,8 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1] ; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1] -; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] -; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc3] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) @@ -4484,8 +4484,8 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1] ; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1] -; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] -; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc3] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) @@ -4504,8 +4504,8 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1] ; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1] -; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] -; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc3] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) @@ -4524,8 +4524,8 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1] ; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1] -; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] -; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc3] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) @@ -4544,8 +4544,8 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1] ; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1] -; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] -; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc3] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) @@ -4564,8 +4564,8 @@ ; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0] ; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xd8] ; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xc0] -; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xcb] +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) @@ -4584,8 +4584,8 @@ ; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0] ; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xd8] ; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xc0] -; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xcb] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) @@ -4752,8 +4752,8 @@ ; CHECK-NEXT: vpbroadcastb %dil, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7] ; CHECK-NEXT: vpbroadcastb %dil, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xcf] ; CHECK-NEXT: vpbroadcastb %dil, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xd7] -; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0] -; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfc,0xc0] +; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] +; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 %mask) @@ -4772,8 +4772,8 @@ ; CHECK-NEXT: vpbroadcastb %dil, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7] ; CHECK-NEXT: vpbroadcastb %dil, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xcf] ; CHECK-NEXT: vpbroadcastb %dil, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xd7] -; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0] -; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 %mask) @@ -4792,8 +4792,8 @@ ; CHECK-NEXT: vpbroadcastw %di, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7] ; CHECK-NEXT: vpbroadcastw %di, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xcf] ; CHECK-NEXT: vpbroadcastw %di, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xd7] -; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 %mask) @@ -4812,8 +4812,8 @@ ; CHECK-NEXT: vpbroadcastw %di, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7] ; CHECK-NEXT: vpbroadcastw %di, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xcf] ; CHECK-NEXT: vpbroadcastw %di, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xd7] -; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 %mask) Index: test/CodeGen/X86/avx512bwvl-mov.ll =================================================================== --- test/CodeGen/X86/avx512bwvl-mov.ll +++ test/CodeGen/X86/avx512bwvl-mov.ll @@ -4,7 +4,7 @@ define <32 x i8> @test_256_1(i8 * %addr) { ; CHECK-LABEL: test_256_1: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu8 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7f,0x28,0x6f,0x07] +; CHECK-NEXT: vmovdqu (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <32 x i8>* %res = load <32 x i8>, <32 x i8>* %vaddr, align 1 @@ -14,7 +14,7 @@ define void @test_256_2(i8 * %addr, <32 x i8> %data) { ; CHECK-LABEL: test_256_2: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu8 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7f,0x28,0x7f,0x07] +; CHECK-NEXT: vmovdqu %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <32 x i8>* store <32 x i8>%data, <32 x i8>* %vaddr, align 1 @@ -24,7 +24,7 @@ define <32 x i8> @test_256_3(i8 * %addr, <32 x i8> %old, <32 x i8> %mask1) { ; CHECK-LABEL: test_256_3: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vpcmpneqb %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x3f,0xca,0x04] ; CHECK-NEXT: vpblendmb (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x66,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -38,7 +38,7 @@ define <32 x i8> @test_256_4(i8 * %addr, <32 x i8> %mask1) { ; CHECK-LABEL: test_256_4: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] +; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9] ; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc9,0x04] ; CHECK-NEXT: vmovdqu8 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -52,7 +52,7 @@ define <16 x i16> @test_256_5(i8 * %addr) { ; CHECK-LABEL: test_256_5: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu16 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xff,0x28,0x6f,0x07] +; CHECK-NEXT: vmovdqu (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <16 x i16>* %res = load <16 x i16>, <16 x i16>* %vaddr, align 1 @@ -62,7 +62,7 @@ define void @test_256_6(i8 * %addr, <16 x i16> %data) { ; CHECK-LABEL: test_256_6: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu16 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xff,0x28,0x7f,0x07] +; CHECK-NEXT: vmovdqu %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <16 x i16>* store <16 x i16>%data, <16 x i16>* %vaddr, align 1 @@ -72,7 +72,7 @@ define <16 x i16> @test_256_7(i8 * %addr, <16 x i16> %old, <16 x i16> %mask1) { ; CHECK-LABEL: test_256_7: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vpcmpneqw %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x3f,0xca,0x04] ; CHECK-NEXT: vpblendmw (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x66,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -86,7 +86,7 @@ define <16 x i16> @test_256_8(i8 * %addr, <16 x i16> %mask1) { ; CHECK-LABEL: test_256_8: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] +; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9] ; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc9,0x04] ; CHECK-NEXT: vmovdqu16 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -100,7 +100,7 @@ define <16 x i8> @test_128_1(i8 * %addr) { ; CHECK-LABEL: test_128_1: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu8 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x6f,0x07] +; CHECK-NEXT: vmovdqu (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <16 x i8>* %res = load <16 x i8>, <16 x i8>* %vaddr, align 1 @@ -110,7 +110,7 @@ define void @test_128_2(i8 * %addr, <16 x i8> %data) { ; CHECK-LABEL: test_128_2: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu8 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7f,0x08,0x7f,0x07] +; CHECK-NEXT: vmovdqu %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <16 x i8>* store <16 x i8>%data, <16 x i8>* %vaddr, align 1 @@ -120,7 +120,7 @@ define <16 x i8> @test_128_3(i8 * %addr, <16 x i8> %old, <16 x i8> %mask1) { ; CHECK-LABEL: test_128_3: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqb %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x3f,0xca,0x04] ; CHECK-NEXT: vpblendmb (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x66,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -134,7 +134,7 @@ define <16 x i8> @test_128_4(i8 * %addr, <16 x i8> %mask1) { ; CHECK-LABEL: test_128_4: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] ; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc9,0x04] ; CHECK-NEXT: vmovdqu8 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -148,7 +148,7 @@ define <8 x i16> @test_128_5(i8 * %addr) { ; CHECK-LABEL: test_128_5: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu16 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x6f,0x07] +; CHECK-NEXT: vmovdqu (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i16>* %res = load <8 x i16>, <8 x i16>* %vaddr, align 1 @@ -158,7 +158,7 @@ define void @test_128_6(i8 * %addr, <8 x i16> %data) { ; CHECK-LABEL: test_128_6: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu16 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xff,0x08,0x7f,0x07] +; CHECK-NEXT: vmovdqu %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i16>* store <8 x i16>%data, <8 x i16>* %vaddr, align 1 @@ -168,7 +168,7 @@ define <8 x i16> @test_128_7(i8 * %addr, <8 x i16> %old, <8 x i16> %mask1) { ; CHECK-LABEL: test_128_7: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqw %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x3f,0xca,0x04] ; CHECK-NEXT: vpblendmw (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x66,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -182,7 +182,7 @@ define <8 x i16> @test_128_8(i8 * %addr, <8 x i16> %mask1) { ; CHECK-LABEL: test_128_8: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] ; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc9,0x04] ; CHECK-NEXT: vmovdqu16 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] Index: test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll @@ -4,7 +4,7 @@ define <4 x float> @test_mask_andnot_ps_rr_128(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: test_mask_andnot_ps_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x55,0xc1] +; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) ret <4 x float> %res @@ -15,7 +15,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x55,0xd1] -; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) ret <4 x float> %res @@ -34,7 +34,7 @@ define <4 x float> @test_mask_andnot_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) { ; CHECK-LABEL: test_mask_andnot_ps_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vandnps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x55,0x07] +; CHECK-NEXT: vandnps (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x float>, <4 x float>* %ptr_b %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) @@ -46,7 +46,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vandnps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x55,0x0f] -; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x float>, <4 x float>* %ptr_b %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) @@ -81,7 +81,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vandnps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x55,0x0f] -; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 @@ -108,7 +108,7 @@ define <8 x float> @test_mask_andnot_ps_rr_256(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: test_mask_andnot_ps_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x55,0xc1] +; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x55,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) ret <8 x float> %res @@ -119,7 +119,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x55,0xd1] -; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) ret <8 x float> %res @@ -138,7 +138,7 @@ define <8 x float> @test_mask_andnot_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) { ; CHECK-LABEL: test_mask_andnot_ps_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vandnps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x55,0x07] +; CHECK-NEXT: vandnps (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x55,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x float>, <8 x float>* %ptr_b %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) @@ -150,7 +150,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vandnps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x55,0x0f] -; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x float>, <8 x float>* %ptr_b %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) @@ -185,7 +185,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vandnps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x55,0x0f] -; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 @@ -316,7 +316,7 @@ define <4 x float> @test_mask_and_ps_rr_128(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: test_mask_and_ps_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x54,0xc1] +; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) ret <4 x float> %res @@ -327,7 +327,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x54,0xd1] -; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) ret <4 x float> %res @@ -346,7 +346,7 @@ define <4 x float> @test_mask_and_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) { ; CHECK-LABEL: test_mask_and_ps_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vandps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x54,0x07] +; CHECK-NEXT: vandps (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x float>, <4 x float>* %ptr_b %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) @@ -358,7 +358,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vandps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x54,0x0f] -; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x float>, <4 x float>* %ptr_b %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) @@ -393,7 +393,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vandps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x54,0x0f] -; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 @@ -420,7 +420,7 @@ define <8 x float> @test_mask_and_ps_rr_256(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: test_mask_and_ps_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vandps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x54,0xc1] +; CHECK-NEXT: vandps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) ret <8 x float> %res @@ -431,7 +431,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vandps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x54,0xd1] -; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) ret <8 x float> %res @@ -450,7 +450,7 @@ define <8 x float> @test_mask_and_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) { ; CHECK-LABEL: test_mask_and_ps_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vandps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x54,0x07] +; CHECK-NEXT: vandps (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x54,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x float>, <8 x float>* %ptr_b %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) @@ -462,7 +462,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vandps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x54,0x0f] -; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x float>, <8 x float>* %ptr_b %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) @@ -497,7 +497,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vandps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x54,0x0f] -; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 @@ -628,7 +628,7 @@ define <4 x float> @test_mask_or_ps_rr_128(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: test_mask_or_ps_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x56,0xc1] +; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) ret <4 x float> %res @@ -639,7 +639,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x56,0xd1] -; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) ret <4 x float> %res @@ -658,7 +658,7 @@ define <4 x float> @test_mask_or_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) { ; CHECK-LABEL: test_mask_or_ps_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vorps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x56,0x07] +; CHECK-NEXT: vorps (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x float>, <4 x float>* %ptr_b %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) @@ -670,7 +670,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vorps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x56,0x0f] -; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x float>, <4 x float>* %ptr_b %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) @@ -705,7 +705,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x56,0x0f] -; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 @@ -732,7 +732,7 @@ define <8 x float> @test_mask_or_ps_rr_256(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: test_mask_or_ps_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x56,0xc1] +; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x56,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) ret <8 x float> %res @@ -743,7 +743,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x56,0xd1] -; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) ret <8 x float> %res @@ -762,7 +762,7 @@ define <8 x float> @test_mask_or_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) { ; CHECK-LABEL: test_mask_or_ps_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vorps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x56,0x07] +; CHECK-NEXT: vorps (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x56,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x float>, <8 x float>* %ptr_b %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) @@ -774,7 +774,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vorps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x56,0x0f] -; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x float>, <8 x float>* %ptr_b %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) @@ -809,7 +809,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x56,0x0f] -; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 @@ -940,7 +940,7 @@ define <4 x float> @test_mask_xor_ps_rr_128(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: test_mask_xor_ps_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x57,0xc1] +; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) ret <4 x float> %res @@ -951,7 +951,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x57,0xd1] -; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) ret <4 x float> %res @@ -970,7 +970,7 @@ define <4 x float> @test_mask_xor_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) { ; CHECK-LABEL: test_mask_xor_ps_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vxorps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x57,0x07] +; CHECK-NEXT: vxorps (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x float>, <4 x float>* %ptr_b %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1) @@ -982,7 +982,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vxorps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x57,0x0f] -; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x float>, <4 x float>* %ptr_b %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) @@ -1017,7 +1017,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vxorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x57,0x0f] -; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 @@ -1044,7 +1044,7 @@ define <8 x float> @test_mask_xor_ps_rr_256(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: test_mask_xor_ps_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x57,0xc1] +; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x57,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) ret <8 x float> %res @@ -1055,7 +1055,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x57,0xd1] -; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) ret <8 x float> %res @@ -1074,7 +1074,7 @@ define <8 x float> @test_mask_xor_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) { ; CHECK-LABEL: test_mask_xor_ps_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vxorps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x57,0x07] +; CHECK-NEXT: vxorps (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x57,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x float>, <8 x float>* %ptr_b %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1) @@ -1086,7 +1086,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vxorps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x57,0x0f] -; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x float>, <8 x float>* %ptr_b %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) @@ -1121,7 +1121,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vxorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x57,0x0f] -; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load float, float* %ptr_b %vecinit.i = insertelement <8 x float> undef, float %q, i32 0 @@ -1366,7 +1366,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vpmullq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) ret <4 x i64> %res @@ -1397,7 +1397,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x40,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i64>, <4 x i64>* %ptr_b %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) @@ -1432,7 +1432,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x40,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 @@ -1470,7 +1470,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vpmullq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) ret <2 x i64> %res @@ -1501,7 +1501,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x40,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <2 x i64>, <2 x i64>* %ptr_b %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) @@ -1536,7 +1536,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce] ; CHECK-NEXT: vpmullq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x40,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 Index: test/CodeGen/X86/avx512dqvl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -9,7 +9,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvtpd2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7b,0xc8] ; CHECK-NEXT: vcvtpd2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x7b,0xc0] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1) @@ -25,7 +25,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvtpd2qq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7b,0xc8] ; CHECK-NEXT: vcvtpd2qq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x7b,0xc0] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1) @@ -41,7 +41,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvtpd2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x79,0xc8] ; CHECK-NEXT: vcvtpd2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x79,0xc0] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1) @@ -57,7 +57,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvtpd2uqq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x79,0xc8] ; CHECK-NEXT: vcvtpd2uqq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x79,0xc0] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1) @@ -73,7 +73,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvtps2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc8] ; CHECK-NEXT: vcvtps2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1) @@ -89,7 +89,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvtps2qq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7b,0xc8] ; CHECK-NEXT: vcvtps2qq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x7b,0xc0] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1) @@ -105,7 +105,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvtps2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc8] ; CHECK-NEXT: vcvtps2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1) @@ -121,7 +121,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvtps2uqq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x79,0xc8] ; CHECK-NEXT: vcvtps2uqq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x79,0xc0] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1) @@ -137,7 +137,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0xe6,0xc8] ; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0xe6,0xc0] -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] +; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1) @@ -153,7 +153,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0xe6,0xc8] ; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0xe6,0xc0] -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1) @@ -169,7 +169,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8] ; CHECK-NEXT: vcvtqq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1) @@ -182,10 +182,10 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8] -; CHECK-NEXT: vmovq %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xfe,0x08,0x7e,0xc9] +; CHECK-NEXT: vmovq %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] ; CHECK-NEXT: ## xmm1 = xmm1[0],zero ; CHECK-NEXT: vcvtqq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) %res1 = shufflevector <4 x float> %res, <4 x float> zeroinitializer, <4 x i32> @@ -203,7 +203,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8] ; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xc0] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1) @@ -219,7 +219,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvttpd2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8] ; CHECK-NEXT: vcvttpd2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1) @@ -235,7 +235,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvttpd2qq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8] ; CHECK-NEXT: vcvttpd2qq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1) @@ -251,7 +251,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvttpd2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8] ; CHECK-NEXT: vcvttpd2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1) @@ -267,7 +267,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvttpd2uqq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8] ; CHECK-NEXT: vcvttpd2uqq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1) @@ -283,7 +283,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvttps2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8] ; CHECK-NEXT: vcvttps2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1) @@ -299,7 +299,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvttps2qq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8] ; CHECK-NEXT: vcvttps2qq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1) @@ -315,7 +315,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x7a,0xc8] ; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x7a,0xc0] -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] +; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1) @@ -331,7 +331,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x7a,0xc8] ; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x7a,0xc0] -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1) @@ -347,7 +347,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8] ; CHECK-NEXT: vcvtuqq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1) @@ -360,10 +360,10 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8] -; CHECK-NEXT: vmovq %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xfe,0x08,0x7e,0xc9] +; CHECK-NEXT: vmovq %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] ; CHECK-NEXT: ## xmm1 = xmm1[0],zero ; CHECK-NEXT: vcvtuqq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) %res1 = shufflevector <4 x float> %res, <4 x float> zeroinitializer, <4 x i32> @@ -381,7 +381,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8] ; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x28,0x7a,0xc0] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1) @@ -397,7 +397,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvttps2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8] ; CHECK-NEXT: vcvttps2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1) @@ -413,7 +413,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vcvttps2uqq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8] ; CHECK-NEXT: vcvttps2uqq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1) @@ -429,7 +429,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vreducepd $4, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x56,0xc8,0x04] ; CHECK-NEXT: vreducepd $8, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x56,0xc0,0x08] -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] +; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 8, <2 x double> %x2, i8 -1) @@ -445,7 +445,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vreducepd $4, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x56,0xc8,0x04] ; CHECK-NEXT: vreducepd $0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x56,0xc0,0x00] -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 0, <4 x double> %x2, i8 -1) @@ -461,7 +461,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vreduceps $4, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x56,0xc8,0x04] ; CHECK-NEXT: vreduceps $88, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x56,0xc0,0x58] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 -1) @@ -477,7 +477,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vreduceps $11, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x56,0xc8,0x0b] ; CHECK-NEXT: vreduceps $11, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x56,0xc0,0x0b] -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1) @@ -493,7 +493,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vrangepd $4, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x50,0xd1,0x04] ; CHECK-NEXT: vrangepd $8, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x50,0xc1,0x08] -; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0] +; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 4, <2 x double> %x3, i8 %x4) %res1 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 8, <2 x double> %x3, i8 -1) @@ -509,7 +509,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vrangepd $4, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x50,0xd1,0x04] ; CHECK-NEXT: vrangepd $88, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x50,0xc1,0x58] -; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 4, <4 x double> %x3, i8 %x4) %res1 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 88, <4 x double> %x3, i8 -1) @@ -525,7 +525,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vrangeps $4, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x50,0xd1,0x04] ; CHECK-NEXT: vrangeps $88, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x50,0xc1,0x58] -; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 4, <4 x float> %x3, i8 %x4) %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 88, <4 x float> %x3, i8 -1) @@ -541,7 +541,7 @@ ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vrangeps $4, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x50,0xd1,0x04] ; CHECK-NEXT: vrangeps $88, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x50,0xc1,0x58] -; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 4, <8 x float> %x3, i8 %x4) %res1 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 88, <8 x float> %x3, i8 -1) @@ -558,8 +558,8 @@ ; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x19,0xc1,0x01] ; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x19,0xc2,0x01] ; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x19,0xc0,0x01] -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] -; CHECK-NEXT: vaddpd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc2] +; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] +; CHECK-NEXT: vaddpd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> %x2, i8 %x3) %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3) @@ -578,8 +578,8 @@ ; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01] ; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xd9,0x01] ; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x18,0xc1,0x01] -; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] -; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 %x4) %res1 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 -1) @@ -598,8 +598,8 @@ ; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01] ; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xd9,0x01] ; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x38,0xc1,0x01] -; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] -; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc3] +; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 %x4) %res1 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 -1) @@ -693,8 +693,8 @@ ; CHECK-NEXT: ## ymm2 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vbroadcastf32x2 %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x19,0xc0] ; CHECK-NEXT: ## ymm0 = xmm0[0,1,0,1,0,1,0,1] -; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca] -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca] +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %x3) @@ -716,8 +716,8 @@ ; CHECK-NEXT: ## ymm2 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x59,0xc0] ; CHECK-NEXT: ## ymm0 = xmm0[0,1,0,1,0,1,0,1] -; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %y_64 = load i64, i64 * %y_ptr %y_v2i64 = insertelement <2 x i64> undef, i64 %y_64, i32 0 @@ -739,8 +739,8 @@ ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x59,0xc8] ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x59,0xd0] ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x59,0xc0] -; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x3) @@ -858,8 +858,8 @@ ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,0,1] ; CHECK-NEXT: vshuff64x2 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc0,0x00] ; CHECK-NEXT: ## ymm0 = ymm0[0,1,0,1] -; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1] -; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] +; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 -1) @@ -883,8 +883,8 @@ ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,0,1] ; CHECK-NEXT: vshufi64x2 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x43,0xc0,0x00] ; CHECK-NEXT: ## ymm0 = ymm0[0,1,0,1] -; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc1] -; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc1] +; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res1 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 -1) Index: test/CodeGen/X86/avx512ifmavl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512ifmavl-intrinsics.ll +++ test/CodeGen/X86/avx512ifmavl-intrinsics.ll @@ -11,7 +11,7 @@ ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm3 {%k1} ; CHECK-NEXT: vmovaps %xmm0, %xmm4 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm4 -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z} ; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 @@ -39,7 +39,7 @@ ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm3 {%k1} ; CHECK-NEXT: vmovaps %ymm0, %ymm4 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm4 -; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 +; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z} ; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 @@ -67,7 +67,7 @@ ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm3 {%k1} {z} ; CHECK-NEXT: vmovaps %xmm0, %xmm4 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm4 -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z} ; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z} ; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 @@ -95,7 +95,7 @@ ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm3 {%k1} {z} ; CHECK-NEXT: vmovaps %ymm0, %ymm4 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm4 -; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 +; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z} ; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z} ; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 @@ -123,7 +123,7 @@ ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm3 {%k1} ; CHECK-NEXT: vmovaps %xmm0, %xmm4 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm4 -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z} ; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 @@ -151,7 +151,7 @@ ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm3 {%k1} ; CHECK-NEXT: vmovaps %ymm0, %ymm4 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm4 -; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 +; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z} ; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 @@ -179,7 +179,7 @@ ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm3 {%k1} {z} ; CHECK-NEXT: vmovaps %xmm0, %xmm4 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm4 -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z} ; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z} ; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 @@ -207,7 +207,7 @@ ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm3 {%k1} {z} ; CHECK-NEXT: vmovaps %ymm0, %ymm4 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm4 -; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 +; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z} ; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z} ; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 Index: test/CodeGen/X86/avx512vbmivl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512vbmivl-intrinsics.ll +++ test/CodeGen/X86/avx512vbmivl-intrinsics.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x8d,0xd0] ; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0x8d,0xd8] ; CHECK-NEXT: vpermb %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0x8d,0xc0] -; CHECK-NEXT: vpaddb %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfc,0xc0] -; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) %res1 = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3) @@ -30,8 +30,8 @@ ; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x8d,0xd0] ; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x8d,0xd8] ; CHECK-NEXT: vpermb %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0x8d,0xc0] -; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfc,0xc0] -; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0] +; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfc,0xc0] +; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) %res1 = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3) @@ -50,8 +50,8 @@ ; CHECK-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x83,0xd1] ; CHECK-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x83,0xd9] ; CHECK-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x83,0xc1] -; CHECK-NEXT: vpaddb %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfc,0xc0] -; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3) @@ -70,8 +70,8 @@ ; CHECK-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x83,0xd1] ; CHECK-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x83,0xd9] ; CHECK-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x83,0xc1] -; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfc,0xc0] -; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0] +; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfc,0xc0] +; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3) @@ -87,13 +87,13 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd9] +; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x75,0xda] ; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x75,0xca] -; CHECK-NEXT: vpxord %xmm4, %xmm4, %xmm4 ## encoding: [0x62,0xf1,0x5d,0x08,0xef,0xe4] +; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] ; CHECK-NEXT: vpermi2b %xmm2, %xmm0, %xmm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x75,0xe2] -; CHECK-NEXT: vpaddb %xmm1, %xmm4, %xmm0 ## encoding: [0x62,0xf1,0x5d,0x08,0xfc,0xc1] -; CHECK-NEXT: vpaddb %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm1, %xmm4, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc1] +; CHECK-NEXT: vpaddb %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) %res1 = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> zeroinitializer, <16 x i8> %x2, i16 %x3) @@ -109,13 +109,13 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd9] +; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x75,0xda] ; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x75,0xca] -; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4] +; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4] ; CHECK-NEXT: vpermi2b %ymm2, %ymm0, %ymm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x75,0xe2] -; CHECK-NEXT: vpaddb %ymm1, %ymm4, %ymm0 ## encoding: [0x62,0xf1,0x5d,0x28,0xfc,0xc1] -; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfc,0xc0] +; CHECK-NEXT: vpaddb %ymm1, %ymm4, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc1] +; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) %res1 = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> zeroinitializer, <32 x i8> %x2, i32 %x3) @@ -131,13 +131,13 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd9] +; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7d,0xda] ; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7d,0xca] -; CHECK-NEXT: vpxord %xmm4, %xmm4, %xmm4 ## encoding: [0x62,0xf1,0x5d,0x08,0xef,0xe4] +; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] ; CHECK-NEXT: vpermt2b %xmm2, %xmm0, %xmm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7d,0xe2] -; CHECK-NEXT: vpaddb %xmm1, %xmm4, %xmm0 ## encoding: [0x62,0xf1,0x5d,0x08,0xfc,0xc1] -; CHECK-NEXT: vpaddb %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm1, %xmm4, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc1] +; CHECK-NEXT: vpaddb %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) %res1 = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> zeroinitializer, <16 x i8> %x2, i16 %x3) @@ -153,13 +153,13 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd9] +; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7d,0xda] ; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7d,0xca] -; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4] +; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4] ; CHECK-NEXT: vpermt2b %ymm2, %ymm0, %ymm4 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7d,0xe2] -; CHECK-NEXT: vpaddb %ymm1, %ymm4, %ymm0 ## encoding: [0x62,0xf1,0x5d,0x28,0xfc,0xc1] -; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfc,0xc0] +; CHECK-NEXT: vpaddb %ymm1, %ymm4, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc1] +; CHECK-NEXT: vpaddb %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) %res1 = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> zeroinitializer, <32 x i8> %x2, i32 %x3) Index: test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -10,7 +10,7 @@ ; CHECK-NEXT: vpbroadcastd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x58,0xc8] ; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xc0] ; CHECK-NEXT: vpaddd (%rsi){1to8}, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x38,0xfe,0x0e] -; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1] +; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %y_32 = load i32, i32 * %y_ptr %y = insertelement <4 x i32> undef, i32 %y_32, i32 0 @@ -27,12 +27,12 @@ define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpbroadcastd %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x58,0xd0] +; CHECK-NEXT: vpbroadcastd %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpbroadcastd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x58,0xc8] ; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x58,0xc0] -; CHECK-NEXT: vpaddd %xmm1, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc9] -; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] +; CHECK-NEXT: vpaddd %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc9] +; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) %res1 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) @@ -47,12 +47,12 @@ define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpbroadcastq %xmm0, %ymm2 ## encoding: [0x62,0xf2,0xfd,0x28,0x59,0xd0] +; CHECK-NEXT: vpbroadcastq %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpbroadcastq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc8] ; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc0] -; CHECK-NEXT: vpaddq %ymm1, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc9] -; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc1] +; CHECK-NEXT: vpaddq %ymm1, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc9] +; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1) %res1 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask) @@ -67,12 +67,12 @@ define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpbroadcastq %xmm0, %xmm2 ## encoding: [0x62,0xf2,0xfd,0x08,0x59,0xd0] +; CHECK-NEXT: vpbroadcastq %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc8] ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc0] -; CHECK-NEXT: vpaddq %xmm1, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc9] -; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc1] +; CHECK-NEXT: vpaddq %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc9] +; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1) %res1 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask) @@ -87,12 +87,12 @@ define <4 x double> @test_x86_vbroadcast_sd_pd_256(<2 x double> %a0, <4 x double> %a1, i8 %mask ) { ; CHECK-LABEL: test_x86_vbroadcast_sd_pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vbroadcastsd %xmm0, %ymm2 ## encoding: [0x62,0xf2,0xfd,0x28,0x19,0xd0] +; CHECK-NEXT: vbroadcastsd %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x19,0xc8] ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x19,0xc0] -; CHECK-NEXT: vaddpd %ymm1, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc9] -; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1] +; CHECK-NEXT: vaddpd %ymm1, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc9] +; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 -1) %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> %a1, i8 %mask) @@ -107,12 +107,12 @@ define <8 x float> @test_x86_vbroadcast_ss_ps_256(<4 x float> %a0, <8 x float> %a1, i8 %mask ) { ; CHECK-LABEL: test_x86_vbroadcast_ss_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vbroadcastss %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x18,0xd0] +; CHECK-NEXT: vbroadcastss %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x18,0xc8] ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x18,0xc0] -; CHECK-NEXT: vaddps %ymm1, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc9] -; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1] +; CHECK-NEXT: vaddps %ymm1, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc9] +; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 -1) %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> %a1, i8 %mask) @@ -127,12 +127,12 @@ define <4 x float> @test_x86_vbroadcast_ss_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask ) { ; CHECK-LABEL: test_x86_vbroadcast_ss_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vbroadcastss %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x18,0xd0] +; CHECK-NEXT: vbroadcastss %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x18,0xc8] ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x18,0xc0] -; CHECK-NEXT: vaddps %xmm1, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc9] -; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] +; CHECK-NEXT: vaddps %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc9] +; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1) %res1 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask) @@ -147,15 +147,15 @@ define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovsldup %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0x12,0xd0] +; CHECK-NEXT: vmovsldup %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x12,0xd0] ; CHECK-NEXT: ## xmm2 = xmm0[0,0,2,2] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovsldup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8] ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0,0,2,2] ; CHECK-NEXT: vmovsldup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0,0,2,2] -; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] +; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca] +; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2) %res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1) @@ -170,15 +170,15 @@ define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovsldup %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0x12,0xd0] +; CHECK-NEXT: vmovsldup %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x12,0xd0] ; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovsldup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8] ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovsldup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0] ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] -; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1] +; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca] +; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2) %res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1) @@ -193,15 +193,15 @@ define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovshdup %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0x16,0xd0] +; CHECK-NEXT: vmovshdup %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x16,0xd0] ; CHECK-NEXT: ## xmm2 = xmm0[1,1,3,3] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovshdup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8] ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[1,1,3,3] ; CHECK-NEXT: vmovshdup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[1,1,3,3] -; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] +; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca] +; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2) %res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1) @@ -216,15 +216,15 @@ define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovshdup %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0x16,0xd0] +; CHECK-NEXT: vmovshdup %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x16,0xd0] ; CHECK-NEXT: ## ymm2 = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovshdup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8] ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: vmovshdup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0] ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] -; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1] +; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca] +; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2) %res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1) @@ -238,15 +238,15 @@ define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovddup %xmm0, %xmm2 ## encoding: [0x62,0xf1,0xff,0x08,0x12,0xd0] +; CHECK-NEXT: vmovddup %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xd0] ; CHECK-NEXT: ## xmm2 = xmm0[0,0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovddup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8] ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0,0] ; CHECK-NEXT: vmovddup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0,0] -; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc1] +; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xca] +; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2) %res1 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1) @@ -261,15 +261,15 @@ define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovddup %ymm0, %ymm2 ## encoding: [0x62,0xf1,0xff,0x28,0x12,0xd0] +; CHECK-NEXT: vmovddup %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xff,0x12,0xd0] ; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmovddup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8] ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,0,2,2] ; CHECK-NEXT: vmovddup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0] ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[0,0,2,2] -; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1] +; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xca] +; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2) %res1 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1) @@ -284,15 +284,15 @@ define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpermilpd $6, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x05,0xd0,0x06] +; CHECK-NEXT: vpermilpd $6, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xd0,0x06] ; CHECK-NEXT: ## ymm2 = ymm0[0,1,3,2] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpermilpd $6, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06] ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,3,2] ; CHECK-NEXT: vpermilpd $6, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06] ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[0,1,3,2] -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] -; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3) @@ -307,15 +307,15 @@ define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 ## encoding: [0x62,0xf3,0xfd,0x08,0x05,0xd0,0x01] +; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xd0,0x01] ; CHECK-NEXT: ## xmm2 = xmm0[1,0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01] ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[1,0] ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[1,0] -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] -; CHECK-NEXT: vaddpd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc2] +; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] +; CHECK-NEXT: vaddpd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3) @@ -330,15 +330,15 @@ define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0x7d,0x28,0x04,0xd0,0x16] +; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xd0,0x16] ; CHECK-NEXT: ## ymm2 = ymm0[2,1,1,0,6,5,5,4] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16] ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4] ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16] ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4] -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] -; CHECK-NEXT: vaddps %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc2] +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3) @@ -353,15 +353,15 @@ define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 ## encoding: [0x62,0xf3,0x7d,0x08,0x04,0xd0,0x16] +; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xd0,0x16] ; CHECK-NEXT: ## xmm2 = xmm0[2,1,1,0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16] ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[2,1,1,0] ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[2,1,1,0] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] -; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3) @@ -376,15 +376,15 @@ define <4 x double>@test_int_x86_avx512_mask_perm_df_256(<4 x double> %x0, i32 %x1, <4 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_perm_df_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpermpd $3, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x01,0xd0,0x03] +; CHECK-NEXT: vpermpd $3, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x01,0xd0,0x03] ; CHECK-NEXT: ## ymm2 = ymm0[3,0,0,0] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpermpd $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03] ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[3,0,0,0] ; CHECK-NEXT: vpermpd $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xc0,0x03] ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[3,0,0,0] -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] -; CHECK-NEXT: vaddpd %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc2] +; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> zeroinitializer, i8 %x3) @@ -399,15 +399,15 @@ define <4 x i64>@test_int_x86_avx512_mask_perm_di_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_perm_di_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpermq $3, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x00,0xd0,0x03] +; CHECK-NEXT: vpermq $3, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x00,0xd0,0x03] ; CHECK-NEXT: ## ymm2 = ymm0[3,0,0,0] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpermq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03] ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[3,0,0,0] ; CHECK-NEXT: vpermq $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xc0,0x03] ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[3,0,0,0] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] -; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc2] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3) @@ -424,7 +424,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovapd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x29,0x07] -; CHECK-NEXT: vmovapd %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x29,0x06] +; CHECK-NEXT: vmovapd %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2) call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1) @@ -438,7 +438,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovapd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x29,0x07] -; CHECK-NEXT: vmovapd %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x29,0x06] +; CHECK-NEXT: vmovapd %ymm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x29,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2) call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1) @@ -452,7 +452,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovupd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x11,0x07] -; CHECK-NEXT: vmovupd %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x11,0x06] +; CHECK-NEXT: vmovupd %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2) call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1) @@ -466,7 +466,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovupd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x11,0x07] -; CHECK-NEXT: vmovupd %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x11,0x06] +; CHECK-NEXT: vmovupd %ymm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2) call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1) @@ -480,7 +480,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovaps %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x29,0x07] -; CHECK-NEXT: vmovaps %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x06] +; CHECK-NEXT: vmovaps %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2) call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1) @@ -494,7 +494,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovaps %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x29,0x07] -; CHECK-NEXT: vmovaps %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x06] +; CHECK-NEXT: vmovaps %ymm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2) call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1) @@ -508,7 +508,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovups %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x11,0x07] -; CHECK-NEXT: vmovups %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x06] +; CHECK-NEXT: vmovups %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2) call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1) @@ -522,7 +522,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovups %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x11,0x07] -; CHECK-NEXT: vmovups %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x06] +; CHECK-NEXT: vmovups %ymm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2) call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1) @@ -536,7 +536,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovdqu64 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x07] -; CHECK-NEXT: vmovdqu64 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x08,0x7f,0x06] +; CHECK-NEXT: vmovdqu %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2) call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1) @@ -550,7 +550,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovdqu64 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x07] -; CHECK-NEXT: vmovdqu64 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x28,0x7f,0x06] +; CHECK-NEXT: vmovdqu %ymm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2) call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1) @@ -564,7 +564,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovdqu32 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x07] -; CHECK-NEXT: vmovdqu32 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7e,0x08,0x7f,0x06] +; CHECK-NEXT: vmovdqu %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2) call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1) @@ -578,7 +578,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovdqu32 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x07] -; CHECK-NEXT: vmovdqu32 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7e,0x28,0x7f,0x06] +; CHECK-NEXT: vmovdqu %ymm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2) call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1) @@ -592,7 +592,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovdqa64 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x07] -; CHECK-NEXT: vmovdqa64 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x7f,0x06] +; CHECK-NEXT: vmovdqa %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2) call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1) @@ -606,7 +606,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovdqa64 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x07] -; CHECK-NEXT: vmovdqa64 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x7f,0x06] +; CHECK-NEXT: vmovdqa %ymm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2) call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1) @@ -620,7 +620,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovdqa32 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x07] -; CHECK-NEXT: vmovdqa32 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x06] +; CHECK-NEXT: vmovdqa %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2) call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1) @@ -634,7 +634,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovdqa32 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x07] -; CHECK-NEXT: vmovdqa32 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7d,0x28,0x7f,0x06] +; CHECK-NEXT: vmovdqa %ymm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x06] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2) call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1) @@ -644,11 +644,11 @@ define <8 x float> @test_mask_load_aligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_aligned_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07] +; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07] ; CHECK-NEXT: vmovaps (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x0f] -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1) %res1 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> %res, i8 %mask) @@ -662,11 +662,11 @@ define <8 x float> @test_mask_load_unaligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_unaligned_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07] +; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07] ; CHECK-NEXT: vmovups (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x0f] -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1) %res1 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> %res, i8 %mask) @@ -680,11 +680,11 @@ define <4 x double> @test_mask_load_aligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_aligned_pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovapd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0x07] +; CHECK-NEXT: vmovapd (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07] ; CHECK-NEXT: vmovapd (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x0f] -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1) %res1 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> %res, i8 %mask) @@ -698,11 +698,11 @@ define <4 x double> @test_mask_load_unaligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_unaligned_pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovupd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x10,0x07] +; CHECK-NEXT: vmovupd (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x10,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07] ; CHECK-NEXT: vmovupd (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x0f] -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1) %res1 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> %res, i8 %mask) @@ -716,11 +716,11 @@ define <4 x float> @test_mask_load_aligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_aligned_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07] +; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07] ; CHECK-NEXT: vmovaps (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x0f] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1) %res1 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> %res, i8 %mask) @@ -734,11 +734,11 @@ define <4 x float> @test_mask_load_unaligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_unaligned_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07] +; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07] ; CHECK-NEXT: vmovups (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x0f] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1) %res1 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> %res, i8 %mask) @@ -752,11 +752,11 @@ define <2 x double> @test_mask_load_aligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_aligned_pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovapd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0x07] +; CHECK-NEXT: vmovapd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07] ; CHECK-NEXT: vmovapd (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x0f] -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] +; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1) %res1 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> %res, i8 %mask) @@ -770,11 +770,11 @@ define <2 x double> @test_mask_load_unaligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_unaligned_pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovupd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x10,0x07] +; CHECK-NEXT: vmovupd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x10,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07] ; CHECK-NEXT: vmovupd (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x0f] -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] +; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1) %res1 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> %res, i8 %mask) @@ -790,11 +790,11 @@ define <4 x i32> @test_mask_load_unaligned_d_128(i8* %ptr, i8* %ptr2, <4 x i32> %data, i8 %mask) { ; CHECK-LABEL: test_mask_load_unaligned_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x6f,0x07] +; CHECK-NEXT: vmovdqu (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovdqu32 (%rsi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x06] ; CHECK-NEXT: vmovdqu32 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x0f] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1) %res1 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr2, <4 x i32> %res, i8 %mask) @@ -808,11 +808,11 @@ define <8 x i32> @test_mask_load_unaligned_d_256(i8* %ptr, i8* %ptr2, <8 x i32> %data, i8 %mask) { ; CHECK-LABEL: test_mask_load_unaligned_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x6f,0x07] +; CHECK-NEXT: vmovdqu (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovdqu32 (%rsi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x06] ; CHECK-NEXT: vmovdqu32 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x0f] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1) %res1 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr2, <8 x i32> %res, i8 %mask) @@ -826,11 +826,11 @@ define <2 x i64> @test_mask_load_unaligned_q_128(i8* %ptr, i8* %ptr2, <2 x i64> %data, i8 %mask) { ; CHECK-LABEL: test_mask_load_unaligned_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x6f,0x07] +; CHECK-NEXT: vmovdqu (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovdqu64 (%rsi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x06] ; CHECK-NEXT: vmovdqu64 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x0f] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1) %res1 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr2, <2 x i64> %res, i8 %mask) @@ -844,11 +844,11 @@ define <4 x i64> @test_mask_load_unaligned_q_256(i8* %ptr, i8* %ptr2, <4 x i64> %data, i8 %mask) { ; CHECK-LABEL: test_mask_load_unaligned_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x6f,0x07] +; CHECK-NEXT: vmovdqu (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] ; CHECK-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca] ; CHECK-NEXT: vmovdqu64 (%rsi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x06] ; CHECK-NEXT: vmovdqu64 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x0f] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1) %res1 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr2, <4 x i64> %res, i8 %mask) @@ -862,11 +862,11 @@ define <4 x i32> @test_mask_load_aligned_d_128(<4 x i32> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_aligned_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x07] +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07] ; CHECK-NEXT: vmovdqa32 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x0f] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1) %res1 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> %res, i8 %mask) @@ -880,11 +880,11 @@ define <8 x i32> @test_mask_load_aligned_d_256(<8 x i32> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_aligned_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x07] +; CHECK-NEXT: vmovdqa (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07] ; CHECK-NEXT: vmovdqa32 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x0f] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1) %res1 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> %res, i8 %mask) @@ -898,11 +898,11 @@ define <2 x i64> @test_mask_load_aligned_q_128(<2 x i64> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_aligned_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x07] +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07] ; CHECK-NEXT: vmovdqa64 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x0f] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1) %res1 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> %res, i8 %mask) @@ -916,11 +916,11 @@ define <4 x i64> @test_mask_load_aligned_q_256(<4 x i64> %data, i8* %ptr, i8 %mask) { ; CHECK-LABEL: test_mask_load_aligned_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0x07] +; CHECK-NEXT: vmovdqa (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07] ; CHECK-NEXT: vmovdqa64 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x0f] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1) %res1 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> %res, i8 %mask) @@ -934,15 +934,15 @@ define <4 x i32>@test_int_x86_avx512_mask_pshuf_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpshufd $3, %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7d,0x08,0x70,0xd0,0x03] +; CHECK-NEXT: vpshufd $3, %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x70,0xd0,0x03] ; CHECK-NEXT: ## xmm2 = xmm0[3,0,0,0] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpshufd $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x70,0xc8,0x03] ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[3,0,0,0] ; CHECK-NEXT: vpshufd $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x70,0xc0,0x03] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[3,0,0,0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] -; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3) @@ -957,15 +957,15 @@ define <8 x i32>@test_int_x86_avx512_mask_pshuf_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpshufd $3, %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7d,0x28,0x70,0xd0,0x03] +; CHECK-NEXT: vpshufd $3, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x70,0xd0,0x03] ; CHECK-NEXT: ## ymm2 = ymm0[3,0,0,0,7,4,4,4] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpshufd $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x70,0xc8,0x03] ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[3,0,0,0,7,4,4,4] ; CHECK-NEXT: vpshufd $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x70,0xc0,0x03] ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[3,0,0,0,7,4,4,4] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] -; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc2] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3) @@ -1196,12 +1196,12 @@ define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x15,0xd9] +; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xd9] ; CHECK-NEXT: ## xmm3 = xmm0[1],xmm1[1] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x15,0xd1] ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[1],xmm1[1] -; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc3] +; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -1214,12 +1214,12 @@ define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vunpckhpd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x15,0xd9] +; CHECK-NEXT: vunpckhpd %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x15,0xd9] ; CHECK-NEXT: ## ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x15,0xd1] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc3] +; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -1232,12 +1232,12 @@ define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vunpckhps %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x15,0xd9] +; CHECK-NEXT: vunpckhps %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xd9] ; CHECK-NEXT: ## xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vunpckhps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x15,0xd1] ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc3] +; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -1250,12 +1250,12 @@ define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x15,0xd9] +; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x15,0xd9] ; CHECK-NEXT: ## ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x15,0xd1] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc3] +; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -1268,12 +1268,12 @@ define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vunpcklpd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x14,0xd9] +; CHECK-NEXT: vunpcklpd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x14,0xd9] ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x14,0xd1] ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0] -; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc3] +; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -1286,12 +1286,12 @@ define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vunpcklpd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x14,0xd9] +; CHECK-NEXT: vunpcklpd %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x14,0xd9] ; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x14,0xd1] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc3] +; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -1304,12 +1304,12 @@ define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vunpcklps %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x14,0xd9] +; CHECK-NEXT: vunpcklps %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xd9] ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vunpcklps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x14,0xd1] ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc3] +; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -1322,12 +1322,12 @@ define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vunpcklps %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x14,0xd9] +; CHECK-NEXT: vunpcklps %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x14,0xd9] ; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vunpcklps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x14,0xd1] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc3] +; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -1340,12 +1340,12 @@ define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0x6a,0xd9] +; CHECK-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6a,0xd9] ; CHECK-NEXT: ## xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6a,0xd1] ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc3] +; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) @@ -1358,12 +1358,12 @@ define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpunpckldq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0x62,0xd9] +; CHECK-NEXT: vpunpckldq %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x62,0xd9] ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x62,0xd1] ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc3] +; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) @@ -1376,12 +1376,12 @@ define <8 x i32>@test_int_x86_avx512_mask_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0x6a,0xd9] +; CHECK-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6a,0xd9] ; CHECK-NEXT: ## ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6a,0xd1] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc3] +; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) @@ -1394,12 +1394,12 @@ define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpunpckldq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0x62,0xd9] +; CHECK-NEXT: vpunpckldq %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x62,0xd9] ; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x62,0xd1] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc3] +; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) @@ -1412,12 +1412,12 @@ define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6d,0xd9] +; CHECK-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6d,0xd9] ; CHECK-NEXT: ## xmm3 = xmm0[1],xmm1[1] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6d,0xd1] ; CHECK-NEXT: ## xmm2 = xmm0[1],xmm1[1] -; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc3] +; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) @@ -1430,12 +1430,12 @@ define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6c,0xd9] +; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xd9] ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6c,0xd1] ; CHECK-NEXT: ## xmm2 = xmm0[0],xmm1[0] -; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc3] +; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) @@ -1448,12 +1448,12 @@ define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6c,0xd9] +; CHECK-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6c,0xd9] ; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6c,0xd1] ; CHECK-NEXT: ## ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc3] +; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) @@ -1466,12 +1466,12 @@ define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6d,0xd9] +; CHECK-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6d,0xd9] ; CHECK-NEXT: ## ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6d,0xd1] ; CHECK-NEXT: ## ymm2 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc3] +; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) @@ -1482,7 +1482,7 @@ define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_mask_and_epi32_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpandd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0xc1] +; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) ret <4 x i32> %res @@ -1493,7 +1493,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpandd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) ret <4 x i32> %res @@ -1512,7 +1512,7 @@ define <4 x i32> @test_mask_and_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_and_epi32_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpandd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0x07] +; CHECK-NEXT: vpand (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) @@ -1524,7 +1524,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) @@ -1559,7 +1559,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 @@ -1586,7 +1586,7 @@ define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: test_mask_and_epi32_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpandd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0xc1] +; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdb,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) ret <8 x i32> %res @@ -1597,7 +1597,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpandd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) ret <8 x i32> %res @@ -1616,7 +1616,7 @@ define <8 x i32> @test_mask_and_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_and_epi32_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpandd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0x07] +; CHECK-NEXT: vpand (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdb,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) @@ -1628,7 +1628,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) @@ -1663,7 +1663,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 @@ -1690,7 +1690,7 @@ define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_mask_or_epi32_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0xc1] +; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xeb,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) ret <4 x i32> %res @@ -1701,7 +1701,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) ret <4 x i32> %res @@ -1720,7 +1720,7 @@ define <4 x i32> @test_mask_or_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_or_epi32_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0x07] +; CHECK-NEXT: vpor (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xeb,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) @@ -1732,7 +1732,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) @@ -1767,7 +1767,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 @@ -1794,7 +1794,7 @@ define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: test_mask_or_epi32_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0xc1] +; CHECK-NEXT: vpor %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xeb,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) ret <8 x i32> %res @@ -1805,7 +1805,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) ret <8 x i32> %res @@ -1824,7 +1824,7 @@ define <8 x i32> @test_mask_or_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_or_epi32_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0x07] +; CHECK-NEXT: vpor (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xeb,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) @@ -1836,7 +1836,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) @@ -1871,7 +1871,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 @@ -1898,7 +1898,7 @@ define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_mask_xor_epi32_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0xc1] +; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) ret <4 x i32> %res @@ -1909,7 +1909,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpxord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) ret <4 x i32> %res @@ -1928,7 +1928,7 @@ define <4 x i32> @test_mask_xor_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_xor_epi32_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0x07] +; CHECK-NEXT: vpxor (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) @@ -1940,7 +1940,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpxord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) @@ -1975,7 +1975,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpxord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xef,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 @@ -2002,7 +2002,7 @@ define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: test_mask_xor_epi32_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0xc1] +; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xef,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) ret <8 x i32> %res @@ -2013,7 +2013,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpxord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) ret <8 x i32> %res @@ -2032,7 +2032,7 @@ define <8 x i32> @test_mask_xor_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_xor_epi32_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0x07] +; CHECK-NEXT: vpxor (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xef,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) @@ -2044,7 +2044,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpxord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) @@ -2079,7 +2079,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpxord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xef,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 @@ -2117,7 +2117,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpandnd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) ret <4 x i32> %res @@ -2148,7 +2148,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandnd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) @@ -2183,7 +2183,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandnd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 @@ -2221,7 +2221,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpandnd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) ret <8 x i32> %res @@ -2252,7 +2252,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandnd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) @@ -2287,7 +2287,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandnd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 @@ -2325,7 +2325,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpandnq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) ret <2 x i64> %res @@ -2356,7 +2356,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandnq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <2 x i64>, <2 x i64>* %ptr_b %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) @@ -2391,7 +2391,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandnq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xdf,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 @@ -2429,7 +2429,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpandnq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) ret <4 x i64> %res @@ -2460,7 +2460,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandnq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i64>, <4 x i64>* %ptr_b %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) @@ -2495,7 +2495,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpandnq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xdf,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 @@ -2522,7 +2522,7 @@ define <4 x i32> @test_mask_add_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_mask_add_epi32_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] +; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) ret <4 x i32> %res @@ -2533,7 +2533,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) ret <4 x i32> %res @@ -2552,7 +2552,7 @@ define <4 x i32> @test_mask_add_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_add_epi32_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0x07] +; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) @@ -2564,7 +2564,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) @@ -2599,7 +2599,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 @@ -2626,7 +2626,7 @@ define <4 x i32> @test_mask_sub_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_mask_sub_epi32_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0xc1] +; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) ret <4 x i32> %res @@ -2637,7 +2637,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) ret <4 x i32> %res @@ -2656,7 +2656,7 @@ define <4 x i32> @test_mask_sub_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_sub_epi32_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0x07] +; CHECK-NEXT: vpsubd (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) @@ -2668,7 +2668,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <4 x i32>, <4 x i32>* %ptr_b %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) @@ -2703,7 +2703,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 @@ -2730,7 +2730,7 @@ define <8 x i32> @test_mask_sub_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: test_mask_sub_epi32_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0xc1] +; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) ret <8 x i32> %res @@ -2741,7 +2741,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) ret <8 x i32> %res @@ -2760,7 +2760,7 @@ define <8 x i32> @test_mask_sub_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_sub_epi32_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsubd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0x07] +; CHECK-NEXT: vpsubd (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) @@ -2772,7 +2772,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) @@ -2807,7 +2807,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 @@ -2834,7 +2834,7 @@ define <8 x i32> @test_mask_add_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: test_mask_add_epi32_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1] +; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) ret <8 x i32> %res @@ -2845,7 +2845,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) ret <8 x i32> %res @@ -2864,7 +2864,7 @@ define <8 x i32> @test_mask_add_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_add_epi32_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0x07] +; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) @@ -2876,7 +2876,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load <8 x i32>, <8 x i32>* %ptr_b %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) @@ -2911,7 +2911,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i32, i32* %ptr_b %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 @@ -2950,7 +2950,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x58,0xd1] -; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) ret <8 x float> %res @@ -2959,7 +2959,7 @@ define <8 x float> @test_mm512_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { ; CHECK-LABEL: test_mm512_add_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1] +; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) ret <8 x float> %res @@ -2981,7 +2981,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x58,0xd1] -; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) ret <4 x float> %res @@ -2990,7 +2990,7 @@ define <4 x float> @test_mm512_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { ; CHECK-LABEL: test_mm512_add_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] +; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) ret <4 x float> %res @@ -3012,7 +3012,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vsubps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5c,0xd1] -; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) ret <8 x float> %res @@ -3021,7 +3021,7 @@ define <8 x float> @test_mm512_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { ; CHECK-LABEL: test_mm512_sub_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vsubps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5c,0xc1] +; CHECK-NEXT: vsubps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5c,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) ret <8 x float> %res @@ -3043,7 +3043,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vsubps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5c,0xd1] -; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) ret <4 x float> %res @@ -3052,7 +3052,7 @@ define <4 x float> @test_mm512_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { ; CHECK-LABEL: test_mm512_sub_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vsubps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5c,0xc1] +; CHECK-NEXT: vsubps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5c,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) ret <4 x float> %res @@ -3074,7 +3074,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmulps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x59,0xd1] -; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) ret <8 x float> %res @@ -3083,7 +3083,7 @@ define <8 x float> @test_mm512_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { ; CHECK-LABEL: test_mm512_mul_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vmulps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x59,0xc1] +; CHECK-NEXT: vmulps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x59,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) ret <8 x float> %res @@ -3105,7 +3105,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmulps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x59,0xd1] -; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) ret <4 x float> %res @@ -3114,7 +3114,7 @@ define <4 x float> @test_mm512_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { ; CHECK-LABEL: test_mm512_mul_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vmulps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x59,0xc1] +; CHECK-NEXT: vmulps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x59,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) ret <4 x float> %res @@ -3136,7 +3136,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vdivps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5e,0xd1] -; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) ret <8 x float> %res @@ -3145,7 +3145,7 @@ define <8 x float> @test_mm512_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { ; CHECK-LABEL: test_mm512_div_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vdivps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5e,0xc1] +; CHECK-NEXT: vdivps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5e,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) ret <8 x float> %res @@ -3167,7 +3167,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vdivps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5e,0xd1] -; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) ret <4 x float> %res @@ -3176,7 +3176,7 @@ define <4 x float> @test_mm512_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { ; CHECK-LABEL: test_mm512_div_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vdivps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5e,0xc1] +; CHECK-NEXT: vdivps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5e,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) ret <4 x float> %res @@ -3188,15 +3188,15 @@ define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0xc6,0xd9,0x01] +; CHECK-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xd9,0x01] ; CHECK-NEXT: ## xmm3 = xmm0[1],xmm1[0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xc6,0xd1,0x01] ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[1],xmm1[0] ; CHECK-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xc6,0xc1,0x01] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[1],xmm1[0] -; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xcb] -; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc1] +; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xcb] +; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> %x3, i8 %x4) %res1 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> %x3, i8 -1) @@ -3211,12 +3211,12 @@ define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vshufpd $6, %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0xc6,0xd9,0x06] +; CHECK-NEXT: vshufpd $6, %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xc6,0xd9,0x06] ; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[1],ymm0[3],ymm1[2] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshufpd $6, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xc6,0xd1,0x06] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[1],ymm0[3],ymm1[2] -; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc3] +; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 6, <4 x double> %x3, i8 %x4) %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 6, <4 x double> %x3, i8 -1) @@ -3229,12 +3229,12 @@ define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0xc6,0xd9,0x16] +; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xd9,0x16] ; CHECK-NEXT: ## xmm3 = xmm0[2,1],xmm1[1,0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0xc6,0xd1,0x16] ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[2,1],xmm1[1,0] -; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc3] +; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 %x4) %res1 = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 -1) @@ -3247,12 +3247,12 @@ define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0xc6,0xd9,0x16] +; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0xc6,0xd9,0x16] ; CHECK-NEXT: ## ymm3 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc6,0xd1,0x16] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4] -; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc3] +; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4) %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1) @@ -3268,7 +3268,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3d,0xd1] ; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x3d,0xc1] -; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2 ,i8 %mask) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) @@ -3281,10 +3281,10 @@ define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmaxsd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x3d,0xd9] +; CHECK-NEXT: vpmaxsd %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3d,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmaxsd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3d,0xd1] -; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc3] +; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) @@ -3300,7 +3300,7 @@ ; CHECK-NEXT: vpmaxsq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x3d,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmaxsq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x3d,0xd1] -; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc3] +; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) @@ -3316,7 +3316,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmaxsq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x3d,0xd1] ; CHECK-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x3d,0xc1] -; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) @@ -3332,7 +3332,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmaxud %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3f,0xd1] ; CHECK-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x3f,0xc1] -; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) @@ -3345,10 +3345,10 @@ define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x3f,0xd9] +; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3f,0xd1] -; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc3] +; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) @@ -3364,7 +3364,7 @@ ; CHECK-NEXT: vpmaxuq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x3f,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmaxuq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x3f,0xd1] -; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc3] +; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) @@ -3380,7 +3380,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmaxuq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x3f,0xd1] ; CHECK-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x3f,0xc1] -; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) @@ -3396,7 +3396,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x39,0xd1] ; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x39,0xc1] -; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) @@ -3409,10 +3409,10 @@ define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmins_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpminsd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x39,0xd9] +; CHECK-NEXT: vpminsd %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x39,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpminsd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x39,0xd1] -; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc3] +; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) @@ -3428,7 +3428,7 @@ ; CHECK-NEXT: vpminsq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x39,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpminsq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x39,0xd1] -; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc3] +; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) @@ -3444,7 +3444,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpminsq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x39,0xd1] ; CHECK-NEXT: vpminsq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x39,0xc1] -; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) @@ -3460,7 +3460,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpminud %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3b,0xd1] ; CHECK-NEXT: vpminud %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x3b,0xc1] -; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) @@ -3473,10 +3473,10 @@ define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pminu_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpminud %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x3b,0xd9] +; CHECK-NEXT: vpminud %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3b,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpminud %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3b,0xd1] -; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc3] +; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) @@ -3492,7 +3492,7 @@ ; CHECK-NEXT: vpminuq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x3b,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpminuq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x3b,0xd1] -; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc3] +; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) @@ -3508,7 +3508,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpminuq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x3b,0xd1] ; CHECK-NEXT: vpminuq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x3b,0xc1] -; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) @@ -3521,12 +3521,12 @@ define <2 x i64>@test_int_x86_avx512_mask_psrl_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0xd3,0xd9] +; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xd3,0xd1] ; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xd3,0xc1] -; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xcb] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xcb] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) @@ -3541,12 +3541,12 @@ define <4 x i64>@test_int_x86_avx512_mask_psrl_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsrlq %xmm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0xd3,0xd9] +; CHECK-NEXT: vpsrlq %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsrlq %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xd3,0xd1] ; CHECK-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xd3,0xc1] -; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xcb] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1) @@ -3561,12 +3561,12 @@ define <4 x i32>@test_int_x86_avx512_mask_psrl_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0xd2,0xd9] +; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd2,0xd1] ; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd2,0xc1] -; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xcb] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xcb] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) @@ -3581,12 +3581,12 @@ define <8 x i32>@test_int_x86_avx512_mask_psrl_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsrld %xmm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0xd2,0xd9] +; CHECK-NEXT: vpsrld %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsrld %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd2,0xd1] ; CHECK-NEXT: vpsrld %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd2,0xc1] -; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb] -; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1] +; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xcb] +; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1) @@ -3601,12 +3601,12 @@ define <4 x i32>@test_int_x86_avx512_mask_psra_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psra_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0xe2,0xd9] +; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe2,0xd1] ; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe2,0xc1] -; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] -; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc3] +; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) @@ -3621,12 +3621,12 @@ define <8 x i32>@test_int_x86_avx512_mask_psra_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psra_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsrad %xmm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0xe2,0xd9] +; CHECK-NEXT: vpsrad %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsrad %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe2,0xd1] ; CHECK-NEXT: vpsrad %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe2,0xc1] -; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] -; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc3] +; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) @@ -3641,12 +3641,12 @@ define <4 x i32>@test_int_x86_avx512_mask_psll_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psll_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0xf2,0xd9] +; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf2,0xd1] ; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf2,0xc1] -; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] -; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc3] +; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) @@ -3661,12 +3661,12 @@ define <8 x i32>@test_int_x86_avx512_mask_psll_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psll_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpslld %xmm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0xf2,0xd9] +; CHECK-NEXT: vpslld %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpslld %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf2,0xd1] ; CHECK-NEXT: vpslld %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf2,0xc1] -; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] -; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc3] +; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) @@ -3681,12 +3681,12 @@ define <4 x i64>@test_int_x86_avx512_mask_psll_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psll_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsllq %xmm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0xf3,0xd9] +; CHECK-NEXT: vpsllq %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsllq %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf3,0xd1] ; CHECK-NEXT: vpsllq %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf3,0xc1] -; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] -; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc3] +; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) @@ -3701,12 +3701,12 @@ define <2 x i64>@test_int_x86_avx512_mask_psrl_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_qi_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsrlq $3, %xmm0, %xmm2 ## encoding: [0x62,0xf1,0xed,0x08,0x73,0xd0,0x03] +; CHECK-NEXT: vpsrlq $3, %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xd0,0x03] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsrlq $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x73,0xd0,0x03] ; CHECK-NEXT: vpsrlq $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x73,0xd0,0x03] -; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca] -; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc1] +; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xca] +; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1) @@ -3721,12 +3721,12 @@ define <4 x i64>@test_int_x86_avx512_mask_psrl_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_qi_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsrlq $3, %ymm0, %ymm2 ## encoding: [0x62,0xf1,0xed,0x28,0x73,0xd0,0x03] +; CHECK-NEXT: vpsrlq $3, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x73,0xd0,0x03] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsrlq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x73,0xd0,0x03] ; CHECK-NEXT: vpsrlq $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x73,0xd0,0x03] -; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca] -; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc1] +; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xca] +; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1) @@ -3741,12 +3741,12 @@ define <4 x i32>@test_int_x86_avx512_mask_psrl_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_di_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsrld $3, %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0x72,0xd0,0x03] +; CHECK-NEXT: vpsrld $3, %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xd0,0x03] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsrld $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x72,0xd0,0x03] ; CHECK-NEXT: vpsrld $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x72,0xd0,0x03] -; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca] -; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] +; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca] +; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1) @@ -3761,12 +3761,12 @@ define <8 x i32>@test_int_x86_avx512_mask_psrl_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_di_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsrld $3, %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0x72,0xd0,0x03] +; CHECK-NEXT: vpsrld $3, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xd0,0x03] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsrld $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x72,0xd0,0x03] ; CHECK-NEXT: vpsrld $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xd0,0x03] -; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca] -; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1] +; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xca] +; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1) @@ -3781,12 +3781,12 @@ define <4 x i32>@test_int_x86_avx512_mask_psll_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psll_di_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpslld $3, %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0x72,0xf0,0x03] +; CHECK-NEXT: vpslld $3, %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xf0,0x03] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpslld $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x72,0xf0,0x03] ; CHECK-NEXT: vpslld $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x72,0xf0,0x03] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] -; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3) @@ -3801,12 +3801,12 @@ define <8 x i32>@test_int_x86_avx512_mask_psll_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psll_di_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpslld $3, %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0x72,0xf0,0x03] +; CHECK-NEXT: vpslld $3, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xf0,0x03] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpslld $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x72,0xf0,0x03] ; CHECK-NEXT: vpslld $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xf0,0x03] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] -; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc2] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3) @@ -3821,12 +3821,12 @@ define <2 x i64>@test_int_x86_avx512_mask_psrlv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrlv2_di: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x45,0xd9] +; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x45,0xd1] ; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x45,0xc1] -; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0] -; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc3] +; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) @@ -3841,12 +3841,12 @@ define <4 x i64>@test_int_x86_avx512_mask_psrlv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrlv4_di: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x45,0xd9] +; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x45,0xd1] ; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x45,0xc1] -; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] -; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc3] +; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) @@ -3861,12 +3861,12 @@ define <4 x i32>@test_int_x86_avx512_mask_psrlv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrlv4_si: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x45,0xd9] +; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x45,0xd1] ; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x45,0xc1] -; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] -; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc3] +; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) @@ -3881,12 +3881,12 @@ define <8 x i32>@test_int_x86_avx512_mask_psrlv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrlv8_si: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x45,0xd9] +; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x45,0xd1] ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x45,0xc1] -; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] -; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc3] +; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) @@ -3901,12 +3901,12 @@ define <4 x i32>@test_int_x86_avx512_mask_psrav4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrav4_si: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x46,0xd9] +; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x46,0xd1] ; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x46,0xc1] -; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] -; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc3] +; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) @@ -3921,12 +3921,12 @@ define <8 x i32>@test_int_x86_avx512_mask_psrav8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_si: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0xd9] +; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x46,0xd1] ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x46,0xc1] -; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] -; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc3] +; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) @@ -3939,11 +3939,11 @@ define <8 x i32>@test_int_x86_avx512_mask_psrav8_si_const() { ; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_si_const: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa32 {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] -; CHECK-NEXT: ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x05,A,A,A,A] -; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI276_0-4, kind: reloc_riprel_4byte -; CHECK-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0x05,A,A,A,A] -; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI276_1-4, kind: reloc_riprel_4byte +; CHECK-NEXT: vmovdqa {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] +; CHECK-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; CHECK-NEXT: ## fixup A - offset: 4, value: LCPI276_0-4, kind: reloc_riprel_4byte +; CHECK-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] +; CHECK-NEXT: ## fixup A - offset: 5, value: LCPI276_1-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> , <8 x i32> , <8 x i32> zeroinitializer, i8 -1) ret <8 x i32> %res @@ -3954,12 +3954,12 @@ define <2 x i64>@test_int_x86_avx512_mask_psllv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psllv2_di: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x47,0xd9] +; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x47,0xd1] ; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x47,0xc1] -; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0] -; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc3] +; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) @@ -3974,12 +3974,12 @@ define <4 x i64>@test_int_x86_avx512_mask_psllv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psllv4_di: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x47,0xd9] +; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x47,0xd1] ; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x47,0xc1] -; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] -; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc3] +; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) @@ -3994,12 +3994,12 @@ define <4 x i32>@test_int_x86_avx512_mask_psllv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psllv4_si: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x47,0xd9] +; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x47,0xd1] ; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x47,0xc1] -; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] -; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc3] +; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) @@ -4014,12 +4014,12 @@ define <8 x i32>@test_int_x86_avx512_mask_psllv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psllv8_si: ; CHECK: ## BB#0: -; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x47,0xd9] +; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x47,0xd1] ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x47,0xc1] -; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] -; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc3] +; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) @@ -4034,15 +4034,15 @@ define <4 x i32>@test_int_x86_avx512_mask_pmovzxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovzxbd %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x31,0xd0] +; CHECK-NEXT: vpmovzxbd %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xd0] ; CHECK-NEXT: ## xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovzxbd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x31,0xc8] ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; CHECK-NEXT: vpmovzxbd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x31,0xc0] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] -; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2) @@ -4057,15 +4057,15 @@ define <8 x i32>@test_int_x86_avx512_mask_pmovzxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovzxbd %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x31,0xd0] +; CHECK-NEXT: vpmovzxbd %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x31,0xd0] ; CHECK-NEXT: ## ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovzxbd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x31,0xc8] ; CHECK-NEXT: ## ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero ; CHECK-NEXT: vpmovzxbd %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x31,0xc0] ; CHECK-NEXT: ## ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] -; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc2] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2) @@ -4080,15 +4080,15 @@ define <2 x i64>@test_int_x86_avx512_mask_pmovzxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovzxbq %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x32,0xd0] +; CHECK-NEXT: vpmovzxbq %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xd0] ; CHECK-NEXT: ## xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovzxbq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x32,0xc8] ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero ; CHECK-NEXT: vpmovzxbq %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x32,0xc0] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] -; CHECK-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc2] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2) @@ -4103,15 +4103,15 @@ define <4 x i64>@test_int_x86_avx512_mask_pmovzxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovzxbq %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x32,0xd0] +; CHECK-NEXT: vpmovzxbq %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x32,0xd0] ; CHECK-NEXT: ## ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovzxbq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x32,0xc8] ; CHECK-NEXT: ## ymm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero ; CHECK-NEXT: vpmovzxbq %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x32,0xc0] ; CHECK-NEXT: ## ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] -; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc2] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2) @@ -4126,15 +4126,15 @@ define <2 x i64>@test_int_x86_avx512_mask_pmovzxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovzxdq %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x35,0xd0] +; CHECK-NEXT: vpmovzxdq %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xd0] ; CHECK-NEXT: ## xmm2 = xmm0[0],zero,xmm0[1],zero ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovzxdq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x35,0xc8] ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero ; CHECK-NEXT: vpmovzxdq %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x35,0xc0] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] -; CHECK-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc2] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2) @@ -4149,15 +4149,15 @@ define <4 x i64>@test_int_x86_avx512_mask_pmovzxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovzxdq %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x35,0xd0] +; CHECK-NEXT: vpmovzxdq %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x35,0xd0] ; CHECK-NEXT: ## ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovzxdq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x35,0xc8] ; CHECK-NEXT: ## ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; CHECK-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x35,0xc0] ; CHECK-NEXT: ## ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] -; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc2] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2) @@ -4172,15 +4172,15 @@ define <4 x i32>@test_int_x86_avx512_mask_pmovzxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovzxwd %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x33,0xd0] +; CHECK-NEXT: vpmovzxwd %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xd0] ; CHECK-NEXT: ## xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovzxwd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x33,0xc8] ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; CHECK-NEXT: vpmovzxwd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x33,0xc0] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] -; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2) @@ -4195,15 +4195,15 @@ define <8 x i32>@test_int_x86_avx512_mask_pmovzxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovzxwd %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x33,0xd0] +; CHECK-NEXT: vpmovzxwd %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xd0] ; CHECK-NEXT: ## ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovzxwd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x33,0xc8] ; CHECK-NEXT: ## ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; CHECK-NEXT: vpmovzxwd %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x33,0xc0] ; CHECK-NEXT: ## ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] -; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc2] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2) @@ -4218,15 +4218,15 @@ define <2 x i64>@test_int_x86_avx512_mask_pmovzxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovzxwq %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x34,0xd0] +; CHECK-NEXT: vpmovzxwq %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xd0] ; CHECK-NEXT: ## xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovzxwq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x34,0xc8] ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; CHECK-NEXT: vpmovzxwq %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x34,0xc0] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] -; CHECK-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc2] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2) @@ -4241,15 +4241,15 @@ define <4 x i64>@test_int_x86_avx512_mask_pmovzxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovzxwq %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x34,0xd0] +; CHECK-NEXT: vpmovzxwq %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x34,0xd0] ; CHECK-NEXT: ## ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovzxwq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x34,0xc8] ; CHECK-NEXT: ## ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; CHECK-NEXT: vpmovzxwq %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x34,0xc0] ; CHECK-NEXT: ## ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] -; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc2] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2) @@ -4264,12 +4264,12 @@ define <4 x i32>@test_int_x86_avx512_mask_pmovsxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsxbd %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x21,0xd0] +; CHECK-NEXT: vpmovsxbd %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovsxbd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x21,0xc8] ; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x21,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] -; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2) @@ -4284,12 +4284,12 @@ define <8 x i32>@test_int_x86_avx512_mask_pmovsxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsxbd %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x21,0xd0] +; CHECK-NEXT: vpmovsxbd %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovsxbd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x21,0xc8] ; CHECK-NEXT: vpmovsxbd %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x21,0xc0] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] -; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc2] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2) @@ -4304,12 +4304,12 @@ define <2 x i64>@test_int_x86_avx512_mask_pmovsxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsxbq %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x22,0xd0] +; CHECK-NEXT: vpmovsxbq %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovsxbq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x22,0xc8] ; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x22,0xc0] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] -; CHECK-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc2] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2) @@ -4324,12 +4324,12 @@ define <4 x i64>@test_int_x86_avx512_mask_pmovsxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsxbq %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x22,0xd0] +; CHECK-NEXT: vpmovsxbq %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x22,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovsxbq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x22,0xc8] ; CHECK-NEXT: vpmovsxbq %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x22,0xc0] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] -; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc2] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2) @@ -4344,12 +4344,12 @@ define <4 x i32>@test_int_x86_avx512_mask_pmovsxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsxwd %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x23,0xd0] +; CHECK-NEXT: vpmovsxwd %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovsxwd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x23,0xc8] ; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x23,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] -; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2) @@ -4364,12 +4364,12 @@ define <8 x i32>@test_int_x86_avx512_mask_pmovsxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsxwd %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x23,0xd0] +; CHECK-NEXT: vpmovsxwd %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x23,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovsxwd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x23,0xc8] ; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x23,0xc0] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] -; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc2] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2) @@ -4384,12 +4384,12 @@ define <2 x i64>@test_int_x86_avx512_mask_pmovsxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsxwq %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x24,0xd0] +; CHECK-NEXT: vpmovsxwq %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovsxwq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x24,0xc8] ; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x24,0xc0] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] -; CHECK-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc2] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2) @@ -4404,12 +4404,12 @@ define <4 x i64>@test_int_x86_avx512_mask_pmovsxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsxwq %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x24,0xd0] +; CHECK-NEXT: vpmovsxwq %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x24,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmovsxwq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x24,0xc8] ; CHECK-NEXT: vpmovsxwq %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x24,0xc0] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] -; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc2] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2) @@ -4428,8 +4428,8 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsraq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1] ; CHECK-NEXT: vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1] -; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0] -; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc3] +; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) @@ -4448,8 +4448,8 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsraq %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1] ; CHECK-NEXT: vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1] -; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] -; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc3] +; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) @@ -4468,8 +4468,8 @@ ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsraq $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x03] ; CHECK-NEXT: vpsraq $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x72,0xe0,0x03] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] -; CHECK-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc2] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3) @@ -4488,8 +4488,8 @@ ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpsraq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x03] ; CHECK-NEXT: vpsraq $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xe0,0x03] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] -; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc2] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3) @@ -4508,8 +4508,8 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1] ; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1] -; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0] -; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc3] +; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) @@ -4522,9 +4522,9 @@ define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128_const(i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_128_const: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} xmm0 = [2,18446744073709551607] -; CHECK-NEXT: ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x05,A,A,A,A] -; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI304_0-4, kind: reloc_riprel_4byte +; CHECK-NEXT: vmovdqa {{.*}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [2,18446744073709551607] +; CHECK-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; CHECK-NEXT: ## fixup A - offset: 4, value: LCPI304_0-4, kind: reloc_riprel_4byte ; CHECK-NEXT: vpsravq {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A] ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI304_1-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -4541,8 +4541,8 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1] ; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1] -; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] -; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc3] +; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) @@ -4557,10 +4557,10 @@ define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0xe6,0xd0] +; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8] -; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc2] +; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1) @@ -4573,10 +4573,10 @@ define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0xe6,0xd0] +; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0xe6,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8] -; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc2] +; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1) @@ -4592,7 +4592,7 @@ ; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8] -; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc2] +; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1) @@ -4608,7 +4608,7 @@ ; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xd0] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8] -; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc2] +; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1) @@ -4628,8 +4628,8 @@ ; CHECK-NEXT: ## xmm2 {%k1} = xmm1[2,3],xmm0[0,1] ; CHECK-NEXT: valignd $2, %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x03,0xc1,0x02] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm1[2,3],xmm0[0,1] -; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xcb] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xcb] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> %x3, i8 %x4) %res1 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> %x3, i8 -1) @@ -4649,7 +4649,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: valignd $6, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x03,0xd1,0x06] ; CHECK-NEXT: ## ymm2 {%k1} = ymm1[6,7],ymm0[0,1,2,3,4,5] -; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc3] +; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 6, <8 x i32> %x3, i8 %x4) %res1 = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 6, <8 x i32> %x3, i8 -1) @@ -4667,7 +4667,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: valignq $1, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x03,0xd1,0x01] ; CHECK-NEXT: ## xmm2 {%k1} = xmm1[1],xmm0[0] -; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc3] +; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 1, <2 x i64> %x3, i8 %x4) %res1 = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 1, <2 x i64> %x3, i8 -1) @@ -4685,7 +4685,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: valignq $3, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x03,0xd1,0x03] ; CHECK-NEXT: ## ymm2 {%k1} = ymm1[3],ymm0[0,1,2] -; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc3] +; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 3, <4 x i64> %x3, i8 %x4) %res1 = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 3, <4 x i64> %x3, i8 -1) @@ -4698,12 +4698,12 @@ define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x0d,0xd9] +; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0d,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x0d,0xd1] ; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x0d,0xc1] -; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] -; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3) @@ -4718,12 +4718,12 @@ define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x0d,0xd9] +; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0d,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x0d,0xd1] ; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x0d,0xc1] -; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0] -; CHECK-NEXT: vaddpd %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc3] +; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0] +; CHECK-NEXT: vaddpd %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3) @@ -4738,12 +4738,12 @@ define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0x7d,0x28,0x0c,0xd9] +; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0c,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x0c,0xd1] ; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x0c,0xc1] -; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0] -; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc3] +; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3) @@ -4758,12 +4758,12 @@ define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x0c,0xd9] +; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0xd9] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x0c,0xd1] ; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x0c,0xc1] -; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0] -; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3) Index: test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics.ll +++ test/CodeGen/X86/avx512vl-intrinsics.ll @@ -834,7 +834,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcompresspd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask) ret <4 x double> %res @@ -902,15 +902,15 @@ ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: movq _xmm@{{.*}}(%rip), %rax ## encoding: [0x48,0x8b,0x05,A,A,A,A] ; CHECK-NEXT: ## fixup A - offset: 3, value: _xmm@GOTPCREL-4, kind: reloc_riprel_4byte_movq_load -; CHECK-NEXT: vmovdqa32 (%rax), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x00] +; CHECK-NEXT: vmovdqa (%rax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x00] ; CHECK-NEXT: movq _k8@{{.*}}(%rip), %rax ## encoding: [0x48,0x8b,0x05,A,A,A,A] ; CHECK-NEXT: ## fixup A - offset: 3, value: _k8@GOTPCREL-4, kind: reloc_riprel_4byte_movq_load ; CHECK-NEXT: movzbl (%rax), %eax ## encoding: [0x0f,0xb6,0x00] ; CHECK-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; CHECK-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0] -; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] -; CHECK-NEXT: vmovdqa32 %xmm0, -{{[0-9]+}}(%rsp) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x84,0x24,0xd8,0xff,0xff,0xff] -; CHECK-NEXT: vmovdqa32 %xmm1, -{{[0-9]+}}(%rsp) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x8c,0x24,0xe8,0xff,0xff,0xff] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] +; CHECK-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x44,0x24,0xd8] +; CHECK-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x4c,0x24,0xe8] ; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] entry: @@ -981,7 +981,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vexpandpd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask) ret <4 x double> %res @@ -1068,7 +1068,7 @@ define < 2 x i64> @test_mask_mul_epi32_rr_128(< 4 x i32> %a, < 4 x i32> %b) { ; CHECK-LABEL: test_mask_mul_epi32_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0xc1] +; CHECK-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) ret < 2 x i64> %res @@ -1079,7 +1079,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmuldq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) ret < 2 x i64> %res @@ -1098,7 +1098,7 @@ define < 2 x i64> @test_mask_mul_epi32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_mul_epi32_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0x07] +; CHECK-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load < 4 x i32>, < 4 x i32>* %ptr_b %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) @@ -1110,7 +1110,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmuldq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load < 4 x i32>, < 4 x i32>* %ptr_b %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) @@ -1146,7 +1146,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmuldq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x28,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 @@ -1175,7 +1175,7 @@ define < 4 x i64> @test_mask_mul_epi32_rr_256(< 8 x i32> %a, < 8 x i32> %b) { ; CHECK-LABEL: test_mask_mul_epi32_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x28,0xc1] +; CHECK-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) ret < 4 x i64> %res @@ -1186,7 +1186,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmuldq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) ret < 4 x i64> %res @@ -1205,7 +1205,7 @@ define < 4 x i64> @test_mask_mul_epi32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_mul_epi32_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x28,0x07] +; CHECK-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load < 8 x i32>, < 8 x i32>* %ptr_b %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) @@ -1217,7 +1217,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmuldq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load < 8 x i32>, < 8 x i32>* %ptr_b %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) @@ -1253,7 +1253,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmuldq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x28,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 @@ -1282,7 +1282,7 @@ define < 2 x i64> @test_mask_mul_epu32_rr_128(< 4 x i32> %a, < 4 x i32> %b) { ; CHECK-LABEL: test_mask_mul_epu32_rr_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xf4,0xc1] +; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) ret < 2 x i64> %res @@ -1293,7 +1293,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) ret < 2 x i64> %res @@ -1312,7 +1312,7 @@ define < 2 x i64> @test_mask_mul_epu32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_mul_epu32_rm_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xf4,0x07] +; CHECK-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load < 4 x i32>, < 4 x i32>* %ptr_b %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) @@ -1324,7 +1324,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmuludq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load < 4 x i32>, < 4 x i32>* %ptr_b %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) @@ -1360,7 +1360,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmuludq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xf4,0x0f] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 @@ -1389,7 +1389,7 @@ define < 4 x i64> @test_mask_mul_epu32_rr_256(< 8 x i32> %a, < 8 x i32> %b) { ; CHECK-LABEL: test_mask_mul_epu32_rr_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xf4,0xc1] +; CHECK-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) ret < 4 x i64> %res @@ -1400,7 +1400,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpmuludq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) ret < 4 x i64> %res @@ -1419,7 +1419,7 @@ define < 4 x i64> @test_mask_mul_epu32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) { ; CHECK-LABEL: test_mask_mul_epu32_rm_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xf4,0x07] +; CHECK-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load < 8 x i32>, < 8 x i32>* %ptr_b %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) @@ -1431,7 +1431,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmuludq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %b = load < 8 x i32>, < 8 x i32>* %ptr_b %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) @@ -1467,7 +1467,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] ; CHECK-NEXT: vpmuludq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xf4,0x0f] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %q = load i64, i64* %ptr_b %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 @@ -1552,7 +1552,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1] -; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) ret <8 x float> %res @@ -1561,7 +1561,7 @@ define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { ; CHECK-LABEL: test_mm512_max_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5f,0xc1] +; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) ret <8 x float> %res @@ -1583,7 +1583,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1] -; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) ret <4 x float> %res @@ -1592,7 +1592,7 @@ define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { ; CHECK-LABEL: test_mm512_max_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5f,0xc1] +; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) ret <4 x float> %res @@ -1614,7 +1614,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1] -; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc2] +; CHECK-NEXT: vmovaps %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) ret <8 x float> %res @@ -1623,7 +1623,7 @@ define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { ; CHECK-LABEL: test_mm512_min_ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5d,0xc1] +; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5d,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) ret <8 x float> %res @@ -1645,7 +1645,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1] -; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc2] +; CHECK-NEXT: vmovaps %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) ret <4 x float> %res @@ -1654,7 +1654,7 @@ define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { ; CHECK-LABEL: test_mm512_min_ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5d,0xc1] +; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) ret <4 x float> %res @@ -1711,10 +1711,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd9] +; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xda] ; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xca] -; CHECK-NEXT: vpaddd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfe,0xc1] +; CHECK-NEXT: vpaddd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) @@ -1728,10 +1728,10 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd9] +; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7e,0xda] ; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xca] -; CHECK-NEXT: vpaddd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfe,0xc1] +; CHECK-NEXT: vpaddd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) @@ -1745,10 +1745,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd9] +; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xda] ; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xca] -; CHECK-NEXT: vpaddd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc1] +; CHECK-NEXT: vpaddd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) @@ -1762,10 +1762,10 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd9] +; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7e,0xda] ; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xca] -; CHECK-NEXT: vpaddd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc1] +; CHECK-NEXT: vpaddd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) @@ -1779,10 +1779,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xd9] +; CHECK-NEXT: vmovapd %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd9] ; CHECK-NEXT: vpermi2pd %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x77,0xda] ; CHECK-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x77,0xca] -; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc1] +; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1) @@ -1796,10 +1796,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xd9] +; CHECK-NEXT: vmovapd %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd9] ; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x77,0xda] ; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x77,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc1] +; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) @@ -1813,10 +1813,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd9] +; CHECK-NEXT: vmovaps %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd9] ; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x77,0xda] ; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x77,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc1] +; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1) @@ -1829,7 +1829,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca] -; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %x1cast = bitcast <2 x i64> %x1 to <4 x i32> %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1cast, <4 x float> %x2, i8 %x3) @@ -1842,10 +1842,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd9] +; CHECK-NEXT: vmovaps %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd9] ; CHECK-NEXT: vpermi2ps %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x77,0xda] ; CHECK-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x77,0xca] -; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc1] +; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) @@ -1861,7 +1861,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpabsq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x1f,0xc8] ; CHECK-NEXT: vpabsq %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x1f,0xc0] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) @@ -1877,7 +1877,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpabsq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x1f,0xc8] ; CHECK-NEXT: vpabsq %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x1f,0xc0] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) @@ -1892,8 +1892,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpabsd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x1e,0xc8] -; CHECK-NEXT: vpabsd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x1e,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpabsd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) @@ -1908,8 +1908,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpabsd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x1e,0xc8] -; CHECK-NEXT: vpabsd %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x1e,0xc0] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] +; CHECK-NEXT: vpabsd %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xc0] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) @@ -1925,7 +1925,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vscalefpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x2c,0xd1] ; CHECK-NEXT: vscalefpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x2c,0xc1] -; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0] +; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) @@ -1941,7 +1941,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vscalefpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x2c,0xd1] ; CHECK-NEXT: vscalefpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x2c,0xc1] -; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) @@ -1957,7 +1957,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vscalefps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x2c,0xd1] ; CHECK-NEXT: vscalefps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x2c,0xc1] -; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) @@ -1973,7 +1973,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vscalefps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x2c,0xd1] ; CHECK-NEXT: vscalefps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x2c,0xc1] -; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) @@ -2398,8 +2398,8 @@ ; CHECK-NEXT: vpmovqd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x35,0xc1] ; CHECK-NEXT: vpmovqd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x35,0xc2] ; CHECK-NEXT: vpmovqd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x35,0xc0] -; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] -; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] +; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] +; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) @@ -2432,8 +2432,8 @@ ; CHECK-NEXT: vpmovsqd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x25,0xc1] ; CHECK-NEXT: vpmovsqd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x25,0xc2] ; CHECK-NEXT: vpmovsqd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x25,0xc0] -; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] -; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] +; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] +; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) @@ -2466,8 +2466,8 @@ ; CHECK-NEXT: vpmovusqd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x15,0xc1] ; CHECK-NEXT: vpmovusqd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x15,0xc2] ; CHECK-NEXT: vpmovusqd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x15,0xc0] -; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] -; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] +; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] +; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) @@ -2500,8 +2500,8 @@ ; CHECK-NEXT: vpmovqd %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1] ; CHECK-NEXT: vpmovqd %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x35,0xc2] ; CHECK-NEXT: vpmovqd %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x35,0xc0] -; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] -; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] +; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] +; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) @@ -2534,8 +2534,8 @@ ; CHECK-NEXT: vpmovsqd %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x25,0xc1] ; CHECK-NEXT: vpmovsqd %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x25,0xc2] ; CHECK-NEXT: vpmovsqd %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x25,0xc0] -; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] -; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] +; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] +; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) @@ -2568,8 +2568,8 @@ ; CHECK-NEXT: vpmovusqd %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x15,0xc1] ; CHECK-NEXT: vpmovusqd %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x15,0xc2] ; CHECK-NEXT: vpmovusqd %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x15,0xc0] -; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] -; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] +; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] +; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) @@ -3008,8 +3008,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5b,0xc8] -; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5b,0xc0] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] +; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1) @@ -3024,8 +3024,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5b,0xc8] -; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5b,0xc0] -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] +; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xc0] +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1) @@ -3040,8 +3040,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8] -; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0xe6,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1) @@ -3054,10 +3054,10 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8] -; CHECK-NEXT: vmovq %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xfe,0x08,0x7e,0xc9] +; CHECK-NEXT: vmovq %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] ; CHECK-NEXT: ## xmm1 = xmm1[0],zero -; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0xe6,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) %res1 = shufflevector <4 x i32> %res, <4 x i32> zeroinitializer, <4 x i32> @@ -3074,8 +3074,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtpd2dq %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0xe6,0xc8] -; CHECK-NEXT: vcvtpd2dq %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x28,0xe6,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vcvtpd2dq %ymm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xff,0xe6,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1) @@ -3090,8 +3090,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x5a,0xc8] -; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x5a,0xc0] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] +; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5a,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 %x2) %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 -1) @@ -3106,8 +3106,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8] -; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x5a,0xc0] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] +; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2) %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 -1) @@ -3120,10 +3120,10 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8] -; CHECK-NEXT: vmovq %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xfe,0x08,0x7e,0xc9] +; CHECK-NEXT: vmovq %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] ; CHECK-NEXT: ## xmm1 = xmm1[0],zero -; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x5a,0xc0] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] +; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2) %res1 = shufflevector <4 x float> %res, <4 x float> zeroinitializer, <4 x i32> @@ -3141,7 +3141,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8] ; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1) @@ -3154,10 +3154,10 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8] -; CHECK-NEXT: vmovq %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xfe,0x08,0x7e,0xc9] +; CHECK-NEXT: vmovq %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] ; CHECK-NEXT: ## xmm1 = xmm1[0],zero ; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) %res1 = shufflevector <4 x i32> %res, <4 x i32> zeroinitializer, <4 x i32> @@ -3175,7 +3175,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtpd2udq %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x29,0x79,0xc8] ; CHECK-NEXT: vcvtpd2udq %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x28,0x79,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1) @@ -3190,8 +3190,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtps2dq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x5b,0xc8] -; CHECK-NEXT: vcvtps2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x5b,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vcvtps2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1) @@ -3206,8 +3206,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtps2dq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x5b,0xc8] -; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x5b,0xc0] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] +; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5b,0xc0] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1) @@ -3222,8 +3222,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtps2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5a,0xc8] -; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5a,0xc0] -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] +; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0] +; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 %x2) %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 -1) @@ -3238,8 +3238,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtps2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5a,0xc8] -; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5a,0xc0] -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] +; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 %x2) %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 -1) @@ -3255,7 +3255,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtps2udq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x79,0xc8] ; CHECK-NEXT: vcvtps2udq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x79,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1) @@ -3271,7 +3271,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtps2udq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x79,0xc8] ; CHECK-NEXT: vcvtps2udq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x79,0xc0] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1) @@ -3286,8 +3286,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8] -; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe6,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1) @@ -3300,10 +3300,10 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8] -; CHECK-NEXT: vmovq %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xfe,0x08,0x7e,0xc9] +; CHECK-NEXT: vmovq %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] ; CHECK-NEXT: ## xmm1 = xmm1[0],zero -; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe6,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) %res1 = shufflevector <4 x i32> %res, <4 x i32> zeroinitializer, <4 x i32> @@ -3320,8 +3320,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8] -; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xe6,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1) @@ -3337,7 +3337,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8] ; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1) @@ -3350,10 +3350,10 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8] -; CHECK-NEXT: vmovq %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xfe,0x08,0x7e,0xc9] +; CHECK-NEXT: vmovq %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc9] ; CHECK-NEXT: ## xmm1 = xmm1[0],zero ; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) %res1 = shufflevector <4 x i32> %res, <4 x i32> zeroinitializer, <4 x i32> @@ -3371,7 +3371,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x29,0x78,0xc8] ; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x28,0x78,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1) @@ -3386,8 +3386,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8] -; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x5b,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1) @@ -3402,8 +3402,8 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8] -; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x5b,0xc0] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] +; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xc0] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1) @@ -3419,7 +3419,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvttps2udq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x78,0xc8] ; CHECK-NEXT: vcvttps2udq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x78,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1) @@ -3435,7 +3435,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvttps2udq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x78,0xc8] ; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x78,0xc0] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1) @@ -3451,7 +3451,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x7a,0xc8] ; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x7a,0xc0] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1) @@ -3467,7 +3467,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x7a,0xc8] ; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7f,0x28,0x7a,0xc0] -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1) @@ -3483,7 +3483,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrndscalepd $4, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x09,0xc8,0x04] ; CHECK-NEXT: vrndscalepd $88, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x09,0xc0,0x58] -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] +; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3) %res1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 88, <2 x double> %x2, i8 -1) @@ -3499,7 +3499,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrndscalepd $4, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x09,0xc8,0x04] ; CHECK-NEXT: vrndscalepd $88, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x09,0xc0,0x58] -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 88, <4 x double> %x2, i8 -1) @@ -3515,7 +3515,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrndscaleps $88, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x08,0xc8,0x58] ; CHECK-NEXT: vrndscaleps $4, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x08,0xc0,0x04] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 -1) @@ -3531,7 +3531,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrndscaleps $5, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x08,0xc8,0x05] ; CHECK-NEXT: vrndscaleps $66, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x08,0xc0,0x42] -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 5, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 66, <8 x float> %x2, i8 -1) @@ -3551,8 +3551,8 @@ ; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1,2,3],ymm1[4,5,6,7] ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x23,0xc1,0x16] ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] -; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0] -; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4) %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1) @@ -3574,8 +3574,8 @@ ; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc1,0x16] ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] -; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] -; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4) %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1) @@ -3595,7 +3595,7 @@ ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7] ; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x43,0xc1,0x16] ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] -; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4) %res1 = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1) @@ -3613,7 +3613,7 @@ ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x43,0xc1,0x16] ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] -; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4) %res1 = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1) @@ -3630,8 +3630,8 @@ ; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x19,0xc1,0x01] ; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x19,0xc2,0x01] ; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x19,0xc0,0x01] -; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] +; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca] +; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 %x3) @@ -3650,8 +3650,8 @@ ; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x26,0xc8,0x0b] ; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0x89,0x26,0xd0,0x0b] ; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x26,0xc0,0x0b] -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] -; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0] +; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] +; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 %x3) %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> zeroinitializer, i8 %x3) @@ -3669,7 +3669,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vgetmantpd $11, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x26,0xc8,0x0b] ; CHECK-NEXT: vgetmantpd $11, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x26,0xc0,0x0b] -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 -1) @@ -3685,7 +3685,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vgetmantps $11, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x26,0xc8,0x0b] ; CHECK-NEXT: vgetmantps $11, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x26,0xc0,0x0b] -; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 %x3) %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 -1) @@ -3701,7 +3701,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vgetmantps $11, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x26,0xc8,0x0b] ; CHECK-NEXT: vgetmantps $11, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x26,0xc0,0x0b] -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1) @@ -3718,8 +3718,8 @@ ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x18,0xd1,0x01] ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xd9,0x01] ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x18,0xc1,0x01] -; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0] -; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 %x4) %res1 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 -1) @@ -3738,8 +3738,8 @@ ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x38,0xd1,0x01] ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xd9,0x01] ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x38,0xc1,0x01] -; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] -; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4) @@ -3756,10 +3756,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd8] +; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x25,0xda,0x21] ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xc2,0x21] -; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1) @@ -3773,10 +3773,10 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd8] +; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x25,0xda,0x21] ; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xc2,0x21] -; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4) %res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1) @@ -3790,10 +3790,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd8] +; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x25,0xda,0x21] ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xc2,0x21] -; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1) @@ -3807,10 +3807,10 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd8] +; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x25,0xda,0x21] ; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xc2,0x21] -; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4) %res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1) @@ -3824,10 +3824,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd8] +; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x25,0xda,0x21] ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0x21] -; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1) @@ -3841,10 +3841,10 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd8] +; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x25,0xda,0x21] ; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0x21] -; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4) %res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1) @@ -3858,10 +3858,10 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd8] +; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x25,0xda,0x21] ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0x21] -; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1) @@ -3875,10 +3875,10 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xd8] +; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xda,0x21] ; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0x21] -; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4) %res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1) @@ -3889,7 +3889,7 @@ define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) { ; CHECK-LABEL: test_x86_vcvtph2ps_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x13,0xc0] +; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 -1) ret <4 x float> %res @@ -3900,7 +3900,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtph2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x13,0xc8] -; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> %a1, i8 %mask) ret <4 x float> %res @@ -3922,7 +3922,7 @@ define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) { ; CHECK-LABEL: test_x86_vcvtph2ps_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtph2ps %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x13,0xc0] +; CHECK-NEXT: vcvtph2ps %xmm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 -1) ret <8 x float> %res @@ -3933,7 +3933,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtph2ps %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x13,0xc8] -; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> %a1, i8 %mask) ret <8 x float> %res @@ -3957,7 +3957,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtps2ph $2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1d,0xc1,0x02] ; CHECK-NEXT: vcvtps2ph $2, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc2,0x02] -; CHECK-NEXT: vcvtps2ph $2, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1d,0xc0,0x02] +; CHECK-NEXT: vcvtps2ph $2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x02] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] ; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -3977,7 +3977,7 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vcvtps2ph $2, %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1d,0xc1,0x02] ; CHECK-NEXT: vcvtps2ph $2, %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc2,0x02] -; CHECK-NEXT: vcvtps2ph $2, %ymm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1d,0xc0,0x02] +; CHECK-NEXT: vcvtps2ph $2, %ymm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x02] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] ; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -4015,7 +4015,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrsqrt14ps %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x4e,0xc8] -; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask) ret <8 x float> %res @@ -4045,7 +4045,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrsqrt14ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x4e,0xc8] -; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask) ret <4 x float> %res @@ -4078,7 +4078,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrcp14ps %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x4c,0xc8] -; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask) ret <8 x float> %res @@ -4108,7 +4108,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrcp14ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x4c,0xc8] -; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask) ret <4 x float> %res @@ -4141,7 +4141,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrsqrt14pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x4e,0xc8] -; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask) ret <4 x double> %res @@ -4171,7 +4171,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrsqrt14pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x4e,0xc8] -; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask) ret <2 x double> %res @@ -4204,7 +4204,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrcp14pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x4c,0xc8] -; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xc1] +; CHECK-NEXT: vmovaps %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask) ret <4 x double> %res @@ -4234,7 +4234,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vrcp14pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x4c,0xc8] -; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xc1] +; CHECK-NEXT: vmovaps %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask) ret <2 x double> %res @@ -4256,8 +4256,8 @@ ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,2,3,0,1,2,3] ; CHECK-NEXT: vshuff32x4 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x23,0xc0,0x00] ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3,0,1,2,3] -; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1] -; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1] +; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 -1) %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 %mask) @@ -4280,8 +4280,8 @@ ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,2,3,0,1,2,3] ; CHECK-NEXT: vshufi32x4 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x43,0xc0,0x00] ; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3,0,1,2,3] -; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1] -; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] +; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1) %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) @@ -4300,8 +4300,8 @@ ; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1] ; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x14,0xd9] ; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x14,0xc1] -; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xcb] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xcb] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) @@ -4320,8 +4320,8 @@ ; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1] ; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xd9] ; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x14,0xc1] -; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xcb] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) @@ -4340,8 +4340,8 @@ ; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1] ; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x14,0xd9] ; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x14,0xc1] -; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xcb] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xcb] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) @@ -4360,8 +4360,8 @@ ; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1] ; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xd9] ; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x14,0xc1] -; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xcb] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) @@ -4380,8 +4380,8 @@ ; CHECK-NEXT: vprold $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03] ; CHECK-NEXT: vprold $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc8,0x03] ; CHECK-NEXT: vprold $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc8,0x03] -; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3) @@ -4400,8 +4400,8 @@ ; CHECK-NEXT: vprold $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03] ; CHECK-NEXT: vprold $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc8,0x03] ; CHECK-NEXT: vprold $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc8,0x03] -; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xca] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3) @@ -4420,8 +4420,8 @@ ; CHECK-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03] ; CHECK-NEXT: vprolq $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0x89,0x72,0xc8,0x03] ; CHECK-NEXT: vprolq $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc8,0x03] -; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xca] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3) @@ -4440,8 +4440,8 @@ ; CHECK-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03] ; CHECK-NEXT: vprolq $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc8,0x03] ; CHECK-NEXT: vprolq $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc8,0x03] -; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xca] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3) @@ -4460,8 +4460,8 @@ ; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1] ; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x15,0xd9] ; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x15,0xc1] -; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xcb] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xcb] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) @@ -4480,8 +4480,8 @@ ; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1] ; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xd9] ; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x15,0xc1] -; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xcb] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) @@ -4500,8 +4500,8 @@ ; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1] ; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x15,0xd9] ; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x15,0xc1] -; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xcb] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xcb] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) @@ -4520,8 +4520,8 @@ ; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1] ; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xd9] ; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x15,0xc1] -; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xcb] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) @@ -4540,8 +4540,8 @@ ; CHECK-NEXT: vprord $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03] ; CHECK-NEXT: vprord $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc0,0x03] ; CHECK-NEXT: vprord $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc0,0x03] -; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3) @@ -4560,8 +4560,8 @@ ; CHECK-NEXT: vprord $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03] ; CHECK-NEXT: vprord $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc0,0x03] ; CHECK-NEXT: vprord $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc0,0x03] -; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xca] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3) @@ -4580,8 +4580,8 @@ ; CHECK-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03] ; CHECK-NEXT: vprorq $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0x89,0x72,0xc0,0x03] ; CHECK-NEXT: vprorq $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc0,0x03] -; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xca] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3) @@ -4600,8 +4600,8 @@ ; CHECK-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03] ; CHECK-NEXT: vprorq $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc0,0x03] ; CHECK-NEXT: vprorq $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc0,0x03] -; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xca] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3) @@ -4620,8 +4620,8 @@ ; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0] ; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xd8] ; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0x16,0xc0] -; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xcb] -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xcb] +; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) %res1 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3) @@ -4640,8 +4640,8 @@ ; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0] ; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xd8] ; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0x36,0xc0] -; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xcb] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) %res1 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) @@ -4659,9 +4659,9 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0] ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x16,0xd8] -; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0x16,0xc0] -; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xcb] -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] +; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0] +; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xcb] +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) %res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3) @@ -4679,9 +4679,9 @@ ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0] ; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x36,0xd8] -; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0x36,0xc0] -; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] +; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xc0] +; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xcb] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) @@ -4697,13 +4697,13 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xd8] +; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8] ; CHECK-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x54,0xda,0x05] -; CHECK-NEXT: vpxord %xmm4, %xmm4, %xmm4 ## encoding: [0x62,0xf1,0x5d,0x08,0xef,0xe4] +; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] ; CHECK-NEXT: vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xe2,0x04] ; CHECK-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0xf5,0x08,0x54,0xc2,0x03] -; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm1 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xcc] -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] +; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xcc] +; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1,<2 x i64> %x2, i32 5, i8 %x4) %res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> zeroinitializer, <2 x double> %x1, <2 x i64> %x2, i32 4, i8 %x4) @@ -4719,11 +4719,11 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0xd8] +; CHECK-NEXT: vmovapd %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8] ; CHECK-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xda,0x05] -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xc2,0x03] -; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] +; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4) %res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 3, i8 %x4) @@ -4739,13 +4739,13 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xd8] +; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8] ; CHECK-NEXT: vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x54,0xda,0x04] -; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4] +; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4] ; CHECK-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xe2,0x05] ; CHECK-NEXT: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03] -; CHECK-NEXT: vaddpd %ymm4, %ymm3, %ymm1 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xcc] -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm4, %ymm3, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xcc] +; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 4, i8 %x4) %res1 = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> zeroinitializer, <4 x double> %x1, <4 x i64> %x2 , i32 5, i8 %x4) @@ -4761,14 +4761,14 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xd8] +; CHECK-NEXT: vmovapd %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8] ; CHECK-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xda,0x05] -; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4] -; CHECK-NEXT: vmovapd %ymm0, %ymm5 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0xe8] +; CHECK-NEXT: vpxor %ymm4, %ymm4, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xe4] +; CHECK-NEXT: vmovapd %ymm0, %ymm5 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xe8] ; CHECK-NEXT: vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xec,0x04] ; CHECK-NEXT: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03] -; CHECK-NEXT: vaddpd %ymm5, %ymm3, %ymm1 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xcd] -; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm5, %ymm3, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xcd] +; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 5, i8 %x4) %res1 = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> zeroinitializer, i32 4, i8 %x4) @@ -4784,14 +4784,14 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x54,0xda,0x05] -; CHECK-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xe0] +; CHECK-NEXT: vmovaps %xmm0, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05] -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x54,0xc2,0x05] -; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] -; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc4] +; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc4] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4) %res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4) @@ -4807,14 +4807,14 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xd8] +; CHECK-NEXT: vmovaps %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x54,0xda,0x05] -; CHECK-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0xe0] +; CHECK-NEXT: vmovaps %xmm0, %xmm4 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xe0] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05] -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x54,0xc2,0x05] -; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] -; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc4] +; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc4] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4) %res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4) @@ -4830,14 +4830,14 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xda,0x05] -; CHECK-NEXT: vmovaps %ymm0, %ymm4 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xe0] +; CHECK-NEXT: vmovaps %ymm0, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05] -; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xc2,0x05] -; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] -; CHECK-NEXT: vaddps %ymm4, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc4] +; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm4, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc4] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 %x4) %res1 = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> zeroinitializer, i32 5, i8 %x4) @@ -4853,14 +4853,14 @@ ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xd8] +; CHECK-NEXT: vmovaps %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xda,0x05] -; CHECK-NEXT: vmovaps %ymm0, %ymm4 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0xe0] +; CHECK-NEXT: vmovaps %ymm0, %ymm4 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xe0] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05] -; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xc2,0x05] -; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] -; CHECK-NEXT: vaddps %ymm4, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc4] +; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm4, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc4] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 %x4) %res1 = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> zeroinitializer, i32 5, i8 %x4) @@ -5023,8 +5023,8 @@ ; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7c,0xc7] ; CHECK-NEXT: vpbroadcastd %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7c,0xcf] ; CHECK-NEXT: vpbroadcastd %edi, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7c,0xd7] -; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 -1) %res1 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 %mask) @@ -5043,8 +5043,8 @@ ; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7c,0xc7] ; CHECK-NEXT: vpbroadcastd %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7c,0xcf] ; CHECK-NEXT: vpbroadcastd %edi, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xd7] -; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 -1) %res1 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 %mask) @@ -5063,8 +5063,8 @@ ; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x7c,0xc7] ; CHECK-NEXT: vpbroadcastq %rdi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x7c,0xcf] ; CHECK-NEXT: vpbroadcastq %rdi, %ymm2 ## encoding: [0x62,0xf2,0xfd,0x28,0x7c,0xd7] -; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 -1) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 %mask) @@ -5083,8 +5083,8 @@ ; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x7c,0xc7] ; CHECK-NEXT: vpbroadcastq %rdi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x7c,0xcf] ; CHECK-NEXT: vpbroadcastq %rdi, %xmm2 ## encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xd7] -; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 -1) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 %mask) @@ -5108,7 +5108,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsraq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] %mask.cast = bitcast i8 %mask to <8 x i1> @@ -5144,7 +5144,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsraq %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] %mask.cast = bitcast i8 %mask to <8 x i1> @@ -5180,7 +5180,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsraq $7, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x07] -; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] %mask.cast = bitcast i8 %mask to <8 x i1> @@ -5216,7 +5216,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsraq $7, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x07] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc1] +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] %mask.cast = bitcast i8 %mask to <8 x i1> @@ -5252,7 +5252,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %a0, <2 x i64> %a1) %mask.cast = bitcast i8 %mask to <8 x i1> @@ -5290,7 +5290,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0xc2] +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %a0, <4 x i64> %a1) %mask.cast = bitcast i8 %mask to <8 x i1> Index: test/CodeGen/X86/avx512vl-logic.ll =================================================================== --- test/CodeGen/X86/avx512vl-logic.ll +++ test/CodeGen/X86/avx512vl-logic.ll @@ -8,7 +8,7 @@ ; CHECK-LABEL: vpandd256: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; CHECK-NEXT: vpandd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq entry: ; Force the execution domain with an add. @@ -35,7 +35,7 @@ ; CHECK-LABEL: vpord256: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; CHECK-NEXT: vpord %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpor %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq entry: ; Force the execution domain with an add. @@ -48,7 +48,7 @@ ; CHECK-LABEL: vpxord256: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; CHECK-NEXT: vpxord %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq entry: ; Force the execution domain with an add. @@ -61,7 +61,7 @@ ; CHECK-LABEL: vpandq256: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; CHECK-NEXT: vpandq %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq entry: ; Force the execution domain with an add. @@ -88,7 +88,7 @@ ; CHECK-LABEL: vporq256: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; CHECK-NEXT: vporq %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpor %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq entry: ; Force the execution domain with an add. @@ -101,7 +101,7 @@ ; CHECK-LABEL: vpxorq256: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; CHECK-NEXT: vpxorq %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq entry: ; Force the execution domain with an add. @@ -116,7 +116,7 @@ ; CHECK-LABEL: vpandd128: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; CHECK-NEXT: vpandd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq entry: ; Force the execution domain with an add. @@ -143,7 +143,7 @@ ; CHECK-LABEL: vpord128: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; CHECK-NEXT: vpord %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq entry: ; Force the execution domain with an add. @@ -156,7 +156,7 @@ ; CHECK-LABEL: vpxord128: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; CHECK-NEXT: vpxord %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq entry: ; Force the execution domain with an add. @@ -169,7 +169,7 @@ ; CHECK-LABEL: vpandq128: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 -; CHECK-NEXT: vpandq %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq entry: ; Force the execution domain with an add. @@ -196,7 +196,7 @@ ; CHECK-LABEL: vporq128: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 -; CHECK-NEXT: vporq %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq entry: ; Force the execution domain with an add. @@ -209,7 +209,7 @@ ; CHECK-LABEL: vpxorq128: ; CHECK: ## BB#0: ## %entry ; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 -; CHECK-NEXT: vpxorq %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq entry: ; Force the execution domain with an add. Index: test/CodeGen/X86/avx512vl-mov.ll =================================================================== --- test/CodeGen/X86/avx512vl-mov.ll +++ test/CodeGen/X86/avx512vl-mov.ll @@ -4,7 +4,7 @@ define <8 x i32> @test_256_1(i8 * %addr) { ; CHECK-LABEL: test_256_1: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07] +; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i32>* %res = load <8 x i32>, <8 x i32>* %vaddr, align 1 @@ -14,7 +14,7 @@ define <8 x i32> @test_256_2(i8 * %addr) { ; CHECK-LABEL: test_256_2: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07] +; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i32>* %res = load <8 x i32>, <8 x i32>* %vaddr, align 32 @@ -24,7 +24,7 @@ define void @test_256_3(i8 * %addr, <4 x i64> %data) { ; CHECK-LABEL: test_256_3: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07] +; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i64>* store <4 x i64>%data, <4 x i64>* %vaddr, align 32 @@ -34,7 +34,7 @@ define void @test_256_4(i8 * %addr, <8 x i32> %data) { ; CHECK-LABEL: test_256_4: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07] +; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i32>* store <8 x i32>%data, <8 x i32>* %vaddr, align 1 @@ -44,7 +44,7 @@ define void @test_256_5(i8 * %addr, <8 x i32> %data) { ; CHECK-LABEL: test_256_5: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07] +; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x i32>* store <8 x i32>%data, <8 x i32>* %vaddr, align 32 @@ -54,7 +54,7 @@ define <4 x i64> @test_256_6(i8 * %addr) { ; CHECK-LABEL: test_256_6: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07] +; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i64>* %res = load <4 x i64>, <4 x i64>* %vaddr, align 32 @@ -64,7 +64,7 @@ define void @test_256_7(i8 * %addr, <4 x i64> %data) { ; CHECK-LABEL: test_256_7: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07] +; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i64>* store <4 x i64>%data, <4 x i64>* %vaddr, align 1 @@ -74,7 +74,7 @@ define <4 x i64> @test_256_8(i8 * %addr) { ; CHECK-LABEL: test_256_8: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07] +; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i64>* %res = load <4 x i64>, <4 x i64>* %vaddr, align 1 @@ -84,7 +84,7 @@ define void @test_256_9(i8 * %addr, <4 x double> %data) { ; CHECK-LABEL: test_256_9: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07] +; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x double>* store <4 x double>%data, <4 x double>* %vaddr, align 32 @@ -94,7 +94,7 @@ define <4 x double> @test_256_10(i8 * %addr) { ; CHECK-LABEL: test_256_10: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07] +; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x double>* %res = load <4 x double>, <4 x double>* %vaddr, align 32 @@ -104,7 +104,7 @@ define void @test_256_11(i8 * %addr, <8 x float> %data) { ; CHECK-LABEL: test_256_11: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07] +; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x float>* store <8 x float>%data, <8 x float>* %vaddr, align 32 @@ -114,7 +114,7 @@ define <8 x float> @test_256_12(i8 * %addr) { ; CHECK-LABEL: test_256_12: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07] +; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x float>* %res = load <8 x float>, <8 x float>* %vaddr, align 32 @@ -124,7 +124,7 @@ define void @test_256_13(i8 * %addr, <4 x double> %data) { ; CHECK-LABEL: test_256_13: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07] +; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x double>* store <4 x double>%data, <4 x double>* %vaddr, align 1 @@ -134,7 +134,7 @@ define <4 x double> @test_256_14(i8 * %addr) { ; CHECK-LABEL: test_256_14: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07] +; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x double>* %res = load <4 x double>, <4 x double>* %vaddr, align 1 @@ -144,7 +144,7 @@ define void @test_256_15(i8 * %addr, <8 x float> %data) { ; CHECK-LABEL: test_256_15: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07] +; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x float>* store <8 x float>%data, <8 x float>* %vaddr, align 1 @@ -154,7 +154,7 @@ define <8 x float> @test_256_16(i8 * %addr) { ; CHECK-LABEL: test_256_16: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07] +; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <8 x float>* %res = load <8 x float>, <8 x float>* %vaddr, align 1 @@ -164,7 +164,7 @@ define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) { ; CHECK-LABEL: test_256_17: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04] ; CHECK-NEXT: vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -178,7 +178,7 @@ define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) { ; CHECK-LABEL: test_256_18: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04] ; CHECK-NEXT: vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -192,7 +192,7 @@ define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) { ; CHECK-LABEL: test_256_19: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] +; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9] ; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x04] ; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -206,7 +206,7 @@ define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) { ; CHECK-LABEL: test_256_20: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] +; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9] ; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x04] ; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -220,7 +220,7 @@ define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) { ; CHECK-LABEL: test_256_21: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] ; CHECK-NEXT: vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -234,7 +234,7 @@ define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) { ; CHECK-LABEL: test_256_22: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] ; CHECK-NEXT: vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -248,7 +248,7 @@ define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) { ; CHECK-LABEL: test_256_23: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] +; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9] ; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04] ; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -262,7 +262,7 @@ define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) { ; CHECK-LABEL: test_256_24: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] +; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9] ; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04] ; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -276,7 +276,7 @@ define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) { ; CHECK-LABEL: test_256_25: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07] ; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04] ; CHECK-NEXT: vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07] @@ -291,7 +291,7 @@ define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) { ; CHECK-LABEL: test_256_26: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07] ; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04] ; CHECK-NEXT: vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07] @@ -306,7 +306,7 @@ define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) { ; CHECK-LABEL: test_256_27: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] +; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9] ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07] ; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04] ; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07] @@ -321,7 +321,7 @@ define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) { ; CHECK-LABEL: test_256_28: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] +; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9] ; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07] ; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04] ; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07] @@ -336,7 +336,7 @@ define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) { ; CHECK-LABEL: test_256_29: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] ; CHECK-NEXT: vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -350,7 +350,7 @@ define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) { ; CHECK-LABEL: test_256_30: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] +; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] ; CHECK-NEXT: vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -364,7 +364,7 @@ define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) { ; CHECK-LABEL: test_256_31: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] +; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9] ; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04] ; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -378,7 +378,7 @@ define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) { ; CHECK-LABEL: test_256_32: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] +; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9] ; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04] ; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -392,7 +392,7 @@ define <4 x i32> @test_128_1(i8 * %addr) { ; CHECK-LABEL: test_128_1: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07] +; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i32>* %res = load <4 x i32>, <4 x i32>* %vaddr, align 1 @@ -402,7 +402,7 @@ define <4 x i32> @test_128_2(i8 * %addr) { ; CHECK-LABEL: test_128_2: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07] +; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i32>* %res = load <4 x i32>, <4 x i32>* %vaddr, align 16 @@ -412,7 +412,7 @@ define void @test_128_3(i8 * %addr, <2 x i64> %data) { ; CHECK-LABEL: test_128_3: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07] +; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x i64>* store <2 x i64>%data, <2 x i64>* %vaddr, align 16 @@ -422,7 +422,7 @@ define void @test_128_4(i8 * %addr, <4 x i32> %data) { ; CHECK-LABEL: test_128_4: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07] +; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i32>* store <4 x i32>%data, <4 x i32>* %vaddr, align 1 @@ -432,7 +432,7 @@ define void @test_128_5(i8 * %addr, <4 x i32> %data) { ; CHECK-LABEL: test_128_5: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07] +; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x i32>* store <4 x i32>%data, <4 x i32>* %vaddr, align 16 @@ -442,7 +442,7 @@ define <2 x i64> @test_128_6(i8 * %addr) { ; CHECK-LABEL: test_128_6: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07] +; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x i64>* %res = load <2 x i64>, <2 x i64>* %vaddr, align 16 @@ -452,7 +452,7 @@ define void @test_128_7(i8 * %addr, <2 x i64> %data) { ; CHECK-LABEL: test_128_7: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07] +; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x i64>* store <2 x i64>%data, <2 x i64>* %vaddr, align 1 @@ -462,7 +462,7 @@ define <2 x i64> @test_128_8(i8 * %addr) { ; CHECK-LABEL: test_128_8: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07] +; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x i64>* %res = load <2 x i64>, <2 x i64>* %vaddr, align 1 @@ -472,7 +472,7 @@ define void @test_128_9(i8 * %addr, <2 x double> %data) { ; CHECK-LABEL: test_128_9: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07] +; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x double>* store <2 x double>%data, <2 x double>* %vaddr, align 16 @@ -482,7 +482,7 @@ define <2 x double> @test_128_10(i8 * %addr) { ; CHECK-LABEL: test_128_10: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07] +; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x double>* %res = load <2 x double>, <2 x double>* %vaddr, align 16 @@ -492,7 +492,7 @@ define void @test_128_11(i8 * %addr, <4 x float> %data) { ; CHECK-LABEL: test_128_11: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07] +; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x float>* store <4 x float>%data, <4 x float>* %vaddr, align 16 @@ -502,7 +502,7 @@ define <4 x float> @test_128_12(i8 * %addr) { ; CHECK-LABEL: test_128_12: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07] +; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x float>* %res = load <4 x float>, <4 x float>* %vaddr, align 16 @@ -512,7 +512,7 @@ define void @test_128_13(i8 * %addr, <2 x double> %data) { ; CHECK-LABEL: test_128_13: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07] +; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x double>* store <2 x double>%data, <2 x double>* %vaddr, align 1 @@ -522,7 +522,7 @@ define <2 x double> @test_128_14(i8 * %addr) { ; CHECK-LABEL: test_128_14: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07] +; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <2 x double>* %res = load <2 x double>, <2 x double>* %vaddr, align 1 @@ -532,7 +532,7 @@ define void @test_128_15(i8 * %addr, <4 x float> %data) { ; CHECK-LABEL: test_128_15: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07] +; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x float>* store <4 x float>%data, <4 x float>* %vaddr, align 1 @@ -542,7 +542,7 @@ define <4 x float> @test_128_16(i8 * %addr) { ; CHECK-LABEL: test_128_16: ; CHECK: ## BB#0: -; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07] +; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] %vaddr = bitcast i8* %addr to <4 x float>* %res = load <4 x float>, <4 x float>* %vaddr, align 1 @@ -552,7 +552,7 @@ define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) { ; CHECK-LABEL: test_128_17: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] ; CHECK-NEXT: vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -566,7 +566,7 @@ define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) { ; CHECK-LABEL: test_128_18: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] ; CHECK-NEXT: vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -580,7 +580,7 @@ define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) { ; CHECK-LABEL: test_128_19: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] ; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04] ; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -594,7 +594,7 @@ define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) { ; CHECK-LABEL: test_128_20: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] ; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04] ; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -608,7 +608,7 @@ define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) { ; CHECK-LABEL: test_128_21: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] ; CHECK-NEXT: vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -622,7 +622,7 @@ define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) { ; CHECK-LABEL: test_128_22: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] ; CHECK-NEXT: vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -636,7 +636,7 @@ define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) { ; CHECK-LABEL: test_128_23: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] ; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04] ; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -650,7 +650,7 @@ define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) { ; CHECK-LABEL: test_128_24: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] ; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04] ; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -664,7 +664,7 @@ define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) { ; CHECK-LABEL: test_128_25: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] ; CHECK-NEXT: vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -678,7 +678,7 @@ define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) { ; CHECK-LABEL: test_128_26: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] ; CHECK-NEXT: vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -692,7 +692,7 @@ define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) { ; CHECK-LABEL: test_128_27: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] ; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04] ; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -706,7 +706,7 @@ define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) { ; CHECK-LABEL: test_128_28: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] ; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04] ; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -720,7 +720,7 @@ define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) { ; CHECK-LABEL: test_128_29: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] ; CHECK-NEXT: vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -734,7 +734,7 @@ define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) { ; CHECK-LABEL: test_128_30: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] ; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] ; CHECK-NEXT: vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -748,7 +748,7 @@ define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) { ; CHECK-LABEL: test_128_31: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] ; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04] ; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -762,7 +762,7 @@ define <2 x double> @test_128_32(i8 * %addr, <2 x i64> %mask1) { ; CHECK-LABEL: test_128_32: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] ; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04] ; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] Index: test/CodeGen/X86/avx512vl-nontemporal.ll =================================================================== --- test/CodeGen/X86/avx512vl-nontemporal.ll +++ test/CodeGen/X86/avx512vl-nontemporal.ll @@ -1,15 +1,15 @@ ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding | FileCheck %s define void @f256(<8 x float> %A, <8 x float> %AA, i8* %B, <4 x double> %C, <4 x double> %CC, i32 %D, <4 x i64> %E, <4 x i64> %EE) { -; CHECK: vmovntps %ymm{{.*}} ## encoding: [0x62 +; CHECK: vmovntps %ymm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5 %cast = bitcast i8* %B to <8 x float>* %A2 = fadd <8 x float> %A, %AA store <8 x float> %A2, <8 x float>* %cast, align 64, !nontemporal !0 -; CHECK: vmovntdq %ymm{{.*}} ## encoding: [0x62 +; CHECK: vmovntdq %ymm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5 %cast1 = bitcast i8* %B to <4 x i64>* %E2 = add <4 x i64> %E, %EE store <4 x i64> %E2, <4 x i64>* %cast1, align 64, !nontemporal !0 -; CHECK: vmovntpd %ymm{{.*}} ## encoding: [0x62 +; CHECK: vmovntpd %ymm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5 %cast2 = bitcast i8* %B to <4 x double>* %C2 = fadd <4 x double> %C, %CC store <4 x double> %C2, <4 x double>* %cast2, align 64, !nontemporal !0 @@ -17,15 +17,15 @@ } define void @f128(<4 x float> %A, <4 x float> %AA, i8* %B, <2 x double> %C, <2 x double> %CC, i32 %D, <2 x i64> %E, <2 x i64> %EE) { -; CHECK: vmovntps %xmm{{.*}} ## encoding: [0x62 +; CHECK: vmovntps %xmm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5 %cast = bitcast i8* %B to <4 x float>* %A2 = fadd <4 x float> %A, %AA store <4 x float> %A2, <4 x float>* %cast, align 64, !nontemporal !0 -; CHECK: vmovntdq %xmm{{.*}} ## encoding: [0x62 +; CHECK: vmovntdq %xmm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5 %cast1 = bitcast i8* %B to <2 x i64>* %E2 = add <2 x i64> %E, %EE store <2 x i64> %E2, <2 x i64>* %cast1, align 64, !nontemporal !0 -; CHECK: vmovntpd %xmm{{.*}} ## encoding: [0x62 +; CHECK: vmovntpd %xmm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5 %cast2 = bitcast i8* %B to <2 x double>* %C2 = fadd <2 x double> %C, %CC store <2 x double> %C2, <2 x double>* %cast2, align 64, !nontemporal !0 Index: test/CodeGen/X86/avx512vl-vbroadcast.ll =================================================================== --- test/CodeGen/X86/avx512vl-vbroadcast.ll +++ test/CodeGen/X86/avx512vl-vbroadcast.ll @@ -73,7 +73,7 @@ define <8 x float> @_ss8xfloat_mask(<8 x float> %i, float %a, <8 x i32> %mask1) { ; CHECK-LABEL: _ss8xfloat_mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpxord %ymm3, %ymm3, %ymm3 +; CHECK-NEXT: vpxor %ymm3, %ymm3, %ymm3 ; CHECK-NEXT: vpcmpneqd %ymm3, %ymm2, %k1 ; CHECK-NEXT: vbroadcastss %xmm1, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -87,7 +87,7 @@ define <8 x float> @_ss8xfloat_maskz(float %a, <8 x i32> %mask1) { ; CHECK-LABEL: _ss8xfloat_maskz: ; CHECK: # BB#0: -; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 +; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq @@ -111,7 +111,7 @@ define <4 x float> @_ss4xfloat_mask(<4 x float> %i, float %a, <4 x i32> %mask1) { ; CHECK-LABEL: _ss4xfloat_mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpxord %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vpcmpneqd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vbroadcastss %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq @@ -125,7 +125,7 @@ define <4 x float> @_ss4xfloat_maskz(float %a, <4 x i32> %mask1) { ; CHECK-LABEL: _ss4xfloat_maskz: ; CHECK: # BB#0: -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq @@ -149,7 +149,7 @@ define <4 x double> @_ss4xdouble_mask(<4 x double> %i, double %a, <4 x i32> %mask1) { ; CHECK-LABEL: _ss4xdouble_mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpxord %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; CHECK-NEXT: vpcmpneqd %xmm3, %xmm2, %k1 ; CHECK-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1} ; CHECK-NEXT: retq @@ -163,7 +163,7 @@ define <4 x double> @_ss4xdouble_maskz(double %a, <4 x i32> %mask1) { ; CHECK-LABEL: _ss4xdouble_maskz: ; CHECK: # BB#0: -; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq Index: test/CodeGen/X86/compress_expand.ll =================================================================== --- test/CodeGen/X86/compress_expand.ll +++ test/CodeGen/X86/compress_expand.ll @@ -238,7 +238,7 @@ define <2 x float> @test13(float* %base, <2 x float> %src0, <2 x i32> %trigger) { ; SKX-LABEL: test13: ; SKX: # BB#0: -; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] ; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k0 ; SKX-NEXT: kshiftlb $6, %k0, %k0 @@ -268,7 +268,7 @@ define void @test14(float* %base, <2 x float> %V, <2 x i32> %trigger) { ; SKX-LABEL: test14: ; SKX: # BB#0: -; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] ; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k0 ; SKX-NEXT: kshiftlb $6, %k0, %k0 @@ -314,7 +314,7 @@ ; SKX-LABEL: test16: ; SKX: # BB#0: ; SKX-NEXT: vextracti32x8 $1, %zmm2, %ymm3 -; SKX-NEXT: vpxord %ymm4, %ymm4, %ymm4 +; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4 ; SKX-NEXT: vpcmpeqd %ymm4, %ymm3, %k1 ; SKX-NEXT: vpcmpeqd %ymm4, %ymm2, %k2 ; SKX-NEXT: kmovb %k2, %eax Index: test/CodeGen/X86/evex-to-vex-compress.mir =================================================================== --- test/CodeGen/X86/evex-to-vex-compress.mir +++ test/CodeGen/X86/evex-to-vex-compress.mir @@ -0,0 +1,4485 @@ +# RUN: llc -march=x86-64 -run-pass x86-evex-to-vex-compress -verify-machineinstrs -mcpu=skx -o - %s | FileCheck %s +# This test verifies VEX encdoing for AVX-512 instructions that use registers of low inedexes and +# do not use zmm or mask registers and have a corresponding AVX/AVX2 opcode + +--- | + define void @evex_z256_to_vex_test() { ret void } + define void @evex_z128_to_vex_test() { ret void } + define void @evex_scalar_to_vex_test() { ret void } + define void @evex_z256_to_evex_test() { ret void } + define void @evex_z128_to_evex_test() { ret void } + define void @evex_scalar_to_evex_test() { ret void } +... +--- + # CHECK-LABEL: name: evex_z256_to_vex_test + # CHECK: bb.0: + +name: evex_z256_to_vex_test +body: | + bb.0: + ; CHECK: VMOVAPDYmr %rdi, 1, _, 0, _, %ymm0 + VMOVAPDZ256mr %rdi, 1, _, 0, _, %ymm0 + ; CHECK: %ymm0 = VMOVAPDYrm %rip, 1, _, %rax, _ + %ymm0 = VMOVAPDZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMOVAPDYrr %ymm0 + %ymm0 = VMOVAPDZ256rr %ymm0 + ; CHECK: %ymm0 = VMOVAPDYrr_REV %ymm0 + %ymm0 = VMOVAPDZ256rr_REV %ymm0 + ; CHECK: VMOVAPSYmr %rdi, 1, _, 0, _, %ymm0 + VMOVAPSZ256mr %rdi, 1, _, 0, _, %ymm0 + ; CHECK: %ymm0 = VMOVAPSYrm %rip, 1, _, %rax, _ + %ymm0 = VMOVAPSZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMOVAPSYrr %ymm0 + %ymm0 = VMOVAPSZ256rr %ymm0 + ; CHECK: %ymm0 = VMOVAPSYrr_REV %ymm0 + %ymm0 = VMOVAPSZ256rr_REV %ymm0 + ; CHECK: %ymm0 = VMOVDDUPYrm %rip, 1, _, %rax, _ + %ymm0 = VMOVDDUPZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMOVDDUPYrr %ymm0 + %ymm0 = VMOVDDUPZ256rr %ymm0 + ; CHECK: VMOVDQAYmr %rdi, 1, _, 0, _, %ymm0 + VMOVDQA32Z256mr %rdi, 1, _, 0, _, %ymm0 + ; CHECK: %ymm0 = VMOVDQAYrm %rip, 1, _, %rax, _ + %ymm0 = VMOVDQA32Z256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMOVDQAYrr %ymm0 + %ymm0 = VMOVDQA32Z256rr %ymm0 + ; CHECK: %ymm0 = VMOVDQAYrr_REV %ymm0 + %ymm0 = VMOVDQA32Z256rr_REV %ymm0 + ; CHECK: VMOVDQAYmr %rdi, 1, _, 0, _, %ymm0 + VMOVDQA64Z256mr %rdi, 1, _, 0, _, %ymm0 + ; CHECK: %ymm0 = VMOVDQAYrm %rip, 1, _, %rax, _ + %ymm0 = VMOVDQA64Z256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMOVDQAYrr %ymm0 + %ymm0 = VMOVDQA64Z256rr %ymm0 + ; CHECK: %ymm0 = VMOVDQAYrr_REV %ymm0 + %ymm0 = VMOVDQA64Z256rr_REV %ymm0 + ; CHECK: VMOVDQUYmr %rdi, 1, _, 0, _, %ymm0 + VMOVDQU16Z256mr %rdi, 1, _, 0, _, %ymm0 + ; CHECK: %ymm0 = VMOVDQUYrm %rip, 1, _, %rax, _ + %ymm0 = VMOVDQU16Z256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMOVDQUYrr %ymm0 + %ymm0 = VMOVDQU16Z256rr %ymm0 + ; CHECK: %ymm0 = VMOVDQUYrr_REV %ymm0 + %ymm0 = VMOVDQU16Z256rr_REV %ymm0 + ; CHECK: VMOVDQUYmr %rdi, 1, _, 0, _, %ymm0 + VMOVDQU32Z256mr %rdi, 1, _, 0, _, %ymm0 + ; CHECK: %ymm0 = VMOVDQUYrm %rip, 1, _, %rax, _ + %ymm0 = VMOVDQU32Z256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMOVDQUYrr %ymm0 + %ymm0 = VMOVDQU32Z256rr %ymm0 + ; CHECK: %ymm0 = VMOVDQUYrr_REV %ymm0 + %ymm0 = VMOVDQU32Z256rr_REV %ymm0 + ; CHECK: VMOVDQUYmr %rdi, 1, _, 0, _, %ymm0 + VMOVDQU64Z256mr %rdi, 1, _, 0, _, %ymm0 + ; CHECK: %ymm0 = VMOVDQUYrm %rip, 1, _, %rax, _ + %ymm0 = VMOVDQU64Z256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMOVDQUYrr %ymm0 + %ymm0 = VMOVDQU64Z256rr %ymm0 + ; CHECK: %ymm0 = VMOVDQUYrr_REV %ymm0 + %ymm0 = VMOVDQU64Z256rr_REV %ymm0 + ; CHECK: VMOVDQUYmr %rdi, 1, _, 0, _, %ymm0 + VMOVDQU8Z256mr %rdi, 1, _, 0, _, %ymm0 + ; CHECK: %ymm0 = VMOVDQUYrm %rip, 1, _, %rax, _ + %ymm0 = VMOVDQU8Z256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMOVDQUYrr %ymm0 + %ymm0 = VMOVDQU8Z256rr %ymm0 + ; CHECK: %ymm0 = VMOVDQUYrr_REV %ymm0 + %ymm0 = VMOVDQU8Z256rr_REV %ymm0 + ; CHECK: %ymm0 = VMOVNTDQAYrm %rip, 1, _, %rax, _ + %ymm0 = VMOVNTDQAZ256rm %rip, 1, _, %rax, _ + ; CHECK: VMOVNTDQYmr %rdi, 1, _, 0, _, %ymm0 + VMOVNTDQZ256mr %rdi, 1, _, 0, _, %ymm0 + ; CHECK: VMOVNTPDYmr %rdi, 1, _, 0, _, %ymm0 + VMOVNTPDZ256mr %rdi, 1, _, 0, _, %ymm0 + ; CHECK: VMOVNTPSYmr %rdi, 1, _, 0, _, %ymm0 + VMOVNTPSZ256mr %rdi, 1, _, 0, _, %ymm0 + ; CHECK: %ymm0 = VMOVSHDUPYrm %rip, 1, _, %rax, _ + %ymm0 = VMOVSHDUPZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMOVSHDUPYrr %ymm0 + %ymm0 = VMOVSHDUPZ256rr %ymm0 + ; CHECK: %ymm0 = VMOVSLDUPYrm %rip, 1, _, %rax, _ + %ymm0 = VMOVSLDUPZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMOVSLDUPYrr %ymm0 + %ymm0 = VMOVSLDUPZ256rr %ymm0 + ; CHECK: VMOVUPDYmr %rdi, 1, _, 0, _, %ymm0 + VMOVUPDZ256mr %rdi, 1, _, 0, _, %ymm0 + ; CHECK: %ymm0 = VMOVUPDYrm %rip, 1, _, %rax, _ + %ymm0 = VMOVUPDZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMOVUPDYrr %ymm0 + %ymm0 = VMOVUPDZ256rr %ymm0 + ; CHECK: %ymm0 = VMOVUPDYrr_REV %ymm0 + %ymm0 = VMOVUPDZ256rr_REV %ymm0 + ; CHECK: VMOVUPSYmr %rdi, 1, _, 0, _, %ymm0 + VMOVUPSZ256mr %rdi, 1, _, 0, _, %ymm0 + ; CHECK: %ymm0 = VPANDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPANDDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPANDYrr %ymm0, %ymm1 + %ymm0 = VPANDDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPANDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPANDQZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPANDYrr %ymm0, %ymm1 + %ymm0 = VPANDQZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPAVGBYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPAVGBZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPAVGBYrr %ymm0, %ymm1 + %ymm0 = VPAVGBZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPAVGWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPAVGWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPAVGWYrr %ymm0, %ymm1 + %ymm0 = VPAVGWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPADDBYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPADDBZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPADDBYrr %ymm0, %ymm1 + %ymm0 = VPADDBZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPADDDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPADDDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPADDDYrr %ymm0, %ymm1 + %ymm0 = VPADDDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPADDQYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPADDQZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPADDQYrr %ymm0, %ymm1 + %ymm0 = VPADDQZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPADDSBYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPADDSBZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPADDSBYrr %ymm0, %ymm1 + %ymm0 = VPADDSBZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPADDSWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPADDSWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPADDSWYrr %ymm0, %ymm1 + %ymm0 = VPADDSWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPADDUSBYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPADDUSBZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPADDUSBYrr %ymm0, %ymm1 + %ymm0 = VPADDUSBZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPADDUSWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPADDUSWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPADDUSWYrr %ymm0, %ymm1 + %ymm0 = VPADDUSWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPADDWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPADDWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPADDWYrr %ymm0, %ymm1 + %ymm0 = VPADDWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VMULPDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VMULPDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMULPDYrr %ymm0, %ymm1 + %ymm0 = VMULPDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VMULPSYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VMULPSZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMULPSYrr %ymm0, %ymm1 + %ymm0 = VMULPSZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VORPDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VORPDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VORPDYrr %ymm0, %ymm1 + %ymm0 = VORPDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VORPSYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VORPSZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VORPSYrr %ymm0, %ymm1 + %ymm0 = VORPSZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMADDUBSWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMADDUBSWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMADDUBSWYrr %ymm0, %ymm1 + %ymm0 = VPMADDUBSWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMADDWDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMADDWDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMADDWDYrr %ymm0, %ymm1 + %ymm0 = VPMADDWDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMAXSBYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMAXSBZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMAXSBYrr %ymm0, %ymm1 + %ymm0 = VPMAXSBZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMAXSDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMAXSDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMAXSDYrr %ymm0, %ymm1 + %ymm0 = VPMAXSDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMAXSWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMAXSWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMAXSWYrr %ymm0, %ymm1 + %ymm0 = VPMAXSWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMAXUBYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMAXUBZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMAXUBYrr %ymm0, %ymm1 + %ymm0 = VPMAXUBZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMAXUDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMAXUDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMAXUDYrr %ymm0, %ymm1 + %ymm0 = VPMAXUDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMAXUWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMAXUWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMAXUWYrr %ymm0, %ymm1 + %ymm0 = VPMAXUWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMINSBYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMINSBZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMINSBYrr %ymm0, %ymm1 + %ymm0 = VPMINSBZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMINSDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMINSDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMINSDYrr %ymm0, %ymm1 + %ymm0 = VPMINSDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMINSWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMINSWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMINSWYrr %ymm0, %ymm1 + %ymm0 = VPMINSWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMINUBYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMINUBZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMINUBYrr %ymm0, %ymm1 + %ymm0 = VPMINUBZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMINUDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMINUDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMINUDYrr %ymm0, %ymm1 + %ymm0 = VPMINUDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMINUWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMINUWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMINUWYrr %ymm0, %ymm1 + %ymm0 = VPMINUWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMULDQYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMULDQZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMULDQYrr %ymm0, %ymm1 + %ymm0 = VPMULDQZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMULHRSWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMULHRSWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMULHRSWYrr %ymm0, %ymm1 + %ymm0 = VPMULHRSWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMULHUWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMULHUWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMULHUWYrr %ymm0, %ymm1 + %ymm0 = VPMULHUWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMULHWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMULHWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMULHWYrr %ymm0, %ymm1 + %ymm0 = VPMULHWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMULLDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMULLDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMULLDYrr %ymm0, %ymm1 + %ymm0 = VPMULLDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMULLWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMULLWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMULLWYrr %ymm0, %ymm1 + %ymm0 = VPMULLWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPMULUDQYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPMULUDQZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMULUDQYrr %ymm0, %ymm1 + %ymm0 = VPMULUDQZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPORYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPORDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPORYrr %ymm0, %ymm1 + %ymm0 = VPORDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPORYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPORQZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPORYrr %ymm0, %ymm1 + %ymm0 = VPORQZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPSUBBYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSUBBZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSUBBYrr %ymm0, %ymm1 + %ymm0 = VPSUBBZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPSUBDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSUBDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSUBDYrr %ymm0, %ymm1 + %ymm0 = VPSUBDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPSUBQYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSUBQZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSUBQYrr %ymm0, %ymm1 + %ymm0 = VPSUBQZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPSUBSBYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSUBSBZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSUBSBYrr %ymm0, %ymm1 + %ymm0 = VPSUBSBZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPSUBSWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSUBSWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSUBSWYrr %ymm0, %ymm1 + %ymm0 = VPSUBSWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPSUBUSBYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSUBUSBZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSUBUSBYrr %ymm0, %ymm1 + %ymm0 = VPSUBUSBZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPSUBUSWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSUBUSWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSUBUSWYrr %ymm0, %ymm1 + %ymm0 = VPSUBUSWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPSUBWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSUBWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSUBWYrr %ymm0, %ymm1 + %ymm0 = VPSUBWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPXORYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPXORDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPXORYrr %ymm0, %ymm1 + %ymm0 = VPXORDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPXORYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPXORQZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPXORYrr %ymm0, %ymm1 + %ymm0 = VPXORQZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VADDPDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VADDPDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VADDPDYrr %ymm0, %ymm1 + %ymm0 = VADDPDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VADDPSYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VADDPSZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VADDPSYrr %ymm0, %ymm1 + %ymm0 = VADDPSZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VANDNPDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VANDNPDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VANDNPDYrr %ymm0, %ymm1 + %ymm0 = VANDNPDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VANDNPSYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VANDNPSZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VANDNPSYrr %ymm0, %ymm1 + %ymm0 = VANDNPSZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VANDPDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VANDPDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VANDPDYrr %ymm0, %ymm1 + %ymm0 = VANDPDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VANDPSYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VANDPSZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VANDPSYrr %ymm0, %ymm1 + %ymm0 = VANDPSZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VDIVPDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VDIVPDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VDIVPDYrr %ymm0, %ymm1 + %ymm0 = VDIVPDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VDIVPSYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VDIVPSZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VDIVPSYrr %ymm0, %ymm1 + %ymm0 = VDIVPSZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VMAXCPDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VMAXCPDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMAXCPDYrr %ymm0, %ymm1 + %ymm0 = VMAXCPDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VMAXCPSYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VMAXCPSZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMAXCPSYrr %ymm0, %ymm1 + %ymm0 = VMAXCPSZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VMAXPDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VMAXPDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMAXPDYrr %ymm0, %ymm1 + %ymm0 = VMAXPDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VMAXPSYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VMAXPSZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMAXPSYrr %ymm0, %ymm1 + %ymm0 = VMAXPSZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VMINCPDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VMINCPDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMINCPDYrr %ymm0, %ymm1 + %ymm0 = VMINCPDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VMINCPSYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VMINCPSZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMINCPSYrr %ymm0, %ymm1 + %ymm0 = VMINCPSZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VMINPDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VMINPDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMINPDYrr %ymm0, %ymm1 + %ymm0 = VMINPDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VMINPSYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VMINPSZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VMINPSYrr %ymm0, %ymm1 + %ymm0 = VMINPSZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VXORPDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VXORPDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VXORPDYrr %ymm0, %ymm1 + %ymm0 = VXORPDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VXORPSYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VXORPSZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VXORPSYrr %ymm0, %ymm1 + %ymm0 = VXORPSZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPACKSSDWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPACKSSDWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPACKSSDWYrr %ymm0, %ymm1 + %ymm0 = VPACKSSDWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPACKSSWBYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPACKSSWBZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPACKSSWBYrr %ymm0, %ymm1 + %ymm0 = VPACKSSWBZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPACKUSDWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPACKUSDWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPACKUSDWYrr %ymm0, %ymm1 + %ymm0 = VPACKUSDWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPACKUSWBYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPACKUSWBZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPACKUSWBYrr %ymm0, %ymm1 + %ymm0 = VPACKUSWBZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VUNPCKHPDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VUNPCKHPDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VUNPCKHPDYrr %ymm0, %ymm1 + %ymm0 = VUNPCKHPDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VUNPCKHPSYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VUNPCKHPSZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VUNPCKHPSYrr %ymm0, %ymm1 + %ymm0 = VUNPCKHPSZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VUNPCKLPDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VUNPCKLPDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VUNPCKLPDYrr %ymm0, %ymm1 + %ymm0 = VUNPCKLPDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VUNPCKLPSYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VUNPCKLPSZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VUNPCKLPSYrr %ymm0, %ymm1 + %ymm0 = VUNPCKLPSZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VSUBPDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VSUBPDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VSUBPDYrr %ymm0, %ymm1 + %ymm0 = VSUBPDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VSUBPSYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VSUBPSZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VSUBPSYrr %ymm0, %ymm1 + %ymm0 = VSUBPSZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPUNPCKHBWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPUNPCKHBWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPUNPCKHBWYrr %ymm0, %ymm1 + %ymm0 = VPUNPCKHBWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPUNPCKHDQYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPUNPCKHDQZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPUNPCKHDQYrr %ymm0, %ymm1 + %ymm0 = VPUNPCKHDQZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPUNPCKHQDQYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPUNPCKHQDQZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPUNPCKHQDQYrr %ymm0, %ymm1 + %ymm0 = VPUNPCKHQDQZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPUNPCKHWDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPUNPCKHWDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPUNPCKHWDYrr %ymm0, %ymm1 + %ymm0 = VPUNPCKHWDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPUNPCKLBWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPUNPCKLBWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPUNPCKLBWYrr %ymm0, %ymm1 + %ymm0 = VPUNPCKLBWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPUNPCKLDQYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPUNPCKLDQZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPUNPCKLDQYrr %ymm0, %ymm1 + %ymm0 = VPUNPCKLDQZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPUNPCKLQDQYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPUNPCKLQDQZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPUNPCKLQDQYrr %ymm0, %ymm1 + %ymm0 = VPUNPCKLQDQZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPUNPCKLWDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPUNPCKLWDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPUNPCKLWDYrr %ymm0, %ymm1 + %ymm0 = VPUNPCKLWDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VFMADD132PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMADD132PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMADD132PDYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMADD132PDZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMADD132PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMADD132PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMADD132PSYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMADD132PSZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMADD213PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMADD213PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMADD213PDYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMADD213PDZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMADD213PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMADD213PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMADD213PSYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMADD213PSZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMADD231PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMADD231PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMADD231PDYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMADD231PDZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMADD231PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMADD231PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMADD231PSYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMADD231PSZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMADDSUB132PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMADDSUB132PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMADDSUB132PDYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMADDSUB132PDZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMADDSUB132PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMADDSUB132PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMADDSUB132PSYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMADDSUB132PSZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMADDSUB213PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMADDSUB213PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMADDSUB213PDYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMADDSUB213PDZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMADDSUB213PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMADDSUB213PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMADDSUB213PSYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMADDSUB213PSZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMADDSUB231PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMADDSUB231PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMADDSUB231PDYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMADDSUB231PDZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMADDSUB231PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMADDSUB231PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMADDSUB231PSYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMADDSUB231PSZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMSUB132PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMSUB132PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMSUB132PDYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMSUB132PDZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMSUB132PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMSUB132PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMSUB132PSYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMSUB132PSZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMSUB213PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMSUB213PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMSUB213PDYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMSUB213PDZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMSUB213PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMSUB213PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMSUB213PSYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMSUB213PSZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMSUB231PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMSUB231PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMSUB231PDYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMSUB231PDZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMSUB231PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMSUB231PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMSUB231PSYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMSUB231PSZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMSUBADD132PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMSUBADD132PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMSUBADD132PDYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMSUBADD132PDZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMSUBADD132PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMSUBADD132PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMSUBADD132PSYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMSUBADD132PSZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMSUBADD213PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMSUBADD213PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMSUBADD213PDYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMSUBADD213PDZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMSUBADD213PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMSUBADD213PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMSUBADD213PSYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMSUBADD213PSZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMSUBADD231PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMSUBADD231PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMSUBADD231PDYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMSUBADD231PDZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFMSUBADD231PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFMSUBADD231PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFMSUBADD231PSYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFMSUBADD231PSZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFNMADD132PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFNMADD132PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFNMADD132PDYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFNMADD132PDZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFNMADD132PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFNMADD132PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFNMADD132PSYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFNMADD132PSZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFNMADD213PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFNMADD213PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFNMADD213PDYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFNMADD213PDZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFNMADD213PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFNMADD213PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFNMADD213PSYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFNMADD213PSZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFNMADD231PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFNMADD231PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFNMADD231PDYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFNMADD231PDZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFNMADD231PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFNMADD231PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFNMADD231PSYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFNMADD231PSZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFNMSUB132PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFNMSUB132PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFNMSUB132PDYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFNMSUB132PDZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFNMSUB132PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFNMSUB132PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFNMSUB132PSYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFNMSUB132PSZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFNMSUB213PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFNMSUB213PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFNMSUB213PDYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFNMSUB213PDZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFNMSUB213PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFNMSUB213PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFNMSUB213PSYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFNMSUB213PSZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFNMSUB231PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFNMSUB231PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFNMSUB231PDYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFNMSUB231PDZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VFNMSUB231PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _ + %ymm0 = VFNMSUB231PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _ + ; CHECK: %ymm0 = VFNMSUB231PSYr %ymm0, %ymm1, %ymm2 + %ymm0 = VFNMSUB231PSZ256r %ymm0, %ymm1, %ymm2 + ; CHECK: %ymm0 = VPSRADYri %ymm0, 7 + %ymm0 = VPSRADZ256ri %ymm0, 7 + ; CHECK: %ymm0 = VPSRADYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSRADZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSRADYrr %ymm0, %xmm1 + %ymm0 = VPSRADZ256rr %ymm0, %xmm1 + ; CHECK: %ymm0 = VPSRAVDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSRAVDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSRAVDYrr %ymm0, %ymm1 + %ymm0 = VPSRAVDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPSRAWYri %ymm0, 7 + %ymm0 = VPSRAWZ256ri %ymm0, 7 + ; CHECK: %ymm0 = VPSRAWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSRAWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSRAWYrr %ymm0, %xmm1 + %ymm0 = VPSRAWZ256rr %ymm0, %xmm1 + ; CHECK: %ymm0 = VPSRLDQYri %ymm0, %ymm1 + %ymm0 = VPSRLDQZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPSRLDYri %ymm0, 7 + %ymm0 = VPSRLDZ256ri %ymm0, 7 + ; CHECK: %ymm0 = VPSRLDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSRLDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSRLDYrr %ymm0, %xmm1 + %ymm0 = VPSRLDZ256rr %ymm0, %xmm1 + ; CHECK: %ymm0 = VPSRLQYri %ymm0, 7 + %ymm0 = VPSRLQZ256ri %ymm0, 7 + ; CHECK: %ymm0 = VPSRLQYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSRLQZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSRLQYrr %ymm0, %xmm1 + %ymm0 = VPSRLQZ256rr %ymm0, %xmm1 + ; CHECK: %ymm0 = VPSRLVDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSRLVDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSRLVDYrr %ymm0, %ymm1 + %ymm0 = VPSRLVDZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPSRLVQYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSRLVQZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSRLVQYrr %ymm0, %ymm1 + %ymm0 = VPSRLVQZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPSRLWYri %ymm0, 7 + %ymm0 = VPSRLWZ256ri %ymm0, 7 + ; CHECK: %ymm0 = VPSRLWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSRLWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSRLWYrr %ymm0, %xmm1 + %ymm0 = VPSRLWZ256rr %ymm0, %xmm1 + ; CHECK: %ymm0 = VPMOVSXBDYrm %rip, 1, _, %rax, _ + %ymm0 = VPMOVSXBDZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMOVSXBDYrr %xmm0 + %ymm0 = VPMOVSXBDZ256rr %xmm0 + ; CHECK: %ymm0 = VPMOVSXBQYrm %rip, 1, _, %rax, _ + %ymm0 = VPMOVSXBQZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMOVSXBQYrr %xmm0 + %ymm0 = VPMOVSXBQZ256rr %xmm0 + ; CHECK: %ymm0 = VPMOVSXBWYrm %rip, 1, _, %rax, _ + %ymm0 = VPMOVSXBWZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMOVSXBWYrr %xmm0 + %ymm0 = VPMOVSXBWZ256rr %xmm0 + ; CHECK: %ymm0 = VPMOVSXDQYrm %rip, 1, _, %rax, _ + %ymm0 = VPMOVSXDQZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMOVSXDQYrr %xmm0 + %ymm0 = VPMOVSXDQZ256rr %xmm0 + ; CHECK: %ymm0 = VPMOVSXWDYrm %rip, 1, _, %rax, _ + %ymm0 = VPMOVSXWDZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMOVSXWDYrr %xmm0 + %ymm0 = VPMOVSXWDZ256rr %xmm0 + ; CHECK: %ymm0 = VPMOVSXWQYrm %rip, 1, _, %rax, _ + %ymm0 = VPMOVSXWQZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMOVSXWQYrr %xmm0 + %ymm0 = VPMOVSXWQZ256rr %xmm0 + ; CHECK: %ymm0 = VPMOVZXBDYrm %rip, 1, _, %rax, _ + %ymm0 = VPMOVZXBDZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMOVZXBDYrr %xmm0 + %ymm0 = VPMOVZXBDZ256rr %xmm0 + ; CHECK: %ymm0 = VPMOVZXBQYrm %rip, 1, _, %rax, _ + %ymm0 = VPMOVZXBQZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMOVZXBQYrr %xmm0 + %ymm0 = VPMOVZXBQZ256rr %xmm0 + ; CHECK: %ymm0 = VPMOVZXBWYrm %rip, 1, _, %rax, _ + %ymm0 = VPMOVZXBWZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMOVZXBWYrr %xmm0 + %ymm0 = VPMOVZXBWZ256rr %xmm0 + ; CHECK: %ymm0 = VPMOVZXDQYrm %rip, 1, _, %rax, _ + %ymm0 = VPMOVZXDQZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMOVZXDQYrr %xmm0 + %ymm0 = VPMOVZXDQZ256rr %xmm0 + ; CHECK: %ymm0 = VPMOVZXWDYrm %rip, 1, _, %rax, _ + %ymm0 = VPMOVZXWDZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMOVZXWDYrr %xmm0 + %ymm0 = VPMOVZXWDZ256rr %xmm0 + ; CHECK: %ymm0 = VPMOVZXWQYrm %rip, 1, _, %rax, _ + %ymm0 = VPMOVZXWQZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPMOVZXWQYrr %xmm0 + %ymm0 = VPMOVZXWQZ256rr %xmm0 + ; CHECK: %ymm0 = VBROADCASTSDYrm %rip, 1, _, %rax, _ + %ymm0 = VBROADCASTSDZ256m %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VBROADCASTSDYrr %xmm0 + %ymm0 = VBROADCASTSDZ256r %xmm0 + ; CHECK: %ymm0 = VBROADCASTSDYrr %xmm0 + %ymm0 = VBROADCASTSDZ256r_s %xmm0 + ; CHECK: %ymm0 = VBROADCASTSSYrm %rip, 1, _, %rax, _ + %ymm0 = VBROADCASTSSZ256m %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VBROADCASTSSYrr %xmm0 + %ymm0 = VBROADCASTSSZ256r %xmm0 + ; CHECK: %ymm0 = VBROADCASTSSYrr %xmm0 + %ymm0 = VBROADCASTSSZ256r_s %xmm0 + ; CHECK: %ymm0 = VPBROADCASTBYrm %rip, 1, _, %rax, _ + %ymm0 = VPBROADCASTBZ256m %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPBROADCASTBYrr %xmm0 + %ymm0 = VPBROADCASTBZ256r %xmm0 + ; CHECK: %ymm0 = VPBROADCASTDYrm %rip, 1, _, %rax, _ + %ymm0 = VPBROADCASTDZ256m %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPBROADCASTDYrr %xmm0 + %ymm0 = VPBROADCASTDZ256r %xmm0 + ; CHECK: %ymm0 = VPBROADCASTWYrm %rip, 1, _, %rax, _ + %ymm0 = VPBROADCASTWZ256m %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPBROADCASTWYrr %xmm0 + %ymm0 = VPBROADCASTWZ256r %xmm0 + ; CHECK: %ymm0 = VPBROADCASTQYrm %rip, 1, _, %rax, _ + %ymm0 = VPBROADCASTQZ256m %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPBROADCASTQYrr %xmm0 + %ymm0 = VPBROADCASTQZ256r %xmm0 + ; CHECK: %ymm0 = VPABSBYrm %rip, 1, _, %rax, _ + %ymm0 = VPABSBZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPABSBYrr %ymm0 + %ymm0 = VPABSBZ256rr %ymm0 + ; CHECK: %ymm0 = VPABSDYrm %rip, 1, _, %rax, _ + %ymm0 = VPABSDZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPABSDYrr %ymm0 + %ymm0 = VPABSDZ256rr %ymm0 + ; CHECK: %ymm0 = VPABSWYrm %rip, 1, _, %rax, _ + %ymm0 = VPABSWZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPABSWYrr %ymm0 + %ymm0 = VPABSWZ256rr %ymm0 + ; CHECK: %ymm0 = VPSADBWYrm %ymm0, 1, _, %rax, _, _ + %ymm0 = VPSADBWZ256rm %ymm0, 1, _, %rax, _, _ + ; CHECK: %ymm0 = VPSADBWYrr %ymm0, %ymm1 + %ymm0 = VPSADBWZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPERMDYrm %ymm0, %rdi, 1, _, 0, _ + %ymm0 = VPERMDZ256rm %ymm0, %rdi, 1, _, 0, _ + ; CHECK: %ymm0 = VPERMDYrr %ymm1, %ymm0 + %ymm0 = VPERMDZ256rr %ymm1, %ymm0 + ; CHECK: %ymm0 = VPERMILPDYmi %rdi, 1, _, 0, _, _ + %ymm0 = VPERMILPDZ256mi %rdi, 1, _, 0, _, _ + ; CHECK: %ymm0 = VPERMILPDYri %ymm0, 7 + %ymm0 = VPERMILPDZ256ri %ymm0, 7 + ; CHECK: %ymm0 = VPERMILPDYrm %ymm0, %rdi, 1, _, 0, _ + %ymm0 = VPERMILPDZ256rm %ymm0, %rdi, 1, _, 0, _ + ; CHECK: %ymm0 = VPERMILPDYrr %ymm1, %ymm0 + %ymm0 = VPERMILPDZ256rr %ymm1, %ymm0 + ; CHECK: %ymm0 = VPERMILPSYmi %rdi, 1, _, 0, _, _ + %ymm0 = VPERMILPSZ256mi %rdi, 1, _, 0, _, _ + ; CHECK: %ymm0 = VPERMILPSYri %ymm0, 7 + %ymm0 = VPERMILPSZ256ri %ymm0, 7 + ; CHECK: %ymm0 = VPERMILPSYrm %ymm0, %rdi, 1, _, 0, _ + %ymm0 = VPERMILPSZ256rm %ymm0, %rdi, 1, _, 0, _ + ; CHECK: %ymm0 = VPERMILPSYrr %ymm1, %ymm0 + %ymm0 = VPERMILPSZ256rr %ymm1, %ymm0 + ; CHECK: %ymm0 = VPERMPDYmi %rdi, 1, _, 0, _, _ + %ymm0 = VPERMPDZ256mi %rdi, 1, _, 0, _, _ + ; CHECK: %ymm0 = VPERMPDYri %ymm0, 7 + %ymm0 = VPERMPDZ256ri %ymm0, 7 + ; CHECK: %ymm0 = VPERMPSYrm %ymm0, %rdi, 1, _, 0, _ + %ymm0 = VPERMPSZ256rm %ymm0, %rdi, 1, _, 0, _ + ; CHECK: %ymm0 = VPERMPSYrr %ymm1, %ymm0 + %ymm0 = VPERMPSZ256rr %ymm1, %ymm0 + ; CHECK: %ymm0 = VPERMQYmi %rdi, 1, _, 0, _, _ + %ymm0 = VPERMQZ256mi %rdi, 1, _, 0, _, _ + ; CHECK: %ymm0 = VPERMQYri %ymm0, 7 + %ymm0 = VPERMQZ256ri %ymm0, 7 + ; CHECK: %ymm0 = VPSLLDQYri %ymm0, 14 + %ymm0 = VPSLLDQZ256rr %ymm0, 14 + ; CHECK: %ymm0 = VPSLLDYri %ymm0, 7 + %ymm0 = VPSLLDZ256ri %ymm0, 7 + ; CHECK: %ymm0 = VPSLLDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSLLDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSLLDYrr %ymm0, 14 + %ymm0 = VPSLLDZ256rr %ymm0, 14 + ; CHECK: %ymm0 = VPSLLQYri %ymm0, 7 + %ymm0 = VPSLLQZ256ri %ymm0, 7 + ; CHECK: %ymm0 = VPSLLQYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSLLQZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSLLQYrr %ymm0, 14 + %ymm0 = VPSLLQZ256rr %ymm0, 14 + ; CHECK: %ymm0 = VPSLLVDYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSLLVDZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSLLVDYrr %ymm0, 14 + %ymm0 = VPSLLVDZ256rr %ymm0, 14 + ; CHECK: %ymm0 = VPSLLVQYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSLLVQZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSLLVQYrr %ymm0, 14 + %ymm0 = VPSLLVQZ256rr %ymm0, 14 + ; CHECK: %ymm0 = VPSLLWYri %ymm0, 7 + %ymm0 = VPSLLWZ256ri %ymm0, 7 + ; CHECK: %ymm0 = VPSLLWYrm %ymm0, %rip, 1, _, %rax, _ + %ymm0 = VPSLLWZ256rm %ymm0, %rip, 1, _, %rax, _ + ; CHECK: %ymm0 = VPSLLWYrr %ymm0, 14 + %ymm0 = VPSLLWZ256rr %ymm0, 14 + ; CHECK: %ymm0 = VCVTDQ2PDYrm %rdi, %ymm0, 1, _, 0 + %ymm0 = VCVTDQ2PDZ256rm %rdi, %ymm0, 1, _, 0 + ; CHECK: %ymm0 = VCVTDQ2PDYrr %xmm0 + %ymm0 = VCVTDQ2PDZ256rr %xmm0 + ; CHECK: %ymm0 = VCVTDQ2PSYrm %rdi, %ymm0, 1, _, 0 + %ymm0 = VCVTDQ2PSZ256rm %rdi, %ymm0, 1, _, 0 + ; CHECK: %ymm0 = VCVTDQ2PSYrr %ymm0 + %ymm0 = VCVTDQ2PSZ256rr %ymm0 + ; CHECK: %xmm0 = VCVTPD2DQYrm %rdi, %ymm0, 1, _, 0 + %xmm0 = VCVTPD2DQZ256rm %rdi, %ymm0, 1, _, 0 + ; CHECK: %xmm0 = VCVTPD2DQYrr %ymm0 + %xmm0 = VCVTPD2DQZ256rr %ymm0 + ; CHECK: %xmm0 = VCVTPD2PSYrm %rdi, %ymm0, 1, _, 0 + %xmm0 = VCVTPD2PSZ256rm %rdi, %ymm0, 1, _, 0 + ; CHECK: %xmm0 = VCVTPD2PSYrr %ymm0 + %xmm0 = VCVTPD2PSZ256rr %ymm0 + ; CHECK: %ymm0 = VCVTPS2DQYrm %rdi, %ymm0, 1, _, 0 + %ymm0 = VCVTPS2DQZ256rm %rdi, %ymm0, 1, _, 0 + ; CHECK: %ymm0 = VCVTPS2DQYrr %ymm0 + %ymm0 = VCVTPS2DQZ256rr %ymm0 + ; CHECK: %ymm0 = VCVTPS2PDYrm %rdi, %ymm0, 1, _, 0 + %ymm0 = VCVTPS2PDZ256rm %rdi, %ymm0, 1, _, 0 + ; CHECK: %ymm0 = VCVTPS2PDYrr %xmm0 + %ymm0 = VCVTPS2PDZ256rr %xmm0 + ; CHECK: VCVTPS2PHYmr %rdi, %ymm0, 1, _, 0, _, _ + VCVTPS2PHZ256mr %rdi, %ymm0, 1, _, 0, _, _ + ; CHECK: %xmm0 = VCVTPS2PHYrr %ymm0, _ + %xmm0 = VCVTPS2PHZ256rr %ymm0, _ + ; CHECK: %ymm0 = VCVTPH2PSYrm %rdi, %ymm0, 1, _, 0 + %ymm0 = VCVTPH2PSZ256rm %rdi, %ymm0, 1, _, 0 + ; CHECK: %ymm0 = VCVTPH2PSYrr %xmm0 + %ymm0 = VCVTPH2PSZ256rr %xmm0 + ; CHECK: %xmm0 = VCVTTPD2DQYrm %rdi, %ymm0, 1, _, 0 + %xmm0 = VCVTTPD2DQZ256rm %rdi, %ymm0, 1, _, 0 + ; CHECK: %xmm0 = VCVTTPD2DQYrr %ymm0 + %xmm0 = VCVTTPD2DQZ256rr %ymm0 + ; CHECK: %ymm0 = VCVTTPS2DQYrm %rdi, %ymm0, 1, _, 0 + %ymm0 = VCVTTPS2DQZ256rm %rdi, %ymm0, 1, _, 0 + ; CHECK: %ymm0 = VCVTTPS2DQYrr %ymm0 + %ymm0 = VCVTTPS2DQZ256rr %ymm0 + ; CHECK: %ymm0 = VSQRTPDYm %rdi, _, _, _, _ + %ymm0 = VSQRTPDZ256m %rdi, _, _, _, _ + ; CHECK: %ymm0 = VSQRTPDYr %ymm0 + %ymm0 = VSQRTPDZ256r %ymm0 + ; CHECK: %ymm0 = VSQRTPSYm %rdi, _, _, _, _ + %ymm0 = VSQRTPSZ256m %rdi, _, _, _, _ + ; CHECK: %ymm0 = VSQRTPSYr %ymm0 + %ymm0 = VSQRTPSZ256r %ymm0 + ; CHECK: %ymm0 = VPALIGNRYrmi %ymm0, %rdi, _, _, _, _, _ + %ymm0 = VPALIGNRZ256rmi %ymm0, %rdi, _, _, _, _, _ + ; CHECK: %ymm0 = VPALIGNRYrri %ymm0, %ymm1, _ + %ymm0 = VPALIGNRZ256rri %ymm0, %ymm1, _ + ; CHECK: %ymm0 = VMOVUPSYrm %rdi, 1, _, 0, _ + %ymm0 = VMOVUPSZ256rm %rdi, 1, _, 0, _ + ; CHECK: %ymm0 = VMOVUPSYrr %ymm0 + %ymm0 = VMOVUPSZ256rr %ymm0 + ; CHECK: %ymm0 = VMOVUPSYrr_REV %ymm0 + %ymm0 = VMOVUPSZ256rr_REV %ymm0 + ; CHECK: %ymm0 = VPSHUFBYrm %ymm0, _, _, _, _, _ + %ymm0 = VPSHUFBZ256rm %ymm0, _, _, _, _, _ + ; CHECK: %ymm0 = VPSHUFBYrr %ymm0, %ymm1 + %ymm0 = VPSHUFBZ256rr %ymm0, %ymm1 + ; CHECK: %ymm0 = VPSHUFDYmi %rdi, 1, _, 0, _, _ + %ymm0 = VPSHUFDZ256mi %rdi, 1, _, 0, _, _ + ; CHECK: %ymm0 = VPSHUFDYri %ymm0, -24 + %ymm0 = VPSHUFDZ256ri %ymm0, -24 + ; CHECK: %ymm0 = VPSHUFHWYmi %rdi, 1, _, 0, _, _ + %ymm0 = VPSHUFHWZ256mi %rdi, 1, _, 0, _, _ + ; CHECK: %ymm0 = VPSHUFHWYri %ymm0, -24 + %ymm0 = VPSHUFHWZ256ri %ymm0, -24 + ; CHECK: %ymm0 = VPSHUFLWYmi %rdi, 1, _, 0, _, _ + %ymm0 = VPSHUFLWZ256mi %rdi, 1, _, 0, _, _ + ; CHECK: %ymm0 = VPSHUFLWYri %ymm0, -24 + %ymm0 = VPSHUFLWZ256ri %ymm0, -24 + ; CHECK: %ymm0 = VSHUFPDYrmi %ymm0, _, _, _, _, _, _ + %ymm0 = VSHUFPDZ256rmi %ymm0, _, _, _, _, _, _ + ; CHECK: %ymm0 = VSHUFPDYrri %ymm0, _, _ + %ymm0 = VSHUFPDZ256rri %ymm0, _, _ + ; CHECK: %ymm0 = VSHUFPSYrmi %ymm0, _, _, _, _, _, _ + %ymm0 = VSHUFPSZ256rmi %ymm0, _, _, _, _, _, _ + ; CHECK: %ymm0 = VSHUFPSYrri %ymm0, _, _ + %ymm0 = VSHUFPSZ256rri %ymm0, _, _ + + RET 0, %zmm0, %zmm1 +... +--- + # CHECK-LABEL: name: evex_z128_to_vex_test + # CHECK: bb.0: + +name: evex_z128_to_vex_test +body: | + bb.0: + ; CHECK: VMOVAPDmr %rdi, 1, _, 0, _, %xmm0 + VMOVAPDZ128mr %rdi, 1, _, 0, _, %xmm0 + ; CHECK: %xmm0 = VMOVAPDrm %rip, 1, _, %rax, _ + %xmm0 = VMOVAPDZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMOVAPDrr %xmm0 + %xmm0 = VMOVAPDZ128rr %xmm0 + ; CHECK: VMOVAPSmr %rdi, 1, _, 0, _, %xmm0 + VMOVAPSZ128mr %rdi, 1, _, 0, _, %xmm0 + ; CHECK: %xmm0 = VMOVAPSrm %rip, 1, _, %rax, _ + %xmm0 = VMOVAPSZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMOVAPSrr %xmm0 + %xmm0 = VMOVAPSZ128rr %xmm0 + ; CHECK: VMOVDQAmr %rdi, 1, _, 0, _, %xmm0 + VMOVDQA32Z128mr %rdi, 1, _, 0, _, %xmm0 + ; CHECK: %xmm0 = VMOVDQArm %rip, 1, _, %rax, _ + %xmm0 = VMOVDQA32Z128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMOVDQArr %xmm0 + %xmm0 = VMOVDQA32Z128rr %xmm0 + ; CHECK: VMOVDQAmr %rdi, 1, _, 0, _, %xmm0 + VMOVDQA64Z128mr %rdi, 1, _, 0, _, %xmm0 + ; CHECK: %xmm0 = VMOVDQArm %rip, 1, _, %rax, _ + %xmm0 = VMOVDQA64Z128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMOVDQArr %xmm0 + %xmm0 = VMOVDQA64Z128rr %xmm0 + ; CHECK: VMOVDQUmr %rdi, 1, _, 0, _, %xmm0 + VMOVDQU16Z128mr %rdi, 1, _, 0, _, %xmm0 + ; CHECK: %xmm0 = VMOVDQUrm %rip, 1, _, %rax, _ + %xmm0 = VMOVDQU16Z128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMOVDQUrr %xmm0 + %xmm0 = VMOVDQU16Z128rr %xmm0 + ; CHECK: VMOVDQUmr %rdi, 1, _, 0, _, %xmm0 + VMOVDQU32Z128mr %rdi, 1, _, 0, _, %xmm0 + ; CHECK: %xmm0 = VMOVDQUrm %rip, 1, _, %rax, _ + %xmm0 = VMOVDQU32Z128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMOVDQUrr %xmm0 + %xmm0 = VMOVDQU32Z128rr %xmm0 + ; CHECK: VMOVDQUmr %rdi, 1, _, 0, _, %xmm0 + VMOVDQU64Z128mr %rdi, 1, _, 0, _, %xmm0 + ; CHECK: %xmm0 = VMOVDQUrm %rip, 1, _, %rax, _ + %xmm0 = VMOVDQU64Z128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMOVDQUrr %xmm0 + %xmm0 = VMOVDQU64Z128rr %xmm0 + ; CHECK: VMOVDQUmr %rdi, 1, _, 0, _, %xmm0 + VMOVDQU8Z128mr %rdi, 1, _, 0, _, %xmm0 + ; CHECK: %xmm0 = VMOVDQUrm %rip, 1, _, %rax, _ + %xmm0 = VMOVDQU8Z128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMOVDQUrr %xmm0 + %xmm0 = VMOVDQU8Z128rr %xmm0 + ; CHECK: %xmm0 = VMOVDQUrr_REV %xmm0 + %xmm0 = VMOVDQU8Z128rr_REV %xmm0 + ; CHECK: %xmm0 = VMOVNTDQArm %rip, 1, _, %rax, _ + %xmm0 = VMOVNTDQAZ128rm %rip, 1, _, %rax, _ + ; CHECK: VMOVUPDmr %rdi, 1, _, 0, _, %xmm0 + VMOVUPDZ128mr %rdi, 1, _, 0, _, %xmm0 + ; CHECK: %xmm0 = VMOVUPDrm %rip, 1, _, %rax, _ + %xmm0 = VMOVUPDZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMOVUPDrr %xmm0 + %xmm0 = VMOVUPDZ128rr %xmm0 + ; CHECK: %xmm0 = VMOVUPDrr_REV %xmm0 + %xmm0 = VMOVUPDZ128rr_REV %xmm0 + ; CHECK: VMOVUPSmr %rdi, 1, _, 0, _, %xmm0 + VMOVUPSZ128mr %rdi, 1, _, 0, _, %xmm0 + ; CHECK: %xmm0 = VMOVUPSrm %rip, 1, _, %rax, _ + %xmm0 = VMOVUPSZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMOVUPSrr %xmm0 + %xmm0 = VMOVUPSZ128rr %xmm0 + ; CHECK: %xmm0 = VMOVUPSrr_REV %xmm0 + %xmm0 = VMOVUPSZ128rr_REV %xmm0 + ; CHECK: VMOVNTDQmr %rdi, 1, _, 0, _, %xmm0 + VMOVNTDQZ128mr %rdi, 1, _, 0, _, %xmm0 + ; CHECK: VMOVNTPDmr %rdi, 1, _, 0, _, %xmm0 + VMOVNTPDZ128mr %rdi, 1, _, 0, _, %xmm0 + ; CHECK: VMOVNTPSmr %rdi, 1, _, 0, _, %xmm0 + VMOVNTPSZ128mr %rdi, 1, _, 0, _, %xmm0 + ; CHECK: %xmm0 = VMOVAPDrr_REV %xmm0 + %xmm0 = VMOVAPDZ128rr_REV %xmm0 + ; CHECK: %xmm0 = VMOVAPSrr_REV %xmm0 + %xmm0 = VMOVAPSZ128rr_REV %xmm0 + ; CHECK: %xmm0 = VMOVDQArr_REV %xmm0 + %xmm0 = VMOVDQA32Z128rr_REV %xmm0 + ; CHECK: %xmm0 = VMOVDQArr_REV %xmm0 + %xmm0 = VMOVDQA64Z128rr_REV %xmm0 + ; CHECK: %xmm0 = VMOVDQUrr_REV %xmm0 + %xmm0 = VMOVDQU16Z128rr_REV %xmm0 + ; CHECK: %xmm0 = VMOVDQUrr_REV %xmm0 + %xmm0 = VMOVDQU32Z128rr_REV %xmm0 + ; CHECK: %xmm0 = VMOVDQUrr_REV %xmm0 + %xmm0 = VMOVDQU64Z128rr_REV %xmm0 + ; CHECK: %xmm0 = VPMOVSXBDrm %rip, 1, _, %rax, _ + %xmm0 = VPMOVSXBDZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMOVSXBDrr %xmm0 + %xmm0 = VPMOVSXBDZ128rr %xmm0 + ; CHECK: %xmm0 = VPMOVSXBQrm %rip, 1, _, %rax, _ + %xmm0 = VPMOVSXBQZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMOVSXBQrr %xmm0 + %xmm0 = VPMOVSXBQZ128rr %xmm0 + ; CHECK: %xmm0 = VPMOVSXBWrm %rip, 1, _, %rax, _ + %xmm0 = VPMOVSXBWZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMOVSXBWrr %xmm0 + %xmm0 = VPMOVSXBWZ128rr %xmm0 + ; CHECK: %xmm0 = VPMOVSXDQrm %rip, 1, _, %rax, _ + %xmm0 = VPMOVSXDQZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMOVSXDQrr %xmm0 + %xmm0 = VPMOVSXDQZ128rr %xmm0 + ; CHECK: %xmm0 = VPMOVSXWDrm %rip, 1, _, %rax, _ + %xmm0 = VPMOVSXWDZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMOVSXWDrr %xmm0 + %xmm0 = VPMOVSXWDZ128rr %xmm0 + ; CHECK: %xmm0 = VPMOVSXWQrm %rip, 1, _, %rax, _ + %xmm0 = VPMOVSXWQZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMOVSXWQrr %xmm0 + %xmm0 = VPMOVSXWQZ128rr %xmm0 + ; CHECK: %xmm0 = VPMOVZXBDrm %rip, 1, _, %rax, _ + %xmm0 = VPMOVZXBDZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMOVZXBDrr %xmm0 + %xmm0 = VPMOVZXBDZ128rr %xmm0 + ; CHECK: %xmm0 = VPMOVZXBQrm %rip, 1, _, %rax, _ + %xmm0 = VPMOVZXBQZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMOVZXBQrr %xmm0 + %xmm0 = VPMOVZXBQZ128rr %xmm0 + ; CHECK: %xmm0 = VPMOVZXBWrm %rip, 1, _, %rax, _ + %xmm0 = VPMOVZXBWZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMOVZXBWrr %xmm0 + %xmm0 = VPMOVZXBWZ128rr %xmm0 + ; CHECK: %xmm0 = VPMOVZXDQrm %rip, 1, _, %rax, _ + %xmm0 = VPMOVZXDQZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMOVZXDQrr %xmm0 + %xmm0 = VPMOVZXDQZ128rr %xmm0 + ; CHECK: %xmm0 = VPMOVZXWDrm %rip, 1, _, %rax, _ + %xmm0 = VPMOVZXWDZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMOVZXWDrr %xmm0 + %xmm0 = VPMOVZXWDZ128rr %xmm0 + ; CHECK: %xmm0 = VPMOVZXWQrm %rip, 1, _, %rax, _ + %xmm0 = VPMOVZXWQZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMOVZXWQrr %xmm0 + %xmm0 = VPMOVZXWQZ128rr %xmm0 + ; CHECK: VMOVHPDmr %rdi, 1, _, 0, _, %xmm0 + VMOVHPDZ128mr %rdi, 1, _, 0, _, %xmm0 + ; CHECK: %xmm0 = VMOVHPDrm %xmm0, %rdi, 1, _, 0, _ + %xmm0 = VMOVHPDZ128rm %xmm0, %rdi, 1, _, 0, _ + ; CHECK: VMOVHPSmr %rdi, 1, _, 0, _, %xmm0 + VMOVHPSZ128mr %rdi, 1, _, 0, _, %xmm0 + ; CHECK: %xmm0 = VMOVHPSrm %xmm0, %rdi, 1, _, 0, _ + %xmm0 = VMOVHPSZ128rm %xmm0, %rdi, 1, _, 0, _ + ; CHECK: VMOVLPDmr %rdi, 1, _, 0, _, %xmm0 + VMOVLPDZ128mr %rdi, 1, _, 0, _, %xmm0 + ; CHECK: %xmm0 = VMOVLPDrm %xmm0, %rdi, 1, _, 0, _ + %xmm0 = VMOVLPDZ128rm %xmm0, %rdi, 1, _, 0, _ + ; CHECK: VMOVLPSmr %rdi, 1, _, 0, _, %xmm0 + VMOVLPSZ128mr %rdi, 1, _, 0, _, %xmm0 + ; CHECK: %xmm0 = VMOVLPSrm %xmm0, %rdi, 1, _, 0, _ + %xmm0 = VMOVLPSZ128rm %xmm0, %rdi, 1, _, 0, _ + ; CHECK: %xmm0 = VMAXCPDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMAXCPDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMAXCPDrr %xmm0, %xmm1 + %xmm0 = VMAXCPDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMAXCPSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMAXCPSZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMAXCPSrr %xmm0, %xmm1 + %xmm0 = VMAXCPSZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMAXPDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMAXPDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMAXPDrr %xmm0, %xmm1 + %xmm0 = VMAXPDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMAXPSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMAXPSZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMAXPSrr %xmm0, %xmm1 + %xmm0 = VMAXPSZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMINCPDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMINCPDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMINCPDrr %xmm0, %xmm1 + %xmm0 = VMINCPDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMINCPSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMINCPSZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMINCPSrr %xmm0, %xmm1 + %xmm0 = VMINCPSZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMINPDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMINPDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMINPDrr %xmm0, %xmm1 + %xmm0 = VMINPDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMINPSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMINPSZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMINPSrr %xmm0, %xmm1 + %xmm0 = VMINPSZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMULPDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMULPDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMULPDrr %xmm0, %xmm1 + %xmm0 = VMULPDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMULPSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMULPSZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMULPSrr %xmm0, %xmm1 + %xmm0 = VMULPSZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VORPDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VORPDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VORPDrr %xmm0, %xmm1 + %xmm0 = VORPDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VORPSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VORPSZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VORPSrr %xmm0, %xmm1 + %xmm0 = VORPSZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPADDBrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPADDBZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPADDBrr %xmm0, %xmm1 + %xmm0 = VPADDBZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPADDDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPADDDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPADDDrr %xmm0, %xmm1 + %xmm0 = VPADDDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPADDQrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPADDQZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPADDQrr %xmm0, %xmm1 + %xmm0 = VPADDQZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPADDSBrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPADDSBZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPADDSBrr %xmm0, %xmm1 + %xmm0 = VPADDSBZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPADDSWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPADDSWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPADDSWrr %xmm0, %xmm1 + %xmm0 = VPADDSWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPADDUSBrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPADDUSBZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPADDUSBrr %xmm0, %xmm1 + %xmm0 = VPADDUSBZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPADDUSWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPADDUSWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPADDUSWrr %xmm0, %xmm1 + %xmm0 = VPADDUSWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPADDWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPADDWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPADDWrr %xmm0, %xmm1 + %xmm0 = VPADDWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPANDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPANDDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPANDrr %xmm0, %xmm1 + %xmm0 = VPANDDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPANDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPANDQZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPANDrr %xmm0, %xmm1 + %xmm0 = VPANDQZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPAVGBrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPAVGBZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPAVGBrr %xmm0, %xmm1 + %xmm0 = VPAVGBZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPAVGWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPAVGWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPAVGWrr %xmm0, %xmm1 + %xmm0 = VPAVGWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMAXSBrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMAXSBZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMAXSBrr %xmm0, %xmm1 + %xmm0 = VPMAXSBZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMAXSDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMAXSDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMAXSDrr %xmm0, %xmm1 + %xmm0 = VPMAXSDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMAXSWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMAXSWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMAXSWrr %xmm0, %xmm1 + %xmm0 = VPMAXSWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMAXUBrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMAXUBZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMAXUBrr %xmm0, %xmm1 + %xmm0 = VPMAXUBZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMAXUDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMAXUDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMAXUDrr %xmm0, %xmm1 + %xmm0 = VPMAXUDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMAXUWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMAXUWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMAXUWrr %xmm0, %xmm1 + %xmm0 = VPMAXUWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMINSBrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMINSBZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMINSBrr %xmm0, %xmm1 + %xmm0 = VPMINSBZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMINSDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMINSDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMINSDrr %xmm0, %xmm1 + %xmm0 = VPMINSDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMINSWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMINSWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMINSWrr %xmm0, %xmm1 + %xmm0 = VPMINSWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMINUBrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMINUBZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMINUBrr %xmm0, %xmm1 + %xmm0 = VPMINUBZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMINUDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMINUDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMINUDrr %xmm0, %xmm1 + %xmm0 = VPMINUDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMINUWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMINUWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMINUWrr %xmm0, %xmm1 + %xmm0 = VPMINUWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMULDQrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMULDQZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMULDQrr %xmm0, %xmm1 + %xmm0 = VPMULDQZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMULHRSWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMULHRSWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMULHRSWrr %xmm0, %xmm1 + %xmm0 = VPMULHRSWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMULHUWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMULHUWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMULHUWrr %xmm0, %xmm1 + %xmm0 = VPMULHUWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMULHWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMULHWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMULHWrr %xmm0, %xmm1 + %xmm0 = VPMULHWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMULLDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMULLDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMULLDrr %xmm0, %xmm1 + %xmm0 = VPMULLDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMULLWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMULLWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMULLWrr %xmm0, %xmm1 + %xmm0 = VPMULLWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMULUDQrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMULUDQZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMULUDQrr %xmm0, %xmm1 + %xmm0 = VPMULUDQZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPORrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPORDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPORrr %xmm0, %xmm1 + %xmm0 = VPORDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPORrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPORQZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPORrr %xmm0, %xmm1 + %xmm0 = VPORQZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPSUBBrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSUBBZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSUBBrr %xmm0, %xmm1 + %xmm0 = VPSUBBZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPSUBDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSUBDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSUBDrr %xmm0, %xmm1 + %xmm0 = VPSUBDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPSUBQrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSUBQZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSUBQrr %xmm0, %xmm1 + %xmm0 = VPSUBQZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPSUBSBrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSUBSBZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSUBSBrr %xmm0, %xmm1 + %xmm0 = VPSUBSBZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPSUBSWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSUBSWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSUBSWrr %xmm0, %xmm1 + %xmm0 = VPSUBSWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPSUBUSBrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSUBUSBZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSUBUSBrr %xmm0, %xmm1 + %xmm0 = VPSUBUSBZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPSUBUSWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSUBUSWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSUBUSWrr %xmm0, %xmm1 + %xmm0 = VPSUBUSWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPSUBWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSUBWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSUBWrr %xmm0, %xmm1 + %xmm0 = VPSUBWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VADDPDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VADDPDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VADDPDrr %xmm0, %xmm1 + %xmm0 = VADDPDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VADDPSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VADDPSZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VADDPSrr %xmm0, %xmm1 + %xmm0 = VADDPSZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VANDNPDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VANDNPDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VANDNPDrr %xmm0, %xmm1 + %xmm0 = VANDNPDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VANDNPSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VANDNPSZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VANDNPSrr %xmm0, %xmm1 + %xmm0 = VANDNPSZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VANDPDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VANDPDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VANDPDrr %xmm0, %xmm1 + %xmm0 = VANDPDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VANDPSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VANDPSZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VANDPSrr %xmm0, %xmm1 + %xmm0 = VANDPSZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VDIVPDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VDIVPDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VDIVPDrr %xmm0, %xmm1 + %xmm0 = VDIVPDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VDIVPSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VDIVPSZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VDIVPSrr %xmm0, %xmm1 + %xmm0 = VDIVPSZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPXORrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPXORDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPXORrr %xmm0, %xmm1 + %xmm0 = VPXORDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPXORrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPXORQZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPXORrr %xmm0, %xmm1 + %xmm0 = VPXORQZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VSUBPDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VSUBPDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VSUBPDrr %xmm0, %xmm1 + %xmm0 = VSUBPDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VSUBPSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VSUBPSZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VSUBPSrr %xmm0, %xmm1 + %xmm0 = VSUBPSZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VXORPDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VXORPDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VXORPDrr %xmm0, %xmm1 + %xmm0 = VXORPDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VXORPSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VXORPSZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VXORPSrr %xmm0, %xmm1 + %xmm0 = VXORPSZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMADDUBSWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMADDUBSWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMADDUBSWrr %xmm0, %xmm1 + %xmm0 = VPMADDUBSWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPMADDWDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPMADDWDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPMADDWDrr %xmm0, %xmm1 + %xmm0 = VPMADDWDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPACKSSDWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPACKSSDWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPACKSSDWrr %xmm0, %xmm1 + %xmm0 = VPACKSSDWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPACKSSWBrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPACKSSWBZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPACKSSWBrr %xmm0, %xmm1 + %xmm0 = VPACKSSWBZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPACKUSDWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPACKUSDWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPACKUSDWrr %xmm0, %xmm1 + %xmm0 = VPACKUSDWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPACKUSWBrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPACKUSWBZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPACKUSWBrr %xmm0, %xmm1 + %xmm0 = VPACKUSWBZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPUNPCKHBWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPUNPCKHBWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPUNPCKHBWrr %xmm0, %xmm1 + %xmm0 = VPUNPCKHBWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPUNPCKHDQrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPUNPCKHDQZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPUNPCKHDQrr %xmm0, %xmm1 + %xmm0 = VPUNPCKHDQZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPUNPCKHQDQrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPUNPCKHQDQZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPUNPCKHQDQrr %xmm0, %xmm1 + %xmm0 = VPUNPCKHQDQZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPUNPCKHWDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPUNPCKHWDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPUNPCKHWDrr %xmm0, %xmm1 + %xmm0 = VPUNPCKHWDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPUNPCKLBWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPUNPCKLBWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPUNPCKLBWrr %xmm0, %xmm1 + %xmm0 = VPUNPCKLBWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPUNPCKLDQrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPUNPCKLDQZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPUNPCKLDQrr %xmm0, %xmm1 + %xmm0 = VPUNPCKLDQZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPUNPCKLQDQrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPUNPCKLQDQZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPUNPCKLQDQrr %xmm0, %xmm1 + %xmm0 = VPUNPCKLQDQZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPUNPCKLWDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPUNPCKLWDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPUNPCKLWDrr %xmm0, %xmm1 + %xmm0 = VPUNPCKLWDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VUNPCKHPDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VUNPCKHPDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VUNPCKHPDrr %xmm0, %xmm1 + %xmm0 = VUNPCKHPDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VUNPCKHPSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VUNPCKHPSZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VUNPCKHPSrr %xmm0, %xmm1 + %xmm0 = VUNPCKHPSZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VUNPCKLPDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VUNPCKLPDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VUNPCKLPDrr %xmm0, %xmm1 + %xmm0 = VUNPCKLPDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VUNPCKLPSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VUNPCKLPSZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VUNPCKLPSrr %xmm0, %xmm1 + %xmm0 = VUNPCKLPSZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VFMADD132PDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADD132PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADD132PDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADD132PDZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADD132PSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADD132PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADD132PSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADD132PSZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADD213PDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADD213PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADD213PDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADD213PDZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADD213PSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADD213PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADD213PSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADD213PSZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADD231PDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADD231PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADD231PDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADD231PDZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADD231PSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADD231PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADD231PSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADD231PSZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADDSUB132PDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADDSUB132PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADDSUB132PDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADDSUB132PDZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADDSUB132PSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADDSUB132PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADDSUB132PSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADDSUB132PSZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADDSUB213PDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADDSUB213PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADDSUB213PDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADDSUB213PDZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADDSUB213PSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADDSUB213PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADDSUB213PSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADDSUB213PSZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADDSUB231PDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADDSUB231PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADDSUB231PDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADDSUB231PDZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADDSUB231PSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADDSUB231PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADDSUB231PSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADDSUB231PSZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUB132PDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUB132PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUB132PDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUB132PDZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUB132PSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUB132PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUB132PSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUB132PSZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUB213PDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUB213PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUB213PDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUB213PDZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUB213PSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUB213PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUB213PSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUB213PSZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUB231PDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUB231PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUB231PDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUB231PDZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUB231PSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUB231PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUB231PSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUB231PSZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUBADD132PDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUBADD132PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUBADD132PDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUBADD132PDZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUBADD132PSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUBADD132PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUBADD132PSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUBADD132PSZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUBADD213PDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUBADD213PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUBADD213PDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUBADD213PDZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUBADD213PSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUBADD213PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUBADD213PSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUBADD213PSZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUBADD231PDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUBADD231PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUBADD231PDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUBADD231PDZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUBADD231PSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUBADD231PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUBADD231PSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUBADD231PSZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMADD132PDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMADD132PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMADD132PDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMADD132PDZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMADD132PSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMADD132PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMADD132PSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMADD132PSZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMADD213PDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMADD213PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMADD213PDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMADD213PDZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMADD213PSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMADD213PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMADD213PSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMADD213PSZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMADD231PDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMADD231PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMADD231PDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMADD231PDZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMADD231PSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMADD231PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMADD231PSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMADD231PSZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMSUB132PDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMSUB132PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMSUB132PDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMSUB132PDZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMSUB132PSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMSUB132PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMSUB132PSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMSUB132PSZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMSUB213PDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMSUB213PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMSUB213PDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMSUB213PDZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMSUB213PSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMSUB213PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMSUB213PSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMSUB213PSZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMSUB231PDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMSUB231PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMSUB231PDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMSUB231PDZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMSUB231PSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMSUB231PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMSUB231PSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMSUB231PSZ128r %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VPSLLDri %xmm0, 7 + %xmm0 = VPSLLDZ128ri %xmm0, 7 + ; CHECK: %xmm0 = VPSLLDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSLLDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSLLDrr %xmm0, 14 + %xmm0 = VPSLLDZ128rr %xmm0, 14 + ; CHECK: %xmm0 = VPSLLQri %xmm0, 7 + %xmm0 = VPSLLQZ128ri %xmm0, 7 + ; CHECK: %xmm0 = VPSLLQrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSLLQZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSLLQrr %xmm0, 14 + %xmm0 = VPSLLQZ128rr %xmm0, 14 + ; CHECK: %xmm0 = VPSLLVDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSLLVDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSLLVDrr %xmm0, 14 + %xmm0 = VPSLLVDZ128rr %xmm0, 14 + ; CHECK: %xmm0 = VPSLLVQrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSLLVQZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSLLVQrr %xmm0, 14 + %xmm0 = VPSLLVQZ128rr %xmm0, 14 + ; CHECK: %xmm0 = VPSLLWri %xmm0, 7 + %xmm0 = VPSLLWZ128ri %xmm0, 7 + ; CHECK: %xmm0 = VPSLLWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSLLWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSLLWrr %xmm0, 14 + %xmm0 = VPSLLWZ128rr %xmm0, 14 + ; CHECK: %xmm0 = VPSRADri %xmm0, 7 + %xmm0 = VPSRADZ128ri %xmm0, 7 + ; CHECK: %xmm0 = VPSRADrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSRADZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSRADrr %xmm0, 14 + %xmm0 = VPSRADZ128rr %xmm0, 14 + ; CHECK: %xmm0 = VPSRAVDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSRAVDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSRAVDrr %xmm0, 14 + %xmm0 = VPSRAVDZ128rr %xmm0, 14 + ; CHECK: %xmm0 = VPSRAWri %xmm0, 7 + %xmm0 = VPSRAWZ128ri %xmm0, 7 + ; CHECK: %xmm0 = VPSRAWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSRAWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSRAWrr %xmm0, 14 + %xmm0 = VPSRAWZ128rr %xmm0, 14 + ; CHECK: %xmm0 = VPSRLDQri %xmm0, 14 + %xmm0 = VPSRLDQZ128rr %xmm0, 14 + ; CHECK: %xmm0 = VPSRLDri %xmm0, 7 + %xmm0 = VPSRLDZ128ri %xmm0, 7 + ; CHECK: %xmm0 = VPSRLDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSRLDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSRLDrr %xmm0, 14 + %xmm0 = VPSRLDZ128rr %xmm0, 14 + ; CHECK: %xmm0 = VPSRLQri %xmm0, 7 + %xmm0 = VPSRLQZ128ri %xmm0, 7 + ; CHECK: %xmm0 = VPSRLQrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSRLQZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSRLQrr %xmm0, 14 + %xmm0 = VPSRLQZ128rr %xmm0, 14 + ; CHECK: %xmm0 = VPSRLVDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSRLVDZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSRLVDrr %xmm0, 14 + %xmm0 = VPSRLVDZ128rr %xmm0, 14 + ; CHECK: %xmm0 = VPSRLVQrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSRLVQZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSRLVQrr %xmm0, 14 + %xmm0 = VPSRLVQZ128rr %xmm0, 14 + ; CHECK: %xmm0 = VPSRLWri %xmm0, 7 + %xmm0 = VPSRLWZ128ri %xmm0, 7 + ; CHECK: %xmm0 = VPSRLWrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VPSRLWZ128rm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPSRLWrr %xmm0, 14 + %xmm0 = VPSRLWZ128rr %xmm0, 14 + ; CHECK: %xmm0 = VPERMILPDmi %rdi, 1, _, 0, _, _ + %xmm0 = VPERMILPDZ128mi %rdi, 1, _, 0, _, _ + ; CHECK: %xmm0 = VPERMILPDri %xmm0, 9 + %xmm0 = VPERMILPDZ128ri %xmm0, 9 + ; CHECK: %xmm0 = VPERMILPDrm %xmm0, %rdi, 1, _, 0, _ + %xmm0 = VPERMILPDZ128rm %xmm0, %rdi, 1, _, 0, _ + ; CHECK: %xmm0 = VPERMILPDrr %xmm0, %xmm1 + %xmm0 = VPERMILPDZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPERMILPSmi %rdi, 1, _, 0, _, _ + %xmm0 = VPERMILPSZ128mi %rdi, 1, _, 0, _, _ + ; CHECK: %xmm0 = VPERMILPSri %xmm0, 9 + %xmm0 = VPERMILPSZ128ri %xmm0, 9 + ; CHECK: %xmm0 = VPERMILPSrm %xmm0, %rdi, 1, _, 0, _ + %xmm0 = VPERMILPSZ128rm %xmm0, %rdi, 1, _, 0, _ + ; CHECK: %xmm0 = VPERMILPSrr %xmm0, %xmm1 + %xmm0 = VPERMILPSZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VCVTPH2PSrm %rdi, %xmm0, 1, _, 0 + %xmm0 = VCVTPH2PSZ128rm %rdi, %xmm0, 1, _, 0 + ; CHECK: %xmm0 = VCVTPH2PSrr %xmm0 + %xmm0 = VCVTPH2PSZ128rr %xmm0 + ; CHECK: %xmm0 = VCVTDQ2PDrm %rdi, %xmm0, 1, _, 0 + %xmm0 = VCVTDQ2PDZ128rm %rdi, %xmm0, 1, _, 0 + ; CHECK: %xmm0 = VCVTDQ2PDrr %xmm0 + %xmm0 = VCVTDQ2PDZ128rr %xmm0 + ; CHECK: %xmm0 = VCVTDQ2PSrm %rdi, %xmm0, 1, _, 0 + %xmm0 = VCVTDQ2PSZ128rm %rdi, %xmm0, 1, _, 0 + ; CHECK: %xmm0 = VCVTDQ2PSrr %xmm0 + %xmm0 = VCVTDQ2PSZ128rr %xmm0 + ; CHECK: %xmm0 = VCVTPD2DQrm %rdi, %xmm0, 1, _, 0 + %xmm0 = VCVTPD2DQZ128rm %rdi, %xmm0, 1, _, 0 + ; CHECK: %xmm0 = VCVTPD2DQrr %xmm0 + %xmm0 = VCVTPD2DQZ128rr %xmm0 + ; CHECK: %xmm0 = VCVTPD2PSrm %rdi, %xmm0, 1, _, 0 + %xmm0 = VCVTPD2PSZ128rm %rdi, %xmm0, 1, _, 0 + ; CHECK: %xmm0 = VCVTPD2PSrr %xmm0 + %xmm0 = VCVTPD2PSZ128rr %xmm0 + ; CHECK: %xmm0 = VCVTPS2DQrm %rdi, %xmm0, 1, _, 0 + %xmm0 = VCVTPS2DQZ128rm %rdi, %xmm0, 1, _, 0 + ; CHECK: %xmm0 = VCVTPS2DQrr %xmm0 + %xmm0 = VCVTPS2DQZ128rr %xmm0 + ; CHECK: %xmm0 = VCVTPS2PDrm %rdi, %xmm0, 1, _, 0 + %xmm0 = VCVTPS2PDZ128rm %rdi, %xmm0, 1, _, 0 + ; CHECK: %xmm0 = VCVTPS2PDrr %xmm0 + %xmm0 = VCVTPS2PDZ128rr %xmm0 + ; CHECK: %xmm0 = VCVTTPD2DQrm %rdi, %xmm0, 1, _, 0 + %xmm0 = VCVTTPD2DQZ128rm %rdi, %xmm0, 1, _, 0 + ; CHECK: %xmm0 = VCVTTPD2DQrr %xmm0 + %xmm0 = VCVTTPD2DQZ128rr %xmm0 + ; CHECK: %xmm0 = VCVTTPS2DQrm %rdi, %xmm0, 1, _, 0 + %xmm0 = VCVTTPS2DQZ128rm %rdi, %xmm0, 1, _, 0 + ; CHECK: %xmm0 = VCVTTPS2DQrr %xmm0 + %xmm0 = VCVTTPS2DQZ128rr %xmm0 + ; CHECK: %xmm0 = VSQRTPDm %rdi, _, _, _, _ + %xmm0 = VSQRTPDZ128m %rdi, _, _, _, _ + ; CHECK: %xmm0 = VSQRTPDr %xmm0 + %xmm0 = VSQRTPDZ128r %xmm0 + ; CHECK: %xmm0 = VSQRTPSm %rdi, _, _, _, _ + %xmm0 = VSQRTPSZ128m %rdi, _, _, _, _ + ; CHECK: %xmm0 = VSQRTPSr %xmm0 + %xmm0 = VSQRTPSZ128r %xmm0 + ; CHECK: %xmm0 = VMOVDDUPrm %rdi, 1, _, 0, _ + %xmm0 = VMOVDDUPZ128rm %rdi, 1, _, 0, _ + ; CHECK: %xmm0 = VMOVDDUPrr %xmm0 + %xmm0 = VMOVDDUPZ128rr %xmm0 + ; CHECK: %xmm0 = VMOVSHDUPrm %rdi, 1, _, 0, _ + %xmm0 = VMOVSHDUPZ128rm %rdi, 1, _, 0, _ + ; CHECK: %xmm0 = VMOVSHDUPrr %xmm0 + %xmm0 = VMOVSHDUPZ128rr %xmm0 + ; CHECK: %xmm0 = VMOVSLDUPrm %rdi, 1, _, 0, _ + %xmm0 = VMOVSLDUPZ128rm %rdi, 1, _, 0, _ + ; CHECK: %xmm0 = VMOVSLDUPrr %xmm0 + %xmm0 = VMOVSLDUPZ128rr %xmm0 + ; CHECK: %xmm0 = VPSHUFBrm %xmm0, _, _, _, _, _ + %xmm0 = VPSHUFBZ128rm %xmm0, _, _, _, _, _ + ; CHECK: %xmm0 = VPSHUFBrr %xmm0, %xmm1 + %xmm0 = VPSHUFBZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VPSHUFDmi %rdi, 1, _, 0, _, _ + %xmm0 = VPSHUFDZ128mi %rdi, 1, _, 0, _, _ + ; CHECK: %xmm0 = VPSHUFDri %xmm0, -24 + %xmm0 = VPSHUFDZ128ri %xmm0, -24 + ; CHECK: %xmm0 = VPSHUFHWmi %rdi, 1, _, 0, _, _ + %xmm0 = VPSHUFHWZ128mi %rdi, 1, _, 0, _, _ + ; CHECK: %xmm0 = VPSHUFHWri %xmm0, -24 + %xmm0 = VPSHUFHWZ128ri %xmm0, -24 + ; CHECK: %xmm0 = VPSHUFLWmi %rdi, 1, _, 0, _, _ + %xmm0 = VPSHUFLWZ128mi %rdi, 1, _, 0, _, _ + ; CHECK: %xmm0 = VPSHUFLWri %xmm0, -24 + %xmm0 = VPSHUFLWZ128ri %xmm0, -24 + ; CHECK: %xmm0 = VPSLLDQri %xmm0, %xmm1 + %xmm0 = VPSLLDQZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VSHUFPDrmi %xmm0, _, _, _, _, _, _ + %xmm0 = VSHUFPDZ128rmi %xmm0, _, _, _, _, _, _ + ; CHECK: %xmm0 = VSHUFPDrri %xmm0, _, _ + %xmm0 = VSHUFPDZ128rri %xmm0, _, _ + ; CHECK: %xmm0 = VSHUFPSrmi %xmm0, _, _, _, _, _, _ + %xmm0 = VSHUFPSZ128rmi %xmm0, _, _, _, _, _, _ + ; CHECK: %xmm0 = VSHUFPSrri %xmm0, _, _ + %xmm0 = VSHUFPSZ128rri %xmm0, _, _ + ; CHECK: %xmm0 = VPSADBWrm %xmm0, 1, _, %rax, _, _ + %xmm0 = VPSADBWZ128rm %xmm0, 1, _, %rax, _, _ + ; CHECK: %xmm0 = VPSADBWrr %xmm0, %xmm1 + %xmm0 = VPSADBWZ128rr %xmm0, %xmm1 + ; CHECK: %xmm0 = VBROADCASTSSrm %rip, _, _, _, _ + %xmm0 = VBROADCASTSSZ128m %rip, _, _, _, _ + ; CHECK: %xmm0 = VBROADCASTSSrr %xmm0 + %xmm0 = VBROADCASTSSZ128r %xmm0 + ; CHECK: %xmm0 = VBROADCASTSSrr %xmm0 + %xmm0 = VBROADCASTSSZ128r_s %xmm0 + ; CHECK: %xmm0 = VPBROADCASTBrm %rip, _, _, _, _ + %xmm0 = VPBROADCASTBZ128m %rip, _, _, _, _ + ; CHECK: %xmm0 = VPBROADCASTBrr %xmm0 + %xmm0 = VPBROADCASTBZ128r %xmm0 + ; CHECK: %xmm0 = VPBROADCASTDrm %rip, _, _, _, _ + %xmm0 = VPBROADCASTDZ128m %rip, _, _, _, _ + ; CHECK: %xmm0 = VPBROADCASTDrr %xmm0 + %xmm0 = VPBROADCASTDZ128r %xmm0 + ; CHECK: %xmm0 = VPBROADCASTQrm %rip, _, _, _, _ + %xmm0 = VPBROADCASTQZ128m %rip, _, _, _, _ + ; CHECK: %xmm0 = VPBROADCASTQrr %xmm0 + %xmm0 = VPBROADCASTQZ128r %xmm0 + ; CHECK: %xmm0 = VPBROADCASTWrm %rip, _, _, _, _ + %xmm0 = VPBROADCASTWZ128m %rip, _, _, _, _ + ; CHECK: %xmm0 = VPBROADCASTWrr %xmm0 + %xmm0 = VPBROADCASTWZ128r %xmm0 + ; CHECK: %xmm0 = VCVTPS2PHrr %xmm0, 2 + %xmm0 = VCVTPS2PHZ128rr %xmm0, 2 + ; CHECK: VCVTPS2PHmr %rdi, %xmm0, 1, _, 0, _, _ + VCVTPS2PHZ128mr %rdi, %xmm0, 1, _, 0, _, _ + ; CHECK: %xmm0 = VPABSBrm %rip, 1, _, %rax, _ + %xmm0 = VPABSBZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPABSBrr %xmm0 + %xmm0 = VPABSBZ128rr %xmm0 + ; CHECK: %xmm0 = VPABSDrm %rip, 1, _, %rax, _ + %xmm0 = VPABSDZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPABSDrr %xmm0 + %xmm0 = VPABSDZ128rr %xmm0 + ; CHECK: %xmm0 = VPABSWrm %rip, 1, _, %rax, _ + %xmm0 = VPABSWZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VPABSWrr %xmm0 + %xmm0 = VPABSWZ128rr %xmm0 + ; CHECK: %xmm0 = VPALIGNRrmi %xmm0, _, _, _, _, _, _ + %xmm0 = VPALIGNRZ128rmi %xmm0, _, _, _, _, _, _ + ; CHECK: %xmm0 = VPALIGNRrri %xmm0, %xmm1, 15 + %xmm0 = VPALIGNRZ128rri %xmm0, %xmm1, 15 + + RET 0, %zmm0, %zmm1 +... +--- + # CHECK-LABEL: name: evex_scalar_to_vex_test + # CHECK: bb.0: + +name: evex_scalar_to_vex_test +body: | + bb.0: + + ; CHECK: %xmm0 = VADDSDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VADDSDZrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VADDSDrm_Int %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VADDSDZrm_Int %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VADDSDrr %xmm0, %xmm1 + %xmm0 = VADDSDZrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VADDSDrr_Int %xmm0, %xmm1 + %xmm0 = VADDSDZrr_Int %xmm0, %xmm1 + ; CHECK: %xmm0 = VADDSSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VADDSSZrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VADDSSrm_Int %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VADDSSZrm_Int %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VADDSSrr %xmm0, %xmm1 + %xmm0 = VADDSSZrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VADDSSrr_Int %xmm0, %xmm1 + %xmm0 = VADDSSZrr_Int %xmm0, %xmm1 + ; CHECK: %xmm0 = VDIVSDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VDIVSDZrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VDIVSDrm_Int %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VDIVSDZrm_Int %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VDIVSDrr %xmm0, %xmm1 + %xmm0 = VDIVSDZrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VDIVSDrr_Int %xmm0, %xmm1 + %xmm0 = VDIVSDZrr_Int %xmm0, %xmm1 + ; CHECK: %xmm0 = VDIVSSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VDIVSSZrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VDIVSSrm_Int %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VDIVSSZrm_Int %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VDIVSSrr %xmm0, %xmm1 + %xmm0 = VDIVSSZrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VDIVSSrr_Int %xmm0, %xmm1 + %xmm0 = VDIVSSZrr_Int %xmm0, %xmm1 + ; CHECK: %xmm0 = VMAXCSDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMAXCSDZrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMAXCSDrr %xmm0, %xmm1 + %xmm0 = VMAXCSDZrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMAXCSSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMAXCSSZrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMAXCSSrr %xmm0, %xmm1 + %xmm0 = VMAXCSSZrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMAXSDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMAXSDZrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMAXSDrm_Int %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMAXSDZrm_Int %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMAXSDrr %xmm0, %xmm1 + %xmm0 = VMAXSDZrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMAXSDrr_Int %xmm0, %xmm1 + %xmm0 = VMAXSDZrr_Int %xmm0, %xmm1 + ; CHECK: %xmm0 = VMAXSSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMAXSSZrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMAXSSrm_Int %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMAXSSZrm_Int %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMAXSSrr %xmm0, %xmm1 + %xmm0 = VMAXSSZrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMAXSSrr_Int %xmm0, %xmm1 + %xmm0 = VMAXSSZrr_Int %xmm0, %xmm1 + ; CHECK: %xmm0 = VMINCSDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMINCSDZrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMINCSDrr %xmm0, %xmm1 + %xmm0 = VMINCSDZrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMINCSSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMINCSSZrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMINCSSrr %xmm0, %xmm1 + %xmm0 = VMINCSSZrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMINSDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMINSDZrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMINSDrm_Int %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMINSDZrm_Int %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMINSDrr %xmm0, %xmm1 + %xmm0 = VMINSDZrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMINSDrr_Int %xmm0, %xmm1 + %xmm0 = VMINSDZrr_Int %xmm0, %xmm1 + ; CHECK: %xmm0 = VMINSSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMINSSZrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMINSSrm_Int %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMINSSZrm_Int %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMINSSrr %xmm0, %xmm1 + %xmm0 = VMINSSZrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMINSSrr_Int %xmm0, %xmm1 + %xmm0 = VMINSSZrr_Int %xmm0, %xmm1 + ; CHECK: %xmm0 = VMULSDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMULSDZrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMULSDrm_Int %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMULSDZrm_Int %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMULSDrr %xmm0, %xmm1 + %xmm0 = VMULSDZrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMULSDrr_Int %xmm0, %xmm1 + %xmm0 = VMULSDZrr_Int %xmm0, %xmm1 + ; CHECK: %xmm0 = VMULSSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMULSSZrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMULSSrm_Int %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VMULSSZrm_Int %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VMULSSrr %xmm0, %xmm1 + %xmm0 = VMULSSZrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VMULSSrr_Int %xmm0, %xmm1 + %xmm0 = VMULSSZrr_Int %xmm0, %xmm1 + ; CHECK: %xmm0 = VSUBSDrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VSUBSDZrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VSUBSDrm_Int %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VSUBSDZrm_Int %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VSUBSDrr %xmm0, %xmm1 + %xmm0 = VSUBSDZrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VSUBSDrr_Int %xmm0, %xmm1 + %xmm0 = VSUBSDZrr_Int %xmm0, %xmm1 + ; CHECK: %xmm0 = VSUBSSrm %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VSUBSSZrm %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VSUBSSrm_Int %xmm0, %rip, 1, _, %rax, _ + %xmm0 = VSUBSSZrm_Int %xmm0, %rip, 1, _, %rax, _ + ; CHECK: %xmm0 = VSUBSSrr %xmm0, %xmm1 + %xmm0 = VSUBSSZrr %xmm0, %xmm1 + ; CHECK: %xmm0 = VSUBSSrr_Int %xmm0, %xmm1 + %xmm0 = VSUBSSZrr_Int %xmm0, %xmm1 + ; CHECK: %xmm0 = VFMADD132SDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADD132SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADD132SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADD132SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADD132SDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADD132SDZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADD132SDr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADD132SDZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADD132SSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADD132SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADD132SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADD132SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADD132SSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADD132SSZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADD132SSr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADD132SSZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADD213SDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADD213SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADD213SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADD213SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADD213SDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADD213SDZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADD213SDr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADD213SDZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADD213SSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADD213SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADD213SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADD213SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADD213SSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADD213SSZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADD213SSr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADD213SSZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADD231SDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADD231SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADD231SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADD231SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADD231SDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADD231SDZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADD231SDr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADD231SDZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADD231SSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADD231SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADD231SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMADD231SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMADD231SSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADD231SSZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMADD231SSr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFMADD231SSZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUB132SDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUB132SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUB132SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUB132SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUB132SDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUB132SDZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUB132SDr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUB132SDZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUB132SSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUB132SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUB132SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUB132SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUB132SSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUB132SSZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUB132SSr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUB132SSZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUB213SDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUB213SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUB213SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUB213SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUB213SDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUB213SDZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUB213SDr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUB213SDZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUB213SSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUB213SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUB213SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUB213SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUB213SSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUB213SSZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUB213SSr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUB213SSZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUB231SDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUB231SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUB231SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUB231SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUB231SDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUB231SDZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUB231SDr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUB231SDZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUB231SSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUB231SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUB231SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFMSUB231SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFMSUB231SSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUB231SSZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFMSUB231SSr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFMSUB231SSZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMADD132SDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMADD132SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMADD132SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMADD132SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMADD132SDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMADD132SDZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMADD132SDr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMADD132SDZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMADD132SSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMADD132SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMADD132SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMADD132SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMADD132SSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMADD132SSZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMADD132SSr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMADD132SSZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMADD213SDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMADD213SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMADD213SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMADD213SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMADD213SDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMADD213SDZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMADD213SDr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMADD213SDZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMADD213SSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMADD213SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMADD213SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMADD213SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMADD213SSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMADD213SSZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMADD213SSr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMADD213SSZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMADD231SDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMADD231SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMADD231SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMADD231SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMADD231SDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMADD231SDZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMADD231SDr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMADD231SDZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMADD231SSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMADD231SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMADD231SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMADD231SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMADD231SSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMADD231SSZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMADD231SSr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMADD231SSZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMSUB132SDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMSUB132SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMSUB132SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMSUB132SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMSUB132SDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMSUB132SDZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMSUB132SDr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMSUB132SDZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMSUB132SSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMSUB132SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMSUB132SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMSUB132SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMSUB132SSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMSUB132SSZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMSUB132SSr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMSUB132SSZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMSUB213SDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMSUB213SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMSUB213SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMSUB213SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMSUB213SDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMSUB213SDZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMSUB213SDr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMSUB213SDZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMSUB213SSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMSUB213SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMSUB213SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMSUB213SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMSUB213SSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMSUB213SSZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMSUB213SSr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMSUB213SSZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMSUB231SDm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMSUB231SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMSUB231SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMSUB231SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMSUB231SDr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMSUB231SDZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMSUB231SDr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMSUB231SDZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMSUB231SSm %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMSUB231SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMSUB231SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + %xmm0 = VFNMSUB231SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _ + ; CHECK: %xmm0 = VFNMSUB231SSr %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMSUB231SSZr %xmm0, %xmm1, %xmm2 + ; CHECK: %xmm0 = VFNMSUB231SSr_Int %xmm0, %xmm1, %xmm2 + %xmm0 = VFNMSUB231SSZr_Int %xmm0, %xmm1, %xmm2 + ; CHECK: VPEXTRBmr %rdi, 1, _, 0, _, %xmm0, 3 + VPEXTRBZmr %rdi, 1, _, 0, _, %xmm0, 3 + ; CHECK: %eax = VPEXTRBrr %xmm0, 1 + %eax = VPEXTRBZrr %xmm0, 1 + ; CHECK: VPEXTRDmr %rdi, 1, _, 0, _, %xmm0, 3 + VPEXTRDZmr %rdi, 1, _, 0, _, %xmm0, 3 + ; CHECK: %eax = VPEXTRDrr %xmm0, 1 + %eax = VPEXTRDZrr %xmm0, 1 + ; CHECK: VPEXTRQmr %rdi, 1, _, 0, _, %xmm0, 3 + VPEXTRQZmr %rdi, 1, _, 0, _, %xmm0, 3 + ; CHECK: %rax = VPEXTRQrr %xmm0, 1 + %rax = VPEXTRQZrr %xmm0, 1 + ; CHECK: VPEXTRWmr %rdi, 1, _, 0, _, %xmm0, 3 + VPEXTRWZmr %rdi, 1, _, 0, _, %xmm0, 3 + ; CHECK: %eax = VPEXTRWri %xmm0, 1 + %eax = VPEXTRWZrr %xmm0, 1 + ; CHECK: %xmm0 = VPINSRBrm %xmm0, %rsi, 1, _, 0, _, 3 + %xmm0 = VPINSRBZrm %xmm0, %rsi, 1, _, 0, _, 3 + ; CHECK: %xmm0 = VPINSRBrr %xmm0, %edi, 5 + %xmm0 = VPINSRBZrr %xmm0, %edi, 5 + ; CHECK: %xmm0 = VPINSRDrm %xmm0, %rsi, 1, _, 0, _, 3 + %xmm0 = VPINSRDZrm %xmm0, %rsi, 1, _, 0, _, 3 + ; CHECK: %xmm0 = VPINSRDrr %xmm0, %edi, 5 + %xmm0 = VPINSRDZrr %xmm0, %edi, 5 + ; CHECK: %xmm0 = VPINSRQrm %xmm0, %rsi, 1, _, 0, _, 3 + %xmm0 = VPINSRQZrm %xmm0, %rsi, 1, _, 0, _, 3 + ; CHECK: %xmm0 = VPINSRQrr %xmm0, %rdi, 5 + %xmm0 = VPINSRQZrr %xmm0, %rdi, 5 + ; CHECK: %xmm0 = VPINSRWrmi %xmm0, %rsi, 1, _, 0, _, 3 + %xmm0 = VPINSRWZrm %xmm0, %rsi, 1, _, 0, _, 3 + ; CHECK: %xmm0 = VPINSRWrri %xmm0, %edi, 5 + %xmm0 = VPINSRWZrr %xmm0, %edi, 5 + ; CHECK: %xmm0 = VSQRTSDm %xmm0, _, _, _, _, _ + %xmm0 = VSQRTSDZm %xmm0, _, _, _, _, _ + ; CHECK: %xmm0 = VSQRTSDm_Int %xmm0, _, _, _, _, _ + %xmm0 = VSQRTSDZm_Int %xmm0, _, _, _, _, _ + ; CHECK: %xmm0 = VSQRTSDr %xmm0, _ + %xmm0 = VSQRTSDZr %xmm0, _ + ; CHECK: %xmm0 = VSQRTSDr_Int %xmm0, _ + %xmm0 = VSQRTSDZr_Int %xmm0, _ + ; CHECK: %xmm0 = VSQRTSSm %xmm0, _, _, _, _, _ + %xmm0 = VSQRTSSZm %xmm0, _, _, _, _, _ + ; CHECK: %xmm0 = VSQRTSSm_Int %xmm0, _, _, _, _, _ + %xmm0 = VSQRTSSZm_Int %xmm0, _, _, _, _, _ + ; CHECK: %xmm0 = VSQRTSSr %xmm0, _ + %xmm0 = VSQRTSSZr %xmm0, _ + ; CHECK: %xmm0 = VSQRTSSr_Int %xmm0, _ + %xmm0 = VSQRTSSZr_Int %xmm0, _ + ; CHECK: %rdi = VCVTSD2SI64rm %rdi, %xmm0, 1, _, 0 + %rdi = VCVTSD2SI64Zrm %rdi, %xmm0, 1, _, 0 + ; CHECK: %rdi = VCVTSD2SI64rr %xmm0 + %rdi = VCVTSD2SI64Zrr %xmm0 + ; CHECK: %edi = VCVTSD2SIrm %rdi, %xmm0, 1, _, 0 + %edi = VCVTSD2SIZrm %rdi, %xmm0, 1, _, 0 + ; CHECK: %edi = VCVTSD2SIrr %xmm0 + %edi = VCVTSD2SIZrr %xmm0 + ; CHECK: %xmm0 = VCVTSD2SSrm %xmm0, %rdi, 1, _, 0, _ + %xmm0 = VCVTSD2SSZrm %xmm0, %rdi, 1, _, 0, _ + ; CHECK: %xmm0 = VCVTSD2SSrr %xmm0, _ + %xmm0 = VCVTSD2SSZrr %xmm0, _ + ; CHECK: %xmm0 = VCVTSI2SDrm %xmm0, %rdi, 1, _, 0, _ + %xmm0 = VCVTSI2SDZrm %xmm0, %rdi, 1, _, 0, _ + ; CHECK: %xmm0 = Int_VCVTSI2SDrm %xmm0, %rdi, 1, _, 0, _ + %xmm0 = VCVTSI2SDZrm_Int %xmm0, %rdi, 1, _, 0, _ + ; CHECK: %xmm0 = VCVTSI2SDrr %xmm0, _ + %xmm0 = VCVTSI2SDZrr %xmm0, _ + ; CHECK: %xmm0 = Int_VCVTSI2SDrr %xmm0, _ + %xmm0 = VCVTSI2SDZrr_Int %xmm0, _ + ; CHECK: %xmm0 = VCVTSI2SSrm %xmm0, %rdi, 1, _, 0, _ + %xmm0 = VCVTSI2SSZrm %xmm0, %rdi, 1, _, 0, _ + ; CHECK: %xmm0 = Int_VCVTSI2SSrm %xmm0, %rdi, 1, _, 0, _ + %xmm0 = VCVTSI2SSZrm_Int %xmm0, %rdi, 1, _, 0, _ + ; CHECK: %xmm0 = VCVTSI2SSrr %xmm0, _ + %xmm0 = VCVTSI2SSZrr %xmm0, _ + ; CHECK: %xmm0 = Int_VCVTSI2SSrr %xmm0, _ + %xmm0 = VCVTSI2SSZrr_Int %xmm0, _ + ; CHECK: %xmm0 = VCVTSS2SDrm %xmm0, %rdi, 1, _, 0, _ + %xmm0 = VCVTSS2SDZrm %xmm0, %rdi, 1, _, 0, _ + ; CHECK: %xmm0 = VCVTSS2SDrr %xmm0, _ + %xmm0 = VCVTSS2SDZrr %xmm0, _ + ; CHECK: %rdi = VCVTSS2SI64rm %rdi, %xmm0, 1, _, 0 + %rdi = VCVTSS2SI64Zrm %rdi, %xmm0, 1, _, 0 + ; CHECK: %rdi = VCVTSS2SI64rr %xmm0 + %rdi = VCVTSS2SI64Zrr %xmm0 + ; CHECK: %edi = VCVTSS2SIrm %rdi, %xmm0, 1, _, 0 + %edi = VCVTSS2SIZrm %rdi, %xmm0, 1, _, 0 + ; CHECK: %edi = VCVTSS2SIrr %xmm0 + %edi = VCVTSS2SIZrr %xmm0 + ; CHECK: %rdi = VCVTTSD2SI64rm %rdi, %xmm0, 1, _, 0 + %rdi = VCVTTSD2SI64Zrm %rdi, %xmm0, 1, _, 0 + ; CHECK: %rdi = Int_VCVTTSD2SI64rm %rdi, %xmm0, 1, _, 0 + %rdi = VCVTTSD2SI64Zrm_Int %rdi, %xmm0, 1, _, 0 + ; CHECK: %rdi = VCVTTSD2SI64rr %xmm0 + %rdi = VCVTTSD2SI64Zrr %xmm0 + ; CHECK: %rdi = Int_VCVTTSD2SI64rr %xmm0 + %rdi = VCVTTSD2SI64Zrr_Int %xmm0 + ; CHECK: %edi = VCVTTSD2SIrm %rdi, %xmm0, 1, _, 0 + %edi = VCVTTSD2SIZrm %rdi, %xmm0, 1, _, 0 + ; CHECK: %edi = Int_VCVTTSD2SIrm %rdi, %xmm0, 1, _, 0 + %edi = VCVTTSD2SIZrm_Int %rdi, %xmm0, 1, _, 0 + ; CHECK: %edi = VCVTTSD2SIrr %xmm0 + %edi = VCVTTSD2SIZrr %xmm0 + ; CHECK: %edi = Int_VCVTTSD2SIrr %xmm0 + %edi = VCVTTSD2SIZrr_Int %xmm0 + ; CHECK: %rdi = VCVTTSS2SI64rm %rdi, %xmm0, 1, _, 0 + %rdi = VCVTTSS2SI64Zrm %rdi, %xmm0, 1, _, 0 + ; CHECK: %rdi = Int_VCVTTSS2SI64rm %rdi, %xmm0, 1, _, 0 + %rdi = VCVTTSS2SI64Zrm_Int %rdi, %xmm0, 1, _, 0 + ; CHECK: %rdi = VCVTTSS2SI64rr %xmm0 + %rdi = VCVTTSS2SI64Zrr %xmm0 + ; CHECK: %rdi = Int_VCVTTSS2SI64rr %xmm0 + %rdi = VCVTTSS2SI64Zrr_Int %xmm0 + ; CHECK: %edi = VCVTTSS2SIrm %rdi, %xmm0, 1, _, 0 + %edi = VCVTTSS2SIZrm %rdi, %xmm0, 1, _, 0 + ; CHECK: %edi = Int_VCVTTSS2SIrm %rdi, %xmm0, 1, _, 0 + %edi = VCVTTSS2SIZrm_Int %rdi, %xmm0, 1, _, 0 + ; CHECK: %edi = VCVTTSS2SIrr %xmm0 + %edi = VCVTTSS2SIZrr %xmm0 + ; CHECK: %edi = Int_VCVTTSS2SIrr %xmm0 + %edi = VCVTTSS2SIZrr_Int %xmm0 + ; CHECK: %xmm0 = VMOV64toSDrr %rdi + %xmm0 = VMOV64toSDZrr %rdi + ; CHECK: %xmm0 = VMOVDI2SSrm %rip, _, _, _, _ + %xmm0 = VMOVDI2SSZrm %rip, _, _, _, _ + ; CHECK: %xmm0 = VMOVDI2SSrr %eax + %xmm0 = VMOVDI2SSZrr %eax + ; CHECK: VMOVSDmr %rdi, %xmm0, _, _, _, _ + VMOVSDZmr %rdi, %xmm0, _, _, _, _ + ; CHECK: %xmm0 = VMOVSDrm %rip, _, _, _, _ + %xmm0 = VMOVSDZrm %rip, _, _, _, _ + ; CHECK: %xmm0 = VMOVSDrr %xmm0, _ + %xmm0 = VMOVSDZrr %xmm0, _ + ; CHECK: VMOVSSmr %rdi, %xmm0, _, _, _, _ + VMOVSSZmr %rdi, %xmm0, _, _, _, _ + ; CHECK: %xmm0 = VMOVSSrm %rip, _, _, _, _ + %xmm0 = VMOVSSZrm %rip, _, _, _, _ + ; CHECK: %xmm0 = VMOVSSrr %xmm0, _ + %xmm0 = VMOVSSZrr %xmm0, _ + ; CHECK: %xmm0 = VMOVSSrr_REV %xmm0, _ + %xmm0 = VMOVSSZrr_REV %xmm0, _ + ; CHECK: %xmm0 = VMOV64toPQIrr %rdi + %xmm0 = VMOV64toPQIZrr %rdi + ; CHECK: %xmm0 = VMOV64toSDrr %rdi + %xmm0 = VMOV64toSDZrr %rdi + ; CHECK: %xmm0 = VMOVDI2PDIrm %rip, _, _, _, _ + %xmm0 = VMOVDI2PDIZrm %rip, _, _, _, _ + ; CHECK: %xmm0 = VMOVDI2PDIrr %edi + %xmm0 = VMOVDI2PDIZrr %edi + ; CHECK: %xmm0 = VMOVLHPSrr %xmm0, _ + %xmm0 = VMOVLHPSZrr %xmm0, _ + ; CHECK: %xmm0 = VMOVHLPSrr %xmm0, _ + %xmm0 = VMOVHLPSZrr %xmm0, _ + ; CHECK: VMOVPDI2DImr %rdi, %xmm0, _, _, _, _ + VMOVPDI2DIZmr %rdi, %xmm0, _, _, _, _ + ; CHECK: %edi = VMOVPDI2DIrr %xmm0 + %edi = VMOVPDI2DIZrr %xmm0 + ; CHECK: VMOVPQI2QImr %rdi, %xmm0, _, _, _, _ + VMOVPQI2QIZmr %rdi, %xmm0, _, _, _, _ + ; CHECK: %rdi = VMOVPQIto64rr %xmm0 + %rdi = VMOVPQIto64Zrr %xmm0 + ; CHECK: %xmm0 = VMOVQI2PQIrm %rip, _, _, _, _ + %xmm0 = VMOVQI2PQIZrm %rip, _, _, _, _ + ; CHECK: %xmm0 = VMOVZPQILo2PQIrr %xmm0 + %xmm0 = VMOVZPQILo2PQIZrr %xmm0 + ; CHECK: Int_VCOMISDrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags + Int_VCOMISDZrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags + ; CHECK: Int_VCOMISDrr %xmm0, %xmm1, implicit-def %eflags + Int_VCOMISDZrr %xmm0, %xmm1, implicit-def %eflags + ; CHECK: Int_VCOMISSrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags + Int_VCOMISSZrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags + ; CHECK: Int_VCOMISSrr %xmm0, %xmm1, implicit-def %eflags + Int_VCOMISSZrr %xmm0, %xmm1, implicit-def %eflags + ; CHECK: Int_VUCOMISDrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags + Int_VUCOMISDZrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags + ; CHECK: Int_VUCOMISDrr %xmm0, %xmm1, implicit-def %eflags + Int_VUCOMISDZrr %xmm0, %xmm1, implicit-def %eflags + ; CHECK: Int_VUCOMISSrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags + Int_VUCOMISSZrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags + ; CHECK: Int_VUCOMISSrr %xmm0, %xmm1, implicit-def %eflags + Int_VUCOMISSZrr %xmm0, %xmm1, implicit-def %eflags + ; CHECK: VCOMISDrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags + VCOMISDZrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags + ; CHECK: VCOMISDrr %xmm0, %xmm1, implicit-def %eflags + VCOMISDZrr %xmm0, %xmm1, implicit-def %eflags + ; CHECK: VCOMISSrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags + VCOMISSZrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags + ; CHECK: VCOMISSrr %xmm0, %xmm1, implicit-def %eflags + VCOMISSZrr %xmm0, %xmm1, implicit-def %eflags + ; CHECK: VUCOMISDrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags + VUCOMISDZrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags + ; CHECK: VUCOMISDrr %xmm0, %xmm1, implicit-def %eflags + VUCOMISDZrr %xmm0, %xmm1, implicit-def %eflags + ; CHECK: VUCOMISSrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags + VUCOMISSZrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags + ; CHECK: VUCOMISSrr %xmm0, %xmm1, implicit-def %eflags + VUCOMISSZrr %xmm0, %xmm1, implicit-def %eflags + + RET 0, %zmm0, %zmm1 +... +--- + # CHECK-LABEL: name: evex_z256_to_evex_test + # CHECK: bb.0: + +name: evex_z256_to_evex_test +body: | + bb.0: + ; CHECK: VMOVAPDZ256mr %rdi, 1, _, 0, _, %ymm16 + VMOVAPDZ256mr %rdi, 1, _, 0, _, %ymm16 + ; CHECK: %ymm16 = VMOVAPDZ256rm %rip, 1, _, %rax, _ + %ymm16 = VMOVAPDZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMOVAPDZ256rr %ymm16 + %ymm16 = VMOVAPDZ256rr %ymm16 + ; CHECK: %ymm16 = VMOVAPDZ256rr_REV %ymm16 + %ymm16 = VMOVAPDZ256rr_REV %ymm16 + ; CHECK: VMOVAPSZ256mr %rdi, 1, _, 0, _, %ymm16 + VMOVAPSZ256mr %rdi, 1, _, 0, _, %ymm16 + ; CHECK: %ymm16 = VMOVAPSZ256rm %rip, 1, _, %rax, _ + %ymm16 = VMOVAPSZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMOVAPSZ256rr %ymm16 + %ymm16 = VMOVAPSZ256rr %ymm16 + ; CHECK: %ymm16 = VMOVAPSZ256rr_REV %ymm16 + %ymm16 = VMOVAPSZ256rr_REV %ymm16 + ; CHECK: %ymm16 = VMOVDDUPZ256rm %rip, 1, _, %rax, _ + %ymm16 = VMOVDDUPZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMOVDDUPZ256rr %ymm16 + %ymm16 = VMOVDDUPZ256rr %ymm16 + ; CHECK: VMOVDQA32Z256mr %rdi, 1, _, 0, _, %ymm16 + VMOVDQA32Z256mr %rdi, 1, _, 0, _, %ymm16 + ; CHECK: %ymm16 = VMOVDQA32Z256rm %rip, 1, _, %rax, _ + %ymm16 = VMOVDQA32Z256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMOVDQA32Z256rr %ymm16 + %ymm16 = VMOVDQA32Z256rr %ymm16 + ; CHECK: %ymm16 = VMOVDQA32Z256rr_REV %ymm16 + %ymm16 = VMOVDQA32Z256rr_REV %ymm16 + ; CHECK: VMOVDQA64Z256mr %rdi, 1, _, 0, _, %ymm16 + VMOVDQA64Z256mr %rdi, 1, _, 0, _, %ymm16 + ; CHECK: %ymm16 = VMOVDQA64Z256rm %rip, 1, _, %rax, _ + %ymm16 = VMOVDQA64Z256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMOVDQA64Z256rr %ymm16 + %ymm16 = VMOVDQA64Z256rr %ymm16 + ; CHECK: %ymm16 = VMOVDQA64Z256rr_REV %ymm16 + %ymm16 = VMOVDQA64Z256rr_REV %ymm16 + ; CHECK: VMOVDQU16Z256mr %rdi, 1, _, 0, _, %ymm16 + VMOVDQU16Z256mr %rdi, 1, _, 0, _, %ymm16 + ; CHECK: %ymm16 = VMOVDQU16Z256rm %rip, 1, _, %rax, _ + %ymm16 = VMOVDQU16Z256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMOVDQU16Z256rr %ymm16 + %ymm16 = VMOVDQU16Z256rr %ymm16 + ; CHECK: %ymm16 = VMOVDQU16Z256rr_REV %ymm16 + %ymm16 = VMOVDQU16Z256rr_REV %ymm16 + ; CHECK: VMOVDQU32Z256mr %rdi, 1, _, 0, _, %ymm16 + VMOVDQU32Z256mr %rdi, 1, _, 0, _, %ymm16 + ; CHECK: %ymm16 = VMOVDQU32Z256rm %rip, 1, _, %rax, _ + %ymm16 = VMOVDQU32Z256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMOVDQU32Z256rr %ymm16 + %ymm16 = VMOVDQU32Z256rr %ymm16 + ; CHECK: %ymm16 = VMOVDQU32Z256rr_REV %ymm16 + %ymm16 = VMOVDQU32Z256rr_REV %ymm16 + ; CHECK: VMOVDQU64Z256mr %rdi, 1, _, 0, _, %ymm16 + VMOVDQU64Z256mr %rdi, 1, _, 0, _, %ymm16 + ; CHECK: %ymm16 = VMOVDQU64Z256rm %rip, 1, _, %rax, _ + %ymm16 = VMOVDQU64Z256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMOVDQU64Z256rr %ymm16 + %ymm16 = VMOVDQU64Z256rr %ymm16 + ; CHECK: %ymm16 = VMOVDQU64Z256rr_REV %ymm16 + %ymm16 = VMOVDQU64Z256rr_REV %ymm16 + ; CHECK: VMOVDQU8Z256mr %rdi, 1, _, 0, _, %ymm16 + VMOVDQU8Z256mr %rdi, 1, _, 0, _, %ymm16 + ; CHECK: %ymm16 = VMOVDQU8Z256rm %rip, 1, _, %rax, _ + %ymm16 = VMOVDQU8Z256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMOVDQU8Z256rr %ymm16 + %ymm16 = VMOVDQU8Z256rr %ymm16 + ; CHECK: %ymm16 = VMOVDQU8Z256rr_REV %ymm16 + %ymm16 = VMOVDQU8Z256rr_REV %ymm16 + ; CHECK: %ymm16 = VMOVNTDQAZ256rm %rip, 1, _, %rax, _ + %ymm16 = VMOVNTDQAZ256rm %rip, 1, _, %rax, _ + ; CHECK: VMOVNTDQZ256mr %rdi, 1, _, 0, _, %ymm16 + VMOVNTDQZ256mr %rdi, 1, _, 0, _, %ymm16 + ; CHECK: VMOVNTPDZ256mr %rdi, 1, _, 0, _, %ymm16 + VMOVNTPDZ256mr %rdi, 1, _, 0, _, %ymm16 + ; CHECK: VMOVNTPSZ256mr %rdi, 1, _, 0, _, %ymm16 + VMOVNTPSZ256mr %rdi, 1, _, 0, _, %ymm16 + ; CHECK: %ymm16 = VMOVSHDUPZ256rm %rip, 1, _, %rax, _ + %ymm16 = VMOVSHDUPZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMOVSHDUPZ256rr %ymm16 + %ymm16 = VMOVSHDUPZ256rr %ymm16 + ; CHECK: %ymm16 = VMOVSLDUPZ256rm %rip, 1, _, %rax, _ + %ymm16 = VMOVSLDUPZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMOVSLDUPZ256rr %ymm16 + %ymm16 = VMOVSLDUPZ256rr %ymm16 + ; CHECK: VMOVUPDZ256mr %rdi, 1, _, 0, _, %ymm16 + VMOVUPDZ256mr %rdi, 1, _, 0, _, %ymm16 + ; CHECK: %ymm16 = VMOVUPDZ256rm %rip, 1, _, %rax, _ + %ymm16 = VMOVUPDZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMOVUPDZ256rr %ymm16 + %ymm16 = VMOVUPDZ256rr %ymm16 + ; CHECK: %ymm16 = VMOVUPDZ256rr_REV %ymm16 + %ymm16 = VMOVUPDZ256rr_REV %ymm16 + ; CHECK: VMOVUPSZ256mr %rdi, 1, _, 0, _, %ymm16 + VMOVUPSZ256mr %rdi, 1, _, 0, _, %ymm16 + ; CHECK: %ymm16 = VPANDDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPANDDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPANDDZ256rr %ymm16, %ymm1 + %ymm16 = VPANDDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPANDQZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPANDQZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPANDQZ256rr %ymm16, %ymm1 + %ymm16 = VPANDQZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPAVGBZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPAVGBZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPAVGBZ256rr %ymm16, %ymm1 + %ymm16 = VPAVGBZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPAVGWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPAVGWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPAVGWZ256rr %ymm16, %ymm1 + %ymm16 = VPAVGWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPADDBZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPADDBZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPADDBZ256rr %ymm16, %ymm1 + %ymm16 = VPADDBZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPADDDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPADDDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPADDDZ256rr %ymm16, %ymm1 + %ymm16 = VPADDDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPADDQZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPADDQZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPADDQZ256rr %ymm16, %ymm1 + %ymm16 = VPADDQZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPADDSBZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPADDSBZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPADDSBZ256rr %ymm16, %ymm1 + %ymm16 = VPADDSBZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPADDSWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPADDSWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPADDSWZ256rr %ymm16, %ymm1 + %ymm16 = VPADDSWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPADDUSBZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPADDUSBZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPADDUSBZ256rr %ymm16, %ymm1 + %ymm16 = VPADDUSBZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPADDUSWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPADDUSWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPADDUSWZ256rr %ymm16, %ymm1 + %ymm16 = VPADDUSWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPADDWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPADDWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPADDWZ256rr %ymm16, %ymm1 + %ymm16 = VPADDWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VMULPDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VMULPDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMULPDZ256rr %ymm16, %ymm1 + %ymm16 = VMULPDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VMULPSZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VMULPSZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMULPSZ256rr %ymm16, %ymm1 + %ymm16 = VMULPSZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VORPDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VORPDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VORPDZ256rr %ymm16, %ymm1 + %ymm16 = VORPDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VORPSZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VORPSZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VORPSZ256rr %ymm16, %ymm1 + %ymm16 = VORPSZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMADDUBSWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMADDUBSWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMADDUBSWZ256rr %ymm16, %ymm1 + %ymm16 = VPMADDUBSWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMADDWDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMADDWDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMADDWDZ256rr %ymm16, %ymm1 + %ymm16 = VPMADDWDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMAXSBZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMAXSBZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMAXSBZ256rr %ymm16, %ymm1 + %ymm16 = VPMAXSBZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMAXSDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMAXSDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMAXSDZ256rr %ymm16, %ymm1 + %ymm16 = VPMAXSDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMAXSWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMAXSWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMAXSWZ256rr %ymm16, %ymm1 + %ymm16 = VPMAXSWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMAXUBZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMAXUBZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMAXUBZ256rr %ymm16, %ymm1 + %ymm16 = VPMAXUBZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMAXUDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMAXUDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMAXUDZ256rr %ymm16, %ymm1 + %ymm16 = VPMAXUDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMAXUWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMAXUWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMAXUWZ256rr %ymm16, %ymm1 + %ymm16 = VPMAXUWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMINSBZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMINSBZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMINSBZ256rr %ymm16, %ymm1 + %ymm16 = VPMINSBZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMINSDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMINSDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMINSDZ256rr %ymm16, %ymm1 + %ymm16 = VPMINSDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMINSWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMINSWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMINSWZ256rr %ymm16, %ymm1 + %ymm16 = VPMINSWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMINUBZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMINUBZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMINUBZ256rr %ymm16, %ymm1 + %ymm16 = VPMINUBZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMINUDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMINUDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMINUDZ256rr %ymm16, %ymm1 + %ymm16 = VPMINUDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMINUWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMINUWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMINUWZ256rr %ymm16, %ymm1 + %ymm16 = VPMINUWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMULDQZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMULDQZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMULDQZ256rr %ymm16, %ymm1 + %ymm16 = VPMULDQZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMULHRSWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMULHRSWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMULHRSWZ256rr %ymm16, %ymm1 + %ymm16 = VPMULHRSWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMULHUWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMULHUWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMULHUWZ256rr %ymm16, %ymm1 + %ymm16 = VPMULHUWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMULHWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMULHWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMULHWZ256rr %ymm16, %ymm1 + %ymm16 = VPMULHWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMULLDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMULLDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMULLDZ256rr %ymm16, %ymm1 + %ymm16 = VPMULLDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMULLWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMULLWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMULLWZ256rr %ymm16, %ymm1 + %ymm16 = VPMULLWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPMULUDQZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPMULUDQZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMULUDQZ256rr %ymm16, %ymm1 + %ymm16 = VPMULUDQZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPORDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPORDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPORDZ256rr %ymm16, %ymm1 + %ymm16 = VPORDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPORQZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPORQZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPORQZ256rr %ymm16, %ymm1 + %ymm16 = VPORQZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPSUBBZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSUBBZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSUBBZ256rr %ymm16, %ymm1 + %ymm16 = VPSUBBZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPSUBDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSUBDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSUBDZ256rr %ymm16, %ymm1 + %ymm16 = VPSUBDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPSUBQZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSUBQZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSUBQZ256rr %ymm16, %ymm1 + %ymm16 = VPSUBQZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPSUBSBZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSUBSBZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSUBSBZ256rr %ymm16, %ymm1 + %ymm16 = VPSUBSBZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPSUBSWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSUBSWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSUBSWZ256rr %ymm16, %ymm1 + %ymm16 = VPSUBSWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPSUBUSBZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSUBUSBZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSUBUSBZ256rr %ymm16, %ymm1 + %ymm16 = VPSUBUSBZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPSUBUSWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSUBUSWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSUBUSWZ256rr %ymm16, %ymm1 + %ymm16 = VPSUBUSWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPSUBWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSUBWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSUBWZ256rr %ymm16, %ymm1 + %ymm16 = VPSUBWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPXORDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPXORDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPXORDZ256rr %ymm16, %ymm1 + %ymm16 = VPXORDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPXORQZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPXORQZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPXORQZ256rr %ymm16, %ymm1 + %ymm16 = VPXORQZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VADDPDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VADDPDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VADDPDZ256rr %ymm16, %ymm1 + %ymm16 = VADDPDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VADDPSZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VADDPSZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VADDPSZ256rr %ymm16, %ymm1 + %ymm16 = VADDPSZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VANDNPDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VANDNPDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VANDNPDZ256rr %ymm16, %ymm1 + %ymm16 = VANDNPDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VANDNPSZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VANDNPSZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VANDNPSZ256rr %ymm16, %ymm1 + %ymm16 = VANDNPSZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VANDPDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VANDPDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VANDPDZ256rr %ymm16, %ymm1 + %ymm16 = VANDPDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VANDPSZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VANDPSZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VANDPSZ256rr %ymm16, %ymm1 + %ymm16 = VANDPSZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VDIVPDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VDIVPDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VDIVPDZ256rr %ymm16, %ymm1 + %ymm16 = VDIVPDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VDIVPSZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VDIVPSZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VDIVPSZ256rr %ymm16, %ymm1 + %ymm16 = VDIVPSZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VMAXCPDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VMAXCPDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMAXCPDZ256rr %ymm16, %ymm1 + %ymm16 = VMAXCPDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VMAXCPSZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VMAXCPSZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMAXCPSZ256rr %ymm16, %ymm1 + %ymm16 = VMAXCPSZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VMAXPDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VMAXPDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMAXPDZ256rr %ymm16, %ymm1 + %ymm16 = VMAXPDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VMAXPSZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VMAXPSZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMAXPSZ256rr %ymm16, %ymm1 + %ymm16 = VMAXPSZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VMINCPDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VMINCPDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMINCPDZ256rr %ymm16, %ymm1 + %ymm16 = VMINCPDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VMINCPSZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VMINCPSZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMINCPSZ256rr %ymm16, %ymm1 + %ymm16 = VMINCPSZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VMINPDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VMINPDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMINPDZ256rr %ymm16, %ymm1 + %ymm16 = VMINPDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VMINPSZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VMINPSZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VMINPSZ256rr %ymm16, %ymm1 + %ymm16 = VMINPSZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VXORPDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VXORPDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VXORPDZ256rr %ymm16, %ymm1 + %ymm16 = VXORPDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VXORPSZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VXORPSZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VXORPSZ256rr %ymm16, %ymm1 + %ymm16 = VXORPSZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPACKSSDWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPACKSSDWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPACKSSDWZ256rr %ymm16, %ymm1 + %ymm16 = VPACKSSDWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPACKSSWBZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPACKSSWBZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPACKSSWBZ256rr %ymm16, %ymm1 + %ymm16 = VPACKSSWBZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPACKUSDWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPACKUSDWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPACKUSDWZ256rr %ymm16, %ymm1 + %ymm16 = VPACKUSDWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPACKUSWBZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPACKUSWBZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPACKUSWBZ256rr %ymm16, %ymm1 + %ymm16 = VPACKUSWBZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VUNPCKHPDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VUNPCKHPDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VUNPCKHPDZ256rr %ymm16, %ymm1 + %ymm16 = VUNPCKHPDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VUNPCKHPSZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VUNPCKHPSZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VUNPCKHPSZ256rr %ymm16, %ymm1 + %ymm16 = VUNPCKHPSZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VUNPCKLPDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VUNPCKLPDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VUNPCKLPDZ256rr %ymm16, %ymm1 + %ymm16 = VUNPCKLPDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VUNPCKLPSZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VUNPCKLPSZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VUNPCKLPSZ256rr %ymm16, %ymm1 + %ymm16 = VUNPCKLPSZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VSUBPDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VSUBPDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VSUBPDZ256rr %ymm16, %ymm1 + %ymm16 = VSUBPDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VSUBPSZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VSUBPSZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VSUBPSZ256rr %ymm16, %ymm1 + %ymm16 = VSUBPSZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPUNPCKHBWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPUNPCKHBWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPUNPCKHBWZ256rr %ymm16, %ymm1 + %ymm16 = VPUNPCKHBWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPUNPCKHDQZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPUNPCKHDQZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPUNPCKHDQZ256rr %ymm16, %ymm1 + %ymm16 = VPUNPCKHDQZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPUNPCKHQDQZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPUNPCKHQDQZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPUNPCKHQDQZ256rr %ymm16, %ymm1 + %ymm16 = VPUNPCKHQDQZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPUNPCKHWDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPUNPCKHWDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPUNPCKHWDZ256rr %ymm16, %ymm1 + %ymm16 = VPUNPCKHWDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPUNPCKLBWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPUNPCKLBWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPUNPCKLBWZ256rr %ymm16, %ymm1 + %ymm16 = VPUNPCKLBWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPUNPCKLDQZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPUNPCKLDQZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPUNPCKLDQZ256rr %ymm16, %ymm1 + %ymm16 = VPUNPCKLDQZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPUNPCKLQDQZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPUNPCKLQDQZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPUNPCKLQDQZ256rr %ymm16, %ymm1 + %ymm16 = VPUNPCKLQDQZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPUNPCKLWDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPUNPCKLWDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPUNPCKLWDZ256rr %ymm16, %ymm1 + %ymm16 = VPUNPCKLWDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VFMADD132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMADD132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMADD132PDZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMADD132PDZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMADD132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMADD132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMADD132PSZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMADD132PSZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMADD213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMADD213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMADD213PDZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMADD213PDZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMADD213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMADD213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMADD213PSZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMADD213PSZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMADD231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMADD231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMADD231PDZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMADD231PDZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMADD231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMADD231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMADD231PSZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMADD231PSZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMADDSUB132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMADDSUB132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMADDSUB132PDZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMADDSUB132PDZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMADDSUB132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMADDSUB132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMADDSUB132PSZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMADDSUB132PSZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMADDSUB213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMADDSUB213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMADDSUB213PDZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMADDSUB213PDZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMADDSUB213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMADDSUB213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMADDSUB213PSZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMADDSUB213PSZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMADDSUB231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMADDSUB231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMADDSUB231PDZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMADDSUB231PDZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMADDSUB231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMADDSUB231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMADDSUB231PSZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMADDSUB231PSZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMSUB132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMSUB132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMSUB132PDZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMSUB132PDZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMSUB132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMSUB132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMSUB132PSZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMSUB132PSZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMSUB213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMSUB213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMSUB213PDZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMSUB213PDZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMSUB213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMSUB213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMSUB213PSZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMSUB213PSZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMSUB231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMSUB231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMSUB231PDZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMSUB231PDZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMSUB231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMSUB231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMSUB231PSZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMSUB231PSZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMSUBADD132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMSUBADD132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMSUBADD132PDZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMSUBADD132PDZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMSUBADD132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMSUBADD132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMSUBADD132PSZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMSUBADD132PSZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMSUBADD213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMSUBADD213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMSUBADD213PDZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMSUBADD213PDZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMSUBADD213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMSUBADD213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMSUBADD213PSZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMSUBADD213PSZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMSUBADD231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMSUBADD231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMSUBADD231PDZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMSUBADD231PDZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFMSUBADD231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFMSUBADD231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFMSUBADD231PSZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFMSUBADD231PSZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFNMADD132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFNMADD132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFNMADD132PDZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFNMADD132PDZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFNMADD132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFNMADD132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFNMADD132PSZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFNMADD132PSZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFNMADD213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFNMADD213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFNMADD213PDZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFNMADD213PDZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFNMADD213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFNMADD213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFNMADD213PSZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFNMADD213PSZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFNMADD231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFNMADD231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFNMADD231PDZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFNMADD231PDZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFNMADD231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFNMADD231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFNMADD231PSZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFNMADD231PSZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFNMSUB132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFNMSUB132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFNMSUB132PDZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFNMSUB132PDZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFNMSUB132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFNMSUB132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFNMSUB132PSZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFNMSUB132PSZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFNMSUB213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFNMSUB213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFNMSUB213PDZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFNMSUB213PDZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFNMSUB213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFNMSUB213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFNMSUB213PSZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFNMSUB213PSZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFNMSUB231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFNMSUB231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFNMSUB231PDZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFNMSUB231PDZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VFNMSUB231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + %ymm16 = VFNMSUB231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _ + ; CHECK: %ymm16 = VFNMSUB231PSZ256r %ymm16, %ymm1, %ymm2 + %ymm16 = VFNMSUB231PSZ256r %ymm16, %ymm1, %ymm2 + ; CHECK: %ymm16 = VPSRADZ256ri %ymm16, 7 + %ymm16 = VPSRADZ256ri %ymm16, 7 + ; CHECK: %ymm16 = VPSRADZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSRADZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSRADZ256rr %ymm16, %xmm1 + %ymm16 = VPSRADZ256rr %ymm16, %xmm1 + ; CHECK: %ymm16 = VPSRAVDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSRAVDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSRAVDZ256rr %ymm16, %ymm1 + %ymm16 = VPSRAVDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPSRAWZ256ri %ymm16, 7 + %ymm16 = VPSRAWZ256ri %ymm16, 7 + ; CHECK: %ymm16 = VPSRAWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSRAWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSRAWZ256rr %ymm16, %xmm1 + %ymm16 = VPSRAWZ256rr %ymm16, %xmm1 + ; CHECK: %ymm16 = VPSRLDQZ256rr %ymm16, %ymm1 + %ymm16 = VPSRLDQZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPSRLDZ256ri %ymm16, 7 + %ymm16 = VPSRLDZ256ri %ymm16, 7 + ; CHECK: %ymm16 = VPSRLDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSRLDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSRLDZ256rr %ymm16, %xmm1 + %ymm16 = VPSRLDZ256rr %ymm16, %xmm1 + ; CHECK: %ymm16 = VPSRLQZ256ri %ymm16, 7 + %ymm16 = VPSRLQZ256ri %ymm16, 7 + ; CHECK: %ymm16 = VPSRLQZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSRLQZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSRLQZ256rr %ymm16, %xmm1 + %ymm16 = VPSRLQZ256rr %ymm16, %xmm1 + ; CHECK: %ymm16 = VPSRLVDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSRLVDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSRLVDZ256rr %ymm16, %ymm1 + %ymm16 = VPSRLVDZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPSRLVQZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSRLVQZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSRLVQZ256rr %ymm16, %ymm1 + %ymm16 = VPSRLVQZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPSRLWZ256ri %ymm16, 7 + %ymm16 = VPSRLWZ256ri %ymm16, 7 + ; CHECK: %ymm16 = VPSRLWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSRLWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSRLWZ256rr %ymm16, %xmm1 + %ymm16 = VPSRLWZ256rr %ymm16, %xmm1 + ; CHECK: %ymm16 = VPMOVSXBDZ256rm %rip, 1, _, %rax, _ + %ymm16 = VPMOVSXBDZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMOVSXBDZ256rr %xmm0 + %ymm16 = VPMOVSXBDZ256rr %xmm0 + ; CHECK: %ymm16 = VPMOVSXBQZ256rm %rip, 1, _, %rax, _ + %ymm16 = VPMOVSXBQZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMOVSXBQZ256rr %xmm0 + %ymm16 = VPMOVSXBQZ256rr %xmm0 + ; CHECK: %ymm16 = VPMOVSXBWZ256rm %rip, 1, _, %rax, _ + %ymm16 = VPMOVSXBWZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMOVSXBWZ256rr %xmm0 + %ymm16 = VPMOVSXBWZ256rr %xmm0 + ; CHECK: %ymm16 = VPMOVSXDQZ256rm %rip, 1, _, %rax, _ + %ymm16 = VPMOVSXDQZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMOVSXDQZ256rr %xmm0 + %ymm16 = VPMOVSXDQZ256rr %xmm0 + ; CHECK: %ymm16 = VPMOVSXWDZ256rm %rip, 1, _, %rax, _ + %ymm16 = VPMOVSXWDZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMOVSXWDZ256rr %xmm0 + %ymm16 = VPMOVSXWDZ256rr %xmm0 + ; CHECK: %ymm16 = VPMOVSXWQZ256rm %rip, 1, _, %rax, _ + %ymm16 = VPMOVSXWQZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMOVSXWQZ256rr %xmm0 + %ymm16 = VPMOVSXWQZ256rr %xmm0 + ; CHECK: %ymm16 = VPMOVZXBDZ256rm %rip, 1, _, %rax, _ + %ymm16 = VPMOVZXBDZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMOVZXBDZ256rr %xmm0 + %ymm16 = VPMOVZXBDZ256rr %xmm0 + ; CHECK: %ymm16 = VPMOVZXBQZ256rm %rip, 1, _, %rax, _ + %ymm16 = VPMOVZXBQZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMOVZXBQZ256rr %xmm0 + %ymm16 = VPMOVZXBQZ256rr %xmm0 + ; CHECK: %ymm16 = VPMOVZXBWZ256rm %rip, 1, _, %rax, _ + %ymm16 = VPMOVZXBWZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMOVZXBWZ256rr %xmm0 + %ymm16 = VPMOVZXBWZ256rr %xmm0 + ; CHECK: %ymm16 = VPMOVZXDQZ256rm %rip, 1, _, %rax, _ + %ymm16 = VPMOVZXDQZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMOVZXDQZ256rr %xmm0 + %ymm16 = VPMOVZXDQZ256rr %xmm0 + ; CHECK: %ymm16 = VPMOVZXWDZ256rm %rip, 1, _, %rax, _ + %ymm16 = VPMOVZXWDZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMOVZXWDZ256rr %xmm0 + %ymm16 = VPMOVZXWDZ256rr %xmm0 + ; CHECK: %ymm16 = VPMOVZXWQZ256rm %rip, 1, _, %rax, _ + %ymm16 = VPMOVZXWQZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPMOVZXWQZ256rr %xmm0 + %ymm16 = VPMOVZXWQZ256rr %xmm0 + ; CHECK: %ymm16 = VBROADCASTSDZ256m %rip, 1, _, %rax, _ + %ymm16 = VBROADCASTSDZ256m %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VBROADCASTSDZ256r %xmm0 + %ymm16 = VBROADCASTSDZ256r %xmm0 + ; CHECK: %ymm16 = VBROADCASTSDZ256r_s %xmm0 + %ymm16 = VBROADCASTSDZ256r_s %xmm0 + ; CHECK: %ymm16 = VBROADCASTSSZ256m %rip, 1, _, %rax, _ + %ymm16 = VBROADCASTSSZ256m %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VBROADCASTSSZ256r %xmm0 + %ymm16 = VBROADCASTSSZ256r %xmm0 + ; CHECK: %ymm16 = VBROADCASTSSZ256r_s %xmm0 + %ymm16 = VBROADCASTSSZ256r_s %xmm0 + ; CHECK: %ymm16 = VPBROADCASTBZ256m %rip, 1, _, %rax, _ + %ymm16 = VPBROADCASTBZ256m %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPBROADCASTBZ256r %xmm0 + %ymm16 = VPBROADCASTBZ256r %xmm0 + ; CHECK: %ymm16 = VPBROADCASTDZ256m %rip, 1, _, %rax, _ + %ymm16 = VPBROADCASTDZ256m %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPBROADCASTDZ256r %xmm0 + %ymm16 = VPBROADCASTDZ256r %xmm0 + ; CHECK: %ymm16 = VPBROADCASTWZ256m %rip, 1, _, %rax, _ + %ymm16 = VPBROADCASTWZ256m %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPBROADCASTWZ256r %xmm0 + %ymm16 = VPBROADCASTWZ256r %xmm0 + ; CHECK: %ymm16 = VPBROADCASTQZ256m %rip, 1, _, %rax, _ + %ymm16 = VPBROADCASTQZ256m %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPBROADCASTQZ256r %xmm0 + %ymm16 = VPBROADCASTQZ256r %xmm0 + ; CHECK: %ymm16 = VPABSBZ256rm %rip, 1, _, %rax, _ + %ymm16 = VPABSBZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPABSBZ256rr %ymm16 + %ymm16 = VPABSBZ256rr %ymm16 + ; CHECK: %ymm16 = VPABSDZ256rm %rip, 1, _, %rax, _ + %ymm16 = VPABSDZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPABSDZ256rr %ymm16 + %ymm16 = VPABSDZ256rr %ymm16 + ; CHECK: %ymm16 = VPABSWZ256rm %rip, 1, _, %rax, _ + %ymm16 = VPABSWZ256rm %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPABSWZ256rr %ymm16 + %ymm16 = VPABSWZ256rr %ymm16 + ; CHECK: %ymm16 = VPSADBWZ256rm %ymm16, 1, _, %rax, _, _ + %ymm16 = VPSADBWZ256rm %ymm16, 1, _, %rax, _, _ + ; CHECK: %ymm16 = VPSADBWZ256rr %ymm16, %ymm1 + %ymm16 = VPSADBWZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPERMDZ256rm %ymm16, %rdi, 1, _, 0, _ + %ymm16 = VPERMDZ256rm %ymm16, %rdi, 1, _, 0, _ + ; CHECK: %ymm16 = VPERMDZ256rr %ymm1, %ymm16 + %ymm16 = VPERMDZ256rr %ymm1, %ymm16 + ; CHECK: %ymm16 = VPERMILPDZ256mi %rdi, 1, _, 0, _, _ + %ymm16 = VPERMILPDZ256mi %rdi, 1, _, 0, _, _ + ; CHECK: %ymm16 = VPERMILPDZ256ri %ymm16, 7 + %ymm16 = VPERMILPDZ256ri %ymm16, 7 + ; CHECK: %ymm16 = VPERMILPDZ256rm %ymm16, %rdi, 1, _, 0, _ + %ymm16 = VPERMILPDZ256rm %ymm16, %rdi, 1, _, 0, _ + ; CHECK: %ymm16 = VPERMILPDZ256rr %ymm1, %ymm16 + %ymm16 = VPERMILPDZ256rr %ymm1, %ymm16 + ; CHECK: %ymm16 = VPERMILPSZ256mi %rdi, 1, _, 0, _, _ + %ymm16 = VPERMILPSZ256mi %rdi, 1, _, 0, _, _ + ; CHECK: %ymm16 = VPERMILPSZ256ri %ymm16, 7 + %ymm16 = VPERMILPSZ256ri %ymm16, 7 + ; CHECK: %ymm16 = VPERMILPSZ256rm %ymm16, %rdi, 1, _, 0, _ + %ymm16 = VPERMILPSZ256rm %ymm16, %rdi, 1, _, 0, _ + ; CHECK: %ymm16 = VPERMILPSZ256rr %ymm1, %ymm16 + %ymm16 = VPERMILPSZ256rr %ymm1, %ymm16 + ; CHECK: %ymm16 = VPERMPDZ256mi %rdi, 1, _, 0, _, _ + %ymm16 = VPERMPDZ256mi %rdi, 1, _, 0, _, _ + ; CHECK: %ymm16 = VPERMPDZ256ri %ymm16, 7 + %ymm16 = VPERMPDZ256ri %ymm16, 7 + ; CHECK: %ymm16 = VPERMPSZ256rm %ymm16, %rdi, 1, _, 0, _ + %ymm16 = VPERMPSZ256rm %ymm16, %rdi, 1, _, 0, _ + ; CHECK: %ymm16 = VPERMPSZ256rr %ymm1, %ymm16 + %ymm16 = VPERMPSZ256rr %ymm1, %ymm16 + ; CHECK: %ymm16 = VPERMQZ256mi %rdi, 1, _, 0, _, _ + %ymm16 = VPERMQZ256mi %rdi, 1, _, 0, _, _ + ; CHECK: %ymm16 = VPERMQZ256ri %ymm16, 7 + %ymm16 = VPERMQZ256ri %ymm16, 7 + ; CHECK: %ymm16 = VPSLLDQZ256rr %ymm16, 14 + %ymm16 = VPSLLDQZ256rr %ymm16, 14 + ; CHECK: %ymm16 = VPSLLDZ256ri %ymm16, 7 + %ymm16 = VPSLLDZ256ri %ymm16, 7 + ; CHECK: %ymm16 = VPSLLDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSLLDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSLLDZ256rr %ymm16, 14 + %ymm16 = VPSLLDZ256rr %ymm16, 14 + ; CHECK: %ymm16 = VPSLLQZ256ri %ymm16, 7 + %ymm16 = VPSLLQZ256ri %ymm16, 7 + ; CHECK: %ymm16 = VPSLLQZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSLLQZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSLLQZ256rr %ymm16, 14 + %ymm16 = VPSLLQZ256rr %ymm16, 14 + ; CHECK: %ymm16 = VPSLLVDZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSLLVDZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSLLVDZ256rr %ymm16, 14 + %ymm16 = VPSLLVDZ256rr %ymm16, 14 + ; CHECK: %ymm16 = VPSLLVQZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSLLVQZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSLLVQZ256rr %ymm16, 14 + %ymm16 = VPSLLVQZ256rr %ymm16, 14 + ; CHECK: %ymm16 = VPSLLWZ256ri %ymm16, 7 + %ymm16 = VPSLLWZ256ri %ymm16, 7 + ; CHECK: %ymm16 = VPSLLWZ256rm %ymm16, %rip, 1, _, %rax, _ + %ymm16 = VPSLLWZ256rm %ymm16, %rip, 1, _, %rax, _ + ; CHECK: %ymm16 = VPSLLWZ256rr %ymm16, 14 + %ymm16 = VPSLLWZ256rr %ymm16, 14 + ; CHECK: %ymm16 = VCVTDQ2PDZ256rm %rdi, %ymm16, 1, _, 0 + %ymm16 = VCVTDQ2PDZ256rm %rdi, %ymm16, 1, _, 0 + ; CHECK: %ymm16 = VCVTDQ2PDZ256rr %xmm0 + %ymm16 = VCVTDQ2PDZ256rr %xmm0 + ; CHECK: %ymm16 = VCVTDQ2PSZ256rm %rdi, %ymm16, 1, _, 0 + %ymm16 = VCVTDQ2PSZ256rm %rdi, %ymm16, 1, _, 0 + ; CHECK: %ymm16 = VCVTDQ2PSZ256rr %ymm16 + %ymm16 = VCVTDQ2PSZ256rr %ymm16 + ; CHECK: %xmm0 = VCVTPD2DQZ256rm %rdi, %ymm16, 1, _, 0 + %xmm0 = VCVTPD2DQZ256rm %rdi, %ymm16, 1, _, 0 + ; CHECK: %xmm0 = VCVTPD2DQZ256rr %ymm16 + %xmm0 = VCVTPD2DQZ256rr %ymm16 + ; CHECK: %xmm0 = VCVTPD2PSZ256rm %rdi, %ymm16, 1, _, 0 + %xmm0 = VCVTPD2PSZ256rm %rdi, %ymm16, 1, _, 0 + ; CHECK: %xmm0 = VCVTPD2PSZ256rr %ymm16 + %xmm0 = VCVTPD2PSZ256rr %ymm16 + ; CHECK: %ymm16 = VCVTPS2DQZ256rm %rdi, %ymm16, 1, _, 0 + %ymm16 = VCVTPS2DQZ256rm %rdi, %ymm16, 1, _, 0 + ; CHECK: %ymm16 = VCVTPS2DQZ256rr %ymm16 + %ymm16 = VCVTPS2DQZ256rr %ymm16 + ; CHECK: %ymm16 = VCVTPS2PDZ256rm %rdi, %ymm16, 1, _, 0 + %ymm16 = VCVTPS2PDZ256rm %rdi, %ymm16, 1, _, 0 + ; CHECK: %ymm16 = VCVTPS2PDZ256rr %xmm0 + %ymm16 = VCVTPS2PDZ256rr %xmm0 + ; CHECK: VCVTPS2PHZ256mr %rdi, %ymm16, 1, _, 0, _, _ + VCVTPS2PHZ256mr %rdi, %ymm16, 1, _, 0, _, _ + ; CHECK: %xmm0 = VCVTPS2PHZ256rr %ymm16, _ + %xmm0 = VCVTPS2PHZ256rr %ymm16, _ + ; CHECK: %ymm16 = VCVTPH2PSZ256rm %rdi, %ymm16, 1, _, 0 + %ymm16 = VCVTPH2PSZ256rm %rdi, %ymm16, 1, _, 0 + ; CHECK: %ymm16 = VCVTPH2PSZ256rr %xmm0 + %ymm16 = VCVTPH2PSZ256rr %xmm0 + ; CHECK: %xmm0 = VCVTTPD2DQZ256rm %rdi, %ymm16, 1, _, 0 + %xmm0 = VCVTTPD2DQZ256rm %rdi, %ymm16, 1, _, 0 + ; CHECK: %xmm0 = VCVTTPD2DQZ256rr %ymm16 + %xmm0 = VCVTTPD2DQZ256rr %ymm16 + ; CHECK: %ymm16 = VCVTTPS2DQZ256rm %rdi, %ymm16, 1, _, 0 + %ymm16 = VCVTTPS2DQZ256rm %rdi, %ymm16, 1, _, 0 + ; CHECK: %ymm16 = VCVTTPS2DQZ256rr %ymm16 + %ymm16 = VCVTTPS2DQZ256rr %ymm16 + ; CHECK: %ymm16 = VSQRTPDZ256m %rdi, _, _, _, _ + %ymm16 = VSQRTPDZ256m %rdi, _, _, _, _ + ; CHECK: %ymm16 = VSQRTPDZ256r %ymm16 + %ymm16 = VSQRTPDZ256r %ymm16 + ; CHECK: %ymm16 = VSQRTPSZ256m %rdi, _, _, _, _ + %ymm16 = VSQRTPSZ256m %rdi, _, _, _, _ + ; CHECK: %ymm16 = VSQRTPSZ256r %ymm16 + %ymm16 = VSQRTPSZ256r %ymm16 + ; CHECK: %ymm16 = VPALIGNRZ256rmi %ymm16, %rdi, _, _, _, _, _ + %ymm16 = VPALIGNRZ256rmi %ymm16, %rdi, _, _, _, _, _ + ; CHECK: %ymm16 = VPALIGNRZ256rri %ymm16, %ymm1, _ + %ymm16 = VPALIGNRZ256rri %ymm16, %ymm1, _ + ; CHECK: %ymm16 = VMOVUPSZ256rm %rdi, 1, _, 0, _ + %ymm16 = VMOVUPSZ256rm %rdi, 1, _, 0, _ + ; CHECK: %ymm16 = VMOVUPSZ256rr %ymm16 + %ymm16 = VMOVUPSZ256rr %ymm16 + ; CHECK: %ymm16 = VMOVUPSZ256rr_REV %ymm16 + %ymm16 = VMOVUPSZ256rr_REV %ymm16 + ; CHECK: %ymm16 = VPSHUFBZ256rm %ymm16, _, _, _, _, _ + %ymm16 = VPSHUFBZ256rm %ymm16, _, _, _, _, _ + ; CHECK: %ymm16 = VPSHUFBZ256rr %ymm16, %ymm1 + %ymm16 = VPSHUFBZ256rr %ymm16, %ymm1 + ; CHECK: %ymm16 = VPSHUFDZ256mi %rdi, 1, _, 0, _, _ + %ymm16 = VPSHUFDZ256mi %rdi, 1, _, 0, _, _ + ; CHECK: %ymm16 = VPSHUFDZ256ri %ymm16, -24 + %ymm16 = VPSHUFDZ256ri %ymm16, -24 + ; CHECK: %ymm16 = VPSHUFHWZ256mi %rdi, 1, _, 0, _, _ + %ymm16 = VPSHUFHWZ256mi %rdi, 1, _, 0, _, _ + ; CHECK: %ymm16 = VPSHUFHWZ256ri %ymm16, -24 + %ymm16 = VPSHUFHWZ256ri %ymm16, -24 + ; CHECK: %ymm16 = VPSHUFLWZ256mi %rdi, 1, _, 0, _, _ + %ymm16 = VPSHUFLWZ256mi %rdi, 1, _, 0, _, _ + ; CHECK: %ymm16 = VPSHUFLWZ256ri %ymm16, -24 + %ymm16 = VPSHUFLWZ256ri %ymm16, -24 + ; CHECK: %ymm16 = VSHUFPDZ256rmi %ymm16, _, _, _, _, _, _ + %ymm16 = VSHUFPDZ256rmi %ymm16, _, _, _, _, _, _ + ; CHECK: %ymm16 = VSHUFPDZ256rri %ymm16, _, _ + %ymm16 = VSHUFPDZ256rri %ymm16, _, _ + ; CHECK: %ymm16 = VSHUFPSZ256rmi %ymm16, _, _, _, _, _, _ + %ymm16 = VSHUFPSZ256rmi %ymm16, _, _, _, _, _, _ + ; CHECK: %ymm16 = VSHUFPSZ256rri %ymm16, _, _ + %ymm16 = VSHUFPSZ256rri %ymm16, _, _ + + RET 0, %zmm0, %zmm1 +... +--- + # CHECK-LABEL: name: evex_z128_to_evex_test + # CHECK: bb.0: + +name: evex_z128_to_evex_test +body: | + bb.0: + ; CHECK: VMOVAPDZ128mr %rdi, 1, _, 0, _, %xmm16 + VMOVAPDZ128mr %rdi, 1, _, 0, _, %xmm16 + ; CHECK: %xmm16 = VMOVAPDZ128rm %rip, 1, _, %rax, _ + %xmm16 = VMOVAPDZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMOVAPDZ128rr %xmm16 + %xmm16 = VMOVAPDZ128rr %xmm16 + ; CHECK: VMOVAPSZ128mr %rdi, 1, _, 0, _, %xmm16 + VMOVAPSZ128mr %rdi, 1, _, 0, _, %xmm16 + ; CHECK: %xmm16 = VMOVAPSZ128rm %rip, 1, _, %rax, _ + %xmm16 = VMOVAPSZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMOVAPSZ128rr %xmm16 + %xmm16 = VMOVAPSZ128rr %xmm16 + ; CHECK: VMOVDQA32Z128mr %rdi, 1, _, 0, _, %xmm16 + VMOVDQA32Z128mr %rdi, 1, _, 0, _, %xmm16 + ; CHECK: %xmm16 = VMOVDQA32Z128rm %rip, 1, _, %rax, _ + %xmm16 = VMOVDQA32Z128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMOVDQA32Z128rr %xmm16 + %xmm16 = VMOVDQA32Z128rr %xmm16 + ; CHECK: VMOVDQA64Z128mr %rdi, 1, _, 0, _, %xmm16 + VMOVDQA64Z128mr %rdi, 1, _, 0, _, %xmm16 + ; CHECK: %xmm16 = VMOVDQA64Z128rm %rip, 1, _, %rax, _ + %xmm16 = VMOVDQA64Z128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMOVDQA64Z128rr %xmm16 + %xmm16 = VMOVDQA64Z128rr %xmm16 + ; CHECK: VMOVDQU16Z128mr %rdi, 1, _, 0, _, %xmm16 + VMOVDQU16Z128mr %rdi, 1, _, 0, _, %xmm16 + ; CHECK: %xmm16 = VMOVDQU16Z128rm %rip, 1, _, %rax, _ + %xmm16 = VMOVDQU16Z128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMOVDQU16Z128rr %xmm16 + %xmm16 = VMOVDQU16Z128rr %xmm16 + ; CHECK: VMOVDQU32Z128mr %rdi, 1, _, 0, _, %xmm16 + VMOVDQU32Z128mr %rdi, 1, _, 0, _, %xmm16 + ; CHECK: %xmm16 = VMOVDQU32Z128rm %rip, 1, _, %rax, _ + %xmm16 = VMOVDQU32Z128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMOVDQU32Z128rr %xmm16 + %xmm16 = VMOVDQU32Z128rr %xmm16 + ; CHECK: VMOVDQU64Z128mr %rdi, 1, _, 0, _, %xmm16 + VMOVDQU64Z128mr %rdi, 1, _, 0, _, %xmm16 + ; CHECK: %xmm16 = VMOVDQU64Z128rm %rip, 1, _, %rax, _ + %xmm16 = VMOVDQU64Z128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMOVDQU64Z128rr %xmm16 + %xmm16 = VMOVDQU64Z128rr %xmm16 + ; CHECK: VMOVDQU8Z128mr %rdi, 1, _, 0, _, %xmm16 + VMOVDQU8Z128mr %rdi, 1, _, 0, _, %xmm16 + ; CHECK: %xmm16 = VMOVDQU8Z128rm %rip, 1, _, %rax, _ + %xmm16 = VMOVDQU8Z128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMOVDQU8Z128rr %xmm16 + %xmm16 = VMOVDQU8Z128rr %xmm16 + ; CHECK: %xmm16 = VMOVDQU8Z128rr_REV %xmm16 + %xmm16 = VMOVDQU8Z128rr_REV %xmm16 + ; CHECK: %xmm16 = VMOVNTDQAZ128rm %rip, 1, _, %rax, _ + %xmm16 = VMOVNTDQAZ128rm %rip, 1, _, %rax, _ + ; CHECK: VMOVUPDZ128mr %rdi, 1, _, 0, _, %xmm16 + VMOVUPDZ128mr %rdi, 1, _, 0, _, %xmm16 + ; CHECK: %xmm16 = VMOVUPDZ128rm %rip, 1, _, %rax, _ + %xmm16 = VMOVUPDZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMOVUPDZ128rr %xmm16 + %xmm16 = VMOVUPDZ128rr %xmm16 + ; CHECK: %xmm16 = VMOVUPDZ128rr_REV %xmm16 + %xmm16 = VMOVUPDZ128rr_REV %xmm16 + ; CHECK: VMOVUPSZ128mr %rdi, 1, _, 0, _, %xmm16 + VMOVUPSZ128mr %rdi, 1, _, 0, _, %xmm16 + ; CHECK: %xmm16 = VMOVUPSZ128rm %rip, 1, _, %rax, _ + %xmm16 = VMOVUPSZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMOVUPSZ128rr %xmm16 + %xmm16 = VMOVUPSZ128rr %xmm16 + ; CHECK: %xmm16 = VMOVUPSZ128rr_REV %xmm16 + %xmm16 = VMOVUPSZ128rr_REV %xmm16 + ; CHECK: VMOVNTDQZ128mr %rdi, 1, _, 0, _, %xmm16 + VMOVNTDQZ128mr %rdi, 1, _, 0, _, %xmm16 + ; CHECK: VMOVNTPDZ128mr %rdi, 1, _, 0, _, %xmm16 + VMOVNTPDZ128mr %rdi, 1, _, 0, _, %xmm16 + ; CHECK: VMOVNTPSZ128mr %rdi, 1, _, 0, _, %xmm16 + VMOVNTPSZ128mr %rdi, 1, _, 0, _, %xmm16 + ; CHECK: %xmm16 = VMOVAPDZ128rr_REV %xmm16 + %xmm16 = VMOVAPDZ128rr_REV %xmm16 + ; CHECK: %xmm16 = VMOVAPSZ128rr_REV %xmm16 + %xmm16 = VMOVAPSZ128rr_REV %xmm16 + ; CHECK: %xmm16 = VMOVDQA32Z128rr_REV %xmm16 + %xmm16 = VMOVDQA32Z128rr_REV %xmm16 + ; CHECK: %xmm16 = VMOVDQA64Z128rr_REV %xmm16 + %xmm16 = VMOVDQA64Z128rr_REV %xmm16 + ; CHECK: %xmm16 = VMOVDQU16Z128rr_REV %xmm16 + %xmm16 = VMOVDQU16Z128rr_REV %xmm16 + ; CHECK: %xmm16 = VMOVDQU32Z128rr_REV %xmm16 + %xmm16 = VMOVDQU32Z128rr_REV %xmm16 + ; CHECK: %xmm16 = VMOVDQU64Z128rr_REV %xmm16 + %xmm16 = VMOVDQU64Z128rr_REV %xmm16 + ; CHECK: %xmm16 = VPMOVSXBDZ128rm %rip, 1, _, %rax, _ + %xmm16 = VPMOVSXBDZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMOVSXBDZ128rr %xmm16 + %xmm16 = VPMOVSXBDZ128rr %xmm16 + ; CHECK: %xmm16 = VPMOVSXBQZ128rm %rip, 1, _, %rax, _ + %xmm16 = VPMOVSXBQZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMOVSXBQZ128rr %xmm16 + %xmm16 = VPMOVSXBQZ128rr %xmm16 + ; CHECK: %xmm16 = VPMOVSXBWZ128rm %rip, 1, _, %rax, _ + %xmm16 = VPMOVSXBWZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMOVSXBWZ128rr %xmm16 + %xmm16 = VPMOVSXBWZ128rr %xmm16 + ; CHECK: %xmm16 = VPMOVSXDQZ128rm %rip, 1, _, %rax, _ + %xmm16 = VPMOVSXDQZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMOVSXDQZ128rr %xmm16 + %xmm16 = VPMOVSXDQZ128rr %xmm16 + ; CHECK: %xmm16 = VPMOVSXWDZ128rm %rip, 1, _, %rax, _ + %xmm16 = VPMOVSXWDZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMOVSXWDZ128rr %xmm16 + %xmm16 = VPMOVSXWDZ128rr %xmm16 + ; CHECK: %xmm16 = VPMOVSXWQZ128rm %rip, 1, _, %rax, _ + %xmm16 = VPMOVSXWQZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMOVSXWQZ128rr %xmm16 + %xmm16 = VPMOVSXWQZ128rr %xmm16 + ; CHECK: %xmm16 = VPMOVZXBDZ128rm %rip, 1, _, %rax, _ + %xmm16 = VPMOVZXBDZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMOVZXBDZ128rr %xmm16 + %xmm16 = VPMOVZXBDZ128rr %xmm16 + ; CHECK: %xmm16 = VPMOVZXBQZ128rm %rip, 1, _, %rax, _ + %xmm16 = VPMOVZXBQZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMOVZXBQZ128rr %xmm16 + %xmm16 = VPMOVZXBQZ128rr %xmm16 + ; CHECK: %xmm16 = VPMOVZXBWZ128rm %rip, 1, _, %rax, _ + %xmm16 = VPMOVZXBWZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMOVZXBWZ128rr %xmm16 + %xmm16 = VPMOVZXBWZ128rr %xmm16 + ; CHECK: %xmm16 = VPMOVZXDQZ128rm %rip, 1, _, %rax, _ + %xmm16 = VPMOVZXDQZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMOVZXDQZ128rr %xmm16 + %xmm16 = VPMOVZXDQZ128rr %xmm16 + ; CHECK: %xmm16 = VPMOVZXWDZ128rm %rip, 1, _, %rax, _ + %xmm16 = VPMOVZXWDZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMOVZXWDZ128rr %xmm16 + %xmm16 = VPMOVZXWDZ128rr %xmm16 + ; CHECK: %xmm16 = VPMOVZXWQZ128rm %rip, 1, _, %rax, _ + %xmm16 = VPMOVZXWQZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMOVZXWQZ128rr %xmm16 + %xmm16 = VPMOVZXWQZ128rr %xmm16 + ; CHECK: VMOVHPDZ128mr %rdi, 1, _, 0, _, %xmm16 + VMOVHPDZ128mr %rdi, 1, _, 0, _, %xmm16 + ; CHECK: %xmm16 = VMOVHPDZ128rm %xmm16, %rdi, 1, _, 0, _ + %xmm16 = VMOVHPDZ128rm %xmm16, %rdi, 1, _, 0, _ + ; CHECK: VMOVHPSZ128mr %rdi, 1, _, 0, _, %xmm16 + VMOVHPSZ128mr %rdi, 1, _, 0, _, %xmm16 + ; CHECK: %xmm16 = VMOVHPSZ128rm %xmm16, %rdi, 1, _, 0, _ + %xmm16 = VMOVHPSZ128rm %xmm16, %rdi, 1, _, 0, _ + ; CHECK: VMOVLPDZ128mr %rdi, 1, _, 0, _, %xmm16 + VMOVLPDZ128mr %rdi, 1, _, 0, _, %xmm16 + ; CHECK: %xmm16 = VMOVLPDZ128rm %xmm16, %rdi, 1, _, 0, _ + %xmm16 = VMOVLPDZ128rm %xmm16, %rdi, 1, _, 0, _ + ; CHECK: VMOVLPSZ128mr %rdi, 1, _, 0, _, %xmm16 + VMOVLPSZ128mr %rdi, 1, _, 0, _, %xmm16 + ; CHECK: %xmm16 = VMOVLPSZ128rm %xmm16, %rdi, 1, _, 0, _ + %xmm16 = VMOVLPSZ128rm %xmm16, %rdi, 1, _, 0, _ + ; CHECK: %xmm16 = VMAXCPDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMAXCPDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMAXCPDZ128rr %xmm16, %xmm1 + %xmm16 = VMAXCPDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMAXCPSZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMAXCPSZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMAXCPSZ128rr %xmm16, %xmm1 + %xmm16 = VMAXCPSZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMAXPDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMAXPDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMAXPDZ128rr %xmm16, %xmm1 + %xmm16 = VMAXPDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMAXPSZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMAXPSZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMAXPSZ128rr %xmm16, %xmm1 + %xmm16 = VMAXPSZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMINCPDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMINCPDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMINCPDZ128rr %xmm16, %xmm1 + %xmm16 = VMINCPDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMINCPSZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMINCPSZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMINCPSZ128rr %xmm16, %xmm1 + %xmm16 = VMINCPSZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMINPDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMINPDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMINPDZ128rr %xmm16, %xmm1 + %xmm16 = VMINPDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMINPSZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMINPSZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMINPSZ128rr %xmm16, %xmm1 + %xmm16 = VMINPSZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMULPDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMULPDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMULPDZ128rr %xmm16, %xmm1 + %xmm16 = VMULPDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMULPSZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMULPSZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMULPSZ128rr %xmm16, %xmm1 + %xmm16 = VMULPSZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VORPDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VORPDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VORPDZ128rr %xmm16, %xmm1 + %xmm16 = VORPDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VORPSZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VORPSZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VORPSZ128rr %xmm16, %xmm1 + %xmm16 = VORPSZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPADDBZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPADDBZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPADDBZ128rr %xmm16, %xmm1 + %xmm16 = VPADDBZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPADDDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPADDDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPADDDZ128rr %xmm16, %xmm1 + %xmm16 = VPADDDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPADDQZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPADDQZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPADDQZ128rr %xmm16, %xmm1 + %xmm16 = VPADDQZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPADDSBZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPADDSBZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPADDSBZ128rr %xmm16, %xmm1 + %xmm16 = VPADDSBZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPADDSWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPADDSWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPADDSWZ128rr %xmm16, %xmm1 + %xmm16 = VPADDSWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPADDUSBZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPADDUSBZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPADDUSBZ128rr %xmm16, %xmm1 + %xmm16 = VPADDUSBZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPADDUSWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPADDUSWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPADDUSWZ128rr %xmm16, %xmm1 + %xmm16 = VPADDUSWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPADDWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPADDWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPADDWZ128rr %xmm16, %xmm1 + %xmm16 = VPADDWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPANDDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPANDDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPANDDZ128rr %xmm16, %xmm1 + %xmm16 = VPANDDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPANDQZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPANDQZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPANDQZ128rr %xmm16, %xmm1 + %xmm16 = VPANDQZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPAVGBZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPAVGBZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPAVGBZ128rr %xmm16, %xmm1 + %xmm16 = VPAVGBZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPAVGWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPAVGWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPAVGWZ128rr %xmm16, %xmm1 + %xmm16 = VPAVGWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMAXSBZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMAXSBZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMAXSBZ128rr %xmm16, %xmm1 + %xmm16 = VPMAXSBZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMAXSDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMAXSDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMAXSDZ128rr %xmm16, %xmm1 + %xmm16 = VPMAXSDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMAXSWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMAXSWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMAXSWZ128rr %xmm16, %xmm1 + %xmm16 = VPMAXSWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMAXUBZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMAXUBZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMAXUBZ128rr %xmm16, %xmm1 + %xmm16 = VPMAXUBZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMAXUDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMAXUDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMAXUDZ128rr %xmm16, %xmm1 + %xmm16 = VPMAXUDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMAXUWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMAXUWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMAXUWZ128rr %xmm16, %xmm1 + %xmm16 = VPMAXUWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMINSBZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMINSBZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMINSBZ128rr %xmm16, %xmm1 + %xmm16 = VPMINSBZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMINSDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMINSDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMINSDZ128rr %xmm16, %xmm1 + %xmm16 = VPMINSDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMINSWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMINSWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMINSWZ128rr %xmm16, %xmm1 + %xmm16 = VPMINSWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMINUBZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMINUBZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMINUBZ128rr %xmm16, %xmm1 + %xmm16 = VPMINUBZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMINUDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMINUDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMINUDZ128rr %xmm16, %xmm1 + %xmm16 = VPMINUDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMINUWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMINUWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMINUWZ128rr %xmm16, %xmm1 + %xmm16 = VPMINUWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMULDQZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMULDQZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMULDQZ128rr %xmm16, %xmm1 + %xmm16 = VPMULDQZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMULHRSWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMULHRSWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMULHRSWZ128rr %xmm16, %xmm1 + %xmm16 = VPMULHRSWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMULHUWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMULHUWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMULHUWZ128rr %xmm16, %xmm1 + %xmm16 = VPMULHUWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMULHWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMULHWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMULHWZ128rr %xmm16, %xmm1 + %xmm16 = VPMULHWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMULLDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMULLDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMULLDZ128rr %xmm16, %xmm1 + %xmm16 = VPMULLDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMULLWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMULLWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMULLWZ128rr %xmm16, %xmm1 + %xmm16 = VPMULLWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMULUDQZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMULUDQZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMULUDQZ128rr %xmm16, %xmm1 + %xmm16 = VPMULUDQZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPORDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPORDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPORDZ128rr %xmm16, %xmm1 + %xmm16 = VPORDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPORQZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPORQZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPORQZ128rr %xmm16, %xmm1 + %xmm16 = VPORQZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPSUBBZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSUBBZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSUBBZ128rr %xmm16, %xmm1 + %xmm16 = VPSUBBZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPSUBDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSUBDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSUBDZ128rr %xmm16, %xmm1 + %xmm16 = VPSUBDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPSUBQZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSUBQZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSUBQZ128rr %xmm16, %xmm1 + %xmm16 = VPSUBQZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPSUBSBZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSUBSBZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSUBSBZ128rr %xmm16, %xmm1 + %xmm16 = VPSUBSBZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPSUBSWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSUBSWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSUBSWZ128rr %xmm16, %xmm1 + %xmm16 = VPSUBSWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPSUBUSBZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSUBUSBZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSUBUSBZ128rr %xmm16, %xmm1 + %xmm16 = VPSUBUSBZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPSUBUSWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSUBUSWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSUBUSWZ128rr %xmm16, %xmm1 + %xmm16 = VPSUBUSWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPSUBWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSUBWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSUBWZ128rr %xmm16, %xmm1 + %xmm16 = VPSUBWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VADDPDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VADDPDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VADDPDZ128rr %xmm16, %xmm1 + %xmm16 = VADDPDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VADDPSZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VADDPSZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VADDPSZ128rr %xmm16, %xmm1 + %xmm16 = VADDPSZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VANDNPDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VANDNPDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VANDNPDZ128rr %xmm16, %xmm1 + %xmm16 = VANDNPDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VANDNPSZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VANDNPSZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VANDNPSZ128rr %xmm16, %xmm1 + %xmm16 = VANDNPSZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VANDPDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VANDPDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VANDPDZ128rr %xmm16, %xmm1 + %xmm16 = VANDPDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VANDPSZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VANDPSZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VANDPSZ128rr %xmm16, %xmm1 + %xmm16 = VANDPSZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VDIVPDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VDIVPDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VDIVPDZ128rr %xmm16, %xmm1 + %xmm16 = VDIVPDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VDIVPSZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VDIVPSZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VDIVPSZ128rr %xmm16, %xmm1 + %xmm16 = VDIVPSZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPXORDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPXORDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPXORDZ128rr %xmm16, %xmm1 + %xmm16 = VPXORDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPXORQZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPXORQZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPXORQZ128rr %xmm16, %xmm1 + %xmm16 = VPXORQZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VSUBPDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VSUBPDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VSUBPDZ128rr %xmm16, %xmm1 + %xmm16 = VSUBPDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VSUBPSZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VSUBPSZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VSUBPSZ128rr %xmm16, %xmm1 + %xmm16 = VSUBPSZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VXORPDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VXORPDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VXORPDZ128rr %xmm16, %xmm1 + %xmm16 = VXORPDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VXORPSZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VXORPSZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VXORPSZ128rr %xmm16, %xmm1 + %xmm16 = VXORPSZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMADDUBSWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMADDUBSWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMADDUBSWZ128rr %xmm16, %xmm1 + %xmm16 = VPMADDUBSWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPMADDWDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPMADDWDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPMADDWDZ128rr %xmm16, %xmm1 + %xmm16 = VPMADDWDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPACKSSDWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPACKSSDWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPACKSSDWZ128rr %xmm16, %xmm1 + %xmm16 = VPACKSSDWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPACKSSWBZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPACKSSWBZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPACKSSWBZ128rr %xmm16, %xmm1 + %xmm16 = VPACKSSWBZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPACKUSDWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPACKUSDWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPACKUSDWZ128rr %xmm16, %xmm1 + %xmm16 = VPACKUSDWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPACKUSWBZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPACKUSWBZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPACKUSWBZ128rr %xmm16, %xmm1 + %xmm16 = VPACKUSWBZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPUNPCKHBWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPUNPCKHBWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPUNPCKHBWZ128rr %xmm16, %xmm1 + %xmm16 = VPUNPCKHBWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPUNPCKHDQZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPUNPCKHDQZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPUNPCKHDQZ128rr %xmm16, %xmm1 + %xmm16 = VPUNPCKHDQZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPUNPCKHQDQZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPUNPCKHQDQZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPUNPCKHQDQZ128rr %xmm16, %xmm1 + %xmm16 = VPUNPCKHQDQZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPUNPCKHWDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPUNPCKHWDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPUNPCKHWDZ128rr %xmm16, %xmm1 + %xmm16 = VPUNPCKHWDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPUNPCKLBWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPUNPCKLBWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPUNPCKLBWZ128rr %xmm16, %xmm1 + %xmm16 = VPUNPCKLBWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPUNPCKLDQZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPUNPCKLDQZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPUNPCKLDQZ128rr %xmm16, %xmm1 + %xmm16 = VPUNPCKLDQZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPUNPCKLQDQZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPUNPCKLQDQZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPUNPCKLQDQZ128rr %xmm16, %xmm1 + %xmm16 = VPUNPCKLQDQZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPUNPCKLWDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPUNPCKLWDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPUNPCKLWDZ128rr %xmm16, %xmm1 + %xmm16 = VPUNPCKLWDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VUNPCKHPDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VUNPCKHPDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VUNPCKHPDZ128rr %xmm16, %xmm1 + %xmm16 = VUNPCKHPDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VUNPCKHPSZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VUNPCKHPSZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VUNPCKHPSZ128rr %xmm16, %xmm1 + %xmm16 = VUNPCKHPSZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VUNPCKLPDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VUNPCKLPDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VUNPCKLPDZ128rr %xmm16, %xmm1 + %xmm16 = VUNPCKLPDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VUNPCKLPSZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VUNPCKLPSZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VUNPCKLPSZ128rr %xmm16, %xmm1 + %xmm16 = VUNPCKLPSZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VFMADD132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADD132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADD132PDZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADD132PDZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADD132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADD132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADD132PSZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADD132PSZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADD213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADD213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADD213PDZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADD213PDZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADD213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADD213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADD213PSZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADD213PSZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADD231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADD231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADD231PDZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADD231PDZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADD231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADD231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADD231PSZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADD231PSZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADDSUB132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADDSUB132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADDSUB132PDZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADDSUB132PDZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADDSUB132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADDSUB132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADDSUB132PSZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADDSUB132PSZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADDSUB213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADDSUB213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADDSUB213PDZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADDSUB213PDZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADDSUB213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADDSUB213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADDSUB213PSZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADDSUB213PSZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADDSUB231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADDSUB231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADDSUB231PDZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADDSUB231PDZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADDSUB231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADDSUB231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADDSUB231PSZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADDSUB231PSZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUB132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUB132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUB132PDZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUB132PDZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUB132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUB132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUB132PSZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUB132PSZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUB213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUB213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUB213PDZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUB213PDZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUB213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUB213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUB213PSZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUB213PSZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUB231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUB231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUB231PDZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUB231PDZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUB231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUB231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUB231PSZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUB231PSZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUBADD132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUBADD132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUBADD132PDZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUBADD132PDZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUBADD132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUBADD132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUBADD132PSZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUBADD132PSZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUBADD213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUBADD213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUBADD213PDZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUBADD213PDZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUBADD213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUBADD213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUBADD213PSZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUBADD213PSZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUBADD231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUBADD231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUBADD231PDZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUBADD231PDZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUBADD231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUBADD231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUBADD231PSZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUBADD231PSZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMADD132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMADD132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMADD132PDZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMADD132PDZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMADD132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMADD132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMADD132PSZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMADD132PSZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMADD213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMADD213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMADD213PDZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMADD213PDZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMADD213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMADD213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMADD213PSZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMADD213PSZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMADD231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMADD231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMADD231PDZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMADD231PDZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMADD231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMADD231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMADD231PSZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMADD231PSZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMSUB132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMSUB132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMSUB132PDZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMSUB132PDZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMSUB132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMSUB132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMSUB132PSZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMSUB132PSZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMSUB213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMSUB213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMSUB213PDZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMSUB213PDZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMSUB213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMSUB213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMSUB213PSZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMSUB213PSZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMSUB231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMSUB231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMSUB231PDZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMSUB231PDZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMSUB231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMSUB231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMSUB231PSZ128r %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMSUB231PSZ128r %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VPSLLDZ128ri %xmm16, 7 + %xmm16 = VPSLLDZ128ri %xmm16, 7 + ; CHECK: %xmm16 = VPSLLDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSLLDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSLLDZ128rr %xmm16, 14 + %xmm16 = VPSLLDZ128rr %xmm16, 14 + ; CHECK: %xmm16 = VPSLLQZ128ri %xmm16, 7 + %xmm16 = VPSLLQZ128ri %xmm16, 7 + ; CHECK: %xmm16 = VPSLLQZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSLLQZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSLLQZ128rr %xmm16, 14 + %xmm16 = VPSLLQZ128rr %xmm16, 14 + ; CHECK: %xmm16 = VPSLLVDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSLLVDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSLLVDZ128rr %xmm16, 14 + %xmm16 = VPSLLVDZ128rr %xmm16, 14 + ; CHECK: %xmm16 = VPSLLVQZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSLLVQZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSLLVQZ128rr %xmm16, 14 + %xmm16 = VPSLLVQZ128rr %xmm16, 14 + ; CHECK: %xmm16 = VPSLLWZ128ri %xmm16, 7 + %xmm16 = VPSLLWZ128ri %xmm16, 7 + ; CHECK: %xmm16 = VPSLLWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSLLWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSLLWZ128rr %xmm16, 14 + %xmm16 = VPSLLWZ128rr %xmm16, 14 + ; CHECK: %xmm16 = VPSRADZ128ri %xmm16, 7 + %xmm16 = VPSRADZ128ri %xmm16, 7 + ; CHECK: %xmm16 = VPSRADZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSRADZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSRADZ128rr %xmm16, 14 + %xmm16 = VPSRADZ128rr %xmm16, 14 + ; CHECK: %xmm16 = VPSRAVDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSRAVDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSRAVDZ128rr %xmm16, 14 + %xmm16 = VPSRAVDZ128rr %xmm16, 14 + ; CHECK: %xmm16 = VPSRAWZ128ri %xmm16, 7 + %xmm16 = VPSRAWZ128ri %xmm16, 7 + ; CHECK: %xmm16 = VPSRAWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSRAWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSRAWZ128rr %xmm16, 14 + %xmm16 = VPSRAWZ128rr %xmm16, 14 + ; CHECK: %xmm16 = VPSRLDQZ128rr %xmm16, 14 + %xmm16 = VPSRLDQZ128rr %xmm16, 14 + ; CHECK: %xmm16 = VPSRLDZ128ri %xmm16, 7 + %xmm16 = VPSRLDZ128ri %xmm16, 7 + ; CHECK: %xmm16 = VPSRLDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSRLDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSRLDZ128rr %xmm16, 14 + %xmm16 = VPSRLDZ128rr %xmm16, 14 + ; CHECK: %xmm16 = VPSRLQZ128ri %xmm16, 7 + %xmm16 = VPSRLQZ128ri %xmm16, 7 + ; CHECK: %xmm16 = VPSRLQZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSRLQZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSRLQZ128rr %xmm16, 14 + %xmm16 = VPSRLQZ128rr %xmm16, 14 + ; CHECK: %xmm16 = VPSRLVDZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSRLVDZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSRLVDZ128rr %xmm16, 14 + %xmm16 = VPSRLVDZ128rr %xmm16, 14 + ; CHECK: %xmm16 = VPSRLVQZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSRLVQZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSRLVQZ128rr %xmm16, 14 + %xmm16 = VPSRLVQZ128rr %xmm16, 14 + ; CHECK: %xmm16 = VPSRLWZ128ri %xmm16, 7 + %xmm16 = VPSRLWZ128ri %xmm16, 7 + ; CHECK: %xmm16 = VPSRLWZ128rm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VPSRLWZ128rm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPSRLWZ128rr %xmm16, 14 + %xmm16 = VPSRLWZ128rr %xmm16, 14 + ; CHECK: %xmm16 = VPERMILPDZ128mi %rdi, 1, _, 0, _, _ + %xmm16 = VPERMILPDZ128mi %rdi, 1, _, 0, _, _ + ; CHECK: %xmm16 = VPERMILPDZ128ri %xmm16, 9 + %xmm16 = VPERMILPDZ128ri %xmm16, 9 + ; CHECK: %xmm16 = VPERMILPDZ128rm %xmm16, %rdi, 1, _, 0, _ + %xmm16 = VPERMILPDZ128rm %xmm16, %rdi, 1, _, 0, _ + ; CHECK: %xmm16 = VPERMILPDZ128rr %xmm16, %xmm1 + %xmm16 = VPERMILPDZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPERMILPSZ128mi %rdi, 1, _, 0, _, _ + %xmm16 = VPERMILPSZ128mi %rdi, 1, _, 0, _, _ + ; CHECK: %xmm16 = VPERMILPSZ128ri %xmm16, 9 + %xmm16 = VPERMILPSZ128ri %xmm16, 9 + ; CHECK: %xmm16 = VPERMILPSZ128rm %xmm16, %rdi, 1, _, 0, _ + %xmm16 = VPERMILPSZ128rm %xmm16, %rdi, 1, _, 0, _ + ; CHECK: %xmm16 = VPERMILPSZ128rr %xmm16, %xmm1 + %xmm16 = VPERMILPSZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VCVTPH2PSZ128rm %rdi, %xmm16, 1, _, 0 + %xmm16 = VCVTPH2PSZ128rm %rdi, %xmm16, 1, _, 0 + ; CHECK: %xmm16 = VCVTPH2PSZ128rr %xmm16 + %xmm16 = VCVTPH2PSZ128rr %xmm16 + ; CHECK: %xmm16 = VCVTDQ2PDZ128rm %rdi, %xmm16, 1, _, 0 + %xmm16 = VCVTDQ2PDZ128rm %rdi, %xmm16, 1, _, 0 + ; CHECK: %xmm16 = VCVTDQ2PDZ128rr %xmm16 + %xmm16 = VCVTDQ2PDZ128rr %xmm16 + ; CHECK: %xmm16 = VCVTDQ2PSZ128rm %rdi, %xmm16, 1, _, 0 + %xmm16 = VCVTDQ2PSZ128rm %rdi, %xmm16, 1, _, 0 + ; CHECK: %xmm16 = VCVTDQ2PSZ128rr %xmm16 + %xmm16 = VCVTDQ2PSZ128rr %xmm16 + ; CHECK: %xmm16 = VCVTPD2DQZ128rm %rdi, %xmm16, 1, _, 0 + %xmm16 = VCVTPD2DQZ128rm %rdi, %xmm16, 1, _, 0 + ; CHECK: %xmm16 = VCVTPD2DQZ128rr %xmm16 + %xmm16 = VCVTPD2DQZ128rr %xmm16 + ; CHECK: %xmm16 = VCVTPD2PSZ128rm %rdi, %xmm16, 1, _, 0 + %xmm16 = VCVTPD2PSZ128rm %rdi, %xmm16, 1, _, 0 + ; CHECK: %xmm16 = VCVTPD2PSZ128rr %xmm16 + %xmm16 = VCVTPD2PSZ128rr %xmm16 + ; CHECK: %xmm16 = VCVTPS2DQZ128rm %rdi, %xmm16, 1, _, 0 + %xmm16 = VCVTPS2DQZ128rm %rdi, %xmm16, 1, _, 0 + ; CHECK: %xmm16 = VCVTPS2DQZ128rr %xmm16 + %xmm16 = VCVTPS2DQZ128rr %xmm16 + ; CHECK: %xmm16 = VCVTPS2PDZ128rm %rdi, %xmm16, 1, _, 0 + %xmm16 = VCVTPS2PDZ128rm %rdi, %xmm16, 1, _, 0 + ; CHECK: %xmm16 = VCVTPS2PDZ128rr %xmm16 + %xmm16 = VCVTPS2PDZ128rr %xmm16 + ; CHECK: %xmm16 = VCVTTPD2DQZ128rm %rdi, %xmm16, 1, _, 0 + %xmm16 = VCVTTPD2DQZ128rm %rdi, %xmm16, 1, _, 0 + ; CHECK: %xmm16 = VCVTTPD2DQZ128rr %xmm16 + %xmm16 = VCVTTPD2DQZ128rr %xmm16 + ; CHECK: %xmm16 = VCVTTPS2DQZ128rm %rdi, %xmm16, 1, _, 0 + %xmm16 = VCVTTPS2DQZ128rm %rdi, %xmm16, 1, _, 0 + ; CHECK: %xmm16 = VCVTTPS2DQZ128rr %xmm16 + %xmm16 = VCVTTPS2DQZ128rr %xmm16 + ; CHECK: %xmm16 = VSQRTPDZ128m %rdi, _, _, _, _ + %xmm16 = VSQRTPDZ128m %rdi, _, _, _, _ + ; CHECK: %xmm16 = VSQRTPDZ128r %xmm16 + %xmm16 = VSQRTPDZ128r %xmm16 + ; CHECK: %xmm16 = VSQRTPSZ128m %rdi, _, _, _, _ + %xmm16 = VSQRTPSZ128m %rdi, _, _, _, _ + ; CHECK: %xmm16 = VSQRTPSZ128r %xmm16 + %xmm16 = VSQRTPSZ128r %xmm16 + ; CHECK: %xmm16 = VMOVDDUPZ128rm %rdi, 1, _, 0, _ + %xmm16 = VMOVDDUPZ128rm %rdi, 1, _, 0, _ + ; CHECK: %xmm16 = VMOVDDUPZ128rr %xmm16 + %xmm16 = VMOVDDUPZ128rr %xmm16 + ; CHECK: %xmm16 = VMOVSHDUPZ128rm %rdi, 1, _, 0, _ + %xmm16 = VMOVSHDUPZ128rm %rdi, 1, _, 0, _ + ; CHECK: %xmm16 = VMOVSHDUPZ128rr %xmm16 + %xmm16 = VMOVSHDUPZ128rr %xmm16 + ; CHECK: %xmm16 = VMOVSLDUPZ128rm %rdi, 1, _, 0, _ + %xmm16 = VMOVSLDUPZ128rm %rdi, 1, _, 0, _ + ; CHECK: %xmm16 = VMOVSLDUPZ128rr %xmm16 + %xmm16 = VMOVSLDUPZ128rr %xmm16 + ; CHECK: %xmm16 = VPSHUFBZ128rm %xmm16, _, _, _, _, _ + %xmm16 = VPSHUFBZ128rm %xmm16, _, _, _, _, _ + ; CHECK: %xmm16 = VPSHUFBZ128rr %xmm16, %xmm1 + %xmm16 = VPSHUFBZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VPSHUFDZ128mi %rdi, 1, _, 0, _, _ + %xmm16 = VPSHUFDZ128mi %rdi, 1, _, 0, _, _ + ; CHECK: %xmm16 = VPSHUFDZ128ri %xmm16, -24 + %xmm16 = VPSHUFDZ128ri %xmm16, -24 + ; CHECK: %xmm16 = VPSHUFHWZ128mi %rdi, 1, _, 0, _, _ + %xmm16 = VPSHUFHWZ128mi %rdi, 1, _, 0, _, _ + ; CHECK: %xmm16 = VPSHUFHWZ128ri %xmm16, -24 + %xmm16 = VPSHUFHWZ128ri %xmm16, -24 + ; CHECK: %xmm16 = VPSHUFLWZ128mi %rdi, 1, _, 0, _, _ + %xmm16 = VPSHUFLWZ128mi %rdi, 1, _, 0, _, _ + ; CHECK: %xmm16 = VPSHUFLWZ128ri %xmm16, -24 + %xmm16 = VPSHUFLWZ128ri %xmm16, -24 + ; CHECK: %xmm16 = VPSLLDQZ128rr %xmm16, %xmm1 + %xmm16 = VPSLLDQZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VSHUFPDZ128rmi %xmm16, _, _, _, _, _, _ + %xmm16 = VSHUFPDZ128rmi %xmm16, _, _, _, _, _, _ + ; CHECK: %xmm16 = VSHUFPDZ128rri %xmm16, _, _ + %xmm16 = VSHUFPDZ128rri %xmm16, _, _ + ; CHECK: %xmm16 = VSHUFPSZ128rmi %xmm16, _, _, _, _, _, _ + %xmm16 = VSHUFPSZ128rmi %xmm16, _, _, _, _, _, _ + ; CHECK: %xmm16 = VSHUFPSZ128rri %xmm16, _, _ + %xmm16 = VSHUFPSZ128rri %xmm16, _, _ + ; CHECK: %xmm16 = VPSADBWZ128rm %xmm16, 1, _, %rax, _, _ + %xmm16 = VPSADBWZ128rm %xmm16, 1, _, %rax, _, _ + ; CHECK: %xmm16 = VPSADBWZ128rr %xmm16, %xmm1 + %xmm16 = VPSADBWZ128rr %xmm16, %xmm1 + ; CHECK: %xmm16 = VBROADCASTSSZ128m %rip, _, _, _, _ + %xmm16 = VBROADCASTSSZ128m %rip, _, _, _, _ + ; CHECK: %xmm16 = VBROADCASTSSZ128r %xmm16 + %xmm16 = VBROADCASTSSZ128r %xmm16 + ; CHECK: %xmm16 = VBROADCASTSSZ128r_s %xmm16 + %xmm16 = VBROADCASTSSZ128r_s %xmm16 + ; CHECK: %xmm16 = VPBROADCASTBZ128m %rip, _, _, _, _ + %xmm16 = VPBROADCASTBZ128m %rip, _, _, _, _ + ; CHECK: %xmm16 = VPBROADCASTBZ128r %xmm16 + %xmm16 = VPBROADCASTBZ128r %xmm16 + ; CHECK: %xmm16 = VPBROADCASTDZ128m %rip, _, _, _, _ + %xmm16 = VPBROADCASTDZ128m %rip, _, _, _, _ + ; CHECK: %xmm16 = VPBROADCASTDZ128r %xmm16 + %xmm16 = VPBROADCASTDZ128r %xmm16 + ; CHECK: %xmm16 = VPBROADCASTQZ128m %rip, _, _, _, _ + %xmm16 = VPBROADCASTQZ128m %rip, _, _, _, _ + ; CHECK: %xmm16 = VPBROADCASTQZ128r %xmm16 + %xmm16 = VPBROADCASTQZ128r %xmm16 + ; CHECK: %xmm16 = VPBROADCASTWZ128m %rip, _, _, _, _ + %xmm16 = VPBROADCASTWZ128m %rip, _, _, _, _ + ; CHECK: %xmm16 = VPBROADCASTWZ128r %xmm16 + %xmm16 = VPBROADCASTWZ128r %xmm16 + ; CHECK: %xmm16 = VCVTPS2PHZ128rr %xmm16, 2 + %xmm16 = VCVTPS2PHZ128rr %xmm16, 2 + ; CHECK: VCVTPS2PHZ128mr %rdi, %xmm16, 1, _, 0, _, _ + VCVTPS2PHZ128mr %rdi, %xmm16, 1, _, 0, _, _ + ; CHECK: %xmm16 = VPABSBZ128rm %rip, 1, _, %rax, _ + %xmm16 = VPABSBZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPABSBZ128rr %xmm16 + %xmm16 = VPABSBZ128rr %xmm16 + ; CHECK: %xmm16 = VPABSDZ128rm %rip, 1, _, %rax, _ + %xmm16 = VPABSDZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPABSDZ128rr %xmm16 + %xmm16 = VPABSDZ128rr %xmm16 + ; CHECK: %xmm16 = VPABSWZ128rm %rip, 1, _, %rax, _ + %xmm16 = VPABSWZ128rm %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VPABSWZ128rr %xmm16 + %xmm16 = VPABSWZ128rr %xmm16 + ; CHECK: %xmm16 = VPALIGNRZ128rmi %xmm16, _, _, _, _, _, _ + %xmm16 = VPALIGNRZ128rmi %xmm16, _, _, _, _, _, _ + ; CHECK: %xmm16 = VPALIGNRZ128rri %xmm16, %xmm1, 15 + %xmm16 = VPALIGNRZ128rri %xmm16, %xmm1, 15 + + RET 0, %zmm0, %zmm1 +... +--- + # CHECK-LABEL: name: evex_scalar_to_evex_test + # CHECK: bb.0: + +name: evex_scalar_to_evex_test +body: | + bb.0: + ; CHECK: %xmm16 = VADDSDZrm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VADDSDZrm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VADDSDZrm_Int %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VADDSDZrm_Int %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VADDSDZrr %xmm16, %xmm1 + %xmm16 = VADDSDZrr %xmm16, %xmm1 + ; CHECK: %xmm16 = VADDSDZrr_Int %xmm16, %xmm1 + %xmm16 = VADDSDZrr_Int %xmm16, %xmm1 + ; CHECK: %xmm16 = VADDSSZrm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VADDSSZrm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VADDSSZrm_Int %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VADDSSZrm_Int %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VADDSSZrr %xmm16, %xmm1 + %xmm16 = VADDSSZrr %xmm16, %xmm1 + ; CHECK: %xmm16 = VADDSSZrr_Int %xmm16, %xmm1 + %xmm16 = VADDSSZrr_Int %xmm16, %xmm1 + ; CHECK: %xmm16 = VDIVSDZrm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VDIVSDZrm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VDIVSDZrm_Int %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VDIVSDZrm_Int %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VDIVSDZrr %xmm16, %xmm1 + %xmm16 = VDIVSDZrr %xmm16, %xmm1 + ; CHECK: %xmm16 = VDIVSDZrr_Int %xmm16, %xmm1 + %xmm16 = VDIVSDZrr_Int %xmm16, %xmm1 + ; CHECK: %xmm16 = VDIVSSZrm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VDIVSSZrm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VDIVSSZrm_Int %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VDIVSSZrm_Int %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VDIVSSZrr %xmm16, %xmm1 + %xmm16 = VDIVSSZrr %xmm16, %xmm1 + ; CHECK: %xmm16 = VDIVSSZrr_Int %xmm16, %xmm1 + %xmm16 = VDIVSSZrr_Int %xmm16, %xmm1 + ; CHECK: %xmm16 = VMAXCSDZrm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMAXCSDZrm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMAXCSDZrr %xmm16, %xmm1 + %xmm16 = VMAXCSDZrr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMAXCSSZrm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMAXCSSZrm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMAXCSSZrr %xmm16, %xmm1 + %xmm16 = VMAXCSSZrr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMAXSDZrm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMAXSDZrm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMAXSDZrm_Int %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMAXSDZrm_Int %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMAXSDZrr %xmm16, %xmm1 + %xmm16 = VMAXSDZrr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMAXSDZrr_Int %xmm16, %xmm1 + %xmm16 = VMAXSDZrr_Int %xmm16, %xmm1 + ; CHECK: %xmm16 = VMAXSSZrm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMAXSSZrm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMAXSSZrm_Int %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMAXSSZrm_Int %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMAXSSZrr %xmm16, %xmm1 + %xmm16 = VMAXSSZrr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMAXSSZrr_Int %xmm16, %xmm1 + %xmm16 = VMAXSSZrr_Int %xmm16, %xmm1 + ; CHECK: %xmm16 = VMINCSDZrm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMINCSDZrm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMINCSDZrr %xmm16, %xmm1 + %xmm16 = VMINCSDZrr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMINCSSZrm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMINCSSZrm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMINCSSZrr %xmm16, %xmm1 + %xmm16 = VMINCSSZrr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMINSDZrm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMINSDZrm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMINSDZrm_Int %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMINSDZrm_Int %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMINSDZrr %xmm16, %xmm1 + %xmm16 = VMINSDZrr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMINSDZrr_Int %xmm16, %xmm1 + %xmm16 = VMINSDZrr_Int %xmm16, %xmm1 + ; CHECK: %xmm16 = VMINSSZrm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMINSSZrm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMINSSZrm_Int %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMINSSZrm_Int %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMINSSZrr %xmm16, %xmm1 + %xmm16 = VMINSSZrr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMINSSZrr_Int %xmm16, %xmm1 + %xmm16 = VMINSSZrr_Int %xmm16, %xmm1 + ; CHECK: %xmm16 = VMULSDZrm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMULSDZrm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMULSDZrm_Int %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMULSDZrm_Int %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMULSDZrr %xmm16, %xmm1 + %xmm16 = VMULSDZrr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMULSDZrr_Int %xmm16, %xmm1 + %xmm16 = VMULSDZrr_Int %xmm16, %xmm1 + ; CHECK: %xmm16 = VMULSSZrm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMULSSZrm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMULSSZrm_Int %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VMULSSZrm_Int %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VMULSSZrr %xmm16, %xmm1 + %xmm16 = VMULSSZrr %xmm16, %xmm1 + ; CHECK: %xmm16 = VMULSSZrr_Int %xmm16, %xmm1 + %xmm16 = VMULSSZrr_Int %xmm16, %xmm1 + ; CHECK: %xmm16 = VSUBSDZrm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VSUBSDZrm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VSUBSDZrm_Int %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VSUBSDZrm_Int %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VSUBSDZrr %xmm16, %xmm1 + %xmm16 = VSUBSDZrr %xmm16, %xmm1 + ; CHECK: %xmm16 = VSUBSDZrr_Int %xmm16, %xmm1 + %xmm16 = VSUBSDZrr_Int %xmm16, %xmm1 + ; CHECK: %xmm16 = VSUBSSZrm %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VSUBSSZrm %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VSUBSSZrm_Int %xmm16, %rip, 1, _, %rax, _ + %xmm16 = VSUBSSZrm_Int %xmm16, %rip, 1, _, %rax, _ + ; CHECK: %xmm16 = VSUBSSZrr %xmm16, %xmm1 + %xmm16 = VSUBSSZrr %xmm16, %xmm1 + ; CHECK: %xmm16 = VSUBSSZrr_Int %xmm16, %xmm1 + %xmm16 = VSUBSSZrr_Int %xmm16, %xmm1 + ; CHECK: %xmm16 = VFMADD132SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADD132SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADD132SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADD132SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADD132SDZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADD132SDZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADD132SDZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADD132SDZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADD132SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADD132SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADD132SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADD132SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADD132SSZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADD132SSZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADD132SSZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADD132SSZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADD213SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADD213SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADD213SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADD213SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADD213SDZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADD213SDZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADD213SDZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADD213SDZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADD213SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADD213SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADD213SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADD213SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADD213SSZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADD213SSZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADD213SSZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADD213SSZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADD231SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADD231SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADD231SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADD231SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADD231SDZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADD231SDZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADD231SDZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADD231SDZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADD231SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADD231SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADD231SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMADD231SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMADD231SSZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADD231SSZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMADD231SSZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFMADD231SSZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUB132SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUB132SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUB132SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUB132SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUB132SDZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUB132SDZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUB132SDZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUB132SDZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUB132SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUB132SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUB132SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUB132SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUB132SSZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUB132SSZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUB132SSZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUB132SSZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUB213SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUB213SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUB213SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUB213SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUB213SDZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUB213SDZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUB213SDZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUB213SDZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUB213SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUB213SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUB213SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUB213SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUB213SSZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUB213SSZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUB213SSZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUB213SSZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUB231SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUB231SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUB231SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUB231SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUB231SDZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUB231SDZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUB231SDZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUB231SDZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUB231SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUB231SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUB231SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFMSUB231SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFMSUB231SSZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUB231SSZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFMSUB231SSZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFMSUB231SSZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMADD132SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMADD132SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMADD132SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMADD132SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMADD132SDZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMADD132SDZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMADD132SDZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMADD132SDZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMADD132SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMADD132SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMADD132SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMADD132SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMADD132SSZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMADD132SSZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMADD132SSZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMADD132SSZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMADD213SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMADD213SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMADD213SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMADD213SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMADD213SDZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMADD213SDZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMADD213SDZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMADD213SDZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMADD213SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMADD213SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMADD213SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMADD213SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMADD213SSZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMADD213SSZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMADD213SSZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMADD213SSZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMADD231SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMADD231SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMADD231SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMADD231SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMADD231SDZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMADD231SDZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMADD231SDZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMADD231SDZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMADD231SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMADD231SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMADD231SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMADD231SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMADD231SSZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMADD231SSZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMADD231SSZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMADD231SSZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMSUB132SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMSUB132SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMSUB132SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMSUB132SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMSUB132SDZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMSUB132SDZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMSUB132SDZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMSUB132SDZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMSUB132SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMSUB132SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMSUB132SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMSUB132SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMSUB132SSZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMSUB132SSZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMSUB132SSZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMSUB132SSZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMSUB213SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMSUB213SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMSUB213SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMSUB213SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMSUB213SDZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMSUB213SDZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMSUB213SDZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMSUB213SDZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMSUB213SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMSUB213SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMSUB213SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMSUB213SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMSUB213SSZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMSUB213SSZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMSUB213SSZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMSUB213SSZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMSUB231SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMSUB231SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMSUB231SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMSUB231SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMSUB231SDZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMSUB231SDZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMSUB231SDZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMSUB231SDZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMSUB231SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMSUB231SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMSUB231SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + %xmm16 = VFNMSUB231SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _ + ; CHECK: %xmm16 = VFNMSUB231SSZr %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMSUB231SSZr %xmm16, %xmm1, %xmm2 + ; CHECK: %xmm16 = VFNMSUB231SSZr_Int %xmm16, %xmm1, %xmm2 + %xmm16 = VFNMSUB231SSZr_Int %xmm16, %xmm1, %xmm2 + ; CHECK: VPEXTRBZmr %rdi, 1, _, 0, _, %xmm16, 3 + VPEXTRBZmr %rdi, 1, _, 0, _, %xmm16, 3 + ; CHECK: %eax = VPEXTRBZrr %xmm16, 1 + %eax = VPEXTRBZrr %xmm16, 1 + ; CHECK: VPEXTRDZmr %rdi, 1, _, 0, _, %xmm16, 3 + VPEXTRDZmr %rdi, 1, _, 0, _, %xmm16, 3 + ; CHECK: %eax = VPEXTRDZrr %xmm16, 1 + %eax = VPEXTRDZrr %xmm16, 1 + ; CHECK: VPEXTRQZmr %rdi, 1, _, 0, _, %xmm16, 3 + VPEXTRQZmr %rdi, 1, _, 0, _, %xmm16, 3 + ; CHECK: %rax = VPEXTRQZrr %xmm16, 1 + %rax = VPEXTRQZrr %xmm16, 1 + ; CHECK: VPEXTRWZmr %rdi, 1, _, 0, _, %xmm16, 3 + VPEXTRWZmr %rdi, 1, _, 0, _, %xmm16, 3 + ; CHECK: %eax = VPEXTRWZrr %xmm16, 1 + %eax = VPEXTRWZrr %xmm16, 1 + ; CHECK: %xmm16 = VPINSRBZrm %xmm16, %rsi, 1, _, 0, _, 3 + %xmm16 = VPINSRBZrm %xmm16, %rsi, 1, _, 0, _, 3 + ; CHECK: %xmm16 = VPINSRBZrr %xmm16, %edi, 5 + %xmm16 = VPINSRBZrr %xmm16, %edi, 5 + ; CHECK: %xmm16 = VPINSRDZrm %xmm16, %rsi, 1, _, 0, _, 3 + %xmm16 = VPINSRDZrm %xmm16, %rsi, 1, _, 0, _, 3 + ; CHECK: %xmm16 = VPINSRDZrr %xmm16, %edi, 5 + %xmm16 = VPINSRDZrr %xmm16, %edi, 5 + ; CHECK: %xmm16 = VPINSRQZrm %xmm16, %rsi, 1, _, 0, _, 3 + %xmm16 = VPINSRQZrm %xmm16, %rsi, 1, _, 0, _, 3 + ; CHECK: %xmm16 = VPINSRQZrr %xmm16, %rdi, 5 + %xmm16 = VPINSRQZrr %xmm16, %rdi, 5 + ; CHECK: %xmm16 = VPINSRWZrm %xmm16, %rsi, 1, _, 0, _, 3 + %xmm16 = VPINSRWZrm %xmm16, %rsi, 1, _, 0, _, 3 + ; CHECK: %xmm16 = VPINSRWZrr %xmm16, %edi, 5 + %xmm16 = VPINSRWZrr %xmm16, %edi, 5 + ; CHECK: %xmm16 = VSQRTSDZm %xmm16, _, _, _, _, _ + %xmm16 = VSQRTSDZm %xmm16, _, _, _, _, _ + ; CHECK: %xmm16 = VSQRTSDZm_Int %xmm16, _, _, _, _, _ + %xmm16 = VSQRTSDZm_Int %xmm16, _, _, _, _, _ + ; CHECK: %xmm16 = VSQRTSDZr %xmm16, _ + %xmm16 = VSQRTSDZr %xmm16, _ + ; CHECK: %xmm16 = VSQRTSDZr_Int %xmm16, _ + %xmm16 = VSQRTSDZr_Int %xmm16, _ + ; CHECK: %xmm16 = VSQRTSSZm %xmm16, _, _, _, _, _ + %xmm16 = VSQRTSSZm %xmm16, _, _, _, _, _ + ; CHECK: %xmm16 = VSQRTSSZm_Int %xmm16, _, _, _, _, _ + %xmm16 = VSQRTSSZm_Int %xmm16, _, _, _, _, _ + ; CHECK: %xmm16 = VSQRTSSZr %xmm16, _ + %xmm16 = VSQRTSSZr %xmm16, _ + ; CHECK: %xmm16 = VSQRTSSZr_Int %xmm16, _ + %xmm16 = VSQRTSSZr_Int %xmm16, _ + ; CHECK: %rdi = VCVTSD2SI64Zrm %rdi, %xmm16, 1, _, 0 + %rdi = VCVTSD2SI64Zrm %rdi, %xmm16, 1, _, 0 + ; CHECK: %rdi = VCVTSD2SI64Zrr %xmm16 + %rdi = VCVTSD2SI64Zrr %xmm16 + ; CHECK: %edi = VCVTSD2SIZrm %rdi, %xmm16, 1, _, 0 + %edi = VCVTSD2SIZrm %rdi, %xmm16, 1, _, 0 + ; CHECK: %edi = VCVTSD2SIZrr %xmm16 + %edi = VCVTSD2SIZrr %xmm16 + ; CHECK: %xmm16 = VCVTSD2SSZrm %xmm16, %rdi, 1, _, 0, _ + %xmm16 = VCVTSD2SSZrm %xmm16, %rdi, 1, _, 0, _ + ; CHECK: %xmm16 = VCVTSD2SSZrr %xmm16, _ + %xmm16 = VCVTSD2SSZrr %xmm16, _ + ; CHECK: %xmm16 = VCVTSI2SDZrm %xmm16, %rdi, 1, _, 0, _ + %xmm16 = VCVTSI2SDZrm %xmm16, %rdi, 1, _, 0, _ + ; CHECK: %xmm16 = VCVTSI2SDZrm_Int %xmm16, %rdi, 1, _, 0, _ + %xmm16 = VCVTSI2SDZrm_Int %xmm16, %rdi, 1, _, 0, _ + ; CHECK: %xmm16 = VCVTSI2SDZrr %xmm16, _ + %xmm16 = VCVTSI2SDZrr %xmm16, _ + ; CHECK: %xmm16 = VCVTSI2SDZrr_Int %xmm16, _ + %xmm16 = VCVTSI2SDZrr_Int %xmm16, _ + ; CHECK: %xmm16 = VCVTSI2SSZrm %xmm16, %rdi, 1, _, 0, _ + %xmm16 = VCVTSI2SSZrm %xmm16, %rdi, 1, _, 0, _ + ; CHECK: %xmm16 = VCVTSI2SSZrm_Int %xmm16, %rdi, 1, _, 0, _ + %xmm16 = VCVTSI2SSZrm_Int %xmm16, %rdi, 1, _, 0, _ + ; CHECK: %xmm16 = VCVTSI2SSZrr %xmm16, _ + %xmm16 = VCVTSI2SSZrr %xmm16, _ + ; CHECK: %xmm16 = VCVTSI2SSZrr_Int %xmm16, _ + %xmm16 = VCVTSI2SSZrr_Int %xmm16, _ + ; CHECK: %xmm16 = VCVTSS2SDZrm %xmm16, %rdi, 1, _, 0, _ + %xmm16 = VCVTSS2SDZrm %xmm16, %rdi, 1, _, 0, _ + ; CHECK: %xmm16 = VCVTSS2SDZrr %xmm16, _ + %xmm16 = VCVTSS2SDZrr %xmm16, _ + ; CHECK: %rdi = VCVTSS2SI64Zrm %rdi, %xmm16, 1, _, 0 + %rdi = VCVTSS2SI64Zrm %rdi, %xmm16, 1, _, 0 + ; CHECK: %rdi = VCVTSS2SI64Zrr %xmm16 + %rdi = VCVTSS2SI64Zrr %xmm16 + ; CHECK: %edi = VCVTSS2SIZrm %rdi, %xmm16, 1, _, 0 + %edi = VCVTSS2SIZrm %rdi, %xmm16, 1, _, 0 + ; CHECK: %edi = VCVTSS2SIZrr %xmm16 + %edi = VCVTSS2SIZrr %xmm16 + ; CHECK: %rdi = VCVTTSD2SI64Zrm %rdi, %xmm16, 1, _, 0 + %rdi = VCVTTSD2SI64Zrm %rdi, %xmm16, 1, _, 0 + ; CHECK: %rdi = VCVTTSD2SI64Zrm_Int %rdi, %xmm16, 1, _, 0 + %rdi = VCVTTSD2SI64Zrm_Int %rdi, %xmm16, 1, _, 0 + ; CHECK: %rdi = VCVTTSD2SI64Zrr %xmm16 + %rdi = VCVTTSD2SI64Zrr %xmm16 + ; CHECK: %rdi = VCVTTSD2SI64Zrr_Int %xmm16 + %rdi = VCVTTSD2SI64Zrr_Int %xmm16 + ; CHECK: %edi = VCVTTSD2SIZrm %rdi, %xmm16, 1, _, 0 + %edi = VCVTTSD2SIZrm %rdi, %xmm16, 1, _, 0 + ; CHECK: %edi = VCVTTSD2SIZrm_Int %rdi, %xmm16, 1, _, 0 + %edi = VCVTTSD2SIZrm_Int %rdi, %xmm16, 1, _, 0 + ; CHECK: %edi = VCVTTSD2SIZrr %xmm16 + %edi = VCVTTSD2SIZrr %xmm16 + ; CHECK: %edi = VCVTTSD2SIZrr_Int %xmm16 + %edi = VCVTTSD2SIZrr_Int %xmm16 + ; CHECK: %rdi = VCVTTSS2SI64Zrm %rdi, %xmm16, 1, _, 0 + %rdi = VCVTTSS2SI64Zrm %rdi, %xmm16, 1, _, 0 + ; CHECK: %rdi = VCVTTSS2SI64Zrm_Int %rdi, %xmm16, 1, _, 0 + %rdi = VCVTTSS2SI64Zrm_Int %rdi, %xmm16, 1, _, 0 + ; CHECK: %rdi = VCVTTSS2SI64Zrr %xmm16 + %rdi = VCVTTSS2SI64Zrr %xmm16 + ; CHECK: %rdi = VCVTTSS2SI64Zrr_Int %xmm16 + %rdi = VCVTTSS2SI64Zrr_Int %xmm16 + ; CHECK: %edi = VCVTTSS2SIZrm %rdi, %xmm16, 1, _, 0 + %edi = VCVTTSS2SIZrm %rdi, %xmm16, 1, _, 0 + ; CHECK: %edi = VCVTTSS2SIZrm_Int %rdi, %xmm16, 1, _, 0 + %edi = VCVTTSS2SIZrm_Int %rdi, %xmm16, 1, _, 0 + ; CHECK: %edi = VCVTTSS2SIZrr %xmm16 + %edi = VCVTTSS2SIZrr %xmm16 + ; CHECK: %edi = VCVTTSS2SIZrr_Int %xmm16 + %edi = VCVTTSS2SIZrr_Int %xmm16 + ; CHECK: %xmm16 = VMOV64toSDZrr %rdi + %xmm16 = VMOV64toSDZrr %rdi + ; CHECK: %xmm16 = VMOVDI2SSZrm %rip, _, _, _, _ + %xmm16 = VMOVDI2SSZrm %rip, _, _, _, _ + ; CHECK: %xmm16 = VMOVDI2SSZrr %eax + %xmm16 = VMOVDI2SSZrr %eax + ; CHECK: VMOVSDZmr %rdi, %xmm16, _, _, _, _ + VMOVSDZmr %rdi, %xmm16, _, _, _, _ + ; CHECK: %xmm16 = VMOVSDZrm %rip, _, _, _, _ + %xmm16 = VMOVSDZrm %rip, _, _, _, _ + ; CHECK: %xmm16 = VMOVSDZrr %xmm16, _ + %xmm16 = VMOVSDZrr %xmm16, _ + ; CHECK: VMOVSSZmr %rdi, %xmm16, _, _, _, _ + VMOVSSZmr %rdi, %xmm16, _, _, _, _ + ; CHECK: %xmm16 = VMOVSSZrm %rip, _, _, _, _ + %xmm16 = VMOVSSZrm %rip, _, _, _, _ + ; CHECK: %xmm16 = VMOVSSZrr %xmm16, _ + %xmm16 = VMOVSSZrr %xmm16, _ + ; CHECK: %xmm16 = VMOVSSZrr_REV %xmm16, _ + %xmm16 = VMOVSSZrr_REV %xmm16, _ + ; CHECK: %xmm16 = VMOV64toPQIZrr %rdi + %xmm16 = VMOV64toPQIZrr %rdi + ; CHECK: %xmm16 = VMOV64toSDZrr %rdi + %xmm16 = VMOV64toSDZrr %rdi + ; CHECK: %xmm16 = VMOVDI2PDIZrm %rip, _, _, _, _ + %xmm16 = VMOVDI2PDIZrm %rip, _, _, _, _ + ; CHECK: %xmm16 = VMOVDI2PDIZrr %edi + %xmm16 = VMOVDI2PDIZrr %edi + ; CHECK: %xmm16 = VMOVLHPSZrr %xmm16, _ + %xmm16 = VMOVLHPSZrr %xmm16, _ + ; CHECK: %xmm16 = VMOVHLPSZrr %xmm16, _ + %xmm16 = VMOVHLPSZrr %xmm16, _ + ; CHECK: VMOVPDI2DIZmr %rdi, %xmm16, _, _, _, _ + VMOVPDI2DIZmr %rdi, %xmm16, _, _, _, _ + ; CHECK: %edi = VMOVPDI2DIZrr %xmm16 + %edi = VMOVPDI2DIZrr %xmm16 + ; CHECK: VMOVPQI2QIZmr %rdi, %xmm16, _, _, _, _ + VMOVPQI2QIZmr %rdi, %xmm16, _, _, _, _ + ; CHECK: %rdi = VMOVPQIto64Zrr %xmm16 + %rdi = VMOVPQIto64Zrr %xmm16 + ; CHECK: %xmm16 = VMOVQI2PQIZrm %rip, _, _, _, _ + %xmm16 = VMOVQI2PQIZrm %rip, _, _, _, _ + ; CHECK: %xmm16 = VMOVZPQILo2PQIZrr %xmm16 + %xmm16 = VMOVZPQILo2PQIZrr %xmm16 + ; CHECK: Int_VCOMISDZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags + Int_VCOMISDZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags + ; CHECK: Int_VCOMISDZrr %xmm16, %xmm1, implicit-def %eflags + Int_VCOMISDZrr %xmm16, %xmm1, implicit-def %eflags + ; CHECK: Int_VCOMISSZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags + Int_VCOMISSZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags + ; CHECK: Int_VCOMISSZrr %xmm16, %xmm1, implicit-def %eflags + Int_VCOMISSZrr %xmm16, %xmm1, implicit-def %eflags + ; CHECK: Int_VUCOMISDZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags + Int_VUCOMISDZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags + ; CHECK: Int_VUCOMISDZrr %xmm16, %xmm1, implicit-def %eflags + Int_VUCOMISDZrr %xmm16, %xmm1, implicit-def %eflags + ; CHECK: Int_VUCOMISSZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags + Int_VUCOMISSZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags + ; CHECK: Int_VUCOMISSZrr %xmm16, %xmm1, implicit-def %eflags + Int_VUCOMISSZrr %xmm16, %xmm1, implicit-def %eflags + ; CHECK: VCOMISDZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags + VCOMISDZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags + ; CHECK: VCOMISDZrr %xmm16, %xmm1, implicit-def %eflags + VCOMISDZrr %xmm16, %xmm1, implicit-def %eflags + ; CHECK: VCOMISSZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags + VCOMISSZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags + ; CHECK: VCOMISSZrr %xmm16, %xmm1, implicit-def %eflags + VCOMISSZrr %xmm16, %xmm1, implicit-def %eflags + ; CHECK: VUCOMISDZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags + VUCOMISDZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags + ; CHECK: VUCOMISDZrr %xmm16, %xmm1, implicit-def %eflags + VUCOMISDZrr %xmm16, %xmm1, implicit-def %eflags + ; CHECK: VUCOMISSZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags + VUCOMISSZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags + ; CHECK: VUCOMISSZrr %xmm16, %xmm1, implicit-def %eflags + VUCOMISSZrr %xmm16, %xmm1, implicit-def %eflags + + RET 0, %zmm0, %zmm1 +... Index: test/CodeGen/X86/fast-isel-store.ll =================================================================== --- test/CodeGen/X86/fast-isel-store.ll +++ test/CodeGen/X86/fast-isel-store.ll @@ -80,7 +80,7 @@ ; SKX32-LABEL: test_store_4xi32: ; SKX32: # BB#0: ; SKX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; SKX32-NEXT: vmovdqu64 %xmm0, (%rdi) +; SKX32-NEXT: vmovdqu %xmm0, (%rdi) ; SKX32-NEXT: retq %foo = add <4 x i32> %value, %value2 ; to force integer type on store store <4 x i32> %foo, <4 x i32>* %addr, align 1 @@ -123,7 +123,7 @@ ; SKX32-LABEL: test_store_4xi32_aligned: ; SKX32: # BB#0: ; SKX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; SKX32-NEXT: vmovdqa64 %xmm0, (%rdi) +; SKX32-NEXT: vmovdqa %xmm0, (%rdi) ; SKX32-NEXT: retq %foo = add <4 x i32> %value, %value2 ; to force integer type on store store <4 x i32> %foo, <4 x i32>* %addr, align 16 Index: test/CodeGen/X86/fp-logic-replace.ll =================================================================== --- test/CodeGen/X86/fp-logic-replace.ll +++ test/CodeGen/X86/fp-logic-replace.ll @@ -22,9 +22,8 @@ ; ; AVX512DQ-LABEL: FsANDPSrr: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x54,0xc1] +; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1] ; AVX512DQ-NEXT: retq # encoding: [0xc3] -; %bc1 = bitcast double %x to i64 %bc2 = bitcast double %y to i64 %and = and i64 %bc1, %bc2 @@ -46,9 +45,8 @@ ; ; AVX512DQ-LABEL: FsANDNPSrr: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vandnps %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf1,0x74,0x08,0x55,0xc0] +; AVX512DQ-NEXT: vandnps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x55,0xc0] ; AVX512DQ-NEXT: retq # encoding: [0xc3] -; %bc1 = bitcast double %x to i64 %bc2 = bitcast double %y to i64 %not = xor i64 %bc2, -1 @@ -70,9 +68,8 @@ ; ; AVX512DQ-LABEL: FsORPSrr: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x56,0xc1] +; AVX512DQ-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1] ; AVX512DQ-NEXT: retq # encoding: [0xc3] -; %bc1 = bitcast double %x to i64 %bc2 = bitcast double %y to i64 %or = or i64 %bc1, %bc2 @@ -93,9 +90,8 @@ ; ; AVX512DQ-LABEL: FsXORPSrr: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x57,0xc1] +; AVX512DQ-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1] ; AVX512DQ-NEXT: retq # encoding: [0xc3] -; %bc1 = bitcast double %x to i64 %bc2 = bitcast double %y to i64 %xor = xor i64 %bc1, %bc2 Index: test/CodeGen/X86/masked_gather_scatter.ll =================================================================== --- test/CodeGen/X86/masked_gather_scatter.ll +++ test/CodeGen/X86/masked_gather_scatter.ll @@ -310,7 +310,7 @@ ; SKX-NEXT: kxnorw %k0, %k0, %k2 ; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} ; SKX-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} -; SKX-NEXT: vmovdqa64 %ymm2, %ymm0 +; SKX-NEXT: vmovdqa %ymm2, %ymm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test6: @@ -319,7 +319,7 @@ ; SKX_32-NEXT: kxnorw %k0, %k0, %k2 ; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm2 {%k2} ; SKX_32-NEXT: vpscatterdd %ymm0, (,%ymm1) {%k1} -; SKX_32-NEXT: vmovdqa64 %ymm2, %ymm0 +; SKX_32-NEXT: vmovdqa %ymm2, %ymm0 ; SKX_32-NEXT: retl %a = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptr, i32 4, <8 x i1> , <8 x i32> undef) @@ -359,7 +359,7 @@ ; SKX-NEXT: kmovb %esi, %k1 ; SKX-NEXT: kmovw %k1, %k2 ; SKX-NEXT: vpgatherdd (%rdi,%ymm0,4), %ymm1 {%k2} -; SKX-NEXT: vmovdqa64 %ymm1, %ymm2 +; SKX-NEXT: vmovdqa %ymm1, %ymm2 ; SKX-NEXT: vpgatherdd (%rdi,%ymm0,4), %ymm2 {%k1} ; SKX-NEXT: vpaddd %ymm2, %ymm1, %ymm0 ; SKX-NEXT: retq @@ -370,7 +370,7 @@ ; SKX_32-NEXT: kmovb {{[0-9]+}}(%esp), %k1 ; SKX_32-NEXT: kmovw %k1, %k2 ; SKX_32-NEXT: vpgatherdd (%eax,%ymm0,4), %ymm1 {%k2} -; SKX_32-NEXT: vmovdqa64 %ymm1, %ymm2 +; SKX_32-NEXT: vmovdqa %ymm1, %ymm2 ; SKX_32-NEXT: vpgatherdd (%eax,%ymm0,4), %ymm2 {%k1} ; SKX_32-NEXT: vpaddd %ymm2, %ymm1, %ymm0 ; SKX_32-NEXT: retl @@ -1233,7 +1233,7 @@ ; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 ; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 ; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1} -; SKX-NEXT: vmovdqa64 %xmm2, %xmm0 +; SKX-NEXT: vmovdqa %xmm2, %xmm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test23: @@ -1242,7 +1242,7 @@ ; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1} -; SKX_32-NEXT: vmovdqa64 %xmm2, %xmm0 +; SKX_32-NEXT: vmovdqa %xmm2, %xmm0 ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind @@ -1276,7 +1276,7 @@ ; SKX: # BB#0: ; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1} -; SKX-NEXT: vmovdqa64 %xmm1, %xmm0 +; SKX-NEXT: vmovdqa %xmm1, %xmm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test24: @@ -1284,7 +1284,7 @@ ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: kxnorw %k0, %k0, %k1 ; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm1 {%k1} -; SKX_32-NEXT: vmovdqa64 %xmm1, %xmm0 +; SKX_32-NEXT: vmovdqa %xmm1, %xmm0 ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind @@ -1324,7 +1324,7 @@ ; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 ; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 ; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1} -; SKX-NEXT: vmovdqa64 %xmm2, %xmm0 +; SKX-NEXT: vmovdqa %xmm2, %xmm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test25: @@ -1333,7 +1333,7 @@ ; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1} -; SKX_32-NEXT: vmovdqa64 %xmm2, %xmm0 +; SKX_32-NEXT: vmovdqa %xmm2, %xmm0 ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind @@ -1370,7 +1370,7 @@ ; SKX: # BB#0: ; SKX-NEXT: kxnorw %k0, %k0, %k1 ; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1} -; SKX-NEXT: vmovdqa64 %xmm1, %xmm0 +; SKX-NEXT: vmovdqa %xmm1, %xmm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: test26: @@ -1378,7 +1378,7 @@ ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: kxnorw %k0, %k0, %k1 ; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm1 {%k1} -; SKX_32-NEXT: vmovdqa64 %xmm1, %xmm0 +; SKX_32-NEXT: vmovdqa %xmm1, %xmm0 ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind Index: test/CodeGen/X86/masked_memop.ll =================================================================== --- test/CodeGen/X86/masked_memop.ll +++ test/CodeGen/X86/masked_memop.ll @@ -27,7 +27,7 @@ ; ; SKX-LABEL: test6: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1 ; SKX-NEXT: vmovupd (%rdi), %xmm1 {%k1} ; SKX-NEXT: vmovapd %xmm1, %xmm0 @@ -56,7 +56,7 @@ ; ; SKX-LABEL: test7: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1 ; SKX-NEXT: vmovups (%rdi), %xmm1 {%k1} ; SKX-NEXT: vmovaps %xmm1, %xmm0 @@ -93,10 +93,10 @@ ; ; SKX-LABEL: test8: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1 ; SKX-NEXT: vmovdqu32 (%rdi), %xmm1 {%k1} -; SKX-NEXT: vmovdqa64 %xmm1, %xmm0 +; SKX-NEXT: vmovdqa %xmm1, %xmm0 ; SKX-NEXT: retq %mask = icmp eq <4 x i32> %trigger, zeroinitializer %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst) @@ -127,7 +127,7 @@ ; ; SKX-LABEL: test9: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1 ; SKX-NEXT: vmovdqu32 %xmm1, (%rdi) {%k1} ; SKX-NEXT: retq @@ -169,7 +169,7 @@ ; ; SKX-LABEL: test10: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1 ; SKX-NEXT: vmovapd (%rdi), %ymm1 {%k1} ; SKX-NEXT: vmovapd %ymm1, %ymm0 @@ -209,7 +209,7 @@ ; ; SKX-LABEL: test10b: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 ; SKX-NEXT: vmovapd (%rdi), %ymm0 {%k1} {z} ; SKX-NEXT: retq @@ -252,7 +252,7 @@ ; ; SKX-LABEL: test11a: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %ymm2, %ymm2, %ymm2 +; SKX-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; SKX-NEXT: vpcmpeqd %ymm2, %ymm0, %k1 ; SKX-NEXT: vmovaps (%rdi), %ymm1 {%k1} ; SKX-NEXT: vmovaps %ymm1, %ymm0 @@ -302,7 +302,7 @@ ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 ; SKX-NEXT: vpmovw2m %xmm0, %k1 ; SKX-NEXT: vmovdqu32 (%rdi), %ymm1 {%k1} -; SKX-NEXT: vmovdqa64 %ymm1, %ymm0 +; SKX-NEXT: vmovdqa %ymm1, %ymm0 ; SKX-NEXT: retq %res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1>%mask, <8 x i32>%dst) ret <8 x i32> %res @@ -425,7 +425,7 @@ ; ; SKX-LABEL: test12: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %ymm2, %ymm2, %ymm2 +; SKX-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; SKX-NEXT: vpcmpeqd %ymm2, %ymm0, %k1 ; SKX-NEXT: vmovdqu32 %ymm1, (%rdi) {%k1} ; SKX-NEXT: retq @@ -464,7 +464,7 @@ ; ; SKX-LABEL: test14: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] ; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0 ; SKX-NEXT: kshiftlw $14, %k0, %k0 @@ -509,7 +509,7 @@ ; ; SKX-LABEL: test15: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] ; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1 ; SKX-NEXT: vpmovqd %xmm1, (%rdi) {%k1} @@ -552,7 +552,7 @@ ; ; SKX-LABEL: test16: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] ; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0 ; SKX-NEXT: kshiftlw $14, %k0, %k0 @@ -604,7 +604,7 @@ ; ; SKX-LABEL: test17: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2 +; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] ; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0 ; SKX-NEXT: kshiftlw $14, %k0, %k0 @@ -648,7 +648,7 @@ ; ; SKX-LABEL: test18: ; SKX: ## BB#0: -; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ; SKX-NEXT: kshiftlw $14, %k0, %k0 Index: test/CodeGen/X86/nontemporal-2.ll =================================================================== --- test/CodeGen/X86/nontemporal-2.ll +++ test/CodeGen/X86/nontemporal-2.ll @@ -116,7 +116,7 @@ ; ; VLX-LABEL: test_zero_v4f32: ; VLX: # BB#0: -; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0 +; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; VLX-NEXT: vmovntdq %xmm0, (%rdi) ; VLX-NEXT: retq store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !1 @@ -138,7 +138,7 @@ ; ; VLX-LABEL: test_zero_v4i32: ; VLX: # BB#0: -; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0 +; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; VLX-NEXT: vmovntdq %xmm0, (%rdi) ; VLX-NEXT: retq store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1 @@ -161,7 +161,7 @@ ; ; VLX-LABEL: test_zero_v2f64: ; VLX: # BB#0: -; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0 +; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; VLX-NEXT: vmovntdq %xmm0, (%rdi) ; VLX-NEXT: retq store <2 x double> zeroinitializer, <2 x double>* %dst, align 16, !nontemporal !1 @@ -183,7 +183,7 @@ ; ; VLX-LABEL: test_zero_v2i64: ; VLX: # BB#0: -; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0 +; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; VLX-NEXT: vmovntdq %xmm0, (%rdi) ; VLX-NEXT: retq store <2 x i64> zeroinitializer, <2 x i64>* %dst, align 16, !nontemporal !1 @@ -205,7 +205,7 @@ ; ; VLX-LABEL: test_zero_v8i16: ; VLX: # BB#0: -; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0 +; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; VLX-NEXT: vmovntdq %xmm0, (%rdi) ; VLX-NEXT: retq store <8 x i16> zeroinitializer, <8 x i16>* %dst, align 16, !nontemporal !1 @@ -227,7 +227,7 @@ ; ; VLX-LABEL: test_zero_v16i8: ; VLX: # BB#0: -; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0 +; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; VLX-NEXT: vmovntdq %xmm0, (%rdi) ; VLX-NEXT: retq store <16 x i8> zeroinitializer, <16 x i8>* %dst, align 16, !nontemporal !1 @@ -253,7 +253,7 @@ ; ; VLX-LABEL: test_zero_v8f32: ; VLX: # BB#0: -; VLX-NEXT: vpxord %ymm0, %ymm0, %ymm0 +; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0 ; VLX-NEXT: vmovntdq %ymm0, (%rdi) ; VLX-NEXT: retq store <8 x float> zeroinitializer, <8 x float>* %dst, align 32, !nontemporal !1 @@ -277,7 +277,7 @@ ; ; VLX-LABEL: test_zero_v8i32: ; VLX: # BB#0: -; VLX-NEXT: vpxord %ymm0, %ymm0, %ymm0 +; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0 ; VLX-NEXT: vmovntdq %ymm0, (%rdi) ; VLX-NEXT: retq store <8 x i32> zeroinitializer, <8 x i32>* %dst, align 32, !nontemporal !1 @@ -301,7 +301,7 @@ ; ; VLX-LABEL: test_zero_v4f64: ; VLX: # BB#0: -; VLX-NEXT: vpxord %ymm0, %ymm0, %ymm0 +; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0 ; VLX-NEXT: vmovntdq %ymm0, (%rdi) ; VLX-NEXT: retq store <4 x double> zeroinitializer, <4 x double>* %dst, align 32, !nontemporal !1 @@ -325,7 +325,7 @@ ; ; VLX-LABEL: test_zero_v4i64: ; VLX: # BB#0: -; VLX-NEXT: vpxord %ymm0, %ymm0, %ymm0 +; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0 ; VLX-NEXT: vmovntdq %ymm0, (%rdi) ; VLX-NEXT: retq store <4 x i64> zeroinitializer, <4 x i64>* %dst, align 32, !nontemporal !1 @@ -349,7 +349,7 @@ ; ; VLX-LABEL: test_zero_v16i16: ; VLX: # BB#0: -; VLX-NEXT: vpxord %ymm0, %ymm0, %ymm0 +; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0 ; VLX-NEXT: vmovntdq %ymm0, (%rdi) ; VLX-NEXT: retq store <16 x i16> zeroinitializer, <16 x i16>* %dst, align 32, !nontemporal !1 @@ -373,7 +373,7 @@ ; ; VLX-LABEL: test_zero_v32i8: ; VLX: # BB#0: -; VLX-NEXT: vpxord %ymm0, %ymm0, %ymm0 +; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0 ; VLX-NEXT: vmovntdq %ymm0, (%rdi) ; VLX-NEXT: retq store <32 x i8> zeroinitializer, <32 x i8>* %dst, align 32, !nontemporal !1 Index: test/CodeGen/X86/sse-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/sse-intrinsics-x86.ll +++ test/CodeGen/X86/sse-intrinsics-x86.ll @@ -57,7 +57,7 @@ ; ; SKX-LABEL: test_x86_sse_comieq_ss: ; SKX: ## BB#0: -; SKX-NEXT: vcomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2f,0xc1] +; SKX-NEXT: vcomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] ; SKX-NEXT: setnp %al ## encoding: [0x0f,0x9b,0xc0] ; SKX-NEXT: sete %cl ## encoding: [0x0f,0x94,0xc1] ; SKX-NEXT: andb %al, %cl ## encoding: [0x20,0xc1] @@ -87,7 +87,7 @@ ; SKX-LABEL: test_x86_sse_comige_ss: ; SKX: ## BB#0: ; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; SKX-NEXT: vcomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2f,0xc1] +; SKX-NEXT: vcomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] ; SKX-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -114,7 +114,7 @@ ; SKX-LABEL: test_x86_sse_comigt_ss: ; SKX: ## BB#0: ; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; SKX-NEXT: vcomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2f,0xc1] +; SKX-NEXT: vcomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] ; SKX-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -141,7 +141,7 @@ ; SKX-LABEL: test_x86_sse_comile_ss: ; SKX: ## BB#0: ; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; SKX-NEXT: vcomiss %xmm0, %xmm1 ## encoding: [0x62,0xf1,0x7c,0x08,0x2f,0xc8] +; SKX-NEXT: vcomiss %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8] ; SKX-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -168,7 +168,7 @@ ; SKX-LABEL: test_x86_sse_comilt_ss: ; SKX: ## BB#0: ; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; SKX-NEXT: vcomiss %xmm0, %xmm1 ## encoding: [0x62,0xf1,0x7c,0x08,0x2f,0xc8] +; SKX-NEXT: vcomiss %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8] ; SKX-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -198,7 +198,7 @@ ; ; SKX-LABEL: test_x86_sse_comineq_ss: ; SKX: ## BB#0: -; SKX-NEXT: vcomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2f,0xc1] +; SKX-NEXT: vcomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] ; SKX-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0] ; SKX-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1] ; SKX-NEXT: orb %al, %cl ## encoding: [0x08,0xc1] @@ -226,7 +226,7 @@ ; SKX-LABEL: test_x86_sse_cvtsi2ss: ; SKX: ## BB#0: ; SKX-NEXT: movl $7, %eax ## encoding: [0xb8,0x07,0x00,0x00,0x00] -; SKX-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x2a,0xc0] +; SKX-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] ret <4 x float> %res @@ -247,7 +247,7 @@ ; ; SKX-LABEL: test_x86_sse_cvtss2si: ; SKX: ## BB#0: -; SKX-NEXT: vcvtss2si %xmm0, %eax ## encoding: [0x62,0xf1,0x7e,0x08,0x2d,0xc0] +; SKX-NEXT: vcvtss2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; [#uses=1] ret i32 %res @@ -268,7 +268,7 @@ ; ; SKX-LABEL: test_x86_sse_cvttss2si: ; SKX: ## BB#0: -; SKX-NEXT: vcvttss2si %xmm0, %eax ## encoding: [0x62,0xf1,0x7e,0x08,0x2c,0xc0] +; SKX-NEXT: vcvttss2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; [#uses=1] ret i32 %res @@ -308,7 +308,7 @@ ; ; SKX-LABEL: test_x86_sse_max_ps: ; SKX: ## BB#0: -; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5f,0xc1] +; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res @@ -345,7 +345,7 @@ ; ; SKX-LABEL: test_x86_sse_min_ps: ; SKX: ## BB#0: -; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5d,0xc1] +; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res @@ -531,7 +531,7 @@ ; ; SKX-LABEL: test_x86_sse_ucomieq_ss: ; SKX: ## BB#0: -; SKX-NEXT: vucomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2e,0xc1] +; SKX-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] ; SKX-NEXT: setnp %al ## encoding: [0x0f,0x9b,0xc0] ; SKX-NEXT: sete %cl ## encoding: [0x0f,0x94,0xc1] ; SKX-NEXT: andb %al, %cl ## encoding: [0x20,0xc1] @@ -561,7 +561,7 @@ ; SKX-LABEL: test_x86_sse_ucomige_ss: ; SKX: ## BB#0: ; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; SKX-NEXT: vucomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2e,0xc1] +; SKX-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] ; SKX-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -588,7 +588,7 @@ ; SKX-LABEL: test_x86_sse_ucomigt_ss: ; SKX: ## BB#0: ; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; SKX-NEXT: vucomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2e,0xc1] +; SKX-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] ; SKX-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -615,7 +615,7 @@ ; SKX-LABEL: test_x86_sse_ucomile_ss: ; SKX: ## BB#0: ; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; SKX-NEXT: vucomiss %xmm0, %xmm1 ## encoding: [0x62,0xf1,0x7c,0x08,0x2e,0xc8] +; SKX-NEXT: vucomiss %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8] ; SKX-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -642,7 +642,7 @@ ; SKX-LABEL: test_x86_sse_ucomilt_ss: ; SKX: ## BB#0: ; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; SKX-NEXT: vucomiss %xmm0, %xmm1 ## encoding: [0x62,0xf1,0x7c,0x08,0x2e,0xc8] +; SKX-NEXT: vucomiss %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8] ; SKX-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] @@ -672,7 +672,7 @@ ; ; SKX-LABEL: test_x86_sse_ucomineq_ss: ; SKX: ## BB#0: -; SKX-NEXT: vucomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2e,0xc1] +; SKX-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] ; SKX-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0] ; SKX-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1] ; SKX-NEXT: orb %al, %cl ## encoding: [0x08,0xc1] Index: test/CodeGen/X86/sse2-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/sse2-intrinsics-x86.ll +++ test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -56,7 +56,7 @@ ; ; SKX-LABEL: test_x86_sse2_comieq_sd: ; SKX: ## BB#0: -; SKX-NEXT: vcomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2f,0xc1] +; SKX-NEXT: vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] ; SKX-NEXT: setnp %al ## encoding: [0x0f,0x9b,0xc0] ; SKX-NEXT: sete %cl ## encoding: [0x0f,0x94,0xc1] ; SKX-NEXT: andb %al, %cl ## encoding: [0x20,0xc1] @@ -86,7 +86,7 @@ ; SKX-LABEL: test_x86_sse2_comige_sd: ; SKX: ## BB#0: ; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; SKX-NEXT: vcomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2f,0xc1] +; SKX-NEXT: vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] ; SKX-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -113,7 +113,7 @@ ; SKX-LABEL: test_x86_sse2_comigt_sd: ; SKX: ## BB#0: ; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; SKX-NEXT: vcomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2f,0xc1] +; SKX-NEXT: vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] ; SKX-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -140,7 +140,7 @@ ; SKX-LABEL: test_x86_sse2_comile_sd: ; SKX: ## BB#0: ; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; SKX-NEXT: vcomisd %xmm0, %xmm1 ## encoding: [0x62,0xf1,0xfd,0x08,0x2f,0xc8] +; SKX-NEXT: vcomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8] ; SKX-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -167,7 +167,7 @@ ; SKX-LABEL: test_x86_sse2_comilt_sd: ; SKX: ## BB#0: ; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; SKX-NEXT: vcomisd %xmm0, %xmm1 ## encoding: [0x62,0xf1,0xfd,0x08,0x2f,0xc8] +; SKX-NEXT: vcomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8] ; SKX-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -197,7 +197,7 @@ ; ; SKX-LABEL: test_x86_sse2_comineq_sd: ; SKX: ## BB#0: -; SKX-NEXT: vcomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2f,0xc1] +; SKX-NEXT: vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] ; SKX-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0] ; SKX-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1] ; SKX-NEXT: orb %al, %cl ## encoding: [0x08,0xc1] @@ -222,7 +222,7 @@ ; ; SKX-LABEL: test_x86_sse2_cvtdq2ps: ; SKX: ## BB#0: -; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5b,0xc0] +; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res @@ -243,7 +243,7 @@ ; ; SKX-LABEL: test_x86_sse2_cvtpd2dq: ; SKX: ## BB#0: -; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0xe6,0xc0] +; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -264,7 +264,7 @@ ; ; SKX-LABEL: test_mm_cvtpd_epi32_zext: ; SKX: ## BB#0: -; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0xe6,0xc0] +; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> @@ -286,7 +286,7 @@ ; ; SKX-LABEL: test_x86_sse2_cvtpd2ps: ; SKX: ## BB#0: -; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x5a,0xc0] +; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res @@ -306,7 +306,7 @@ ; ; SKX-LABEL: test_x86_sse2_cvtpd2ps_zext: ; SKX: ## BB#0: -; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x5a,0xc0] +; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> @@ -342,7 +342,7 @@ ; ; SKX-LABEL: test_x86_sse2_cvtsd2si: ; SKX: ## BB#0: -; SKX-NEXT: vcvtsd2si %xmm0, %eax ## encoding: [0x62,0xf1,0x7f,0x08,0x2d,0xc0] +; SKX-NEXT: vcvtsd2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2d,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; [#uses=1] ret i32 %res @@ -415,7 +415,7 @@ ; ; SKX-LABEL: test_x86_sse2_cvtsi2sd: ; SKX: ## BB#0: -; SKX-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x2a,0x44,0x24,0x01] +; SKX-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res @@ -488,7 +488,7 @@ ; ; SKX-LABEL: test_x86_sse2_cvttpd2dq: ; SKX: ## BB#0: -; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe6,0xc0] +; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -509,7 +509,7 @@ ; ; SKX-LABEL: test_mm_cvttpd_epi32_zext: ; SKX: ## BB#0: -; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe6,0xc0] +; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> @@ -531,7 +531,7 @@ ; ; SKX-LABEL: test_x86_sse2_cvttps2dq: ; SKX: ## BB#0: -; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x5b,0xc0] +; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -552,7 +552,7 @@ ; ; SKX-LABEL: test_x86_sse2_cvttsd2si: ; SKX: ## BB#0: -; SKX-NEXT: vcvttsd2si %xmm0, %eax ## encoding: [0x62,0xf1,0x7f,0x08,0x2c,0xc0] +; SKX-NEXT: vcvttsd2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2c,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; [#uses=1] ret i32 %res @@ -573,7 +573,7 @@ ; ; SKX-LABEL: test_x86_sse2_max_pd: ; SKX: ## BB#0: -; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x5f,0xc1] +; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5f,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res @@ -610,7 +610,7 @@ ; ; SKX-LABEL: test_x86_sse2_min_pd: ; SKX: ## BB#0: -; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x5d,0xc1] +; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5d,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res @@ -665,7 +665,7 @@ ; ; SKX-LABEL: test_x86_sse2_packssdw_128: ; SKX: ## BB#0: -; SKX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6b,0xc1] +; SKX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -686,7 +686,7 @@ ; ; SKX-LABEL: test_x86_sse2_packsswb_128: ; SKX: ## BB#0: -; SKX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x63,0xc1] +; SKX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -707,7 +707,7 @@ ; ; SKX-LABEL: test_x86_sse2_packuswb_128: ; SKX: ## BB#0: -; SKX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x67,0xc1] +; SKX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -728,7 +728,7 @@ ; ; SKX-LABEL: test_x86_sse2_padds_b: ; SKX: ## BB#0: -; SKX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xec,0xc1] +; SKX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -749,7 +749,7 @@ ; ; SKX-LABEL: test_x86_sse2_padds_w: ; SKX: ## BB#0: -; SKX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xed,0xc1] +; SKX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -770,7 +770,7 @@ ; ; SKX-LABEL: test_x86_sse2_paddus_b: ; SKX: ## BB#0: -; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdc,0xc1] +; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -791,7 +791,7 @@ ; ; SKX-LABEL: test_x86_sse2_paddus_w: ; SKX: ## BB#0: -; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdd,0xc1] +; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -812,7 +812,7 @@ ; ; SKX-LABEL: test_x86_sse2_pavg_b: ; SKX: ## BB#0: -; SKX-NEXT: vpavgb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe0,0xc1] +; SKX-NEXT: vpavgb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -833,7 +833,7 @@ ; ; SKX-LABEL: test_x86_sse2_pavg_w: ; SKX: ## BB#0: -; SKX-NEXT: vpavgw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe3,0xc1] +; SKX-NEXT: vpavgw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -854,7 +854,7 @@ ; ; SKX-LABEL: test_x86_sse2_pmadd_wd: ; SKX: ## BB#0: -; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf5,0xc1] +; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -875,7 +875,7 @@ ; ; SKX-LABEL: test_x86_sse2_pmaxs_w: ; SKX: ## BB#0: -; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xee,0xc1] +; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -896,7 +896,7 @@ ; ; SKX-LABEL: test_x86_sse2_pmaxu_b: ; SKX: ## BB#0: -; SKX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xde,0xc1] +; SKX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -917,7 +917,7 @@ ; ; SKX-LABEL: test_x86_sse2_pmins_w: ; SKX: ## BB#0: -; SKX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xea,0xc1] +; SKX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -938,7 +938,7 @@ ; ; SKX-LABEL: test_x86_sse2_pminu_b: ; SKX: ## BB#0: -; SKX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xda,0xc1] +; SKX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -975,7 +975,7 @@ ; ; SKX-LABEL: test_x86_sse2_pmulh_w: ; SKX: ## BB#0: -; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe5,0xc1] +; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -996,7 +996,7 @@ ; ; SKX-LABEL: test_x86_sse2_pmulhu_w: ; SKX: ## BB#0: -; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe4,0xc1] +; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -1017,7 +1017,7 @@ ; ; SKX-LABEL: test_x86_sse2_pmulu_dq: ; SKX: ## BB#0: -; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xf4,0xc1] +; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res @@ -1038,7 +1038,7 @@ ; ; SKX-LABEL: test_x86_sse2_psad_bw: ; SKX: ## BB#0: -; SKX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf6,0xc1] +; SKX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf6,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res @@ -1059,7 +1059,7 @@ ; ; SKX-LABEL: test_x86_sse2_psll_d: ; SKX: ## BB#0: -; SKX-NEXT: vpslld %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf2,0xc1] +; SKX-NEXT: vpslld %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -1080,7 +1080,7 @@ ; ; SKX-LABEL: test_x86_sse2_psll_q: ; SKX: ## BB#0: -; SKX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xf3,0xc1] +; SKX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf3,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res @@ -1101,7 +1101,7 @@ ; ; SKX-LABEL: test_x86_sse2_psll_w: ; SKX: ## BB#0: -; SKX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf1,0xc1] +; SKX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -1122,7 +1122,7 @@ ; ; SKX-LABEL: test_x86_sse2_pslli_d: ; SKX: ## BB#0: -; SKX-NEXT: vpslld $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xf0,0x07] +; SKX-NEXT: vpslld $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x07] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -1143,7 +1143,7 @@ ; ; SKX-LABEL: test_x86_sse2_pslli_q: ; SKX: ## BB#0: -; SKX-NEXT: vpsllq $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x73,0xf0,0x07] +; SKX-NEXT: vpsllq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf0,0x07] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res @@ -1164,7 +1164,7 @@ ; ; SKX-LABEL: test_x86_sse2_pslli_w: ; SKX: ## BB#0: -; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x71,0xf0,0x07] +; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x07] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -1185,7 +1185,7 @@ ; ; SKX-LABEL: test_x86_sse2_psra_d: ; SKX: ## BB#0: -; SKX-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe2,0xc1] +; SKX-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -1206,7 +1206,7 @@ ; ; SKX-LABEL: test_x86_sse2_psra_w: ; SKX: ## BB#0: -; SKX-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe1,0xc1] +; SKX-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -1227,7 +1227,7 @@ ; ; SKX-LABEL: test_x86_sse2_psrai_d: ; SKX: ## BB#0: -; SKX-NEXT: vpsrad $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xe0,0x07] +; SKX-NEXT: vpsrad $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xe0,0x07] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -1248,7 +1248,7 @@ ; ; SKX-LABEL: test_x86_sse2_psrai_w: ; SKX: ## BB#0: -; SKX-NEXT: vpsraw $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x71,0xe0,0x07] +; SKX-NEXT: vpsraw $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x07] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -1269,7 +1269,7 @@ ; ; SKX-LABEL: test_x86_sse2_psrl_d: ; SKX: ## BB#0: -; SKX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd2,0xc1] +; SKX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -1290,7 +1290,7 @@ ; ; SKX-LABEL: test_x86_sse2_psrl_q: ; SKX: ## BB#0: -; SKX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd3,0xc1] +; SKX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res @@ -1311,7 +1311,7 @@ ; ; SKX-LABEL: test_x86_sse2_psrl_w: ; SKX: ## BB#0: -; SKX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd1,0xc1] +; SKX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -1332,7 +1332,7 @@ ; ; SKX-LABEL: test_x86_sse2_psrli_d: ; SKX: ## BB#0: -; SKX-NEXT: vpsrld $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xd0,0x07] +; SKX-NEXT: vpsrld $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xd0,0x07] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -1353,7 +1353,7 @@ ; ; SKX-LABEL: test_x86_sse2_psrli_q: ; SKX: ## BB#0: -; SKX-NEXT: vpsrlq $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x73,0xd0,0x07] +; SKX-NEXT: vpsrlq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x07] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res @@ -1374,7 +1374,7 @@ ; ; SKX-LABEL: test_x86_sse2_psrli_w: ; SKX: ## BB#0: -; SKX-NEXT: vpsrlw $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x71,0xd0,0x07] +; SKX-NEXT: vpsrlw $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xd0,0x07] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -1395,7 +1395,7 @@ ; ; SKX-LABEL: test_x86_sse2_psubs_b: ; SKX: ## BB#0: -; SKX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe8,0xc1] +; SKX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -1416,7 +1416,7 @@ ; ; SKX-LABEL: test_x86_sse2_psubs_w: ; SKX: ## BB#0: -; SKX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe9,0xc1] +; SKX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -1437,7 +1437,7 @@ ; ; SKX-LABEL: test_x86_sse2_psubus_b: ; SKX: ## BB#0: -; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd8,0xc1] +; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -1458,7 +1458,7 @@ ; ; SKX-LABEL: test_x86_sse2_psubus_w: ; SKX: ## BB#0: -; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd9,0xc1] +; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -1516,7 +1516,7 @@ ; SKX-LABEL: test_x86_sse2_sqrt_sd_vec_load: ; SKX: ## BB#0: ; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; SKX-NEXT: vmovaps (%eax), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x00] +; SKX-NEXT: vmovaps (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] ; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %a1 = load <2 x double>, <2 x double>* %a0, align 16 @@ -1546,7 +1546,7 @@ ; ; SKX-LABEL: test_x86_sse2_ucomieq_sd: ; SKX: ## BB#0: -; SKX-NEXT: vucomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2e,0xc1] +; SKX-NEXT: vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] ; SKX-NEXT: setnp %al ## encoding: [0x0f,0x9b,0xc0] ; SKX-NEXT: sete %cl ## encoding: [0x0f,0x94,0xc1] ; SKX-NEXT: andb %al, %cl ## encoding: [0x20,0xc1] @@ -1576,7 +1576,7 @@ ; SKX-LABEL: test_x86_sse2_ucomige_sd: ; SKX: ## BB#0: ; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; SKX-NEXT: vucomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2e,0xc1] +; SKX-NEXT: vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] ; SKX-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -1603,7 +1603,7 @@ ; SKX-LABEL: test_x86_sse2_ucomigt_sd: ; SKX: ## BB#0: ; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; SKX-NEXT: vucomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2e,0xc1] +; SKX-NEXT: vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] ; SKX-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -1630,7 +1630,7 @@ ; SKX-LABEL: test_x86_sse2_ucomile_sd: ; SKX: ## BB#0: ; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; SKX-NEXT: vucomisd %xmm0, %xmm1 ## encoding: [0x62,0xf1,0xfd,0x08,0x2e,0xc8] +; SKX-NEXT: vucomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8] ; SKX-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -1657,7 +1657,7 @@ ; SKX-LABEL: test_x86_sse2_ucomilt_sd: ; SKX: ## BB#0: ; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] -; SKX-NEXT: vucomisd %xmm0, %xmm1 ## encoding: [0x62,0xf1,0xfd,0x08,0x2e,0xc8] +; SKX-NEXT: vucomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8] ; SKX-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] @@ -1687,7 +1687,7 @@ ; ; SKX-LABEL: test_x86_sse2_ucomineq_sd: ; SKX: ## BB#0: -; SKX-NEXT: vucomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2e,0xc1] +; SKX-NEXT: vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] ; SKX-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0] ; SKX-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1] ; SKX-NEXT: orb %al, %cl ## encoding: [0x08,0xc1] Index: test/CodeGen/X86/sse41-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/sse41-intrinsics-x86.ll +++ test/CodeGen/X86/sse41-intrinsics-x86.ll @@ -127,7 +127,7 @@ ; ; SKX-LABEL: test_x86_sse41_packusdw: ; SKX: ## BB#0: -; SKX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x2b,0xc1] +; SKX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -183,7 +183,7 @@ ; ; SKX-LABEL: test_x86_sse41_pmaxsb: ; SKX: ## BB#0: -; SKX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3c,0xc1] +; SKX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3c,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -204,7 +204,7 @@ ; ; SKX-LABEL: test_x86_sse41_pmaxsd: ; SKX: ## BB#0: -; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3d,0xc1] +; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3d,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -225,7 +225,7 @@ ; ; SKX-LABEL: test_x86_sse41_pmaxud: ; SKX: ## BB#0: -; SKX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3f,0xc1] +; SKX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3f,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -246,7 +246,7 @@ ; ; SKX-LABEL: test_x86_sse41_pmaxuw: ; SKX: ## BB#0: -; SKX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3e,0xc1] +; SKX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -267,7 +267,7 @@ ; ; SKX-LABEL: test_x86_sse41_pminsb: ; SKX: ## BB#0: -; SKX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x38,0xc1] +; SKX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x38,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -288,7 +288,7 @@ ; ; SKX-LABEL: test_x86_sse41_pminsd: ; SKX: ## BB#0: -; SKX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x39,0xc1] +; SKX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x39,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -309,7 +309,7 @@ ; ; SKX-LABEL: test_x86_sse41_pminud: ; SKX: ## BB#0: -; SKX-NEXT: vpminud %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3b,0xc1] +; SKX-NEXT: vpminud %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3b,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -330,7 +330,7 @@ ; ; SKX-LABEL: test_x86_sse41_pminuw: ; SKX: ## BB#0: -; SKX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3a,0xc1] +; SKX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -351,7 +351,7 @@ ; ; SKX-LABEL: test_x86_sse41_pmuldq: ; SKX: ## BB#0: -; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0xc1] +; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res Index: test/CodeGen/X86/sse42-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/sse42-intrinsics-x86.ll +++ test/CodeGen/X86/sse42-intrinsics-x86.ll @@ -52,7 +52,7 @@ ; SKX: ## BB#0: ; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08] ; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; SKX-NEXT: vmovdqu8 (%eax), %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x6f,0x00] +; SKX-NEXT: vmovdqu (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x00] ; SKX-NEXT: movl $7, %eax ## encoding: [0xb8,0x07,0x00,0x00,0x00] ; SKX-NEXT: movl $7, %edx ## encoding: [0xba,0x07,0x00,0x00,0x00] ; SKX-NEXT: vpcmpestri $7, (%ecx), %xmm0 ## encoding: [0xc4,0xe3,0x79,0x61,0x01,0x07] @@ -292,7 +292,7 @@ ; SKX: ## BB#0: ; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] ; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04] -; SKX-NEXT: vmovdqu8 (%ecx), %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x6f,0x01] +; SKX-NEXT: vmovdqu (%ecx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01] ; SKX-NEXT: vpcmpistri $7, (%eax), %xmm0 ## encoding: [0xc4,0xe3,0x79,0x63,0x00,0x07] ; SKX-NEXT: movl %ecx, %eax ## encoding: [0x89,0xc8] ; SKX-NEXT: retl ## encoding: [0xc3] Index: test/CodeGen/X86/ssse3-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/ssse3-intrinsics-x86.ll +++ test/CodeGen/X86/ssse3-intrinsics-x86.ll @@ -16,7 +16,7 @@ ; ; SKX-LABEL: test_x86_ssse3_pabs_b_128: ; SKX: ## BB#0: -; SKX-NEXT: vpabsb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x1c,0xc0] +; SKX-NEXT: vpabsb %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res @@ -37,7 +37,7 @@ ; ; SKX-LABEL: test_x86_ssse3_pabs_d_128: ; SKX: ## BB#0: -; SKX-NEXT: vpabsd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x1e,0xc0] +; SKX-NEXT: vpabsd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res @@ -58,7 +58,7 @@ ; ; SKX-LABEL: test_x86_ssse3_pabs_w_128: ; SKX: ## BB#0: -; SKX-NEXT: vpabsw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x1d,0xc0] +; SKX-NEXT: vpabsw %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -175,7 +175,7 @@ ; ; SKX-LABEL: test_x86_ssse3_pmadd_ub_sw_128: ; SKX: ## BB#0: -; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x04,0xc1] +; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -196,7 +196,7 @@ ; ; SKX-LABEL: test_x86_ssse3_pmul_hr_sw_128: ; SKX: ## BB#0: -; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x0b,0xc1] +; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -217,7 +217,7 @@ ; ; SKX-LABEL: test_x86_ssse3_pshuf_b_128: ; SKX: ## BB#0: -; SKX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x00,0xc1] +; SKX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0xc1] ; SKX-NEXT: retl ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res Index: test/CodeGen/X86/subvector-broadcast.ll =================================================================== --- test/CodeGen/X86/subvector-broadcast.ll +++ test/CodeGen/X86/subvector-broadcast.ll @@ -558,7 +558,7 @@ ; X32-AVX512F: ## BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] -; X32-AVX512F-NEXT: vmovdqa64 %ymm0, %ymm1 +; X32-AVX512F-NEXT: vmovdqa %ymm0, %ymm1 ; X32-AVX512F-NEXT: retl ; ; X32-AVX512BW-LABEL: test_broadcast_8i16_32i16: @@ -571,7 +571,7 @@ ; X32-AVX512DQ: ## BB#0: ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] -; X32-AVX512DQ-NEXT: vmovdqa64 %ymm0, %ymm1 +; X32-AVX512DQ-NEXT: vmovdqa %ymm0, %ymm1 ; X32-AVX512DQ-NEXT: retl ; ; X64-AVX1-LABEL: test_broadcast_8i16_32i16: @@ -589,7 +589,7 @@ ; X64-AVX512F-LABEL: test_broadcast_8i16_32i16: ; X64-AVX512F: ## BB#0: ; X64-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] -; X64-AVX512F-NEXT: vmovdqa64 %ymm0, %ymm1 +; X64-AVX512F-NEXT: vmovdqa %ymm0, %ymm1 ; X64-AVX512F-NEXT: retq ; ; X64-AVX512BW-LABEL: test_broadcast_8i16_32i16: @@ -600,7 +600,7 @@ ; X64-AVX512DQ-LABEL: test_broadcast_8i16_32i16: ; X64-AVX512DQ: ## BB#0: ; X64-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] -; X64-AVX512DQ-NEXT: vmovdqa64 %ymm0, %ymm1 +; X64-AVX512DQ-NEXT: vmovdqa %ymm0, %ymm1 ; X64-AVX512DQ-NEXT: retq %1 = load <8 x i16>, <8 x i16> *%p %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <32 x i32> @@ -708,7 +708,7 @@ ; X32-AVX512F: ## BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] -; X32-AVX512F-NEXT: vmovdqa64 %ymm0, %ymm1 +; X32-AVX512F-NEXT: vmovdqa %ymm0, %ymm1 ; X32-AVX512F-NEXT: retl ; ; X32-AVX512BW-LABEL: test_broadcast_16i8_64i8: @@ -721,7 +721,7 @@ ; X32-AVX512DQ: ## BB#0: ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] -; X32-AVX512DQ-NEXT: vmovdqa64 %ymm0, %ymm1 +; X32-AVX512DQ-NEXT: vmovdqa %ymm0, %ymm1 ; X32-AVX512DQ-NEXT: retl ; ; X64-AVX1-LABEL: test_broadcast_16i8_64i8: @@ -739,7 +739,7 @@ ; X64-AVX512F-LABEL: test_broadcast_16i8_64i8: ; X64-AVX512F: ## BB#0: ; X64-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] -; X64-AVX512F-NEXT: vmovdqa64 %ymm0, %ymm1 +; X64-AVX512F-NEXT: vmovdqa %ymm0, %ymm1 ; X64-AVX512F-NEXT: retq ; ; X64-AVX512BW-LABEL: test_broadcast_16i8_64i8: @@ -750,7 +750,7 @@ ; X64-AVX512DQ-LABEL: test_broadcast_16i8_64i8: ; X64-AVX512DQ: ## BB#0: ; X64-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3] -; X64-AVX512DQ-NEXT: vmovdqa64 %ymm0, %ymm1 +; X64-AVX512DQ-NEXT: vmovdqa %ymm0, %ymm1 ; X64-AVX512DQ-NEXT: retq %1 = load <16 x i8>, <16 x i8> *%p %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <64 x i32> @@ -900,8 +900,8 @@ ; X32-AVX512F: ## BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512F-NEXT: vmovdqa64 (%ecx), %xmm0 -; X32-AVX512F-NEXT: vmovdqa64 %xmm0, (%eax) +; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0 +; X32-AVX512F-NEXT: vmovdqa %xmm0, (%eax) ; X32-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512F-NEXT: retl ; @@ -909,8 +909,8 @@ ; X32-AVX512BW: ## BB#0: ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512BW-NEXT: vmovdqa64 (%ecx), %xmm0 -; X32-AVX512BW-NEXT: vmovdqa64 %xmm0, (%eax) +; X32-AVX512BW-NEXT: vmovdqa (%ecx), %xmm0 +; X32-AVX512BW-NEXT: vmovdqa %xmm0, (%eax) ; X32-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512BW-NEXT: retl ; @@ -918,8 +918,8 @@ ; X32-AVX512DQ: ## BB#0: ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512DQ-NEXT: vmovdqa64 (%ecx), %xmm0 -; X32-AVX512DQ-NEXT: vmovdqa64 %xmm0, (%eax) +; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0 +; X32-AVX512DQ-NEXT: vmovdqa %xmm0, (%eax) ; X32-AVX512DQ-NEXT: vinserti64x2 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512DQ-NEXT: retl ; @@ -932,22 +932,22 @@ ; ; X64-AVX512F-LABEL: test_broadcast_2i64_4i64_reuse: ; X64-AVX512F: ## BB#0: -; X64-AVX512F-NEXT: vmovdqa64 (%rdi), %xmm0 -; X64-AVX512F-NEXT: vmovdqa64 %xmm0, (%rsi) +; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512F-NEXT: vmovdqa %xmm0, (%rsi) ; X64-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512F-NEXT: retq ; ; X64-AVX512BW-LABEL: test_broadcast_2i64_4i64_reuse: ; X64-AVX512BW: ## BB#0: -; X64-AVX512BW-NEXT: vmovdqa64 (%rdi), %xmm0 -; X64-AVX512BW-NEXT: vmovdqa64 %xmm0, (%rsi) +; X64-AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512BW-NEXT: vmovdqa %xmm0, (%rsi) ; X64-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512BW-NEXT: retq ; ; X64-AVX512DQ-LABEL: test_broadcast_2i64_4i64_reuse: ; X64-AVX512DQ: ## BB#0: -; X64-AVX512DQ-NEXT: vmovdqa64 (%rdi), %xmm0 -; X64-AVX512DQ-NEXT: vmovdqa64 %xmm0, (%rsi) +; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512DQ-NEXT: vmovdqa %xmm0, (%rsi) ; X64-AVX512DQ-NEXT: vinserti64x2 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512DQ-NEXT: retq %1 = load <2 x i64>, <2 x i64>* %p0 @@ -1008,8 +1008,8 @@ ; X32-AVX512: ## BB#0: ; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512-NEXT: vmovdqa32 (%ecx), %xmm0 -; X32-AVX512-NEXT: vmovdqa32 %xmm0, (%eax) +; X32-AVX512-NEXT: vmovdqa (%ecx), %xmm0 +; X32-AVX512-NEXT: vmovdqa %xmm0, (%eax) ; X32-AVX512-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512-NEXT: retl ; @@ -1022,8 +1022,8 @@ ; ; X64-AVX512-LABEL: test_broadcast_4i32_8i32_reuse: ; X64-AVX512: ## BB#0: -; X64-AVX512-NEXT: vmovdqa32 (%rdi), %xmm0 -; X64-AVX512-NEXT: vmovdqa32 %xmm0, (%rsi) +; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512-NEXT: vmovdqa %xmm0, (%rsi) ; X64-AVX512-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512-NEXT: retq %1 = load <4 x i32>, <4 x i32>* %p0 @@ -1046,8 +1046,8 @@ ; X32-AVX512F: ## BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512F-NEXT: vmovdqa64 (%ecx), %xmm0 -; X32-AVX512F-NEXT: vmovdqa32 %xmm0, (%eax) +; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0 +; X32-AVX512F-NEXT: vmovdqa %xmm0, (%eax) ; X32-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512F-NEXT: retl ; @@ -1055,8 +1055,8 @@ ; X32-AVX512BW: ## BB#0: ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512BW-NEXT: vmovdqu16 (%ecx), %xmm0 -; X32-AVX512BW-NEXT: vmovdqu16 %xmm0, (%eax) +; X32-AVX512BW-NEXT: vmovdqu (%ecx), %xmm0 +; X32-AVX512BW-NEXT: vmovdqu %xmm0, (%eax) ; X32-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512BW-NEXT: retl ; @@ -1064,8 +1064,8 @@ ; X32-AVX512DQ: ## BB#0: ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512DQ-NEXT: vmovdqa64 (%ecx), %xmm0 -; X32-AVX512DQ-NEXT: vmovdqa32 %xmm0, (%eax) +; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0 +; X32-AVX512DQ-NEXT: vmovdqa %xmm0, (%eax) ; X32-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512DQ-NEXT: retl ; @@ -1078,22 +1078,22 @@ ; ; X64-AVX512F-LABEL: test_broadcast_8i16_16i16_reuse: ; X64-AVX512F: ## BB#0: -; X64-AVX512F-NEXT: vmovdqa64 (%rdi), %xmm0 -; X64-AVX512F-NEXT: vmovdqa32 %xmm0, (%rsi) +; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512F-NEXT: vmovdqa %xmm0, (%rsi) ; X64-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512F-NEXT: retq ; ; X64-AVX512BW-LABEL: test_broadcast_8i16_16i16_reuse: ; X64-AVX512BW: ## BB#0: -; X64-AVX512BW-NEXT: vmovdqu16 (%rdi), %xmm0 -; X64-AVX512BW-NEXT: vmovdqu16 %xmm0, (%rsi) +; X64-AVX512BW-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX512BW-NEXT: vmovdqu %xmm0, (%rsi) ; X64-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512BW-NEXT: retq ; ; X64-AVX512DQ-LABEL: test_broadcast_8i16_16i16_reuse: ; X64-AVX512DQ: ## BB#0: -; X64-AVX512DQ-NEXT: vmovdqa64 (%rdi), %xmm0 -; X64-AVX512DQ-NEXT: vmovdqa32 %xmm0, (%rsi) +; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512DQ-NEXT: vmovdqa %xmm0, (%rsi) ; X64-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512DQ-NEXT: retq %1 = load <8 x i16>, <8 x i16> *%p0 @@ -1116,8 +1116,8 @@ ; X32-AVX512F: ## BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512F-NEXT: vmovdqa64 (%ecx), %xmm0 -; X32-AVX512F-NEXT: vmovdqa32 %xmm0, (%eax) +; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0 +; X32-AVX512F-NEXT: vmovdqa %xmm0, (%eax) ; X32-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512F-NEXT: retl ; @@ -1125,8 +1125,8 @@ ; X32-AVX512BW: ## BB#0: ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512BW-NEXT: vmovdqu8 (%ecx), %xmm0 -; X32-AVX512BW-NEXT: vmovdqu8 %xmm0, (%eax) +; X32-AVX512BW-NEXT: vmovdqu (%ecx), %xmm0 +; X32-AVX512BW-NEXT: vmovdqu %xmm0, (%eax) ; X32-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512BW-NEXT: retl ; @@ -1134,8 +1134,8 @@ ; X32-AVX512DQ: ## BB#0: ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512DQ-NEXT: vmovdqa64 (%ecx), %xmm0 -; X32-AVX512DQ-NEXT: vmovdqa32 %xmm0, (%eax) +; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0 +; X32-AVX512DQ-NEXT: vmovdqa %xmm0, (%eax) ; X32-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512DQ-NEXT: retl ; @@ -1148,22 +1148,22 @@ ; ; X64-AVX512F-LABEL: test_broadcast_16i8_32i8_reuse: ; X64-AVX512F: ## BB#0: -; X64-AVX512F-NEXT: vmovdqa64 (%rdi), %xmm0 -; X64-AVX512F-NEXT: vmovdqa32 %xmm0, (%rsi) +; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512F-NEXT: vmovdqa %xmm0, (%rsi) ; X64-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512F-NEXT: retq ; ; X64-AVX512BW-LABEL: test_broadcast_16i8_32i8_reuse: ; X64-AVX512BW: ## BB#0: -; X64-AVX512BW-NEXT: vmovdqu8 (%rdi), %xmm0 -; X64-AVX512BW-NEXT: vmovdqu8 %xmm0, (%rsi) +; X64-AVX512BW-NEXT: vmovdqu (%rdi), %xmm0 +; X64-AVX512BW-NEXT: vmovdqu %xmm0, (%rsi) ; X64-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512BW-NEXT: retq ; ; X64-AVX512DQ-LABEL: test_broadcast_16i8_32i8_reuse: ; X64-AVX512DQ: ## BB#0: -; X64-AVX512DQ-NEXT: vmovdqa64 (%rdi), %xmm0 -; X64-AVX512DQ-NEXT: vmovdqa32 %xmm0, (%rsi) +; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512DQ-NEXT: vmovdqa %xmm0, (%rsi) ; X64-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512DQ-NEXT: retq %1 = load <16 x i8>, <16 x i8> *%p0 @@ -1191,9 +1191,9 @@ ; X32-AVX512F: ## BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512F-NEXT: vmovdqa64 (%ecx), %xmm0 -; X32-AVX512F-NEXT: vpxord %xmm1, %xmm1, %xmm1 -; X32-AVX512F-NEXT: vmovdqa32 %xmm1, (%eax) +; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0 +; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X32-AVX512F-NEXT: vmovdqa %xmm1, (%eax) ; X32-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512F-NEXT: retl ; @@ -1201,9 +1201,9 @@ ; X32-AVX512BW: ## BB#0: ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512BW-NEXT: vmovdqa64 (%ecx), %xmm0 -; X32-AVX512BW-NEXT: vpxord %xmm1, %xmm1, %xmm1 -; X32-AVX512BW-NEXT: vmovdqa32 %xmm1, (%eax) +; X32-AVX512BW-NEXT: vmovdqa (%ecx), %xmm0 +; X32-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X32-AVX512BW-NEXT: vmovdqa %xmm1, (%eax) ; X32-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512BW-NEXT: retl ; @@ -1211,7 +1211,7 @@ ; X32-AVX512DQ: ## BB#0: ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512DQ-NEXT: vmovdqa64 (%ecx), %xmm0 +; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0 ; X32-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32-AVX512DQ-NEXT: vmovaps %xmm1, (%eax) ; X32-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 @@ -1227,23 +1227,23 @@ ; ; X64-AVX512F-LABEL: test_broadcast_4i32_8i32_chain: ; X64-AVX512F: ## BB#0: -; X64-AVX512F-NEXT: vmovdqa64 (%rdi), %xmm0 -; X64-AVX512F-NEXT: vpxord %xmm1, %xmm1, %xmm1 -; X64-AVX512F-NEXT: vmovdqa32 %xmm1, (%rsi) +; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X64-AVX512F-NEXT: vmovdqa %xmm1, (%rsi) ; X64-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512F-NEXT: retq ; ; X64-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain: ; X64-AVX512BW: ## BB#0: -; X64-AVX512BW-NEXT: vmovdqa64 (%rdi), %xmm0 -; X64-AVX512BW-NEXT: vpxord %xmm1, %xmm1, %xmm1 -; X64-AVX512BW-NEXT: vmovdqa32 %xmm1, (%rsi) +; X64-AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) ; X64-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512BW-NEXT: retq ; ; X64-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain: ; X64-AVX512DQ: ## BB#0: -; X64-AVX512DQ-NEXT: vmovdqa64 (%rdi), %xmm0 +; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-AVX512DQ-NEXT: vmovaps %xmm1, (%rsi) ; X64-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 @@ -1270,9 +1270,9 @@ ; X32-AVX512F: ## BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512F-NEXT: vmovdqa64 (%ecx), %xmm0 -; X32-AVX512F-NEXT: vpxord %xmm1, %xmm1, %xmm1 -; X32-AVX512F-NEXT: vmovdqa32 %xmm1, (%eax) +; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0 +; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X32-AVX512F-NEXT: vmovdqa %xmm1, (%eax) ; X32-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %zmm0, %zmm0 ; X32-AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; X32-AVX512F-NEXT: retl @@ -1281,9 +1281,9 @@ ; X32-AVX512BW: ## BB#0: ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512BW-NEXT: vmovdqa64 (%ecx), %xmm0 -; X32-AVX512BW-NEXT: vpxord %xmm1, %xmm1, %xmm1 -; X32-AVX512BW-NEXT: vmovdqa32 %xmm1, (%eax) +; X32-AVX512BW-NEXT: vmovdqa (%ecx), %xmm0 +; X32-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X32-AVX512BW-NEXT: vmovdqa %xmm1, (%eax) ; X32-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %zmm0, %zmm0 ; X32-AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; X32-AVX512BW-NEXT: retl @@ -1292,7 +1292,7 @@ ; X32-AVX512DQ: ## BB#0: ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512DQ-NEXT: vmovdqa64 (%ecx), %xmm0 +; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0 ; X32-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32-AVX512DQ-NEXT: vmovaps %xmm1, (%eax) ; X32-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %zmm0, %zmm0 @@ -1310,25 +1310,25 @@ ; ; X64-AVX512F-LABEL: test_broadcast_4i32_16i32_chain: ; X64-AVX512F: ## BB#0: -; X64-AVX512F-NEXT: vmovdqa64 (%rdi), %xmm0 -; X64-AVX512F-NEXT: vpxord %xmm1, %xmm1, %xmm1 -; X64-AVX512F-NEXT: vmovdqa32 %xmm1, (%rsi) +; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X64-AVX512F-NEXT: vmovdqa %xmm1, (%rsi) ; X64-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %zmm0, %zmm0 ; X64-AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; X64-AVX512F-NEXT: retq ; ; X64-AVX512BW-LABEL: test_broadcast_4i32_16i32_chain: ; X64-AVX512BW: ## BB#0: -; X64-AVX512BW-NEXT: vmovdqa64 (%rdi), %xmm0 -; X64-AVX512BW-NEXT: vpxord %xmm1, %xmm1, %xmm1 -; X64-AVX512BW-NEXT: vmovdqa32 %xmm1, (%rsi) +; X64-AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X64-AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) ; X64-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %zmm0, %zmm0 ; X64-AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; X64-AVX512BW-NEXT: retq ; ; X64-AVX512DQ-LABEL: test_broadcast_4i32_16i32_chain: ; X64-AVX512DQ: ## BB#0: -; X64-AVX512DQ-NEXT: vmovdqa64 (%rdi), %xmm0 +; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-AVX512DQ-NEXT: vmovaps %xmm1, (%rsi) ; X64-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %zmm0, %zmm0 @@ -1355,18 +1355,18 @@ ; X32-AVX512-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,2,0,3,0,4,0,1,0,2,0,3,0,4,0] ; X32-AVX512-NEXT: vpaddq %zmm2, %zmm1, %zmm1 ; X32-AVX512-NEXT: vpandq %zmm2, %zmm1, %zmm1 -; X32-AVX512-NEXT: vmovdqu64 %ymm0, _ga4 +; X32-AVX512-NEXT: vmovdqu %ymm0, _ga4 ; X32-AVX512-NEXT: vmovdqu64 %zmm1, _gb4 ; X32-AVX512-NEXT: retl ; ; X64-AVX512-LABEL: fallback_broadcast_v4i64_to_v8i64: ; X64-AVX512: ## BB#0: ## %entry -; X64-AVX512-NEXT: vmovdqa64 {{.*#+}} ymm2 = [1,2,3,4] +; X64-AVX512-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,3,4] ; X64-AVX512-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ; X64-AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 ; X64-AVX512-NEXT: vpaddq %zmm2, %zmm1, %zmm1 ; X64-AVX512-NEXT: vpandq %zmm2, %zmm1, %zmm1 -; X64-AVX512-NEXT: vmovdqu64 %ymm0, {{.*}}(%rip) +; X64-AVX512-NEXT: vmovdqu %ymm0, {{.*}}(%rip) ; X64-AVX512-NEXT: vmovdqu64 %zmm1, {{.*}}(%rip) ; X64-AVX512-NEXT: retq entry: Index: test/CodeGen/X86/vec-copysign-avx512.ll =================================================================== --- test/CodeGen/X86/vec-copysign-avx512.ll +++ test/CodeGen/X86/vec-copysign-avx512.ll @@ -7,7 +7,7 @@ ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1 ; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512VL-NEXT: vporq %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512VLDQ-LABEL: v4f32: @@ -25,7 +25,7 @@ ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm1, %ymm1 ; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; AVX512VL-NEXT: vporq %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512VLDQ-LABEL: v8f32: @@ -59,9 +59,9 @@ define <2 x double> @v2f64(<2 x double> %a, <2 x double> %b) nounwind { ; AVX512VL-LABEL: v2f64: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpandq {{.*}}(%rip), %xmm1, %xmm1 -; AVX512VL-NEXT: vpandq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vporq %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512VLDQ-LABEL: v2f64: @@ -79,7 +79,7 @@ ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm1, %ymm1 ; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; AVX512VL-NEXT: vporq %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512VLDQ-LABEL: v4f64: Index: test/CodeGen/X86/vec_fabs.ll =================================================================== --- test/CodeGen/X86/vec_fabs.ll +++ test/CodeGen/X86/vec_fabs.ll @@ -17,7 +17,7 @@ ; ; X32_AVX512VL-LABEL: fabs_v2f64: ; X32_AVX512VL: # BB#0: -; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}, %xmm0, %xmm0 +; X32_AVX512VL-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 ; X32_AVX512VL-NEXT: retl ; ; X32_AVX512VLDQ-LABEL: fabs_v2f64: @@ -32,7 +32,7 @@ ; ; X64_AVX512VL-LABEL: fabs_v2f64: ; X64_AVX512VL: # BB#0: -; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip), %xmm0, %xmm0 +; X64_AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; X64_AVX512VL-NEXT: retq ; ; X64_AVX512VLDQ-LABEL: fabs_v2f64: Index: test/CodeGen/X86/vec_fp_to_int.ll =================================================================== --- test/CodeGen/X86/vec_fp_to_int.ll +++ test/CodeGen/X86/vec_fp_to_int.ll @@ -2468,7 +2468,7 @@ ; AVX512VL-NEXT: movq %rcx, %rsi ; AVX512VL-NEXT: callq __fixtfdi ; AVX512VL-NEXT: vmovq %rax, %xmm0 -; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill +; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill ; AVX512VL-NEXT: movq %rbx, %rdi ; AVX512VL-NEXT: movq %r14, %rsi ; AVX512VL-NEXT: callq __fixtfdi @@ -2516,7 +2516,7 @@ ; AVX512VLDQ-NEXT: movq %rcx, %rsi ; AVX512VLDQ-NEXT: callq __fixtfdi ; AVX512VLDQ-NEXT: vmovq %rax, %xmm0 -; AVX512VLDQ-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill +; AVX512VLDQ-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill ; AVX512VLDQ-NEXT: movq %rbx, %rdi ; AVX512VLDQ-NEXT: movq %r14, %rsi ; AVX512VLDQ-NEXT: callq __fixtfdi Index: test/CodeGen/X86/vec_fpext.ll =================================================================== --- test/CodeGen/X86/vec_fpext.ll +++ test/CodeGen/X86/vec_fpext.ll @@ -28,8 +28,8 @@ ; X32-AVX512VL: # BB#0: # %entry ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] -; X32-AVX512VL-NEXT: vcvtps2pd (%ecx), %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x5a,0x01] -; X32-AVX512VL-NEXT: vmovups %xmm0, (%eax) # encoding: [0x62,0xf1,0x7c,0x08,0x11,0x00] +; X32-AVX512VL-NEXT: vcvtps2pd (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0x01] +; X32-AVX512VL-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] ; X32-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: fpext_frommem: @@ -46,8 +46,8 @@ ; ; X64-AVX512VL-LABEL: fpext_frommem: ; X64-AVX512VL: # BB#0: # %entry -; X64-AVX512VL-NEXT: vcvtps2pd (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x5a,0x07] -; X64-AVX512VL-NEXT: vmovups %xmm0, (%rsi) # encoding: [0x62,0xf1,0x7c,0x08,0x11,0x06] +; X64-AVX512VL-NEXT: vcvtps2pd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0x07] +; X64-AVX512VL-NEXT: vmovups %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x06] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] entry: %0 = load <2 x float>, <2 x float>* %in, align 8 @@ -80,8 +80,8 @@ ; X32-AVX512VL: # BB#0: # %entry ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] -; X32-AVX512VL-NEXT: vcvtps2pd (%ecx), %ymm0 # encoding: [0x62,0xf1,0x7c,0x28,0x5a,0x01] -; X32-AVX512VL-NEXT: vmovups %ymm0, (%eax) # encoding: [0x62,0xf1,0x7c,0x28,0x11,0x00] +; X32-AVX512VL-NEXT: vcvtps2pd (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0x01] +; X32-AVX512VL-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00] ; X32-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: fpext_frommem4: @@ -101,8 +101,8 @@ ; ; X64-AVX512VL-LABEL: fpext_frommem4: ; X64-AVX512VL: # BB#0: # %entry -; X64-AVX512VL-NEXT: vcvtps2pd (%rdi), %ymm0 # encoding: [0x62,0xf1,0x7c,0x28,0x5a,0x07] -; X64-AVX512VL-NEXT: vmovups %ymm0, (%rsi) # encoding: [0x62,0xf1,0x7c,0x28,0x11,0x06] +; X64-AVX512VL-NEXT: vcvtps2pd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0x07] +; X64-AVX512VL-NEXT: vmovups %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x06] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x float>, <4 x float>* %in @@ -195,9 +195,9 @@ ; ; X32-AVX512VL-LABEL: fpext_fromconst: ; X32-AVX512VL: # BB#0: # %entry -; X32-AVX512VL-NEXT: vmovaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00] -; X32-AVX512VL-NEXT: # encoding: [0x62,0xf1,0x7c,0x08,0x28,0x05,A,A,A,A] -; X32-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X32-AVX512VL-NEXT: vmovaps {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [1.000000e+00,-2.000000e+00] +; X32-AVX512VL-NEXT: # encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] +; X32-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 ; X32-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: fpext_fromconst: @@ -216,9 +216,9 @@ ; ; X64-AVX512VL-LABEL: fpext_fromconst: ; X64-AVX512VL: # BB#0: # %entry -; X64-AVX512VL-NEXT: vmovaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00] -; X64-AVX512VL-NEXT: # encoding: [0x62,0xf1,0x7c,0x08,0x28,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vmovaps {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [1.000000e+00,-2.000000e+00] +; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] entry: %0 = insertelement <2 x float> undef, float 1.0, i32 0 Index: test/CodeGen/X86/vec_int_to_fp.ll =================================================================== --- test/CodeGen/X86/vec_int_to_fp.ll +++ test/CodeGen/X86/vec_int_to_fp.ll @@ -2594,7 +2594,7 @@ ; ; AVX512VL-LABEL: sitofp_load_2i64_to_2f64: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa64 (%rdi), %xmm0 +; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax @@ -2774,7 +2774,7 @@ ; ; AVX512VL-LABEL: sitofp_load_4i64_to_4f64: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa64 (%rdi), %ymm0 +; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 ; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 @@ -2913,7 +2913,7 @@ ; ; AVX512VL-LABEL: uitofp_load_2i64_to_2f64: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa64 (%rdi), %xmm0 +; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax @@ -3021,7 +3021,7 @@ ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3,4,5,6,7] ; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX512VL-NEXT: retq @@ -3037,7 +3037,7 @@ ; AVX512VLDQ: # BB#0: ; AVX512VLDQ-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero ; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512VLDQ-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VLDQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3,4,5,6,7] ; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX512VLDQ-NEXT: retq @@ -3189,7 +3189,7 @@ ; ; AVX512VL-LABEL: uitofp_load_4i64_to_4f64: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa64 (%rdi), %ymm0 +; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 ; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2 @@ -3420,7 +3420,7 @@ ; ; AVX512VL-LABEL: sitofp_load_4i64_to_4f32: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa64 (%rdi), %ymm0 +; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax @@ -4007,7 +4007,7 @@ ; ; AVX512VL-LABEL: uitofp_load_4i64_to_4f32: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa64 (%rdi), %ymm0 +; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax Index: test/CodeGen/X86/vector-half-conversions.ll =================================================================== --- test/CodeGen/X86/vector-half-conversions.ll +++ test/CodeGen/X86/vector-half-conversions.ll @@ -3010,7 +3010,7 @@ ; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2] -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; AVX512VL-NEXT: retq %1 = fptrunc <4 x float> %a0 to <4 x half> @@ -3713,7 +3713,7 @@ ; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512VL-NEXT: vmovdqa32 %xmm0, (%rdi) +; AVX512VL-NEXT: vmovdqa %xmm0, (%rdi) ; AVX512VL-NEXT: retq %1 = fptrunc <4 x float> %a0 to <4 x half> %2 = bitcast <4 x half> %1 to <4 x i16> @@ -3827,9 +3827,9 @@ ; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2] -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; AVX512VL-NEXT: vmovdqa32 %xmm0, (%rdi) +; AVX512VL-NEXT: vmovdqa %xmm0, (%rdi) ; AVX512VL-NEXT: retq %1 = fptrunc <4 x float> %a0 to <4 x half> %2 = bitcast <4 x half> %1 to <4 x i16> @@ -4742,7 +4742,7 @@ ; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2] -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; AVX512VL-NEXT: addq $40, %rsp ; AVX512VL-NEXT: popq %rbx @@ -5373,7 +5373,7 @@ ; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512VL-NEXT: vmovdqa32 %xmm0, (%r14) +; AVX512VL-NEXT: vmovdqa %xmm0, (%r14) ; AVX512VL-NEXT: addq $32, %rsp ; AVX512VL-NEXT: popq %rbx ; AVX512VL-NEXT: popq %r14 @@ -5544,9 +5544,9 @@ ; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] ; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2] -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; AVX512VL-NEXT: vmovdqa32 %xmm0, (%r14) +; AVX512VL-NEXT: vmovdqa %xmm0, (%r14) ; AVX512VL-NEXT: addq $32, %rsp ; AVX512VL-NEXT: popq %rbx ; AVX512VL-NEXT: popq %r14 Index: test/CodeGen/X86/vector-lzcnt-256.ll =================================================================== --- test/CodeGen/X86/vector-lzcnt-256.ll +++ test/CodeGen/X86/vector-lzcnt-256.ll @@ -716,7 +716,7 @@ ; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero ; AVX512VLCD-NEXT: vplzcntd %zmm1, %zmm1 ; AVX512VLCD-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] +; AVX512VLCD-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] ; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm1, %xmm1 ; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0 @@ -805,7 +805,7 @@ ; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero ; AVX512VLCD-NEXT: vplzcntd %zmm1, %zmm1 ; AVX512VLCD-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] +; AVX512VLCD-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] ; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm1, %xmm1 ; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0 Index: test/CodeGen/X86/vector-shuffle-128-v16.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v16.ll +++ test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -423,7 +423,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] ; AVX512VL-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -462,7 +462,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] +; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0] ; AVX512VL-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -482,7 +482,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpandq {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> ret <16 x i8> %shuffle @@ -520,7 +520,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0] +; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0] ; AVX512VL-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -560,7 +560,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0] +; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0] ; AVX512VL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -710,7 +710,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 ; AVX512VL-NEXT: retq %a = insertelement <16 x i8> undef, i8 %i, i32 0 @@ -747,7 +747,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 ; AVX512VL-NEXT: retq %a = insertelement <16 x i8> undef, i8 %i, i32 0 @@ -784,7 +784,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpinsrb $2, %edi, %xmm0, %xmm0 ; AVX512VL-NEXT: retq %a = insertelement <16 x i8> undef, i8 %i, i32 3 @@ -1233,7 +1233,7 @@ ; AVX512VL: # BB#0: # %entry ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0] -; AVX512VL-NEXT: vporq %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq entry: %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -1270,9 +1270,9 @@ ; ; AVX512VL-LABEL: constant_gets_selected: ; AVX512VL: # BB#0: # %entry -; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vmovdqa32 %xmm0, (%rdi) -; AVX512VL-NEXT: vmovdqa32 %xmm0, (%rsi) +; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovdqa %xmm0, (%rdi) +; AVX512VL-NEXT: vmovdqa %xmm0, (%rsi) ; AVX512VL-NEXT: retq entry: %weird_zero = bitcast <4 x i32> zeroinitializer to <16 x i8> @@ -1420,7 +1420,7 @@ ; ; AVX512VL-LABEL: PR12412: ; AVX512VL: # BB#0: # %entry -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> ; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -1781,7 +1781,7 @@ ; ; AVX512VL-LABEL: PR31364: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpinsrb $0, (%rdi), %xmm0, %xmm0 ; AVX512VL-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm0 ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0] Index: test/CodeGen/X86/vector-shuffle-128-v2.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v2.ll +++ test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -782,7 +782,7 @@ ; ; AVX512VL-LABEL: shuffle_v2i64_z1: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; AVX512VL-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> @@ -824,7 +824,7 @@ ; ; AVX512VL-LABEL: shuffle_v2f64_1z: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; AVX512VL-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> @@ -853,7 +853,7 @@ ; ; AVX512VL-LABEL: shuffle_v2f64_z0: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512VL-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> @@ -899,7 +899,7 @@ ; ; AVX512VL-LABEL: shuffle_v2f64_z1: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; AVX512VL-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> @@ -927,7 +927,7 @@ ; ; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] ; AVX512VL-NEXT: retq %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> @@ -973,7 +973,7 @@ ; ; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; AVX512VL-NEXT: retq %bitcast32 = bitcast <2 x i64> %x to <4 x float> Index: test/CodeGen/X86/vector-shuffle-128-v4.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v4.ll +++ test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -1364,7 +1364,7 @@ ; AVX512VL-LABEL: shuffle_v4i32_z6zz: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] ; AVX512VL-NEXT: retq %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> @@ -1691,7 +1691,7 @@ ; ; AVX512VL-LABEL: shuffle_v4i32_0z23: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] ; AVX512VL-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> @@ -1734,7 +1734,7 @@ ; ; AVX512VL-LABEL: shuffle_v4i32_01z3: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] ; AVX512VL-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> @@ -1777,7 +1777,7 @@ ; ; AVX512VL-LABEL: shuffle_v4i32_012z: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3] ; AVX512VL-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> @@ -1820,7 +1820,7 @@ ; ; AVX512VL-LABEL: shuffle_v4i32_0zz3: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] ; AVX512VL-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> Index: test/CodeGen/X86/vector-shuffle-128-v8.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v8.ll +++ test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -1423,7 +1423,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i16_z8zzzzzz: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 ; AVX512VL-NEXT: retq %a = insertelement <8 x i16> undef, i16 %i, i32 0 @@ -1446,7 +1446,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i16_zzzzz8zz: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 ; AVX512VL-NEXT: retq %a = insertelement <8 x i16> undef, i16 %i, i32 0 @@ -1469,7 +1469,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i16_zuuzuuz8: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ; AVX512VL-NEXT: retq %a = insertelement <8 x i16> undef, i16 %i, i32 0 @@ -1492,7 +1492,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i16_zzBzzzzz: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpinsrw $2, %edi, %xmm0, %xmm0 ; AVX512VL-NEXT: retq %a = insertelement <8 x i16> undef, i16 %i, i32 3 @@ -2110,7 +2110,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i16_0z234567: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> @@ -2142,7 +2142,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i16_0zzzz5z7: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7] ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> @@ -2174,7 +2174,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i16_0123456z: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7] ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> Index: test/CodeGen/X86/vector-shuffle-256-v16.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-256-v16.ll +++ test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -170,7 +170,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm1 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm1 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0] ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -198,7 +198,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm1 = [0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm1 = [0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0] ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -225,7 +225,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm1 = [0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm1 = [0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0] ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -252,7 +252,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm1 = [0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm1 = [0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0] ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -278,7 +278,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm1 = [0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm1 = [0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0] ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -304,7 +304,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm1 = [0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm1 = [0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0] ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -330,7 +330,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm1 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm1 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0] ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -717,7 +717,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0] ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -741,7 +741,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -765,7 +765,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255] ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -789,7 +789,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0] ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -849,7 +849,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,16,0,16,0,16,0,16,8,24,8,24,8,24,8,24] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,16,0,16,0,16,0,16,8,24,8,24,8,24,8,24] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -876,9 +876,9 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,0,0,0,20,21,22,23,8,8,8,8,28,29,30,31] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,0,0,0,20,21,22,23,8,8,8,8,28,29,30,31] ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 -; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0 +; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -909,9 +909,9 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [3,2,1,0,23,22,21,20,11,10,9,8,31,30,29,28] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [3,2,1,0,23,22,21,20,11,10,9,8,31,30,29,28] ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 -; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0 +; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -940,9 +940,9 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [3,2,1,0,19,18,17,16,11,10,9,8,27,26,25,24] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [3,2,1,0,19,18,17,16,11,10,9,8,27,26,25,24] ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 -; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0 +; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -1129,7 +1129,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,12,28,13,29,14,30,15,31] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,12,28,13,29,14,30,15,31] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -1155,7 +1155,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [4,20,5,21,6,22,7,23,8,24,9,25,10,26,11,27] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [4,20,5,21,6,22,7,23,8,24,9,25,10,26,11,27] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -1408,7 +1408,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,0,0,0,4,4,4,4,16,16,16,16,20,20,20,20] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,0,0,0,4,4,4,4,16,16,16,16,20,20,20,20] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -1435,7 +1435,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [8,8,8,8,12,12,12,12,16,16,16,16,20,20,20,20] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [8,8,8,8,12,12,12,12,16,16,16,16,20,20,20,20] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -1463,7 +1463,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [8,8,8,8,12,12,12,12,24,24,24,24,28,28,28,28] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [8,8,8,8,12,12,12,12,24,24,24,24,28,28,28,28] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -1490,7 +1490,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,0,0,0,4,4,4,4,24,24,24,24,28,28,28,28] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,0,0,0,4,4,4,4,24,24,24,24,28,28,28,28] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -1514,7 +1514,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -1748,7 +1748,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [1,2,3,4,5,6,7,0,17,18,19,20,21,22,23,16] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [1,2,3,4,5,6,7,0,17,18,19,20,21,22,23,16] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -1771,7 +1771,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [7,0,1,2,3,4,5,6,23,16,17,18,19,20,21,22] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [7,0,1,2,3,4,5,6,23,16,17,18,19,20,21,22] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -1867,7 +1867,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [4,5,6,7,16,17,18,27,12,13,14,15,24,25,26,27] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [4,5,6,7,16,17,18,27,12,13,14,15,24,25,26,27] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -2211,7 +2211,7 @@ ; AVX512VL-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] +; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0 @@ -2415,7 +2415,7 @@ ; AVX512VL-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] +; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0 @@ -2449,7 +2449,7 @@ ; AVX512VL-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] +; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0 @@ -2483,7 +2483,7 @@ ; AVX512VL-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] +; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0 @@ -2517,7 +2517,7 @@ ; AVX512VL-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] +; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0 @@ -2997,7 +2997,7 @@ ; AVX512VL-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] +; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0 @@ -3033,7 +3033,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,16,1,17,2,18,3,27,8,24,9,25,10,26,11,27] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,16,1,17,2,18,3,27,8,24,9,25,10,26,11,27] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -3066,7 +3066,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,20,1,21,2,22,3,31,8,28,9,29,10,30,11,31] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,20,1,21,2,22,3,31,8,28,9,29,10,30,11,31] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -3099,7 +3099,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [4,20,5,21,6,22,7,31,12,28,13,29,14,30,15,31] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [4,20,5,21,6,22,7,31,12,28,13,29,14,30,15,31] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -3132,7 +3132,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [4,16,5,17,6,18,7,27,12,24,13,25,14,26,15,27] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [4,16,5,17,6,18,7,27,12,24,13,25,14,26,15,27] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -3172,7 +3172,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,16,1,17,6,22,7,31,8,24,9,25,14,30,15,31] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,16,1,17,6,22,7,31,8,24,9,25,14,30,15,31] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -3209,7 +3209,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,20,1,21,6,16,7,25,8,28,9,29,14,24,15,25] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,20,1,21,6,16,7,25,8,28,9,29,14,24,15,25] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -3245,7 +3245,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [1,0,17,16,3,2,19,26,9,8,25,24,11,10,27,26] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [1,0,17,16,3,2,19,26,9,8,25,24,11,10,27,26] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -3278,9 +3278,9 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,16,1,17,2,18,3,27,8,24,9,25,10,26,11,27] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,16,1,17,2,18,3,27,8,24,9,25,10,26,11,27] ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 -; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0 +; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -3312,9 +3312,9 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [4,20,5,21,6,22,7,31,12,28,13,29,14,30,15,31] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [4,20,5,21,6,22,7,31,12,28,13,29,14,30,15,31] ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 -; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0 +; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -3350,7 +3350,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,2,1,3,20,22,21,31,8,10,9,11,28,30,29,31] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,2,1,3,20,22,21,31,8,10,9,11,28,30,29,31] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -3380,7 +3380,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = <4,4,3,18,u,u,u,u,12,12,11,26,u,u,u,u> +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <4,4,3,18,u,u,u,u,12,12,11,26,u,u,u,u> ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -3408,7 +3408,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = <0,3,2,21,u,u,u,u,8,11,10,29,u,u,u,u> +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <0,3,2,21,u,u,u,u,8,11,10,29,u,u,u,u> ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -3449,7 +3449,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = <0,1,2,21,u,u,u,u,8,9,10,29,u,u,u,u> +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <0,1,2,21,u,u,u,u,8,9,10,29,u,u,u,u> ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -3475,9 +3475,9 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 -; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0 +; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -3503,9 +3503,9 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = <4,5,6,19,u,u,u,u,12,13,14,27,u,u,u,u> +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <4,5,6,19,u,u,u,u,12,13,14,27,u,u,u,u> ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 -; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0 +; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -3537,7 +3537,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,1,2,21,20,21,22,11,8,9,10,29,28,29,30,11] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,1,2,21,20,21,22,11,8,9,10,29,28,29,30,11] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -3563,7 +3563,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,17,2,3,20,21,22,15,8,25,10,11,28,29,30,15] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,17,2,3,20,21,22,15,8,25,10,11,28,29,30,15] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -3596,7 +3596,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -3627,9 +3627,9 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 -; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0 +; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -3658,7 +3658,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [21,22,23,0,1,2,3,12,29,30,31,8,9,10,11,12] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [21,22,23,0,1,2,3,12,29,30,31,8,9,10,11,12] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -3773,9 +3773,9 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [3,4,5,6,7,16,17,26,11,12,13,14,15,24,25,26] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [3,4,5,6,7,16,17,26,11,12,13,14,15,24,25,26] ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 -; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0 +; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -3889,7 +3889,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [3,4,5,6,7,16,17,26,11,12,13,14,15,24,25,26] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [3,4,5,6,7,16,17,26,11,12,13,14,15,24,25,26] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -3937,9 +3937,9 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [21,22,23,0,1,2,3,12,29,30,31,8,9,10,11,12] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [21,22,23,0,1,2,3,12,29,30,31,8,9,10,11,12] ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 -; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0 +; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -3985,9 +3985,9 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = <7,u,19,u,4,4,21,u,15,u,27,u,12,12,29,u> +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <7,u,19,u,4,4,21,u,15,u,27,u,12,12,29,u> ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 -; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0 +; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -4129,7 +4129,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [2,18,3,19,0,16,1,17,10,26,11,27,8,24,9,25] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [2,18,3,19,0,16,1,17,10,26,11,27,8,24,9,25] ; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> @@ -4166,7 +4166,7 @@ ; ; AVX512VL-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [2,18,3,19,0,16,1,17,10,26,11,27,8,24,9,25] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [2,18,3,19,0,16,1,17,10,26,11,27,8,24,9,25] ; AVX512VL-NEXT: vpermi2w %ymm1, %ymm0, %ymm2 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm2[0,2,1,3] ; AVX512VL-NEXT: retq @@ -4256,9 +4256,9 @@ ; ; AVX512VL-LABEL: PR24935: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [11,10,17,13,10,7,27,0,17,25,0,12,29,20,16,8] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [11,10,17,13,10,7,27,0,17,25,0,12,29,20,16,8] ; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2 -; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0 +; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle Index: test/CodeGen/X86/vector-shuffle-256-v32.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-256-v32.ll +++ test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -317,10 +317,10 @@ ; AVX512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX512VL-NEXT: vpxord %ymm2, %ymm2, %ymm2 +; AVX512VL-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; AVX512VL-NEXT: vpshufb %ymm2, %ymm1, %ymm1 ; AVX512VL-NEXT: vpbroadcastb %xmm0, %xmm0 -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -349,7 +349,7 @@ ; AVX512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = <0,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <0,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u> ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VL-NEXT: retq @@ -379,7 +379,7 @@ ; AVX512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u> ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VL-NEXT: retq @@ -409,7 +409,7 @@ ; AVX512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u> ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VL-NEXT: retq @@ -749,7 +749,7 @@ ; ; AVX512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %ymm1, %ymm1, %ymm1 +; AVX512VL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; AVX512VL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1037,7 +1037,7 @@ ; ; AVX512VL-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1061,7 +1061,7 @@ ; ; AVX512VL-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1076,7 +1076,7 @@ ; ; AVX512VL-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpandq {{.*}}(%rip), %ymm0, %ymm0 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> ret <32 x i8> %shuffle @@ -1142,11 +1142,11 @@ ; ; AVX512VL-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %ymm2, %ymm2, %ymm2 +; AVX512VL-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; AVX512VL-NEXT: vpshufb %ymm2, %ymm1, %ymm1 ; AVX512VL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1176,7 +1176,7 @@ ; ; AVX512VL-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxord %ymm2, %ymm2, %ymm2 +; AVX512VL-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; AVX512VL-NEXT: vpshufb %ymm2, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] ; AVX512VL-NEXT: retq @@ -1399,7 +1399,7 @@ ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31,u] ; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31] -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1428,7 +1428,7 @@ ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23,u] ; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23] -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> @@ -1684,13 +1684,13 @@ ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm1[2,3,0,1] ; AVX512VL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,12,u,u,u,u,u,u,u,0,3,u,u,u,u,u,u,21,16,u,26,u,u,20,18,20,23] ; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[10,13,u,u,3,3,u,8,u,u,u,12,1,u,u,u,u,u,20,u,17,22,u,u,16,u,27,u,u,u,u,u] -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm3 = <255,255,u,u,255,255,0,255,u,u,u,255,255,u,0,0,u,u,255,u,255,255,0,0,255,0,255,u,0,0,0,0> +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm3 = <255,255,u,u,255,255,0,255,u,u,u,255,255,u,0,0,u,u,255,u,255,255,0,0,255,0,255,u,0,0,0,0> ; AVX512VL-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm0[2,3,0,1] ; AVX512VL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u] ; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2],ymm0[3,4,5],ymm2[6],ymm0[7] -; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255] +; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255] ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> Index: test/CodeGen/X86/vector-shuffle-256-v8.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-256-v8.ll +++ test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -1049,7 +1049,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_00040000: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0] ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1072,7 +1072,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_00500000: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0] ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1095,7 +1095,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_06000000: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0] ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1150,7 +1150,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_00112233: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3] ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1296,7 +1296,7 @@ ; AVX512VL-LABEL: shuffle_v8i32_08192a3b: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm0 = [0,8,2,9,4,10,6,11] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm0 = [0,8,2,9,4,10,6,11] ; AVX512VL-NEXT: vpermi2d %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1325,7 +1325,7 @@ ; AVX512VL-LABEL: shuffle_v8i32_08991abb: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm0 = [8,0,1,1,10,2,3,3] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm0 = [8,0,1,1,10,2,3,3] ; AVX512VL-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1564,7 +1564,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_00015444: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4] ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1585,7 +1585,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_00204644: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4] ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1606,7 +1606,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_03004474: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4] ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1627,7 +1627,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_10004444: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4] ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1648,7 +1648,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_22006446: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6] ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1669,7 +1669,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_33307474: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4] ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1690,7 +1690,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_32104567: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7] ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1711,7 +1711,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_00236744: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4] ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1732,7 +1732,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_00226644: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4] ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1753,7 +1753,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_10324567: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7] ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1774,7 +1774,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_11334567: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7] ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1795,7 +1795,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_01235467: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7] ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1816,7 +1816,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_01235466: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6] ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1837,7 +1837,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_002u6u44: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4> +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4> ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1858,7 +1858,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_00uu66uu: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u> +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u> ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1879,7 +1879,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_103245uu: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u> +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u> ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1900,7 +1900,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_1133uu67: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7> +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7> ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1921,7 +1921,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_0uu354uu: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u> +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u> ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1942,7 +1942,7 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_uuu3uu66: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = ; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -1969,9 +1969,9 @@ ; ; AVX512VL-LABEL: shuffle_v8i32_6caa87e5: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm2 = [14,4,2,2,0,15,6,13] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [14,4,2,2,0,15,6,13] ; AVX512VL-NEXT: vpermi2d %ymm0, %ymm1, %ymm2 -; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0 +; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle Index: test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll +++ test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll @@ -22,18 +22,18 @@ ; X32-LABEL: combine_vpermt2var_16i16_identity_mask: ; X32: # BB#0: ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; X32-NEXT: vmovdqu16 {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] +; X32-NEXT: vmovdqu {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] ; X32-NEXT: vpermi2w %ymm1, %ymm0, %ymm2 {%k1} {z} -; X32-NEXT: vmovdqu16 {{.*#+}} ymm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16] +; X32-NEXT: vmovdqu {{.*#+}} ymm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16] ; X32-NEXT: vpermi2w %ymm2, %ymm2, %ymm0 {%k1} {z} ; X32-NEXT: retl ; ; X64-LABEL: combine_vpermt2var_16i16_identity_mask: ; X64: # BB#0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqu16 {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] +; X64-NEXT: vmovdqu {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] ; X64-NEXT: vpermi2w %ymm1, %ymm0, %ymm2 {%k1} {z} -; X64-NEXT: vmovdqu16 {{.*#+}} ymm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16] +; X64-NEXT: vmovdqu {{.*#+}} ymm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16] ; X64-NEXT: vpermi2w %ymm2, %ymm2, %ymm0 {%k1} {z} ; X64-NEXT: retq %res0 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> , <16 x i16> %x0, <16 x i16> %x1, i16 %m) @@ -44,13 +44,13 @@ define <16 x i16> @combine_vpermi2var_16i16_as_permw(<16 x i16> %x0, <16 x i16> %x1) { ; X32-LABEL: combine_vpermi2var_16i16_as_permw: ; X32: # BB#0: -; X32-NEXT: vmovdqu16 {{.*#+}} ymm1 = [15,0,14,1,13,2,12,3,11,4,10,5,9,6,8,7] +; X32-NEXT: vmovdqu {{.*#+}} ymm1 = [15,0,14,1,13,2,12,3,11,4,10,5,9,6,8,7] ; X32-NEXT: vpermw %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: combine_vpermi2var_16i16_as_permw: ; X64: # BB#0: -; X64-NEXT: vmovdqu16 {{.*#+}} ymm1 = [15,0,14,1,13,2,12,3,11,4,10,5,9,6,8,7] +; X64-NEXT: vmovdqu {{.*#+}} ymm1 = [15,0,14,1,13,2,12,3,11,4,10,5,9,6,8,7] ; X64-NEXT: vpermw %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq %res0 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> , <16 x i16> %x1, i16 -1) @@ -61,13 +61,13 @@ define <16 x i16> @combine_vpermt2var_vpermi2var_16i16_as_vperm2(<16 x i16> %x0, <16 x i16> %x1) { ; X32-LABEL: combine_vpermt2var_vpermi2var_16i16_as_vperm2: ; X32: # BB#0: -; X32-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,31,2,2,4,29,6,27,8,25,10,23,12,21,14,19] +; X32-NEXT: vmovdqu {{.*#+}} ymm2 = [0,31,2,2,4,29,6,27,8,25,10,23,12,21,14,19] ; X32-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: combine_vpermt2var_vpermi2var_16i16_as_vperm2: ; X64: # BB#0: -; X64-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,31,2,2,4,29,6,27,8,25,10,23,12,21,14,19] +; X64-NEXT: vmovdqu {{.*#+}} ymm2 = [0,31,2,2,4,29,6,27,8,25,10,23,12,21,14,19] ; X64-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 ; X64-NEXT: retq %res0 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> , <16 x i16> %x1, i16 -1) Index: test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll +++ test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll @@ -37,18 +37,18 @@ ; X32-LABEL: combine_vpermt2var_16i8_identity_mask: ; X32: # BB#0: ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; X32-NEXT: vmovdqu8 {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] +; X32-NEXT: vmovdqu {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] ; X32-NEXT: vpermi2b %xmm1, %xmm0, %xmm2 {%k1} {z} -; X32-NEXT: vmovdqu8 {{.*#+}} xmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16] +; X32-NEXT: vmovdqu {{.*#+}} xmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16] ; X32-NEXT: vpermi2b %xmm2, %xmm2, %xmm0 {%k1} {z} ; X32-NEXT: retl ; ; X64-LABEL: combine_vpermt2var_16i8_identity_mask: ; X64: # BB#0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqu8 {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] +; X64-NEXT: vmovdqu {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] ; X64-NEXT: vpermi2b %xmm1, %xmm0, %xmm2 {%k1} {z} -; X64-NEXT: vmovdqu8 {{.*#+}} xmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16] +; X64-NEXT: vmovdqu {{.*#+}} xmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16] ; X64-NEXT: vpermi2b %xmm2, %xmm2, %xmm0 {%k1} {z} ; X64-NEXT: retq %res0 = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> , <16 x i8> %x0, <16 x i8> %x1, i16 %m) @@ -73,13 +73,13 @@ define <32 x i8> @combine_vpermi2var_32i8_as_vpermb(<32 x i8> %x0, <32 x i8> %x1) { ; X32-LABEL: combine_vpermi2var_32i8_as_vpermb: ; X32: # BB#0: -; X32-NEXT: vmovdqu8 {{.*#+}} ymm1 = [0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19] +; X32-NEXT: vmovdqu {{.*#+}} ymm1 = [0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19] ; X32-NEXT: vpermb %ymm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: combine_vpermi2var_32i8_as_vpermb: ; X64: # BB#0: -; X64-NEXT: vmovdqu8 {{.*#+}} ymm1 = [0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19] +; X64-NEXT: vmovdqu {{.*#+}} ymm1 = [0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19] ; X64-NEXT: vpermb %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq %res0 = shufflevector <32 x i8> %x0, <32 x i8> %x1, <32 x i32> @@ -106,17 +106,17 @@ define <16 x i8> @combine_vpermt2var_vpermi2var_16i8_as_vperm2(<16 x i8> %x0, <16 x i8> %x1) { ; X32-LABEL: combine_vpermt2var_vpermi2var_16i8_as_vperm2: ; X32: # BB#0: -; X32-NEXT: vmovdqu8 {{.*#+}} xmm2 = [0,31,2,29,4,27,6,25,8,23,10,21,12,19,14,17] +; X32-NEXT: vmovdqu {{.*#+}} xmm2 = [0,31,2,29,4,27,6,25,8,23,10,21,12,19,14,17] ; X32-NEXT: vpermi2b %xmm1, %xmm0, %xmm2 -; X32-NEXT: vmovdqu8 {{.*#+}} xmm0 = [0,17,2,18,4,19,6,21,8,23,10,25,12,27,14,29] +; X32-NEXT: vmovdqu {{.*#+}} xmm0 = [0,17,2,18,4,19,6,21,8,23,10,25,12,27,14,29] ; X32-NEXT: vpermi2b %xmm2, %xmm2, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: combine_vpermt2var_vpermi2var_16i8_as_vperm2: ; X64: # BB#0: -; X64-NEXT: vmovdqu8 {{.*#+}} xmm2 = [0,31,2,29,4,27,6,25,8,23,10,21,12,19,14,17] +; X64-NEXT: vmovdqu {{.*#+}} xmm2 = [0,31,2,29,4,27,6,25,8,23,10,21,12,19,14,17] ; X64-NEXT: vpermi2b %xmm1, %xmm0, %xmm2 -; X64-NEXT: vmovdqu8 {{.*#+}} xmm0 = [0,17,2,18,4,19,6,21,8,23,10,25,12,27,14,29] +; X64-NEXT: vmovdqu {{.*#+}} xmm0 = [0,17,2,18,4,19,6,21,8,23,10,25,12,27,14,29] ; X64-NEXT: vpermi2b %xmm2, %xmm2, %xmm0 ; X64-NEXT: retq %res0 = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> , <16 x i8> %x1, i16 -1) @@ -126,13 +126,13 @@ define <32 x i8> @combine_vpermi2var_32i8_as_vperm2(<32 x i8> %x0, <32 x i8> %x1) { ; X32-LABEL: combine_vpermi2var_32i8_as_vperm2: ; X32: # BB#0: -; X32-NEXT: vmovdqu8 {{.*#+}} ymm2 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19] +; X32-NEXT: vmovdqu {{.*#+}} ymm2 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19] ; X32-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: combine_vpermi2var_32i8_as_vperm2: ; X64: # BB#0: -; X64-NEXT: vmovdqu8 {{.*#+}} ymm2 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19] +; X64-NEXT: vmovdqu {{.*#+}} ymm2 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19] ; X64-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 ; X64-NEXT: retq %res0 = shufflevector <32 x i8> %x0, <32 x i8> %x1, <32 x i32> Index: test/CodeGen/X86/vector-shuffle-masked.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-masked.ll +++ test/CodeGen/X86/vector-shuffle-masked.ll @@ -6,7 +6,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} xmm2 {%k1} = xmm0[1,2,3],xmm1[0] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> %mask.cast = bitcast i8 %mask to <8 x i1> @@ -33,7 +33,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} xmm2 {%k1} = xmm0[2,3],xmm1[0,1] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> %mask.cast = bitcast i8 %mask to <8 x i1> @@ -60,7 +60,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ; CHECK-NEXT: valignq {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0] -; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0 +; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> %mask.cast = bitcast i8 %mask to <8 x i1> @@ -87,7 +87,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ; CHECK-NEXT: valignq {{.*#+}} ymm2 {%k1} = ymm0[1,2,3],ymm1[0] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ; CHECK-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> %mask.cast = bitcast i8 %mask to <8 x i1> @@ -114,7 +114,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,3,0] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> %mask.cast = bitcast i8 %mask to <8 x i1> @@ -141,7 +141,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} ymm2 {%k1} = ymm0[1,2,3,4,5,6,7],ymm1[0] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> %mask.cast = bitcast i8 %mask to <8 x i1> @@ -166,7 +166,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} ymm2 {%k1} = ymm0[2,3,4,5,6,7],ymm1[0,1] -; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 +; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> %mask.cast = bitcast i8 %mask to <8 x i1> @@ -191,7 +191,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ; CHECK-NEXT: valignd {{.*#+}} ymm1 {%k1} = ymm0[1,2,3,4,5,6,7,0] -; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 +; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> %mask.cast = bitcast i8 %mask to <8 x i1> Index: test/CodeGen/X86/vector-trunc.ll =================================================================== --- test/CodeGen/X86/vector-trunc.ll +++ test/CodeGen/X86/vector-trunc.ll @@ -535,7 +535,7 @@ ; AVX512VL: # BB#0: # %entry ; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VL-NEXT: vmovdqu32 %xmm0, (%rax) +; AVX512VL-NEXT: vmovdqu %xmm0, (%rax) ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: trunc16i16_16i8: @@ -644,7 +644,7 @@ ; AVX512VL-NEXT: vpmovsxwd %ymm1, %zmm1 ; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vmovdqu32 %ymm0, (%rax) +; AVX512VL-NEXT: vmovdqu %ymm0, (%rax) ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: trunc32i16_32i8: @@ -1100,7 +1100,7 @@ ; ; AVX512VL-LABEL: trunc2x8i16_16i8: ; AVX512VL: # BB#0: # %entry -; AVX512VL-NEXT: vmovdqa64 {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> ; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -1116,7 +1116,7 @@ ; ; AVX512BWVL-LABEL: trunc2x8i16_16i8: ; AVX512BWVL: # BB#0: # %entry -; AVX512BWVL-NEXT: vmovdqu8 {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; AVX512BWVL-NEXT: vmovdqu {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> ; AVX512BWVL-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; AVX512BWVL-NEXT: vpshufb %xmm2, %xmm0, %xmm0 ; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -1202,7 +1202,7 @@ ; ; AVX512VL-LABEL: trunc16i64_16i8_const: ; AVX512VL: # BB#0: # %entry -; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: trunc16i64_16i8_const: @@ -1212,7 +1212,7 @@ ; ; AVX512BWVL-LABEL: trunc16i64_16i8_const: ; AVX512BWVL: # BB#0: # %entry -; AVX512BWVL-NEXT: vpxord %xmm0, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX512BWVL-NEXT: retq entry: Index: test/CodeGen/X86/vector-tzcnt-128.ll =================================================================== --- test/CodeGen/X86/vector-tzcnt-128.ll +++ test/CodeGen/X86/vector-tzcnt-128.ll @@ -136,16 +136,16 @@ ; ; AVX512CDVL-LABEL: testv2i64: ; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512CDVL-NEXT: vpsubq %xmm0, %xmm1, %xmm2 -; AVX512CDVL-NEXT: vpandq %xmm2, %xmm0, %xmm0 +; AVX512CDVL-NEXT: vpand %xmm2, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vpsubq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512CDVL-NEXT: vpandq %xmm2, %xmm0, %xmm3 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512CDVL-NEXT: vpand %xmm2, %xmm0, %xmm3 +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX512CDVL-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX512CDVL-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512CDVL-NEXT: vpandq %xmm2, %xmm0, %xmm0 +; AVX512CDVL-NEXT: vpand %xmm2, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vpshufb %xmm0, %xmm4, %xmm0 ; AVX512CDVL-NEXT: vpaddb %xmm3, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 @@ -316,11 +316,11 @@ ; ; AVX512CDVL-LABEL: testv2i64u: ; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512CDVL-NEXT: vpsubq %xmm0, %xmm1, %xmm1 -; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0 +; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vplzcntq %xmm0, %xmm0 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm1 = [63,63] +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63] ; AVX512CDVL-NEXT: vpsubq %xmm0, %xmm1, %xmm0 ; AVX512CDVL-NEXT: retq ; @@ -510,16 +510,16 @@ ; ; AVX512CDVL-LABEL: testv4i32: ; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512CDVL-NEXT: vpsubd %xmm0, %xmm1, %xmm2 -; AVX512CDVL-NEXT: vpandd %xmm2, %xmm0, %xmm0 +; AVX512CDVL-NEXT: vpand %xmm2, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vpsubd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512CDVL-NEXT: vpandq %xmm2, %xmm0, %xmm3 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512CDVL-NEXT: vpand %xmm2, %xmm0, %xmm3 +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX512CDVL-NEXT: vpshufb %xmm3, %xmm4, %xmm3 ; AVX512CDVL-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512CDVL-NEXT: vpandq %xmm2, %xmm0, %xmm0 +; AVX512CDVL-NEXT: vpand %xmm2, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vpshufb %xmm0, %xmm4, %xmm0 ; AVX512CDVL-NEXT: vpaddb %xmm3, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] @@ -731,9 +731,9 @@ ; ; AVX512CDVL-LABEL: testv4i32u: ; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512CDVL-NEXT: vpsubd %xmm0, %xmm1, %xmm1 -; AVX512CDVL-NEXT: vpandd %xmm1, %xmm0, %xmm0 +; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vplzcntd %xmm0, %xmm0 ; AVX512CDVL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 ; AVX512CDVL-NEXT: vpsubd %xmm0, %xmm1, %xmm0 @@ -913,16 +913,16 @@ ; ; AVX512CDVL-LABEL: testv8i16: ; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512CDVL-NEXT: vpsubw %xmm0, %xmm1, %xmm1 -; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0 +; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm2 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm2 +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX512CDVL-NEXT: vpshufb %xmm2, %xmm3, %xmm2 ; AVX512CDVL-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0 +; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX512CDVL-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vpsllw $8, %xmm0, %xmm1 @@ -1111,16 +1111,16 @@ ; ; AVX512CDVL-LABEL: testv8i16u: ; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512CDVL-NEXT: vpsubw %xmm0, %xmm1, %xmm1 -; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0 +; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm2 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm2 +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX512CDVL-NEXT: vpshufb %xmm2, %xmm3, %xmm2 ; AVX512CDVL-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0 +; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX512CDVL-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vpsllw $8, %xmm0, %xmm1 @@ -1287,16 +1287,16 @@ ; ; AVX512CDVL-LABEL: testv16i8: ; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512CDVL-NEXT: vpsubb %xmm0, %xmm1, %xmm1 -; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0 +; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm2 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm2 +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX512CDVL-NEXT: vpshufb %xmm2, %xmm3, %xmm2 ; AVX512CDVL-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0 +; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX512CDVL-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX512CDVL-NEXT: retq @@ -1453,16 +1453,16 @@ ; ; AVX512CDVL-LABEL: testv16i8u: ; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1 +; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512CDVL-NEXT: vpsubb %xmm0, %xmm1, %xmm1 -; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0 +; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm2 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm2 +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX512CDVL-NEXT: vpshufb %xmm2, %xmm3, %xmm2 ; AVX512CDVL-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0 +; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512CDVL-NEXT: vpshufb %xmm0, %xmm3, %xmm0 ; AVX512CDVL-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX512CDVL-NEXT: retq Index: test/CodeGen/X86/vector-tzcnt-256.ll =================================================================== --- test/CodeGen/X86/vector-tzcnt-256.ll +++ test/CodeGen/X86/vector-tzcnt-256.ll @@ -59,16 +59,16 @@ ; ; AVX512CDVL-LABEL: testv4i64: ; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1 +; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; AVX512CDVL-NEXT: vpsubq %ymm0, %ymm1, %ymm2 -; AVX512CDVL-NEXT: vpandq %ymm2, %ymm0, %ymm0 +; AVX512CDVL-NEXT: vpand %ymm2, %ymm0, %ymm0 ; AVX512CDVL-NEXT: vpsubq {{.*}}(%rip){1to4}, %ymm0, %ymm0 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512CDVL-NEXT: vpandq %ymm2, %ymm0, %ymm3 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512CDVL-NEXT: vpand %ymm2, %ymm0, %ymm3 +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX512CDVL-NEXT: vpshufb %ymm3, %ymm4, %ymm3 ; AVX512CDVL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512CDVL-NEXT: vpandq %ymm2, %ymm0, %ymm0 +; AVX512CDVL-NEXT: vpand %ymm2, %ymm0, %ymm0 ; AVX512CDVL-NEXT: vpshufb %ymm0, %ymm4, %ymm0 ; AVX512CDVL-NEXT: vpaddb %ymm3, %ymm0, %ymm0 ; AVX512CDVL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 @@ -164,9 +164,9 @@ ; ; AVX512CDVL-LABEL: testv4i64u: ; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1 +; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; AVX512CDVL-NEXT: vpsubq %ymm0, %ymm1, %ymm1 -; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0 +; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512CDVL-NEXT: vplzcntq %ymm0, %ymm0 ; AVX512CDVL-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1 ; AVX512CDVL-NEXT: vpsubq %ymm0, %ymm1, %ymm0 @@ -266,16 +266,16 @@ ; ; AVX512CDVL-LABEL: testv8i32: ; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1 +; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; AVX512CDVL-NEXT: vpsubd %ymm0, %ymm1, %ymm2 -; AVX512CDVL-NEXT: vpandd %ymm2, %ymm0, %ymm0 +; AVX512CDVL-NEXT: vpand %ymm2, %ymm0, %ymm0 ; AVX512CDVL-NEXT: vpsubd {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512CDVL-NEXT: vpandq %ymm2, %ymm0, %ymm3 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512CDVL-NEXT: vpand %ymm2, %ymm0, %ymm3 +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX512CDVL-NEXT: vpshufb %ymm3, %ymm4, %ymm3 ; AVX512CDVL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512CDVL-NEXT: vpandq %ymm2, %ymm0, %ymm0 +; AVX512CDVL-NEXT: vpand %ymm2, %ymm0, %ymm0 ; AVX512CDVL-NEXT: vpshufb %ymm0, %ymm4, %ymm0 ; AVX512CDVL-NEXT: vpaddb %ymm3, %ymm0, %ymm0 ; AVX512CDVL-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] @@ -396,9 +396,9 @@ ; ; AVX512CDVL-LABEL: testv8i32u: ; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1 +; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; AVX512CDVL-NEXT: vpsubd %ymm0, %ymm1, %ymm1 -; AVX512CDVL-NEXT: vpandd %ymm1, %ymm0, %ymm0 +; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512CDVL-NEXT: vplzcntd %ymm0, %ymm0 ; AVX512CDVL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 ; AVX512CDVL-NEXT: vpsubd %ymm0, %ymm1, %ymm0 @@ -496,16 +496,16 @@ ; ; AVX512CDVL-LABEL: testv16i16: ; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1 +; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; AVX512CDVL-NEXT: vpsubw %ymm0, %ymm1, %ymm1 -; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0 +; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512CDVL-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm2 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm2 +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX512CDVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 ; AVX512CDVL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0 +; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512CDVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0 ; AVX512CDVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX512CDVL-NEXT: vpsllw $8, %ymm0, %ymm1 @@ -611,16 +611,16 @@ ; ; AVX512CDVL-LABEL: testv16i16u: ; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1 +; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; AVX512CDVL-NEXT: vpsubw %ymm0, %ymm1, %ymm1 -; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0 +; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512CDVL-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm2 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm2 +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX512CDVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 ; AVX512CDVL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0 +; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512CDVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0 ; AVX512CDVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX512CDVL-NEXT: vpsllw $8, %ymm0, %ymm1 @@ -717,16 +717,16 @@ ; ; AVX512CDVL-LABEL: testv32i8: ; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1 +; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; AVX512CDVL-NEXT: vpsubb %ymm0, %ymm1, %ymm1 -; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0 +; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512CDVL-NEXT: vpsubb {{.*}}(%rip), %ymm0, %ymm0 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm2 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm2 +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX512CDVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 ; AVX512CDVL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0 +; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512CDVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0 ; AVX512CDVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX512CDVL-NEXT: retq @@ -814,16 +814,16 @@ ; ; AVX512CDVL-LABEL: testv32i8u: ; AVX512CDVL: # BB#0: -; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1 +; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; AVX512CDVL-NEXT: vpsubb %ymm0, %ymm1, %ymm1 -; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0 +; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512CDVL-NEXT: vpsubb {{.*}}(%rip), %ymm0, %ymm0 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm2 -; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm2 +; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX512CDVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2 ; AVX512CDVL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0 +; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512CDVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0 ; AVX512CDVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX512CDVL-NEXT: retq Index: test/CodeGen/X86/viabs.ll =================================================================== --- test/CodeGen/X86/viabs.ll +++ test/CodeGen/X86/viabs.ll @@ -452,7 +452,7 @@ ; AVX512: # BB#0: ; AVX512-NEXT: vpsraq $63, %xmm0, %xmm1 ; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpxorq %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %tmp1neg = sub <2 x i64> zeroinitializer, %a %b = icmp sge <2 x i64> %a, zeroinitializer @@ -501,7 +501,7 @@ ; AVX512: # BB#0: ; AVX512-NEXT: vpsraq $63, %ymm0, %ymm1 ; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpxorq %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: retq %tmp1neg = sub <4 x i64> zeroinitializer, %a %b = icmp sgt <4 x i64> %a,