diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h --- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h +++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h @@ -323,6 +323,7 @@ ENUM_ENTRY(MODRM_SPLITRM) \ ENUM_ENTRY(MODRM_SPLITMISC) \ ENUM_ENTRY(MODRM_SPLITREG) \ + ENUM_ENTRY(MODRM_SPLITREGM) \ ENUM_ENTRY(MODRM_FULL) #define ENUM_ENTRY(n) n, @@ -361,6 +362,7 @@ ENUM_ENTRY(ENCODING_RM_CD16,"R/M operand with CDisp scaling of 16") \ ENUM_ENTRY(ENCODING_RM_CD32,"R/M operand with CDisp scaling of 32") \ ENUM_ENTRY(ENCODING_RM_CD64,"R/M operand with CDisp scaling of 64") \ + ENUM_ENTRY(ENCODING_SIB, "Force SIB operand in ModR/M byte.") \ ENUM_ENTRY(ENCODING_VSIB, "VSIB operand in ModR/M byte.") \ ENUM_ENTRY(ENCODING_VSIB_CD2, "VSIB operand with CDisp scaling of 2") \ ENUM_ENTRY(ENCODING_VSIB_CD4, "VSIB operand with CDisp scaling of 4") \ @@ -411,6 +413,7 @@ ENUM_ENTRY(TYPE_IMM, "immediate operand") \ ENUM_ENTRY(TYPE_UIMM8, "1-byte unsigned immediate operand") \ ENUM_ENTRY(TYPE_M, "Memory operand") \ + ENUM_ENTRY(TYPE_MSIB, "Memory operand force sib encoding") \ ENUM_ENTRY(TYPE_MVSIBX, "Memory operand using XMM index") \ ENUM_ENTRY(TYPE_MVSIBY, "Memory operand using YMM index") \ ENUM_ENTRY(TYPE_MVSIBZ, "Memory operand using ZMM index") \ @@ -424,6 +427,7 @@ ENUM_ENTRY(TYPE_ZMM, "64-byte") \ ENUM_ENTRY(TYPE_VK, "mask register") \ ENUM_ENTRY(TYPE_VK_PAIR, "mask register pair") \ + ENUM_ENTRY(TYPE_TMM, "tile") \ ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \ ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \ ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \ diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -1392,8 +1392,8 @@ // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV // indicates that the AVX registers will be saved and restored on context // switch, then we have full AVX support. - bool HasAVXSave = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) && - !getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6); + bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX); + bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6); #if defined(__APPLE__) // Darwin lazily saves the AVX512 context on first use: trust that the OS will // save the AVX512 context if we use AVX512 instructions, even the bit is not @@ -1403,6 +1403,9 @@ // AVX512 requires additional context to be saved by the OS. bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); #endif + // AMX requires additional context to be saved by the OS. + const unsigned AMXBits = (1 << 17) | (1 << 18); + bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits); Features["avx"] = HasAVXSave; Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave; @@ -1493,6 +1496,9 @@ // detecting features using the "-march=native" flag. // For more info, see X86 ISA docs. Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1); + Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave; + Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave; + Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave; bool HasLeaf7Subleaf1 = MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h --- a/llvm/lib/Target/X86/AsmParser/X86Operand.h +++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h @@ -315,6 +315,11 @@ bool isMem512() const { return Kind == Memory && (!Mem.Size || Mem.Size == 512); } + + bool isSibMem() const { + return isMem() && Mem.BaseReg != X86::RIP && Mem.BaseReg != X86::EIP; + } + bool isMemIndexReg(unsigned LowR, unsigned HighR) const { assert(Kind == Memory && "Invalid access!"); return Mem.IndexReg >= LowR && Mem.IndexReg <= HighR; diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -166,6 +166,9 @@ if (modFromModRM(modRM) == 0x3) return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8]; return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)]; + case MODRM_SPLITREGM: + assert(modFromModRM(modRM) == 0x3); + return modRMTable[dec->instructionIDs+(modRM & 0x7)]; case MODRM_SPLITMISC: if (modFromModRM(modRM) == 0x3) return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8]; @@ -776,6 +779,10 @@ return prefix##_YMM0 + index; \ case TYPE_XMM: \ return prefix##_XMM0 + index; \ + case TYPE_TMM: \ + if (index > 7) \ + *valid = 0; \ + return prefix##_TMM0 + index; \ case TYPE_VK: \ index &= 0xf; \ if (index > 7) \ @@ -849,6 +856,7 @@ if (!valid) return -1; break; + case ENCODING_SIB: CASE_ENCODING_RM: if (insn->eaBase >= insn->eaRegBase) { insn->eaBase = (EABase)fixupRMValue( @@ -1533,6 +1541,15 @@ if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB); break; + case ENCODING_SIB: + // Reject if SIB wasn't used. + if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64) + return -1; + if (readModRM(insn)) + return -1; + if (fixupReg(insn, &Op)) + return -1; + break; case ENCODING_REG: CASE_ENCODING_RM: if (readModRM(insn)) @@ -2006,9 +2023,11 @@ /// @param mcInst - The MCInst to append to. /// @param insn - The instruction to extract Mod, R/M, and SIB fields /// from. +/// @param ForceSIB - The instruction must use SIB. /// @return - 0 on success; nonzero otherwise static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, - const MCDisassembler *Dis) { + const MCDisassembler *Dis, + bool ForceSIB = false) { // Addresses in an MCInst are represented as five operands: // 1. basereg (register) The R/M base, or (if there is a SIB) the // SIB base @@ -2067,11 +2086,12 @@ // -Any base register used other than ESP/RSP/R12D/R12. Using these as a // base always requires a SIB byte. // -A scale other than 1 is used. - if (insn.sibScale != 1 || - (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) || - (insn.sibBase != SIB_BASE_NONE && - insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP && - insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12)) { + if (!ForceSIB && + (insn.sibScale != 1 || + (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) || + (insn.sibBase != SIB_BASE_NONE && + insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP && + insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) { indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ : X86::RIZ); } else @@ -2182,6 +2202,7 @@ case TYPE_XMM: case TYPE_YMM: case TYPE_ZMM: + case TYPE_TMM: case TYPE_VK_PAIR: case TYPE_VK: case TYPE_DEBUGREG: @@ -2193,6 +2214,8 @@ case TYPE_MVSIBY: case TYPE_MVSIBZ: return translateRMMemory(mcInst, insn, Dis); + case TYPE_MSIB: + return translateRMMemory(mcInst, insn, Dis, true); } } @@ -2242,6 +2265,7 @@ return false; case ENCODING_WRITEMASK: return translateMaskRegister(mcInst, insn.writemask); + case ENCODING_SIB: CASE_ENCODING_RM: CASE_ENCODING_VSIB: return translateRM(mcInst, operand, insn, Dis); diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h --- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -380,6 +380,17 @@ ENTRY(BND2) \ ENTRY(BND3) +#undef REGS_TMM +#define REGS_TMM \ + ENTRY(TMM0) \ + ENTRY(TMM1) \ + ENTRY(TMM2) \ + ENTRY(TMM3) \ + ENTRY(TMM4) \ + ENTRY(TMM5) \ + ENTRY(TMM6) \ + ENTRY(TMM7) + #define ALL_EA_BASES \ EA_BASES_16BIT \ EA_BASES_32BIT \ @@ -404,6 +415,7 @@ REGS_DEBUG \ REGS_CONTROL \ REGS_BOUND \ + REGS_TMM \ ENTRY(RIP) /// All possible values of the base field for effective-address diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -623,6 +623,15 @@ /// information. In the intel manual these are represented as /0, /1, ... /// + // Instructions operate on a register Reg/Opcode operand not the r/m field. + MRMr0 = 21, + + /// MRMSrcMem - But force to use the SIB field. + MRMSrcMemFSIB = 22, + + /// MRMDestMem - But force to use the SIB field. + MRMDestMemFSIB = 23, + /// MRMDestMem - This form is used for instructions that use the Mod/RM byte /// to specify a destination, which in this case is memory. /// @@ -1082,8 +1091,10 @@ case X86II::PrefixByte: return -1; case X86II::MRMDestMem: + case X86II::MRMDestMemFSIB: return 0; case X86II::MRMSrcMem: + case X86II::MRMSrcMemFSIB: // Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a // mask register. return 1 + HasVEX_4V + HasEVEX_K; @@ -1103,6 +1114,7 @@ case X86II::MRMSrcRegOp4: case X86II::MRMSrcRegCC: case X86II::MRMXrCC: + case X86II::MRMr0: case X86II::MRMXr: case X86II::MRM0r: case X86II::MRM1r: case X86II::MRM2r: case X86II::MRM3r: diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -78,7 +78,8 @@ void emitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField, uint64_t TSFlags, bool HasREX, uint64_t StartByte, raw_ostream &OS, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + const MCSubtargetInfo &STI, + bool ForceSIB = false) const; bool emitPrefixImpl(unsigned &CurOp, const MCInst &MI, const MCSubtargetInfo &STI, raw_ostream &OS) const; @@ -382,7 +383,8 @@ uint64_t TSFlags, bool HasREX, uint64_t StartByte, raw_ostream &OS, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { + const MCSubtargetInfo &STI, + bool ForceSIB) const { const MCOperand &Disp = MI.getOperand(Op + X86::AddrDisp); const MCOperand &Base = MI.getOperand(Op + X86::AddrBaseReg); const MCOperand &Scale = MI.getOperand(Op + X86::AddrScaleAmt); @@ -395,7 +397,8 @@ BaseReg == X86::EIP) { // [disp32+rIP] in X86-64 mode assert(STI.hasFeature(X86::Mode64Bit) && "Rip-relative addressing requires 64-bit mode"); - assert(IndexReg.getReg() == 0 && "Invalid rip-relative address"); + assert(IndexReg.getReg() == 0 && !ForceSIB && + "Invalid rip-relative address"); emitByte(modRMByte(0, RegOpcodeField, 5), OS); unsigned Opcode = MI.getOpcode(); @@ -510,7 +513,7 @@ // 2-7) and absolute references. if ( // The SIB byte must be used if there is an index register. - IndexReg.getReg() == 0 && + !ForceSIB && IndexReg.getReg() == 0 && // The SIB byte must be used if the base is ESP/RSP/R12, all of which // encode to an R/M value of 4, which indicates that a SIB byte is // present. @@ -883,9 +886,11 @@ switch (TSFlags & X86II::FormMask) { default: llvm_unreachable("Unexpected form in emitVEXOpcodePrefix!"); + case X86II::MRM_C0: case X86II::RawFrm: case X86II::PrefixByte: break; + case X86II::MRMDestMemFSIB: case X86II::MRMDestMem: { // MRMDestMem instructions forms: // MemAddr, src1(ModR/M) @@ -916,6 +921,7 @@ EVEX_R2 = ~(RegEnc >> 4) & 1; break; } + case X86II::MRMSrcMemFSIB: case X86II::MRMSrcMem: { // MRMSrcMem instructions forms: // src1(ModR/M), MemAddr @@ -1097,6 +1103,15 @@ EncodeRC = true; break; } + case X86II::MRMr0: { + // MRMr0 instructions forms: + // 11:rrr:000 + // dst(ModR/M) + unsigned RegEnc = getX86RegEncoding(MI, CurOp++); + VEX_R = ~(RegEnc >> 3) & 1; + EVEX_R2 = ~(RegEnc >> 4) & 1; + break; + } case X86II::MRM0r: case X86II::MRM1r: case X86II::MRM2r: @@ -1267,6 +1282,11 @@ case X86II::MRM7r: REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B break; + case X86II::MRMr0: + REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R + break; + case X86II::MRMDestMemFSIB: + llvm_unreachable("FSIB format never need REX prefix!"); } if (REX && UsesHighByteReg) report_fatal_error( @@ -1481,6 +1501,7 @@ CurOp = SrcRegNum + 1; break; } + case X86II::MRMDestMemFSIB: case X86II::MRMDestMem: { emitByte(BaseOpcode, OS); unsigned SrcRegNum = CurOp + X86::AddrNumOperands; @@ -1491,8 +1512,9 @@ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) ++SrcRegNum; + bool ForceSIB = (Form == X86II::MRMDestMemFSIB); emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(SrcRegNum)), TSFlags, - HasREX, StartByte, OS, Fixups, STI); + HasREX, StartByte, OS, Fixups, STI, ForceSIB); CurOp = SrcRegNum + 1; break; } @@ -1553,6 +1575,7 @@ getX86RegNum(MI.getOperand(FirstOp)), OS); break; } + case X86II::MRMSrcMemFSIB: case X86II::MRMSrcMem: { unsigned FirstMemOp = CurOp + 1; @@ -1564,8 +1587,9 @@ emitByte(BaseOpcode, OS); + bool ForceSIB = (Form == X86II::MRMSrcMemFSIB); emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)), - TSFlags, HasREX, StartByte, OS, Fixups, STI); + TSFlags, HasREX, StartByte, OS, Fixups, STI, ForceSIB); CurOp = FirstMemOp + X86::AddrNumOperands; if (HasVEX_I8Reg) I8RegNum = getX86RegEncoding(MI, CurOp++); @@ -1637,6 +1661,10 @@ emitRegModRMByte(MI.getOperand(CurOp++), (Form == X86II::MRMXr) ? 0 : Form - X86II::MRM0r, OS); break; + case X86II::MRMr0: + emitByte(BaseOpcode, OS); + emitByte(modRMByte(3, getX86RegNum(MI.getOperand(CurOp++)),0), OS); + break; case X86II::MRMXmCC: { unsigned FirstMemOp = CurOp; diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -249,6 +249,14 @@ // target-feature attribute. def FeatureDeprecatedMPX : SubtargetFeature<"mpx", "DeprecatedHasMPX", "false", "Deprecated. Support MPX instructions">; +def FeatureAMXTILE : SubtargetFeature<"amx-tile", "HasAMXTILE", "true", + "Support AMX-TILE instructions">; +def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true", + "Support AMX-INT8 instructions", + [FeatureAMXTILE]>; +def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true", + "Support AMX-BF16 instructions", + [FeatureAMXTILE]>; def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/X86/X86InstrAMX.td @@ -0,0 +1,76 @@ +//===---- X86InstrAMX.td - AMX Instruction Set Extension --*- tablegen -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the instructions that make up the Intel AMX instruction +// set. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// AMX instructions + +let Predicates = [HasAMXTILE, In64BitMode] in { + let SchedRW = [WriteSystem] in { + let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in + def LDTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src), + "ldtilecfg\t$src", []>, VEX, T8PS; + def STTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src), + "sttilecfg\t$src", []>, VEX, T8PD; + def TILELOADD : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), + (ins sibmem:$src), + "tileloadd\t{$src, $dst|$dst, $src}", []>, + VEX, T8XD; + def TILELOADDT1 : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), + (ins sibmem:$src), + "tileloaddt1\t{$src, $dst|$dst, $src}", []>, + VEX, T8PD; + let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in + def TILERELEASE : I<0x49, MRM_C0, (outs), (ins), + "tilerelease", []>, VEX, T8PS; + def TILESTORED : I<0x4b, MRMDestMemFSIB, (outs), + (ins sibmem:$dst, TILE:$src), + "tilestored\t{$src, $dst|$dst, $src}", []>, + VEX, T8XS; + def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins), + "tilezero\t$dst", []>, + VEX, T8XD; + } // SchedRW +} // HasAMXTILE + +let Predicates = [HasAMXINT8, In64BitMode] in { + let SchedRW = [WriteSystem] in { + let Constraints = "$src1 = $dst" in { + def TDPBSSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), + (ins TILE:$src1, TILE:$src2, TILE:$src3), + "tdpbssd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, + VEX_4V, T8XD; + def TDPBSUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), + (ins TILE:$src1, TILE:$src2, TILE:$src3), + "tdpbsud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, + VEX_4V, T8XS; + def TDPBUSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), + (ins TILE:$src1, TILE:$src2, TILE:$src3), + "tdpbusd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, + VEX_4V, T8PD; + def TDPBUUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), + (ins TILE:$src1, TILE:$src2, TILE:$src3), + "tdpbuud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, + VEX_4V, T8PS; + } + } +} // HasAMXTILE + +let Predicates = [HasAMXBF16, In64BitMode] in { + let SchedRW = [WriteSystem] in { + let Constraints = "$src1 = $dst" in + def TDPBF16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst), + (ins TILE:$src1, TILE:$src2, TILE:$src3), + "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}", + []>, VEX_4V, T8XS; + } +} // HasAMXTILE, HasAMXBF16 diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -28,6 +28,9 @@ def RawFrmImm16 : Format<8>; def AddCCFrm : Format<9>; def PrefixByte : Format<10>; +def MRMr0 : Format<21>; +def MRMSrcMemFSIB : Format<22>; +def MRMDestMemFSIB : Format<23>; def MRMDestMem : Format<24>; def MRMSrcMem : Format<25>; def MRMSrcMem4VOp3 : Format<26>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -361,6 +361,8 @@ def X86Mem512_RC256XOperand : AsmOperandClass { let Name = "Mem512_RC256X"; } def X86Mem256_RC512Operand : AsmOperandClass { let Name = "Mem256_RC512"; } def X86Mem512_RC512Operand : AsmOperandClass { let Name = "Mem512_RC512"; } + + def X86SibMemOperand : AsmOperandClass { let Name = "SibMem"; } } def X86AbsMemAsmOperand : AsmOperandClass { @@ -392,6 +394,8 @@ // restrict to only unsized memory. def opaquemem : X86MemOperand<"printMemReference">; +def sibmem: X86MemOperand<"printMemReference", X86SibMemOperand>; + def i8mem : X86MemOperand<"printbytemem", X86Mem8AsmOperand>; def i16mem : X86MemOperand<"printwordmem", X86Mem16AsmOperand>; def i32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>; @@ -955,6 +959,9 @@ def HasENQCMD : Predicate<"Subtarget->hasENQCMD()">; def HasSERIALIZE : Predicate<"Subtarget->hasSERIALIZE()">; def HasTSXLDTRK : Predicate<"Subtarget->hasTSXLDTRK()">; +def HasAMXTILE : Predicate<"Subtarget->hasAMXTILE()">; +def HasAMXBF16 : Predicate<"Subtarget->hasAMXBF16()">; +def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">; def Not64BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate<(all_of (not Mode64Bit)), "Not 64-bit mode">; def In64BitMode : Predicate<"Subtarget->is64Bit()">, @@ -3070,6 +3077,9 @@ include "X86InstrTSX.td" include "X86InstrSGX.td" +// AMX instructions +include "X86InstrAMX.td" + // System instructions. include "X86InstrSystem.td" diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -265,6 +265,16 @@ } } +// Tile "registers". +def TMM0: X86Reg<"tmm0", 0>; +def TMM1: X86Reg<"tmm1", 1>; +def TMM2: X86Reg<"tmm2", 2>; +def TMM3: X86Reg<"tmm3", 3>; +def TMM4: X86Reg<"tmm4", 4>; +def TMM5: X86Reg<"tmm5", 5>; +def TMM6: X86Reg<"tmm6", 6>; +def TMM7: X86Reg<"tmm7", 7>; + // Mask Registers, used by AVX-512 instructions. def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118, 93, 93]>; def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119, 94, 94]>; @@ -621,3 +631,8 @@ // Bound registers def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>; + +// Tiles +let isAllocatable = 0 in +def TILE : RegisterClass<"X86", [untyped], 0, + (sequence "TMM%u", 0, 7)> {let Size = 8192;} diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -403,6 +403,11 @@ /// Processor supports TSXLDTRK instruction bool HasTSXLDTRK = false; + /// Processor has AMX support + bool HasAMXTILE = false; + bool HasAMXBF16 = false; + bool HasAMXINT8 = false; + /// Processor has a single uop BEXTR implementation. bool HasFastBEXTR = false; @@ -735,6 +740,9 @@ bool useRetpolineIndirectBranches() const { return UseRetpolineIndirectBranches; } + bool hasAMXTILE() const { return HasAMXTILE; } + bool hasAMXBF16() const { return HasAMXBF16; } + bool hasAMXINT8() const { return HasAMXINT8; } bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; } // These are generic getters that OR together all of the thunk types diff --git a/llvm/test/CodeGen/X86/ipra-reg-usage.ll b/llvm/test/CodeGen/X86/ipra-reg-usage.ll --- a/llvm/test/CodeGen/X86/ipra-reg-usage.ll +++ b/llvm/test/CodeGen/X86/ipra-reg-usage.ll @@ -3,7 +3,7 @@ target triple = "x86_64-unknown-unknown" declare void @bar1() define preserve_allcc void @foo()#0 { -; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw $fpsw $fs $gs $hip $ip $mxcsr $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh +; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw $fpsw $fs $gs $hip $ip $mxcsr $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh call void @bar1() call void @bar2() ret void diff --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-bf16-att.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-bf16-att.txt new file mode 100644 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-bf16-att.txt @@ -0,0 +1,25 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 | FileCheck %s + +# CHECK: tdpbf16ps %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x5a,0x5c,0xf5 + +# CHECK: tdpbf16ps %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x72,0x5c,0xda + +# CHECK: tdpbf16ps %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x5a,0x5c,0xf5 + +# CHECK: tdpbf16ps %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x72,0x5c,0xda + +# CHECK: tdpbf16ps %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x5a,0x5c,0xf5 + +# CHECK: tdpbf16ps %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x72,0x5c,0xda + +# CHECK: tdpbf16ps %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x5a,0x5c,0xf5 + +# CHECK: tdpbf16ps %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x72,0x5c,0xda diff --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-bf16-intel.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-bf16-intel.txt new file mode 100644 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-bf16-intel.txt @@ -0,0 +1,25 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s + +# CHECK: tdpbf16ps tmm6, tmm5, tmm4 +0xc4,0xe2,0x5a,0x5c,0xf5 + +# CHECK: tdpbf16ps tmm3, tmm2, tmm1 +0xc4,0xe2,0x72,0x5c,0xda + +# CHECK: tdpbf16ps tmm6, tmm5, tmm4 +0xc4,0xe2,0x5a,0x5c,0xf5 + +# CHECK: tdpbf16ps tmm3, tmm2, tmm1 +0xc4,0xe2,0x72,0x5c,0xda + +# CHECK: tdpbf16ps tmm6, tmm5, tmm4 +0xc4,0xe2,0x5a,0x5c,0xf5 + +# CHECK: tdpbf16ps tmm3, tmm2, tmm1 +0xc4,0xe2,0x72,0x5c,0xda + +# CHECK: tdpbf16ps tmm6, tmm5, tmm4 +0xc4,0xe2,0x5a,0x5c,0xf5 + +# CHECK: tdpbf16ps tmm3, tmm2, tmm1 +0xc4,0xe2,0x72,0x5c,0xda diff --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-error.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-error.txt new file mode 100644 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-error.txt @@ -0,0 +1,4 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 2>&1 | FileCheck %s + +# CHECK: invalid instruction encoding +0xc4,0xe2,0x1a,0x5c,0xf5 diff --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-int8-att.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-int8-att.txt new file mode 100644 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-int8-att.txt @@ -0,0 +1,97 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 | FileCheck %s + +# CHECK: tdpbssd %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x5b,0x5e,0xf5 + +# CHECK: tdpbssd %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x73,0x5e,0xda + +# CHECK: tdpbsud %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x5a,0x5e,0xf5 + +# CHECK: tdpbsud %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x72,0x5e,0xda + +# CHECK: tdpbusd %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x59,0x5e,0xf5 + +# CHECK: tdpbusd %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x71,0x5e,0xda + +# CHECK: tdpbuud %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x58,0x5e,0xf5 + +# CHECK: tdpbuud %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x70,0x5e,0xda + +# CHECK: tdpbssd %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x5b,0x5e,0xf5 + +# CHECK: tdpbssd %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x73,0x5e,0xda + +# CHECK: tdpbsud %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x5a,0x5e,0xf5 + +# CHECK: tdpbsud %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x72,0x5e,0xda + +# CHECK: tdpbusd %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x59,0x5e,0xf5 + +# CHECK: tdpbusd %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x71,0x5e,0xda + +# CHECK: tdpbuud %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x58,0x5e,0xf5 + +# CHECK: tdpbuud %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x70,0x5e,0xda + +# CHECK: tdpbssd %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x5b,0x5e,0xf5 + +# CHECK: tdpbssd %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x73,0x5e,0xda + +# CHECK: tdpbsud %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x5a,0x5e,0xf5 + +# CHECK: tdpbsud %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x72,0x5e,0xda + +# CHECK: tdpbusd %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x59,0x5e,0xf5 + +# CHECK: tdpbusd %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x71,0x5e,0xda + +# CHECK: tdpbuud %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x58,0x5e,0xf5 + +# CHECK: tdpbuud %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x70,0x5e,0xda + +# CHECK: tdpbssd %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x5b,0x5e,0xf5 + +# CHECK: tdpbssd %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x73,0x5e,0xda + +# CHECK: tdpbsud %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x5a,0x5e,0xf5 + +# CHECK: tdpbsud %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x72,0x5e,0xda + +# CHECK: tdpbusd %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x59,0x5e,0xf5 + +# CHECK: tdpbusd %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x71,0x5e,0xda + +# CHECK: tdpbuud %tmm4, %tmm5, %tmm6 +0xc4,0xe2,0x58,0x5e,0xf5 + +# CHECK: tdpbuud %tmm1, %tmm2, %tmm3 +0xc4,0xe2,0x70,0x5e,0xda diff --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-int8-intel.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-int8-intel.txt new file mode 100644 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-int8-intel.txt @@ -0,0 +1,97 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s + +# CHECK: tdpbssd tmm6, tmm5, tmm4 +0xc4,0xe2,0x5b,0x5e,0xf5 + +# CHECK: tdpbssd tmm3, tmm2, tmm1 +0xc4,0xe2,0x73,0x5e,0xda + +# CHECK: tdpbsud tmm6, tmm5, tmm4 +0xc4,0xe2,0x5a,0x5e,0xf5 + +# CHECK: tdpbsud tmm3, tmm2, tmm1 +0xc4,0xe2,0x72,0x5e,0xda + +# CHECK: tdpbusd tmm6, tmm5, tmm4 +0xc4,0xe2,0x59,0x5e,0xf5 + +# CHECK: tdpbusd tmm3, tmm2, tmm1 +0xc4,0xe2,0x71,0x5e,0xda + +# CHECK: tdpbuud tmm6, tmm5, tmm4 +0xc4,0xe2,0x58,0x5e,0xf5 + +# CHECK: tdpbuud tmm3, tmm2, tmm1 +0xc4,0xe2,0x70,0x5e,0xda + +# CHECK: tdpbssd tmm6, tmm5, tmm4 +0xc4,0xe2,0x5b,0x5e,0xf5 + +# CHECK: tdpbssd tmm3, tmm2, tmm1 +0xc4,0xe2,0x73,0x5e,0xda + +# CHECK: tdpbsud tmm6, tmm5, tmm4 +0xc4,0xe2,0x5a,0x5e,0xf5 + +# CHECK: tdpbsud tmm3, tmm2, tmm1 +0xc4,0xe2,0x72,0x5e,0xda + +# CHECK: tdpbusd tmm6, tmm5, tmm4 +0xc4,0xe2,0x59,0x5e,0xf5 + +# CHECK: tdpbusd tmm3, tmm2, tmm1 +0xc4,0xe2,0x71,0x5e,0xda + +# CHECK: tdpbuud tmm6, tmm5, tmm4 +0xc4,0xe2,0x58,0x5e,0xf5 + +# CHECK: tdpbuud tmm3, tmm2, tmm1 +0xc4,0xe2,0x70,0x5e,0xda + +# CHECK: tdpbssd tmm6, tmm5, tmm4 +0xc4,0xe2,0x5b,0x5e,0xf5 + +# CHECK: tdpbssd tmm3, tmm2, tmm1 +0xc4,0xe2,0x73,0x5e,0xda + +# CHECK: tdpbsud tmm6, tmm5, tmm4 +0xc4,0xe2,0x5a,0x5e,0xf5 + +# CHECK: tdpbsud tmm3, tmm2, tmm1 +0xc4,0xe2,0x72,0x5e,0xda + +# CHECK: tdpbusd tmm6, tmm5, tmm4 +0xc4,0xe2,0x59,0x5e,0xf5 + +# CHECK: tdpbusd tmm3, tmm2, tmm1 +0xc4,0xe2,0x71,0x5e,0xda + +# CHECK: tdpbuud tmm6, tmm5, tmm4 +0xc4,0xe2,0x58,0x5e,0xf5 + +# CHECK: tdpbuud tmm3, tmm2, tmm1 +0xc4,0xe2,0x70,0x5e,0xda + +# CHECK: tdpbssd tmm6, tmm5, tmm4 +0xc4,0xe2,0x5b,0x5e,0xf5 + +# CHECK: tdpbssd tmm3, tmm2, tmm1 +0xc4,0xe2,0x73,0x5e,0xda + +# CHECK: tdpbsud tmm6, tmm5, tmm4 +0xc4,0xe2,0x5a,0x5e,0xf5 + +# CHECK: tdpbsud tmm3, tmm2, tmm1 +0xc4,0xe2,0x72,0x5e,0xda + +# CHECK: tdpbusd tmm6, tmm5, tmm4 +0xc4,0xe2,0x59,0x5e,0xf5 + +# CHECK: tdpbusd tmm3, tmm2, tmm1 +0xc4,0xe2,0x71,0x5e,0xda + +# CHECK: tdpbuud tmm6, tmm5, tmm4 +0xc4,0xe2,0x58,0x5e,0xf5 + +# CHECK: tdpbuud tmm3, tmm2, tmm1 +0xc4,0xe2,0x70,0x5e,0xda diff --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tile-att.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tile-att.txt new file mode 100644 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tile-att.txt @@ -0,0 +1,145 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 | FileCheck %s + +# CHECK: tilerelease +0xc4,0xe2,0x78,0x49,0xc0 + +# CHECK: tilezero %tmm6 +0xc4,0xe2,0x7b,0x49,0xf0 + +# CHECK: tilezero %tmm3 +0xc4,0xe2,0x7b,0x49,0xd8 + +# CHECK: tilezero %tmm6 +0xc4,0xe2,0x7b,0x49,0xf0 + +# CHECK: tilezero %tmm3 +0xc4,0xe2,0x7b,0x49,0xd8 + +# CHECK: ldtilecfg 268435456(%rbp,%r14,8) +0xc4,0xa2,0x78,0x49,0x84,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: ldtilecfg 291(%r8,%rax,4) +0xc4,0xc2,0x78,0x49,0x84,0x80,0x23,0x01,0x00,0x00 + +# CHECK: ldtilecfg (%rip) +0xc4,0xe2,0x78,0x49,0x05,0x00,0x00,0x00,0x00 + +# CHECK: ldtilecfg -2048(,%rbp,2) +0xc4,0xe2,0x78,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: sttilecfg 268435456(%rbp,%r14,8) +0xc4,0xa2,0x79,0x49,0x84,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: sttilecfg 291(%r8,%rax,4) +0xc4,0xc2,0x79,0x49,0x84,0x80,0x23,0x01,0x00,0x00 + +# CHECK: sttilecfg (%rip) +0xc4,0xe2,0x79,0x49,0x05,0x00,0x00,0x00,0x00 + +# CHECK: sttilecfg -2048(,%rbp,2) +0xc4,0xe2,0x79,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: tileloadd 268435456(%rbp,%r14,8), %tmm6 +0xc4,0xa2,0x7b,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: tileloadd 291(%r8,%rax,4), %tmm3 +0xc4,0xc2,0x7b,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00 + +# CHECK: tileloadd -32(,%rbp,2), %tmm3 +0xc4,0xe2,0x7b,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff + +# CHECK: tileloadd 64(%rbx), %tmm4 +0xc4,0xe2,0x7b,0x4b,0x64,0x23,0x40 + +# CHECK: tileloaddt1 268435456(%rbp,%r14,8), %tmm6 +0xc4,0xa2,0x79,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: tileloaddt1 291(%r8,%rax,4), %tmm3 +0xc4,0xc2,0x79,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00 + +# CHECK: tileloaddt1 -32(,%rbp,2), %tmm3 +0xc4,0xe2,0x79,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff + +# CHECK: tileloaddt1 16(%rbp), %tmm6 +0xc4,0xe2,0x79,0x4b,0x74,0x25,0x10 + +# CHECK: tilerelease +0xc4,0xe2,0x78,0x49,0xc0 + +# CHECK: tilestored %tmm6, 268435456(%rbp,%r14,8) +0xc4,0xa2,0x7a,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: tilestored %tmm3, 291(%r8,%rax,4) +0xc4,0xc2,0x7a,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00 + +# CHECK: tilestored %tmm3, -32(,%rbp,2) +0xc4,0xe2,0x7a,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff + +# CHECK: tilezero %tmm6 +0xc4,0xe2,0x7b,0x49,0xf0 + +# CHECK: tilezero %tmm3 +0xc4,0xe2,0x7b,0x49,0xd8 + +# CHECK: ldtilecfg 268435456(%rbp,%r14,8) +0xc4,0xa2,0x78,0x49,0x84,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: ldtilecfg 291(%r8,%rax,4) +0xc4,0xc2,0x78,0x49,0x84,0x80,0x23,0x01,0x00,0x00 + +# CHECK: ldtilecfg (%rip) +0xc4,0xe2,0x78,0x49,0x05,0x00,0x00,0x00,0x00 + +# CHECK: ldtilecfg -2048(,%rbp,2) +0xc4,0xe2,0x78,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: sttilecfg 268435456(%rbp,%r14,8) +0xc4,0xa2,0x79,0x49,0x84,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: sttilecfg 291(%r8,%rax,4) +0xc4,0xc2,0x79,0x49,0x84,0x80,0x23,0x01,0x00,0x00 + +# CHECK: sttilecfg (%rip) +0xc4,0xe2,0x79,0x49,0x05,0x00,0x00,0x00,0x00 + +# CHECK: sttilecfg -2048(,%rbp,2) +0xc4,0xe2,0x79,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: tileloadd 268435456(%rbp,%r14,8), %tmm6 +0xc4,0xa2,0x7b,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: tileloadd 291(%r8,%rax,4), %tmm3 +0xc4,0xc2,0x7b,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00 + +# CHECK: tileloadd -32(,%rbp,2), %tmm3 +0xc4,0xe2,0x7b,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff + +# CHECK: tileloaddt1 268435456(%rbp,%r14,8), %tmm6 +0xc4,0xa2,0x79,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: tileloaddt1 291(%r8,%rax,4), %tmm3 +0xc4,0xc2,0x79,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00 + +# CHECK: tileloaddt1 -32(,%rbp,2), %tmm3 +0xc4,0xe2,0x79,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff + +# CHECK: tilerelease +0xc4,0xe2,0x78,0x49,0xc0 + +# CHECK: tilestored %tmm6, 268435456(%rbp,%r14,8) +0xc4,0xa2,0x7a,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: tilestored %tmm3, 291(%r8,%rax,4) +0xc4,0xc2,0x7a,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00 + +# CHECK: tilestored %tmm3, -32(,%rbp,2) +0xc4,0xe2,0x7a,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff + +# CHECK: tilestored %tmm3, (%r8) +0xc4,0xc2,0x7a,0x4b,0x1c,0x20 + +# CHECK: tilezero %tmm6 +0xc4,0xe2,0x7b,0x49,0xf0 + +# CHECK: tilezero %tmm3 +0xc4,0xe2,0x7b,0x49,0xd8 diff --git a/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tile-intel.txt b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tile-intel.txt new file mode 100644 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tile-intel.txt @@ -0,0 +1,148 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s + +# CHECK: tilerelease +0xc4,0xe2,0x78,0x49,0xc0 + +# CHECK: tilezero tmm6 +0xc4,0xe2,0x7b,0x49,0xf0 + +# CHECK: tilezero tmm3 +0xc4,0xe2,0x7b,0x49,0xd8 + +# CHECK: tilerelease +0xc4,0xe2,0x78,0x49,0xc0 + +# CHECK: tilezero tmm6 +0xc4,0xe2,0x7b,0x49,0xf0 + +# CHECK: tilezero tmm3 +0xc4,0xe2,0x7b,0x49,0xd8 + +# CHECK: ldtilecfg [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x78,0x49,0x84,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: ldtilecfg [r8 + 4*rax + 291] +0xc4,0xc2,0x78,0x49,0x84,0x80,0x23,0x01,0x00,0x00 + +# CHECK: ldtilecfg [rip] +0xc4,0xe2,0x78,0x49,0x05,0x00,0x00,0x00,0x00 + +# CHECK: ldtilecfg [2*rbp - 2048] +0xc4,0xe2,0x78,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: sttilecfg [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x79,0x49,0x84,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: sttilecfg [r8 + 4*rax + 291] +0xc4,0xc2,0x79,0x49,0x84,0x80,0x23,0x01,0x00,0x00 + +# CHECK: sttilecfg [rip] +0xc4,0xe2,0x79,0x49,0x05,0x00,0x00,0x00,0x00 + +# CHECK: sttilecfg [2*rbp - 2048] +0xc4,0xe2,0x79,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: tileloadd tmm6, [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7b,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: tileloadd tmm3, [r8 + 4*rax + 291] +0xc4,0xc2,0x7b,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00 + +# CHECK: tileloadd tmm3, [2*rbp - 32] +0xc4,0xe2,0x7b,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff + +# CHECK: tileloadd tmm4, [rbx + 64] +0xc4,0xe2,0x7b,0x4b,0x64,0x23,0x40 + +# CHECK: tileloaddt1 tmm6, [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x79,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: tileloaddt1 tmm3, [r8 + 4*rax + 291] +0xc4,0xc2,0x79,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00 + +# CHECK: tileloaddt1 tmm3, [2*rbp - 32] +0xc4,0xe2,0x79,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff + +# CHECK: tileloaddt1 tmm6, [rbp + 16] +0xc4,0xe2,0x79,0x4b,0x74,0x25,0x10 + +# CHECK: tilerelease +0xc4,0xe2,0x78,0x49,0xc0 + +# CHECK: tilestored [rbp + 8*r14 + 268435456], tmm6 +0xc4,0xa2,0x7a,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: tilestored [r8 + 4*rax + 291], tmm3 +0xc4,0xc2,0x7a,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00 + +# CHECK: tilestored [2*rbp - 32], tmm3 +0xc4,0xe2,0x7a,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff + +# CHECK: tilestored [r8], tmm3 +0xc4,0xc2,0x7a,0x4b,0x1c,0x20 + +# CHECK: tilezero tmm6 +0xc4,0xe2,0x7b,0x49,0xf0 + +# CHECK: tilezero tmm3 +0xc4,0xe2,0x7b,0x49,0xd8 + +# CHECK: ldtilecfg [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x78,0x49,0x84,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: ldtilecfg [r8 + 4*rax + 291] +0xc4,0xc2,0x78,0x49,0x84,0x80,0x23,0x01,0x00,0x00 + +# CHECK: ldtilecfg [rip] +0xc4,0xe2,0x78,0x49,0x05,0x00,0x00,0x00,0x00 + +# CHECK: ldtilecfg [2*rbp - 2048] +0xc4,0xe2,0x78,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: sttilecfg [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x79,0x49,0x84,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: sttilecfg [r8 + 4*rax + 291] +0xc4,0xc2,0x79,0x49,0x84,0x80,0x23,0x01,0x00,0x00 + +# CHECK: sttilecfg [rip] +0xc4,0xe2,0x79,0x49,0x05,0x00,0x00,0x00,0x00 + +# CHECK: sttilecfg [2*rbp - 2048] +0xc4,0xe2,0x79,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff + +# CHECK: tileloadd tmm6, [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x7b,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: tileloadd tmm3, [r8 + 4*rax + 291] +0xc4,0xc2,0x7b,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00 + +# CHECK: tileloadd tmm3, [2*rbp - 32] +0xc4,0xe2,0x7b,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff + +# CHECK: tileloaddt1 tmm6, [rbp + 8*r14 + 268435456] +0xc4,0xa2,0x79,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: tileloaddt1 tmm3, [r8 + 4*rax + 291] +0xc4,0xc2,0x79,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00 + +# CHECK: tileloaddt1 tmm3, [2*rbp - 32] +0xc4,0xe2,0x79,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff + +# CHECK: tilerelease +0xc4,0xe2,0x78,0x49,0xc0 + +# CHECK: tilestored [rbp + 8*r14 + 268435456], tmm6 +0xc4,0xa2,0x7a,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# CHECK: tilestored [r8 + 4*rax + 291], tmm3 +0xc4,0xc2,0x7a,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00 + +# CHECK: tilestored [2*rbp - 32], tmm3 +0xc4,0xe2,0x7a,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff + +# CHECK: tilezero tmm6 +0xc4,0xe2,0x7b,0x49,0xf0 + +# CHECK: tilezero tmm3 +0xc4,0xe2,0x7b,0x49,0xd8 diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-bf16-att.s b/llvm/test/MC/X86/AMX/x86-64-amx-bf16-att.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/AMX/x86-64-amx-bf16-att.s @@ -0,0 +1,34 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -show-encoding %s | FileCheck %s +// some AMX instruction must use SIB. + +// CHECK: tdpbf16ps %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x5a,0x5c,0xf5] + tdpbf16ps %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbf16ps %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x72,0x5c,0xda] + tdpbf16ps %tmm1, %tmm2, %tmm3 + +// CHECK: tdpbf16ps %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x5a,0x5c,0xf5] + tdpbf16ps %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbf16ps %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x72,0x5c,0xda] + tdpbf16ps %tmm1, %tmm2, %tmm3 + +// CHECK: tdpbf16ps %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x5a,0x5c,0xf5] + tdpbf16ps %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbf16ps %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x72,0x5c,0xda] + tdpbf16ps %tmm1, %tmm2, %tmm3 + +// CHECK: tdpbf16ps %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x5a,0x5c,0xf5] + tdpbf16ps %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbf16ps %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x72,0x5c,0xda] + tdpbf16ps %tmm1, %tmm2, %tmm3 diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-bf16-intel.s b/llvm/test/MC/X86/AMX/x86-64-amx-bf16-intel.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/AMX/x86-64-amx-bf16-intel.s @@ -0,0 +1,33 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: tdpbf16ps tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x5a,0x5c,0xf5] + tdpbf16ps tmm6, tmm5, tmm4 + +// CHECK: tdpbf16ps tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x72,0x5c,0xda] + tdpbf16ps tmm3, tmm2, tmm1 + +// CHECK: tdpbf16ps tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x5a,0x5c,0xf5] + tdpbf16ps tmm6, tmm5, tmm4 + +// CHECK: tdpbf16ps tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x72,0x5c,0xda] + tdpbf16ps tmm3, tmm2, tmm1 + +// CHECK: tdpbf16ps tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x5a,0x5c,0xf5] + tdpbf16ps tmm6, tmm5, tmm4 + +// CHECK: tdpbf16ps tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x72,0x5c,0xda] + tdpbf16ps tmm3, tmm2, tmm1 + +// CHECK: tdpbf16ps tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x5a,0x5c,0xf5] + tdpbf16ps tmm6, tmm5, tmm4 + +// CHECK: tdpbf16ps tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x72,0x5c,0xda] + tdpbf16ps tmm3, tmm2, tmm1 diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-error.s b/llvm/test/MC/X86/AMX/x86-64-amx-error.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/AMX/x86-64-amx-error.s @@ -0,0 +1,10 @@ +// RUN: not llvm-mc -triple x86_64-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s + +// CHECK: invalid operand for instruction +tileloadd (%rip), %tmm0 + +// CHECK: invalid operand for instruction +tileloaddt1 1(%rip), %tmm1 + +// CHECK: invalid operand for instruction +tilestored %tmm2, (%rip) diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-int8-att.s b/llvm/test/MC/X86/AMX/x86-64-amx-int8-att.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/AMX/x86-64-amx-int8-att.s @@ -0,0 +1,130 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -show-encoding %s | FileCheck %s +// some AMX instruction must use SIB. + +// CHECK: tdpbssd %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x5b,0x5e,0xf5] + tdpbssd %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbssd %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x73,0x5e,0xda] + tdpbssd %tmm1, %tmm2, %tmm3 + +// CHECK: tdpbsud %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x5a,0x5e,0xf5] + tdpbsud %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbsud %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x72,0x5e,0xda] + tdpbsud %tmm1, %tmm2, %tmm3 + +// CHECK: tdpbusd %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x59,0x5e,0xf5] + tdpbusd %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbusd %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x71,0x5e,0xda] + tdpbusd %tmm1, %tmm2, %tmm3 + +// CHECK: tdpbuud %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x58,0x5e,0xf5] + tdpbuud %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbuud %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x70,0x5e,0xda] + tdpbuud %tmm1, %tmm2, %tmm3 + +// CHECK: tdpbssd %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x5b,0x5e,0xf5] + tdpbssd %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbssd %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x73,0x5e,0xda] + tdpbssd %tmm1, %tmm2, %tmm3 + +// CHECK: tdpbsud %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x5a,0x5e,0xf5] + tdpbsud %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbsud %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x72,0x5e,0xda] + tdpbsud %tmm1, %tmm2, %tmm3 + +// CHECK: tdpbusd %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x59,0x5e,0xf5] + tdpbusd %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbusd %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x71,0x5e,0xda] + tdpbusd %tmm1, %tmm2, %tmm3 + +// CHECK: tdpbuud %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x58,0x5e,0xf5] + tdpbuud %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbuud %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x70,0x5e,0xda] + tdpbuud %tmm1, %tmm2, %tmm3 + +// CHECK: tdpbssd %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x5b,0x5e,0xf5] + tdpbssd %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbssd %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x73,0x5e,0xda] + tdpbssd %tmm1, %tmm2, %tmm3 + +// CHECK: tdpbsud %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x5a,0x5e,0xf5] + tdpbsud %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbsud %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x72,0x5e,0xda] + tdpbsud %tmm1, %tmm2, %tmm3 + +// CHECK: tdpbusd %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x59,0x5e,0xf5] + tdpbusd %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbusd %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x71,0x5e,0xda] + tdpbusd %tmm1, %tmm2, %tmm3 + +// CHECK: tdpbuud %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x58,0x5e,0xf5] + tdpbuud %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbuud %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x70,0x5e,0xda] + tdpbuud %tmm1, %tmm2, %tmm3 + +// CHECK: tdpbssd %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x5b,0x5e,0xf5] + tdpbssd %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbssd %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x73,0x5e,0xda] + tdpbssd %tmm1, %tmm2, %tmm3 + +// CHECK: tdpbsud %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x5a,0x5e,0xf5] + tdpbsud %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbsud %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x72,0x5e,0xda] + tdpbsud %tmm1, %tmm2, %tmm3 + +// CHECK: tdpbusd %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x59,0x5e,0xf5] + tdpbusd %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbusd %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x71,0x5e,0xda] + tdpbusd %tmm1, %tmm2, %tmm3 + +// CHECK: tdpbuud %tmm4, %tmm5, %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x58,0x5e,0xf5] + tdpbuud %tmm4, %tmm5, %tmm6 + +// CHECK: tdpbuud %tmm1, %tmm2, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x70,0x5e,0xda] + tdpbuud %tmm1, %tmm2, %tmm3 diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-int8-intel.s b/llvm/test/MC/X86/AMX/x86-64-amx-int8-intel.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/AMX/x86-64-amx-int8-intel.s @@ -0,0 +1,129 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: tdpbssd tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x5b,0x5e,0xf5] + tdpbssd tmm6, tmm5, tmm4 + +// CHECK: tdpbssd tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x73,0x5e,0xda] + tdpbssd tmm3, tmm2, tmm1 + +// CHECK: tdpbsud tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x5a,0x5e,0xf5] + tdpbsud tmm6, tmm5, tmm4 + +// CHECK: tdpbsud tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x72,0x5e,0xda] + tdpbsud tmm3, tmm2, tmm1 + +// CHECK: tdpbusd tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x59,0x5e,0xf5] + tdpbusd tmm6, tmm5, tmm4 + +// CHECK: tdpbusd tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x71,0x5e,0xda] + tdpbusd tmm3, tmm2, tmm1 + +// CHECK: tdpbuud tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x58,0x5e,0xf5] + tdpbuud tmm6, tmm5, tmm4 + +// CHECK: tdpbuud tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x70,0x5e,0xda] + tdpbuud tmm3, tmm2, tmm1 + +// CHECK: tdpbssd tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x5b,0x5e,0xf5] + tdpbssd tmm6, tmm5, tmm4 + +// CHECK: tdpbssd tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x73,0x5e,0xda] + tdpbssd tmm3, tmm2, tmm1 + +// CHECK: tdpbsud tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x5a,0x5e,0xf5] + tdpbsud tmm6, tmm5, tmm4 + +// CHECK: tdpbsud tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x72,0x5e,0xda] + tdpbsud tmm3, tmm2, tmm1 + +// CHECK: tdpbusd tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x59,0x5e,0xf5] + tdpbusd tmm6, tmm5, tmm4 + +// CHECK: tdpbusd tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x71,0x5e,0xda] + tdpbusd tmm3, tmm2, tmm1 + +// CHECK: tdpbuud tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x58,0x5e,0xf5] + tdpbuud tmm6, tmm5, tmm4 + +// CHECK: tdpbuud tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x70,0x5e,0xda] + tdpbuud tmm3, tmm2, tmm1 + +// CHECK: tdpbssd tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x5b,0x5e,0xf5] + tdpbssd tmm6, tmm5, tmm4 + +// CHECK: tdpbssd tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x73,0x5e,0xda] + tdpbssd tmm3, tmm2, tmm1 + +// CHECK: tdpbsud tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x5a,0x5e,0xf5] + tdpbsud tmm6, tmm5, tmm4 + +// CHECK: tdpbsud tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x72,0x5e,0xda] + tdpbsud tmm3, tmm2, tmm1 + +// CHECK: tdpbusd tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x59,0x5e,0xf5] + tdpbusd tmm6, tmm5, tmm4 + +// CHECK: tdpbusd tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x71,0x5e,0xda] + tdpbusd tmm3, tmm2, tmm1 + +// CHECK: tdpbuud tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x58,0x5e,0xf5] + tdpbuud tmm6, tmm5, tmm4 + +// CHECK: tdpbuud tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x70,0x5e,0xda] + tdpbuud tmm3, tmm2, tmm1 + +// CHECK: tdpbssd tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x5b,0x5e,0xf5] + tdpbssd tmm6, tmm5, tmm4 + +// CHECK: tdpbssd tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x73,0x5e,0xda] + tdpbssd tmm3, tmm2, tmm1 + +// CHECK: tdpbsud tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x5a,0x5e,0xf5] + tdpbsud tmm6, tmm5, tmm4 + +// CHECK: tdpbsud tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x72,0x5e,0xda] + tdpbsud tmm3, tmm2, tmm1 + +// CHECK: tdpbusd tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x59,0x5e,0xf5] + tdpbusd tmm6, tmm5, tmm4 + +// CHECK: tdpbusd tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x71,0x5e,0xda] + tdpbusd tmm3, tmm2, tmm1 + +// CHECK: tdpbuud tmm6, tmm5, tmm4 +// CHECK: encoding: [0xc4,0xe2,0x58,0x5e,0xf5] + tdpbuud tmm6, tmm5, tmm4 + +// CHECK: tdpbuud tmm3, tmm2, tmm1 +// CHECK: encoding: [0xc4,0xe2,0x70,0x5e,0xda] + tdpbuud tmm3, tmm2, tmm1 diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-tile-att.s b/llvm/test/MC/X86/AMX/x86-64-amx-tile-att.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/AMX/x86-64-amx-tile-att.s @@ -0,0 +1,198 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -show-encoding %s | FileCheck %s +// some AMX instruction must use SIB. + +// CHECK: tilerelease +// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0xc0] + tilerelease + +// CHECK: tilezero %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xf0] + tilezero %tmm6 + +// CHECK: tilezero %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xd8] + tilezero %tmm3 + +// CHECK: tilerelease +// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0xc0] + tilerelease + +// CHECK: tilezero %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xf0] + tilezero %tmm6 + +// CHECK: tilezero %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xd8] + tilezero %tmm3 + +// CHECK: ldtilecfg 268435456(%rbp,%r14,8) +// CHECK: encoding: [0xc4,0xa2,0x78,0x49,0x84,0xf5,0x00,0x00,0x00,0x10] + ldtilecfg 268435456(%rbp,%r14,8) + +// CHECK: ldtilecfg 291(%r8,%rax,4) +// CHECK: encoding: [0xc4,0xc2,0x78,0x49,0x84,0x80,0x23,0x01,0x00,0x00] + ldtilecfg 291(%r8,%rax,4) + +// CHECK: ldtilecfg (%rip) +// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0x05,0x00,0x00,0x00,0x00] + ldtilecfg (%rip) + +// CHECK: ldtilecfg -2048(,%rbp,2) +// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff] + ldtilecfg -2048(,%rbp,2) + +// CHECK: sttilecfg 268435456(%rbp,%r14,8) +// CHECK: encoding: [0xc4,0xa2,0x79,0x49,0x84,0xf5,0x00,0x00,0x00,0x10] + sttilecfg 268435456(%rbp,%r14,8) + +// CHECK: sttilecfg 291(%r8,%rax,4) +// CHECK: encoding: [0xc4,0xc2,0x79,0x49,0x84,0x80,0x23,0x01,0x00,0x00] + sttilecfg 291(%r8,%rax,4) + +// CHECK: sttilecfg (%rip) +// CHECK: encoding: [0xc4,0xe2,0x79,0x49,0x05,0x00,0x00,0x00,0x00] + sttilecfg (%rip) + +// CHECK: sttilecfg -2048(,%rbp,2) +// CHECK: encoding: [0xc4,0xe2,0x79,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff] + sttilecfg -2048(,%rbp,2) + +// CHECK: tileloadd 268435456(%rbp,%r14,8), %tmm6 +// CHECK: encoding: [0xc4,0xa2,0x7b,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10] + tileloadd 268435456(%rbp,%r14,8), %tmm6 + +// CHECK: tileloadd 291(%r8,%rax,4), %tmm3 +// CHECK: encoding: [0xc4,0xc2,0x7b,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00] + tileloadd 291(%r8,%rax,4), %tmm3 + +// CHECK: tileloadd 64(%rbx), %tmm4 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x4b,0x64,0x23,0x40] + tileloadd 64(%rbx), %tmm4 + +// CHECK: tileloadd -32(,%rbp,2), %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff] + tileloadd -32(,%rbp,2), %tmm3 + +// CHECK: tileloaddt1 268435456(%rbp,%r14,8), %tmm6 +// CHECK: encoding: [0xc4,0xa2,0x79,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10] + tileloaddt1 268435456(%rbp,%r14,8), %tmm6 + +// CHECK: tileloaddt1 291(%r8,%rax,4), %tmm3 +// CHECK: encoding: [0xc4,0xc2,0x79,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00] + tileloaddt1 291(%r8,%rax,4), %tmm3 + +// CHECK: tileloaddt1 -32(,%rbp,2), %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x79,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff] + tileloaddt1 -32(,%rbp,2), %tmm3 + +// CHECK: tileloaddt1 16(%rbp), %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x79,0x4b,0x74,0x25,0x10] + tileloaddt1 16(%rbp), %tmm6 + +// CHECK: tilerelease +// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0xc0] + tilerelease + +// CHECK: tilestored %tmm6, 268435456(%rbp,%r14,8) +// CHECK: encoding: [0xc4,0xa2,0x7a,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10] + tilestored %tmm6, 268435456(%rbp,%r14,8) + +// CHECK: tilestored %tmm3, 291(%r8,%rax,4) +// CHECK: encoding: [0xc4,0xc2,0x7a,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00] + tilestored %tmm3, 291(%r8,%rax,4) + +// CHECK: tilestored %tmm3, -32(,%rbp,2) +// CHECK: encoding: [0xc4,0xe2,0x7a,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff] + tilestored %tmm3, -32(,%rbp,2) + +// CHECK: tilestored %tmm3, (%r8) +// CHECK: encoding: [0xc4,0xc2,0x7a,0x4b,0x1c,0x20] + tilestored %tmm3, (%r8) + +// CHECK: tilezero %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xf0] + tilezero %tmm6 + +// CHECK: tilezero %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xd8] + tilezero %tmm3 + +// CHECK: ldtilecfg 268435456(%rbp,%r14,8) +// CHECK: encoding: [0xc4,0xa2,0x78,0x49,0x84,0xf5,0x00,0x00,0x00,0x10] + ldtilecfg 268435456(%rbp,%r14,8) + +// CHECK: ldtilecfg 291(%r8,%rax,4) +// CHECK: encoding: [0xc4,0xc2,0x78,0x49,0x84,0x80,0x23,0x01,0x00,0x00] + ldtilecfg 291(%r8,%rax,4) + +// CHECK: ldtilecfg (%rip) +// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0x05,0x00,0x00,0x00,0x00] + ldtilecfg (%rip) + +// CHECK: ldtilecfg -2048(,%rbp,2) +// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff] + ldtilecfg -2048(,%rbp,2) + +// CHECK: sttilecfg 268435456(%rbp,%r14,8) +// CHECK: encoding: [0xc4,0xa2,0x79,0x49,0x84,0xf5,0x00,0x00,0x00,0x10] + sttilecfg 268435456(%rbp,%r14,8) + +// CHECK: sttilecfg 291(%r8,%rax,4) +// CHECK: encoding: [0xc4,0xc2,0x79,0x49,0x84,0x80,0x23,0x01,0x00,0x00] + sttilecfg 291(%r8,%rax,4) + +// CHECK: sttilecfg (%rip) +// CHECK: encoding: [0xc4,0xe2,0x79,0x49,0x05,0x00,0x00,0x00,0x00] + sttilecfg (%rip) + +// CHECK: sttilecfg -2048(,%rbp,2) +// CHECK: encoding: [0xc4,0xe2,0x79,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff] + sttilecfg -2048(,%rbp,2) + +// CHECK: tileloadd 268435456(%rbp,%r14,8), %tmm6 +// CHECK: encoding: [0xc4,0xa2,0x7b,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10] + tileloadd 268435456(%rbp,%r14,8), %tmm6 + +// CHECK: tileloadd 291(%r8,%rax,4), %tmm3 +// CHECK: encoding: [0xc4,0xc2,0x7b,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00] + tileloadd 291(%r8,%rax,4), %tmm3 + +// CHECK: tileloadd -32(,%rbp,2), %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff] + tileloadd -32(,%rbp,2), %tmm3 + +// CHECK: tileloaddt1 268435456(%rbp,%r14,8), %tmm6 +// CHECK: encoding: [0xc4,0xa2,0x79,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10] + tileloaddt1 268435456(%rbp,%r14,8), %tmm6 + +// CHECK: tileloaddt1 291(%r8,%rax,4), %tmm3 +// CHECK: encoding: [0xc4,0xc2,0x79,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00] + tileloaddt1 291(%r8,%rax,4), %tmm3 + +// CHECK: tileloaddt1 -32(,%rbp,2), %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x79,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff] + tileloaddt1 -32(,%rbp,2), %tmm3 + +// CHECK: tilerelease +// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0xc0] + tilerelease + +// CHECK: tilestored %tmm6, 268435456(%rbp,%r14,8) +// CHECK: encoding: [0xc4,0xa2,0x7a,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10] + tilestored %tmm6, 268435456(%rbp,%r14,8) + +// CHECK: tilestored %tmm3, 291(%r8,%rax,4) +// CHECK: encoding: [0xc4,0xc2,0x7a,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00] + tilestored %tmm3, 291(%r8,%rax,4) + +// CHECK: tilestored %tmm3, -32(,%rbp,2) +// CHECK: encoding: [0xc4,0xe2,0x7a,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff] + tilestored %tmm3, -32(,%rbp,2) + +// CHECK: tilezero %tmm6 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xf0] + tilezero %tmm6 + +// CHECK: tilezero %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xd8] + tilezero %tmm3 diff --git a/llvm/test/MC/X86/AMX/x86-64-amx-tile-intel.s b/llvm/test/MC/X86/AMX/x86-64-amx-tile-intel.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/AMX/x86-64-amx-tile-intel.s @@ -0,0 +1,197 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: tilerelease +// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0xc0] + tilerelease + +// CHECK: tilezero tmm6 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xf0] + tilezero tmm6 + +// CHECK: tilezero tmm3 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xd8] + tilezero tmm3 + +// CHECK: tilerelease +// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0xc0] + tilerelease + +// CHECK: tilezero tmm6 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xf0] + tilezero tmm6 + +// CHECK: tilezero tmm3 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xd8] + tilezero tmm3 + +// CHECK: ldtilecfg [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x78,0x49,0x84,0xf5,0x00,0x00,0x00,0x10] + ldtilecfg [rbp + 8*r14 + 268435456] + +// CHECK: ldtilecfg [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x78,0x49,0x84,0x80,0x23,0x01,0x00,0x00] + ldtilecfg [r8 + 4*rax + 291] + +// CHECK: ldtilecfg [rip] +// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0x05,0x00,0x00,0x00,0x00] + ldtilecfg [rip] + +// CHECK: ldtilecfg [2*rbp - 2048] +// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff] + ldtilecfg [2*rbp - 2048] + +// CHECK: sttilecfg [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x79,0x49,0x84,0xf5,0x00,0x00,0x00,0x10] + sttilecfg [rbp + 8*r14 + 268435456] + +// CHECK: sttilecfg [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x79,0x49,0x84,0x80,0x23,0x01,0x00,0x00] + sttilecfg [r8 + 4*rax + 291] + +// CHECK: sttilecfg [rip] +// CHECK: encoding: [0xc4,0xe2,0x79,0x49,0x05,0x00,0x00,0x00,0x00] + sttilecfg [rip] + +// CHECK: sttilecfg [2*rbp - 2048] +// CHECK: encoding: [0xc4,0xe2,0x79,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff] + sttilecfg [2*rbp - 2048] + +// CHECK: tileloadd tmm6, [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x7b,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10] + tileloadd tmm6, [rbp + 8*r14 + 268435456] + +// CHECK: tileloadd tmm3, [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x7b,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00] + tileloadd tmm3, [r8 + 4*rax + 291] + +// CHECK: tileloadd tmm3, [2*rbp - 32] +// CHECK: encoding: [0xc4,0xe2,0x7b,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff] + tileloadd tmm3, [2*rbp - 32] + +// CHECK: tileloadd tmm4, [rbx + 64] +// CHECK: encoding: [0xc4,0xe2,0x7b,0x4b,0x64,0x23,0x40] + tileloadd tmm4, [rbx + 64] + +// CHECK: tileloaddt1 tmm6, [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x79,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10] + tileloaddt1 tmm6, [rbp + 8*r14 + 268435456] + +// CHECK: tileloaddt1 tmm3, [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x79,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00] + tileloaddt1 tmm3, [r8 + 4*rax + 291] + +// CHECK: tileloaddt1 tmm3, [2*rbp - 32] +// CHECK: encoding: [0xc4,0xe2,0x79,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff] + tileloaddt1 tmm3, [2*rbp - 32] + +// CHECK: tileloaddt1 tmm6, [rbp + 16] +// CHECK: encoding: [0xc4,0xe2,0x79,0x4b,0x74,0x25,0x10] + tileloaddt1 tmm6, [rbp + 16] + +// CHECK: tilerelease +// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0xc0] + tilerelease + +// CHECK: tilestored [rbp + 8*r14 + 268435456], tmm6 +// CHECK: encoding: [0xc4,0xa2,0x7a,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10] + tilestored [rbp + 8*r14 + 268435456], tmm6 + +// CHECK: tilestored [r8 + 4*rax + 291], tmm3 +// CHECK: encoding: [0xc4,0xc2,0x7a,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00] + tilestored [r8 + 4*rax + 291], tmm3 + +// CHECK: tilestored [2*rbp - 32], tmm3 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff] + tilestored [2*rbp - 32], tmm3 + +// CHECK: tilestored [r8], tmm3 +// CHECK: encoding: [0xc4,0xc2,0x7a,0x4b,0x1c,0x20] + tilestored [r8], tmm3 + +// CHECK: tilezero tmm6 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xf0] + tilezero tmm6 + +// CHECK: tilezero tmm3 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xd8] + tilezero tmm3 + +// CHECK: ldtilecfg [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x78,0x49,0x84,0xf5,0x00,0x00,0x00,0x10] + ldtilecfg [rbp + 8*r14 + 268435456] + +// CHECK: ldtilecfg [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x78,0x49,0x84,0x80,0x23,0x01,0x00,0x00] + ldtilecfg [r8 + 4*rax + 291] + +// CHECK: ldtilecfg [rip] +// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0x05,0x00,0x00,0x00,0x00] + ldtilecfg [rip] + +// CHECK: ldtilecfg [2*rbp - 2048] +// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff] + ldtilecfg [2*rbp - 2048] + +// CHECK: sttilecfg [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x79,0x49,0x84,0xf5,0x00,0x00,0x00,0x10] + sttilecfg [rbp + 8*r14 + 268435456] + +// CHECK: sttilecfg [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x79,0x49,0x84,0x80,0x23,0x01,0x00,0x00] + sttilecfg [r8 + 4*rax + 291] + +// CHECK: sttilecfg [rip] +// CHECK: encoding: [0xc4,0xe2,0x79,0x49,0x05,0x00,0x00,0x00,0x00] + sttilecfg [rip] + +// CHECK: sttilecfg [2*rbp - 2048] +// CHECK: encoding: [0xc4,0xe2,0x79,0x49,0x04,0x6d,0x00,0xf8,0xff,0xff] + sttilecfg [2*rbp - 2048] + +// CHECK: tileloadd tmm6, [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x7b,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10] + tileloadd tmm6, [rbp + 8*r14 + 268435456] + +// CHECK: tileloadd tmm3, [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x7b,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00] + tileloadd tmm3, [r8 + 4*rax + 291] + +// CHECK: tileloadd tmm3, [2*rbp - 32] +// CHECK: encoding: [0xc4,0xe2,0x7b,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff] + tileloadd tmm3, [2*rbp - 32] + +// CHECK: tileloaddt1 tmm6, [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0xc4,0xa2,0x79,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10] + tileloaddt1 tmm6, [rbp + 8*r14 + 268435456] + +// CHECK: tileloaddt1 tmm3, [r8 + 4*rax + 291] +// CHECK: encoding: [0xc4,0xc2,0x79,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00] + tileloaddt1 tmm3, [r8 + 4*rax + 291] + +// CHECK: tileloaddt1 tmm3, [2*rbp - 32] +// CHECK: encoding: [0xc4,0xe2,0x79,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff] + tileloaddt1 tmm3, [2*rbp - 32] + +// CHECK: tilerelease +// CHECK: encoding: [0xc4,0xe2,0x78,0x49,0xc0] + tilerelease + +// CHECK: tilestored [rbp + 8*r14 + 268435456], tmm6 +// CHECK: encoding: [0xc4,0xa2,0x7a,0x4b,0xb4,0xf5,0x00,0x00,0x00,0x10] + tilestored [rbp + 8*r14 + 268435456], tmm6 + +// CHECK: tilestored [r8 + 4*rax + 291], tmm3 +// CHECK: encoding: [0xc4,0xc2,0x7a,0x4b,0x9c,0x80,0x23,0x01,0x00,0x00] + tilestored [r8 + 4*rax + 291], tmm3 + +// CHECK: tilestored [2*rbp - 32], tmm3 +// CHECK: encoding: [0xc4,0xe2,0x7a,0x4b,0x1c,0x6d,0xe0,0xff,0xff,0xff] + tilestored [2*rbp - 32], tmm3 + +// CHECK: tilezero tmm6 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xf0] + tilezero tmm6 + +// CHECK: tilezero tmm3 +// CHECK: encoding: [0xc4,0xe2,0x7b,0x49,0xd8] + tilezero tmm3 diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -40,6 +40,7 @@ case X86II::MRMSrcRegOp4: case X86II::MRMSrcRegCC: case X86II::MRMXrCC: + case X86II::MRMr0: case X86II::MRMXr: case X86II::MRM0r: case X86II::MRM1r: diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp --- a/llvm/utils/TableGen/X86DisassemblerTables.cpp +++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp @@ -595,6 +595,7 @@ bool satisfiesOneEntry = true; bool satisfiesSplitRM = true; bool satisfiesSplitReg = true; + bool satisfiesSplitRegM = true; bool satisfiesSplitMisc = true; for (unsigned index = 0; index < 256; ++index) { @@ -616,6 +617,10 @@ if (((index & 0xc0) != 0xc0) && (decision.instructionIDs[index] != decision.instructionIDs[index&0x38])) satisfiesSplitMisc = false; + + if (((index & 0xc0) == 0xc0) && + (decision.instructionIDs[index] != decision.instructionIDs[index&0xc7])) + satisfiesSplitRegM = false; } if (satisfiesOneEntry) @@ -627,6 +632,9 @@ if (satisfiesSplitReg && satisfiesSplitMisc) return MODRM_SPLITREG; + if (satisfiesSplitRegM) + return MODRM_SPLITREGM; + if (satisfiesSplitMisc) return MODRM_SPLITMISC; @@ -691,6 +699,10 @@ for (unsigned index = 0xc0; index < 256; index += 8) ModRMDecision.push_back(decision.instructionIDs[index]); break; + case MODRM_SPLITREGM: + for (unsigned index = 0xc0; index < 256; index += 8) + ModRMDecision.push_back(decision.instructionIDs[index]); + break; case MODRM_SPLITMISC: for (unsigned index = 0; index < 64; index += 8) ModRMDecision.push_back(decision.instructionIDs[index]); @@ -732,6 +744,9 @@ case MODRM_SPLITREG: sEntryNumber += 16; break; + case MODRM_SPLITREGM: + sEntryNumber += 8; + break; case MODRM_SPLITMISC: sEntryNumber += 8 + 64; break; diff --git a/llvm/utils/TableGen/X86ModRMFilters.h b/llvm/utils/TableGen/X86ModRMFilters.h --- a/llvm/utils/TableGen/X86ModRMFilters.h +++ b/llvm/utils/TableGen/X86ModRMFilters.h @@ -108,6 +108,29 @@ } }; +/// ExtendedRMFilter - Extended opcodes are classified based on the value of the +/// mod field [bits 7-6] and the value of the nnn field [bits 2-0]. +class ExtendedRMFilter : public ModRMFilter { + void anchor() override; + bool R; + uint8_t NNN; +public: + /// Constructor + /// + /// \param r True if the mod field must be set to 11; false otherwise. + /// The name is explained at ModFilter. + /// \param nnn The required value of the nnn field. + ExtendedRMFilter(bool r, uint8_t nnn) : + ModRMFilter(), + R(r), + NNN(nnn) { + } + + bool accepts(uint8_t modRM) const override { + return ((R && ((modRM & 0xc0) == 0xc0)) && + ((modRM & 0x7) == NNN)); + } +}; /// ExactFilter - The occasional extended opcode (such as VMCALL or MONITOR) /// requires the ModR/M byte to have a specific value. class ExactFilter : public ModRMFilter { diff --git a/llvm/utils/TableGen/X86ModRMFilters.cpp b/llvm/utils/TableGen/X86ModRMFilters.cpp --- a/llvm/utils/TableGen/X86ModRMFilters.cpp +++ b/llvm/utils/TableGen/X86ModRMFilters.cpp @@ -18,4 +18,6 @@ void ExtendedFilter::anchor() { } +void ExtendedRMFilter::anchor() { } + void ExactFilter::anchor() { } diff --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h --- a/llvm/utils/TableGen/X86RecognizableInstr.h +++ b/llvm/utils/TableGen/X86RecognizableInstr.h @@ -103,6 +103,9 @@ RawFrmImm16 = 8, AddCCFrm = 9, PrefixByte = 10, + MRMr0 = 21, + MRMSrcMemFSIB = 22, + MRMDestMemFSIB = 23, MRMDestMem = 24, MRMSrcMem = 25, MRMSrcMem4VOp3 = 26, diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp --- a/llvm/utils/TableGen/X86RecognizableInstr.cpp +++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp @@ -352,10 +352,13 @@ // The scaling factor for AVX512 compressed displacement encoding is an // instruction attribute. Adjust the ModRM encoding type to include the // scale for compressed displacement. - if ((encoding != ENCODING_RM && encoding != ENCODING_VSIB) ||CD8_Scale == 0) + if ((encoding != ENCODING_RM && + encoding != ENCODING_VSIB && + encoding != ENCODING_SIB) ||CD8_Scale == 0) return; encoding = (OperandEncoding)(encoding + Log2_32(CD8_Scale)); assert(((encoding >= ENCODING_RM && encoding <= ENCODING_RM_CD64) || + (encoding == ENCODING_SIB) || (encoding >= ENCODING_VSIB && encoding <= ENCODING_VSIB_CD64)) && "Invalid CDisp scaling"); } @@ -519,6 +522,7 @@ HANDLE_OPTIONAL(immediate) break; case X86Local::MRMDestMem: + case X86Local::MRMDestMemFSIB: // Operand 1 is a memory operand (possibly SIB-extended) // Operand 2 is a register operand in the Reg/Opcode field. // - In AVX, there is a register operand in the VEX.vvvv field here - @@ -589,6 +593,7 @@ HANDLE_OPERAND(opcodeModifier) break; case X86Local::MRMSrcMem: + case X86Local::MRMSrcMemFSIB: // Operand 1 is a register operand in the Reg/Opcode field. // Operand 2 is a memory operand (possibly SIB-extended) // - In AVX, there is a register operand in the VEX.vvvv field here - @@ -641,6 +646,10 @@ HANDLE_OPERAND(rmRegister) HANDLE_OPERAND(opcodeModifier) break; + case X86Local::MRMr0: + // Operand 1 is a register operand in the R/M field. + HANDLE_OPERAND(roRegister) + break; case X86Local::MRMXr: case X86Local::MRM0r: case X86Local::MRM1r: @@ -772,7 +781,9 @@ filter = std::make_unique(true); break; case X86Local::MRMDestMem: + case X86Local::MRMDestMemFSIB: case X86Local::MRMSrcMem: + case X86Local::MRMSrcMemFSIB: case X86Local::MRMSrcMem4VOp3: case X86Local::MRMSrcMemOp4: case X86Local::MRMSrcMemCC: @@ -792,6 +803,9 @@ case X86Local::MRM6X: case X86Local::MRM7X: filter = std::make_unique(true, Form - X86Local::MRM0X); break; + case X86Local::MRMr0: + filter = std::make_unique(true, Form - X86Local::MRMr0); + break; case X86Local::MRM0m: case X86Local::MRM1m: case X86Local::MRM2m: case X86Local::MRM3m: case X86Local::MRM4m: case X86Local::MRM5m: @@ -911,6 +925,7 @@ TYPE("i64imm", TYPE_IMM) TYPE("anymem", TYPE_M) TYPE("opaquemem", TYPE_M) + TYPE("sibmem", TYPE_MSIB) TYPE("SEGMENT_REG", TYPE_SEGMENTREG) TYPE("DEBUG_REG", TYPE_DEBUGREG) TYPE("CONTROL_REG", TYPE_CONTROLREG) @@ -969,6 +984,7 @@ TYPE("vz256mem", TYPE_MVSIBZ) TYPE("vz512mem", TYPE_MVSIBZ) TYPE("BNDR", TYPE_BNDR) + TYPE("TILE", TYPE_TMM) errs() << "Unhandled type string " << s << "\n"; llvm_unreachable("Unhandled type string"); } @@ -1008,6 +1024,7 @@ ENCODING("VR128X", ENCODING_IB) ENCODING("VR256X", ENCODING_IB) ENCODING("VR512", ENCODING_IB) + ENCODING("TILE", ENCODING_IB) errs() << "Unhandled immediate encoding " << s << "\n"; llvm_unreachable("Unhandled immediate encoding"); } @@ -1046,6 +1063,7 @@ ENCODING("VK8PAIR", ENCODING_RM) ENCODING("VK16PAIR", ENCODING_RM) ENCODING("BNDR", ENCODING_RM) + ENCODING("TILE", ENCODING_RM) errs() << "Unhandled R/M register encoding " << s << "\n"; llvm_unreachable("Unhandled R/M register encoding"); } @@ -1092,6 +1110,7 @@ ENCODING("VK32WM", ENCODING_REG) ENCODING("VK64WM", ENCODING_REG) ENCODING("BNDR", ENCODING_REG) + ENCODING("TILE", ENCODING_REG) errs() << "Unhandled reg/opcode register encoding " << s << "\n"; llvm_unreachable("Unhandled reg/opcode register encoding"); } @@ -1123,6 +1142,7 @@ ENCODING("VK4PAIR", ENCODING_VVVV) ENCODING("VK8PAIR", ENCODING_VVVV) ENCODING("VK16PAIR", ENCODING_VVVV) + ENCODING("TILE", ENCODING_VVVV) errs() << "Unhandled VEX.vvvv register encoding " << s << "\n"; llvm_unreachable("Unhandled VEX.vvvv register encoding"); } @@ -1163,6 +1183,7 @@ ENCODING("lea64mem", ENCODING_RM) ENCODING("anymem", ENCODING_RM) ENCODING("opaquemem", ENCODING_RM) + ENCODING("sibmem", ENCODING_SIB) ENCODING("vx64mem", ENCODING_VSIB) ENCODING("vx128mem", ENCODING_VSIB) ENCODING("vx256mem", ENCODING_VSIB)