diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -142,7 +142,8 @@ MCOperand decodeOperand_AV_32(unsigned Val) const; MCOperand decodeOperand_AV_64(unsigned Val) const; MCOperand decodeOperand_AV_128(unsigned Val) const; - MCOperand decodeOperand_AV_512(unsigned Val) const; + MCOperand decodeOperand_AVDst_128(unsigned Val) const; + MCOperand decodeOperand_AVDst_512(unsigned Val) const; enum OpWidthTy { OPW32, diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -148,7 +148,8 @@ DECODE_OPERAND_REG(AV_32) DECODE_OPERAND_REG(AV_64) DECODE_OPERAND_REG(AV_128) -DECODE_OPERAND_REG(AV_512) +DECODE_OPERAND_REG(AVDst_128) +DECODE_OPERAND_REG(AVDst_512) static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, unsigned Imm, uint64_t Addr, @@ -972,8 +973,16 @@ return decodeSrcOp(OPW128, Val); } -MCOperand AMDGPUDisassembler::decodeOperand_AV_512(unsigned Val) const { - return decodeSrcOp(OPW512, Val); +MCOperand AMDGPUDisassembler::decodeOperand_AVDst_128(unsigned Val) const { + using namespace AMDGPU::EncValues; + assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1. + return decodeSrcOp(OPW128, Val | IS_VGPR); +} + +MCOperand AMDGPUDisassembler::decodeOperand_AVDst_512(unsigned Val) const { + using namespace AMDGPU::EncValues; + assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1. + return decodeSrcOp(OPW512, Val | IS_VGPR); } MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const { diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -280,7 +280,8 @@ INLINE_FLOATING_C_MAX = 248, LITERAL_CONST = 255, VGPR_MIN = 256, - VGPR_MAX = 511 + VGPR_MAX = 511, + IS_VGPR = 256 // Indicates VGPR or AGPR }; } // namespace EncValues diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1097,7 +1097,7 @@ defm VISrc_1024 : RegInlineOperandAC<"VReg", "VISrc_1024", "_1024">; //===----------------------------------------------------------------------===// -// AVSrc_* Operands with an AGPR or VGPR +// AVSrc_*, AVDst_*, AVLdSt_* Operands with an AGPR or VGPR //===----------------------------------------------------------------------===// def AVSrc_32 : RegisterOperand { @@ -1115,8 +1115,13 @@ let EncoderMethod = "getAVOperandEncoding"; } -def AVSrc_512 : RegisterOperand { - let DecoderMethod = "DecodeAV_512RegisterClass"; +def AVDst_128 : RegisterOperand { + let DecoderMethod = "DecodeAVDst_128RegisterClass"; + let EncoderMethod = "getAVOperandEncoding"; +} + +def AVDst_512 : RegisterOperand { + let DecoderMethod = "DecodeAVDst_512RegisterClass"; let EncoderMethod = "getAVOperandEncoding"; } diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -449,12 +449,12 @@ def VOPProfileMAI_F32_V2F32_X16_VCD : VOPProfileMAI; def VOPProfileMAI_F32_V2F32_X32_VCD : VOPProfileMAI; -def VOPProfileSMFMAC_F32_16X16X32_F16 : VOPProfileSMFMAC; -def VOPProfileSMFMAC_F32_32X32X16_F16 : VOPProfileSMFMAC; -def VOPProfileSMFMAC_F32_16X16X32_I16 : VOPProfileSMFMAC; -def VOPProfileSMFMAC_F32_32X32X16_I16 : VOPProfileSMFMAC; -def VOPProfileSMFMAC_I32_16X16X64_I8 : VOPProfileSMFMAC; -def VOPProfileSMFMAC_I32_32X32X32_I8 : VOPProfileSMFMAC; +def VOPProfileSMFMAC_F32_16X16X32_F16 : VOPProfileSMFMAC; +def VOPProfileSMFMAC_F32_32X32X16_F16 : VOPProfileSMFMAC; +def VOPProfileSMFMAC_F32_16X16X32_I16 : VOPProfileSMFMAC; +def VOPProfileSMFMAC_F32_32X32X16_I16 : VOPProfileSMFMAC; +def VOPProfileSMFMAC_I32_16X16X64_I8 : VOPProfileSMFMAC; +def VOPProfileSMFMAC_I32_32X32X32_I8 : VOPProfileSMFMAC; class MFMATable { bit IsMac = is_mac; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -373,7 +373,7 @@ } class VOP3Pe_SMFMAC op> : Enc64 { - bits<10> vdst; + bits<10> vdst; // VGPR or AGPR, but not SGPR. vdst{8} is not encoded in the instruction. bits<10> src0; bits<10> src1; bits<9> idx; @@ -381,7 +381,6 @@ bits<3> cbsz; bits<4> abid; - let vdst{8} = 1; // VGPR or AGPR, but not SGPR let blgp = 0; let Inst{7-0} = vdst{7-0}; diff --git a/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt b/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt --- a/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt @@ -69,18 +69,126 @@ # GFX940: v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 ; encoding: [0x0a,0x80,0xe2,0xd3,0x02,0x09,0x06,0x14] 0x0a,0x80,0xe2,0xd3,0x02,0x09,0x06,0x14 +# GFX940: v_smfmac_f32_16x16x32_f16 v[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x00,0xe2,0xd3,0x02,0x09,0x0e,0x04] +0xfc,0x00,0xe2,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_16x16x32_f16 a[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x80,0xe2,0xd3,0x02,0x09,0x0e,0x04] +0xfc,0x80,0xe2,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x04] +0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x0c] +0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x0c + +# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x04] +0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x04 + +# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x14] +0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x14 + +# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe2,0xd3,0x02,0x09,0xfe,0x07] +0x0a,0x00,0xe2,0xd3,0x02,0x09,0xfe,0x07 + +# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe2,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x02,0xe2,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe2,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x07,0xe2,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe2,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x08,0xe2,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe2,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x38,0xe2,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe2,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x78,0xe2,0xd3,0x02,0x09,0x0e,0x04 + # GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe4,0xd3,0x02,0x09,0x0a,0x0c] 0x0a,0x0b,0xe4,0xd3,0x02,0x09,0x0a,0x0c # GFX940: v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 ; encoding: [0x0a,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x14] 0x0a,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x14 +# GFX940: v_smfmac_f32_32x32x16_f16 v[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x00,0xe4,0xd3,0x02,0x09,0x0e,0x04] +0xf0,0x00,0xe4,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_32x32x16_f16 a[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x04] +0xf0,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x04] +0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x0c] +0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x0c + +# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x04] +0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x04 + +# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x14] +0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x14 + +# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe4,0xd3,0x02,0x09,0xfe,0x07] +0x0a,0x00,0xe4,0xd3,0x02,0x09,0xfe,0x07 + +# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe4,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x02,0xe4,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe4,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x07,0xe4,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe4,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x08,0xe4,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe4,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x38,0xe4,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe4,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x78,0xe4,0xd3,0x02,0x09,0x0e,0x04 + # GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe6,0xd3,0x02,0x09,0x12,0x0c] 0x0a,0x0b,0xe6,0xd3,0x02,0x09,0x12,0x0c # GFX940: v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 ; encoding: [0x0a,0x80,0xe6,0xd3,0x02,0x09,0x16,0x14] 0x0a,0x80,0xe6,0xd3,0x02,0x09,0x16,0x14 +# GFX940: v_smfmac_f32_16x16x32_bf16 v[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x00,0xe6,0xd3,0x02,0x09,0x0e,0x04] +0xfc,0x00,0xe6,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_16x16x32_bf16 a[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x80,0xe6,0xd3,0x02,0x09,0x0e,0x04] +0xfc,0x80,0xe6,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x04] +0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x0c] +0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x0c + +# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x04] +0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x04 + +# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x14] +0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x14 + +# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe6,0xd3,0x02,0x09,0xfe,0x07] +0x0a,0x00,0xe6,0xd3,0x02,0x09,0xfe,0x07 + +# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe6,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x02,0xe6,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe6,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x07,0xe6,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe6,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x08,0xe6,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe6,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x38,0xe6,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe6,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x78,0xe6,0xd3,0x02,0x09,0x0e,0x04 + # GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], a[2:3], v[4:7], v6 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe8,0xd3,0x02,0x09,0x1a,0x0c] 0x0a,0x0b,0xe8,0xd3,0x02,0x09,0x1a,0x0c @@ -93,14 +201,122 @@ # GFX940: v_smfmac_f32_32x32x16_bf16 a[10:25], v[2:3], a[4:7], v9 ; encoding: [0x0a,0x80,0xe8,0xd3,0x02,0x09,0x26,0x14] 0x0a,0x80,0xe8,0xd3,0x02,0x09,0x26,0x14 +# GFX940: v_smfmac_f32_32x32x16_bf16 v[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x00,0xe8,0xd3,0x02,0x09,0x0e,0x04] +0xf0,0x00,0xe8,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_32x32x16_bf16 a[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x80,0xe8,0xd3,0x02,0x09,0x0e,0x04] +0xf0,0x80,0xe8,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x04] +0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x0c] +0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x0c + +# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x04] +0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x04 + +# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x14] +0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x14 + +# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe8,0xd3,0x02,0x09,0xfe,0x07] +0x0a,0x00,0xe8,0xd3,0x02,0x09,0xfe,0x07 + +# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe8,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x02,0xe8,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe8,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x07,0xe8,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe8,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x08,0xe8,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe8,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x38,0xe8,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe8,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x78,0xe8,0xd3,0x02,0x09,0x0e,0x04 + # GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v10 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xea,0xd3,0x02,0x09,0x2a,0x0c] 0x0a,0x0b,0xea,0xd3,0x02,0x09,0x2a,0x0c # GFX940: v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v11 ; encoding: [0x0a,0x80,0xea,0xd3,0x02,0x09,0x2e,0x14] 0x0a,0x80,0xea,0xd3,0x02,0x09,0x2e,0x14 +# GFX940: v_smfmac_i32_16x16x64_i8 v[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x00,0xea,0xd3,0x02,0x09,0x0e,0x04] +0xfc,0x00,0xea,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_i32_16x16x64_i8 a[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x80,0xea,0xd3,0x02,0x09,0x0e,0x04] +0xfc,0x80,0xea,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x04] +0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x04 + +# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x0c] +0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x0c + +# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x04] +0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x04 + +# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x14] +0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x14 + +# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xea,0xd3,0x02,0x09,0xfe,0x07] +0x0a,0x00,0xea,0xd3,0x02,0x09,0xfe,0x07 + +# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xea,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x02,0xea,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xea,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x07,0xea,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xea,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x08,0xea,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xea,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x38,0xea,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xea,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x78,0xea,0xd3,0x02,0x09,0x0e,0x04 + # GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v12 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xec,0xd3,0x02,0x09,0x32,0x0c] 0x0a,0x0b,0xec,0xd3,0x02,0x09,0x32,0x0c # GFX940: v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v13 ; encoding: [0x0a,0x80,0xec,0xd3,0x02,0x09,0x36,0x14] 0x0a,0x80,0xec,0xd3,0x02,0x09,0x36,0x14 + +# GFX940: v_smfmac_i32_32x32x32_i8 v[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x00,0xec,0xd3,0x02,0x09,0x0e,0x04] +0xf0,0x00,0xec,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_i32_32x32x32_i8 a[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x80,0xec,0xd3,0x02,0x09,0x0e,0x04] +0xf0,0x80,0xec,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x04] +0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x04 + +# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x0c] +0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x0c + +# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x04] +0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x04 + +# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x14] +0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x14 + +# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xec,0xd3,0x02,0x09,0xfe,0x07] +0x0a,0x00,0xec,0xd3,0x02,0x09,0xfe,0x07 + +# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xec,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x02,0xec,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xec,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x07,0xec,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xec,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x08,0xec,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xec,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x38,0xec,0xd3,0x02,0x09,0x0e,0x04 + +# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xec,0xd3,0x02,0x09,0x0e,0x04] +0x0a,0x78,0xec,0xd3,0x02,0x09,0x0e,0x04