diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -560,6 +560,12 @@ if (Bytes.size() < 4) break; const uint64_t QW = ((uint64_t)eatBytes(Bytes) << 32) | DW; + if (STI.getFeatureBits()[AMDGPU::FeatureGFX940Insts]) { + Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address); + if (Res) + break; + } + if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) { Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address); if (Res) diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -1004,14 +1004,14 @@ VOP3_Pseudo PS_ACD = !cast(NAME # "_e64"), VOP3_Pseudo PS_VCD = !cast(NAME # "_vgprcd" # "_e64")> { let SubtargetPredicate = isGFX940Plus, - AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9", + AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX940", AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in { def _gfx940_acd : VOP3P_Real, VOP3Pe_MAI ; def _gfx940_vcd : VOP3P_Real, VOP3Pe_MAI ; - } // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9" + } // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX940" defm : VOP3P_Real_MFMA_gfx940_aliases; diff --git a/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt b/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt --- a/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt @@ -440,3 +440,159 @@ # GFX940: v_smfmac_f32_32x32x32_fp8_fp8 a[0:15], v[2:3], a[4:7], v1 ; encoding: [0x00,0x80,0xff,0xd3,0x02,0x09,0x06,0x14] 0x00,0x80,0xff,0xd3,0x02,0x09,0x06,0x14 + +# GFX940: v_mfma_f32_16x16x16_f16 v[10:13], v[2:3], v[4:5], v[6:9] ; encoding: [0x0a,0x00,0xcd,0xd3,0x02,0x09,0x1a,0x04] +0x0a,0x00,0xcd,0xd3,0x02,0x09,0x1a,0x04 + +# GFX940: v_mfma_f32_16x16x16_f16 v[252:255], a[254:255], v[254:255], v[252:255] ; encoding: [0xfc,0x00,0xcd,0xd3,0xfe,0xfd,0xf3,0x0f] +0xfc,0x00,0xcd,0xd3,0xfe,0xfd,0xf3,0x0f + +# GFX940: v_mfma_f32_16x16x16_f16 v[252:255], v[254:255], a[254:255], v[252:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xfc,0x3a,0xcd,0xd3,0xfe,0xfd,0xf3,0x77] +0xfc,0x3a,0xcd,0xd3,0xfe,0xfd,0xf3,0x77 + +# GFX940: v_mfma_f32_16x16x16_f16 a[252:255], a[254:255], a[254:255], a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xcd,0xd3,0xfe,0xfd,0xf3,0xff] +0xfc,0xff,0xcd,0xd3,0xfe,0xfd,0xf3,0xff + +# GFX940: v_mfma_f32_16x16x1_4b_f32 v[240:255], v1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc1,0xd3,0x01,0x05,0xc2,0x07] +0xf0,0x00,0xc1,0xd3,0x01,0x05,0xc2,0x07 + +# GFX940: v_mfma_f32_16x16x1_4b_f32 v[240:255], a1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc1,0xd3,0x01,0x05,0xc2,0x0f] +0xf0,0x00,0xc1,0xd3,0x01,0x05,0xc2,0x0f + +# GFX940: v_mfma_f32_16x16x1_4b_f32 v[240:255], v1, a2, v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xc1,0xd3,0x01,0x05,0xc2,0x77] +0xf0,0x3a,0xc1,0xd3,0x01,0x05,0xc2,0x77 + +# GFX940: v_mfma_f32_16x16x1_4b_f32 a[240:255], a255, a255, a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xc1,0xd3,0xff,0xff,0xc3,0xff] +0xf0,0xff,0xc1,0xd3,0xff,0xff,0xc3,0xff + +# GFX940: v_mfma_f32_16x16x4_4b_f16 v[240:255], v[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xc9,0xd3,0x02,0x09,0xc2,0x07] +0xf0,0x00,0xc9,0xd3,0x02,0x09,0xc2,0x07 + +# GFX940: v_mfma_f32_16x16x4_4b_f16 v[240:255], a[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xc9,0xd3,0x02,0x09,0xc2,0x0f] +0xf0,0x00,0xc9,0xd3,0x02,0x09,0xc2,0x0f + +# GFX940: v_mfma_f32_16x16x4_4b_f16 v[240:255], v[2:3], a[4:5], v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xc9,0xd3,0x02,0x09,0xc2,0x77] +0xf0,0x3a,0xc9,0xd3,0x02,0x09,0xc2,0x77 + +# GFX940: v_mfma_f32_16x16x4_4b_f16 a[240:255], a[254:255], a[254:255], a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xc9,0xd3,0xfe,0xfd,0xc3,0xff] +0xf0,0xff,0xc9,0xd3,0xfe,0xfd,0xc3,0xff + +# GFX940: v_mfma_f32_16x16x4_f32 v[10:13], v1, v2, v[6:9] ; encoding: [0x0a,0x00,0xc5,0xd3,0x01,0x05,0x1a,0x04] +0x0a,0x00,0xc5,0xd3,0x01,0x05,0x1a,0x04 + +# GFX940: v_mfma_f32_16x16x4_f32 v[252:255], a255, v255, v[252:255] ; encoding: [0xfc,0x00,0xc5,0xd3,0xff,0xff,0xf3,0x0f] +0xfc,0x00,0xc5,0xd3,0xff,0xff,0xf3,0x0f + +# GFX940: v_mfma_f32_16x16x4_f32 a[10:13], v1, a2, a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xc5,0xd3,0x01,0x05,0x1a,0x74] +0x0a,0xba,0xc5,0xd3,0x01,0x05,0x1a,0x74 + +# GFX940: v_mfma_f32_16x16x4_f32 a[252:255], a255, a255, a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xc5,0xd3,0xff,0xff,0xf3,0xff] +0xfc,0xff,0xc5,0xd3,0xff,0xff,0xf3,0xff + +# GFX940: v_mfma_f32_32x32x1_2b_f32 v[224:255], v1, v2, v[224:255] ; encoding: [0xe0,0x00,0xc0,0xd3,0x01,0x05,0x82,0x07] +0xe0,0x00,0xc0,0xd3,0x01,0x05,0x82,0x07 + +# GFX940: v_mfma_f32_32x32x1_2b_f32 v[224:255], a1, v2, v[224:255] ; encoding: [0xe0,0x00,0xc0,0xd3,0x01,0x05,0x82,0x0f] +0xe0,0x00,0xc0,0xd3,0x01,0x05,0x82,0x0f + +# GFX940: v_mfma_f32_32x32x1_2b_f32 v[224:255], v1, a2, v[224:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xe0,0x3a,0xc0,0xd3,0x01,0x05,0x82,0x77] +0xe0,0x3a,0xc0,0xd3,0x01,0x05,0x82,0x77 + +# GFX940: v_mfma_f32_32x32x1_2b_f32 a[224:255], a255, a255, a[224:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xe0,0xff,0xc0,0xd3,0xff,0xff,0x83,0xff] +0xe0,0xff,0xc0,0xd3,0xff,0xff,0x83,0xff + +# GFX940: v_mfma_f32_32x32x2_f32 v[240:255], v1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc4,0xd3,0x01,0x05,0xc2,0x07] +0xf0,0x00,0xc4,0xd3,0x01,0x05,0xc2,0x07 + +# GFX940: v_mfma_f32_32x32x2_f32 v[240:255], a1, v2, v[240:255] ; encoding: [0xf0,0x00,0xc4,0xd3,0x01,0x05,0xc2,0x0f] +0xf0,0x00,0xc4,0xd3,0x01,0x05,0xc2,0x0f + +# GFX940: v_mfma_f32_32x32x2_f32 v[240:255], v1, a2, v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xc4,0xd3,0x01,0x05,0xc2,0x77] +0xf0,0x3a,0xc4,0xd3,0x01,0x05,0xc2,0x77 + +# GFX940: v_mfma_f32_32x32x2_f32 a[240:255], a255, a255, a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xc4,0xd3,0xff,0xff,0xc3,0xff] +0xf0,0xff,0xc4,0xd3,0xff,0xff,0xc3,0xff + +# GFX940: v_mfma_f32_32x32x4_2b_f16 v[224:255], v[2:3], v[4:5], v[224:255] ; encoding: [0xe0,0x00,0xc8,0xd3,0x02,0x09,0x82,0x07] +0xe0,0x00,0xc8,0xd3,0x02,0x09,0x82,0x07 + +# GFX940: v_mfma_f32_32x32x4_2b_f16 v[224:255], a[2:3], v[4:5], v[224:255] ; encoding: [0xe0,0x00,0xc8,0xd3,0x02,0x09,0x82,0x0f] +0xe0,0x00,0xc8,0xd3,0x02,0x09,0x82,0x0f + +# GFX940: v_mfma_f32_32x32x4_2b_f16 v[224:255], v[2:3], a[4:5], v[224:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xe0,0x3a,0xc8,0xd3,0x02,0x09,0x82,0x77] +0xe0,0x3a,0xc8,0xd3,0x02,0x09,0x82,0x77 + +# GFX940: v_mfma_f32_32x32x4_2b_f16 a[224:255], a[254:255], a[254:255], a[224:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xe0,0xff,0xc8,0xd3,0xfe,0xfd,0x83,0xff] +0xe0,0xff,0xc8,0xd3,0xfe,0xfd,0x83,0xff + +# GFX940: v_mfma_f32_32x32x8_f16 v[240:255], v[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xcc,0xd3,0x02,0x09,0xc2,0x07] +0xf0,0x00,0xcc,0xd3,0x02,0x09,0xc2,0x07 + +# GFX940: v_mfma_f32_32x32x8_f16 v[240:255], a[2:3], v[4:5], v[240:255] ; encoding: [0xf0,0x00,0xcc,0xd3,0x02,0x09,0xc2,0x0f] +0xf0,0x00,0xcc,0xd3,0x02,0x09,0xc2,0x0f + +# GFX940: v_mfma_f32_32x32x8_f16 v[240:255], v[2:3], a[4:5], v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xcc,0xd3,0x02,0x09,0xc2,0x77] +0xf0,0x3a,0xcc,0xd3,0x02,0x09,0xc2,0x77 + +# GFX940: v_mfma_f32_32x32x8_f16 a[240:255], a[254:255], a[254:255], a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xcc,0xd3,0xfe,0xfd,0xc3,0xff] +0xf0,0xff,0xcc,0xd3,0xfe,0xfd,0xc3,0xff + +# GFX940: v_mfma_f32_4x4x1_16b_f32 v[10:13], v1, v2, v[6:9] ; encoding: [0x0a,0x00,0xc2,0xd3,0x01,0x05,0x1a,0x04] +0x0a,0x00,0xc2,0xd3,0x01,0x05,0x1a,0x04 + +# GFX940: v_mfma_f32_4x4x1_16b_f32 v[252:255], a255, v255, v[252:255] ; encoding: [0xfc,0x00,0xc2,0xd3,0xff,0xff,0xf3,0x0f] +0xfc,0x00,0xc2,0xd3,0xff,0xff,0xf3,0x0f + +# GFX940: v_mfma_f32_4x4x1_16b_f32 a[10:13], v1, a2, a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xc2,0xd3,0x01,0x05,0x1a,0x74] +0x0a,0xba,0xc2,0xd3,0x01,0x05,0x1a,0x74 + +# GFX940: v_mfma_f32_4x4x1_16b_f32 a[252:255], a255, a255, a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xc2,0xd3,0xff,0xff,0xf3,0xff] +0xfc,0xff,0xc2,0xd3,0xff,0xff,0xf3,0xff + +# GFX940: v_mfma_f32_4x4x4_16b_f16 v[10:13], v[2:3], v[4:5], v[6:9] ; encoding: [0x0a,0x00,0xca,0xd3,0x02,0x09,0x1a,0x04] +0x0a,0x00,0xca,0xd3,0x02,0x09,0x1a,0x04 + +# GFX940: v_mfma_f32_4x4x4_16b_f16 v[252:255], a[254:255], v[254:255], v[252:255] ; encoding: [0xfc,0x00,0xca,0xd3,0xfe,0xfd,0xf3,0x0f] +0xfc,0x00,0xca,0xd3,0xfe,0xfd,0xf3,0x0f + +# GFX940: v_mfma_f32_4x4x4_16b_f16 a[10:13], v[2:3], a[4:5], a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xca,0xd3,0x02,0x09,0x1a,0x74] +0x0a,0xba,0xca,0xd3,0x02,0x09,0x1a,0x74 + +# GFX940: v_mfma_f32_4x4x4_16b_f16 a[252:255], a[254:255], a[254:255], a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xca,0xd3,0xfe,0xfd,0xf3,0xff] +0xfc,0xff,0xca,0xd3,0xfe,0xfd,0xf3,0xff + +# GFX940: v_mfma_i32_16x16x4_4b_i8 v[240:255], a1, a2, v[240:255] ; encoding: [0xf0,0x00,0xd1,0xd3,0x01,0x05,0xc2,0x1f] +0xf0,0x00,0xd1,0xd3,0x01,0x05,0xc2,0x1f + +# GFX940: v_mfma_i32_16x16x4_4b_i8 v[240:255], v1, a2, v[240:255] ; encoding: [0xf0,0x00,0xd1,0xd3,0x01,0x05,0xc2,0x17] +0xf0,0x00,0xd1,0xd3,0x01,0x05,0xc2,0x17 + +# GFX940: v_mfma_i32_16x16x4_4b_i8 v[240:255], a1, v2, v[240:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xf0,0x3a,0xd1,0xd3,0x01,0x05,0xc2,0x6f] +0xf0,0x3a,0xd1,0xd3,0x01,0x05,0xc2,0x6f + +# GFX940: v_mfma_i32_16x16x4_4b_i8 a[240:255], a255, a255, a[240:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xf0,0xff,0xd1,0xd3,0xff,0xff,0xc3,0xff] +0xf0,0xff,0xd1,0xd3,0xff,0xff,0xc3,0xff + +# GFX940: v_mfma_i32_32x32x4_2b_i8 v[224:255], v1, v2, v[224:255] ; encoding: [0xe0,0x00,0xd0,0xd3,0x01,0x05,0x82,0x07] +0xe0,0x00,0xd0,0xd3,0x01,0x05,0x82,0x07 + +# GFX940: v_mfma_i32_32x32x4_2b_i8 v[224:255], v1, a2, v[224:255] ; encoding: [0xe0,0x00,0xd0,0xd3,0x01,0x05,0x82,0x17] +0xe0,0x00,0xd0,0xd3,0x01,0x05,0x82,0x17 + +# GFX940: v_mfma_i32_32x32x4_2b_i8 v[224:255], a1, v2, v[224:255] cbsz:2 abid:7 blgp:3 ; encoding: [0xe0,0x3a,0xd0,0xd3,0x01,0x05,0x82,0x6f] +0xe0,0x3a,0xd0,0xd3,0x01,0x05,0x82,0x6f + +# GFX940: v_mfma_i32_32x32x4_2b_i8 a[224:255], a255, a255, a[224:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xe0,0xff,0xd0,0xd3,0xff,0xff,0x83,0xff] +0xe0,0xff,0xd0,0xd3,0xff,0xff,0x83,0xff + +# GFX940: v_mfma_i32_4x4x4_16b_i8 v[10:13], v1, v2, v[6:9] ; encoding: [0x0a,0x00,0xd2,0xd3,0x01,0x05,0x1a,0x04] +0x0a,0x00,0xd2,0xd3,0x01,0x05,0x1a,0x04 + +# GFX940: v_mfma_i32_4x4x4_16b_i8 v[252:255], a255, v255, v[252:255] ; encoding: [0xfc,0x00,0xd2,0xd3,0xff,0xff,0xf3,0x0f] +0xfc,0x00,0xd2,0xd3,0xff,0xff,0xf3,0x0f + +# GFX940: v_mfma_i32_4x4x4_16b_i8 a[10:13], v1, a2, a[6:9] cbsz:2 abid:7 blgp:3 ; encoding: [0x0a,0xba,0xd2,0xd3,0x01,0x05,0x1a,0x74] +0x0a,0xba,0xd2,0xd3,0x01,0x05,0x1a,0x74 + +# GFX940: v_mfma_i32_4x4x4_16b_i8 a[252:255], a255, a255, a[252:255] cbsz:7 abid:15 blgp:7 ; encoding: [0xfc,0xff,0xd2,0xd3,0xff,0xff,0xf3,0xff] +0xfc,0xff,0xd2,0xd3,0xff,0xff,0xf3,0xff