Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -83,10 +83,10 @@ // //===----------------------------------------------------------------------===// -static inline uint32_t eatB32(ArrayRef& Bytes) { - assert(Bytes.size() >= sizeof eatB32(Bytes)); - const auto Res = support::endian::read32le(Bytes.data()); - Bytes = Bytes.slice(sizeof Res); +template static inline T eatBytes(ArrayRef& Bytes) { + assert(Bytes.size() >= sizeof T); + const auto Res = support::endian::read(Bytes.data()); + Bytes = Bytes.slice(sizeof T); return Res; } @@ -123,8 +123,20 @@ do { // ToDo: better to switch encoding length using some bit predicate // but it is unknown yet, so try all we can + + // Try to decode DPP first to solve conflict with VOP1 and VOP2 encodings + if (Bytes.size() >= 8) { + const uint64_t QW = eatBytes(Bytes); + Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address); + if (Res) break; + } + + // Reinitialize Bytes as DPP64 could have eaten too much + Bytes = Bytes_.slice(0, MaxInstBytesNum); + + // Try decode 32-bit instruction if (Bytes.size() < 4) break; - const uint32_t DW = eatB32(Bytes); + const uint32_t DW = eatBytes(Bytes); Res = tryDecodeInst(DecoderTableVI32, MI, DW, Address); if (Res) break; @@ -132,7 +144,7 @@ if (Res) break; if (Bytes.size() < 4) break; - const uint64_t QW = ((uint64_t)eatB32(Bytes) << 32) | DW; + const uint64_t QW = ((uint64_t)eatBytes(Bytes) << 32) | DW; Res = tryDecodeInst(DecoderTableVI64, MI, QW, Address); if (Res) break; @@ -261,7 +273,7 @@ if (Bytes.size() < 4) return errOperand(0, "cannot read literal, inst bytes left " + Twine(Bytes.size())); - return MCOperand::createImm(eatB32(Bytes)); + return MCOperand::createImm(eatBytes(Bytes)); } MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -1705,6 +1705,8 @@ VOP1_DPPe , VOP_DPP { let AssemblerPredicates = [isVI]; + let DecoderNamespace = "DPP"; + let DisableDecoder = DisableVIDecoder; let src0_modifiers = !if(p.HasModifiers, ?, 0); let src1_modifiers = 0; } @@ -1767,6 +1769,8 @@ VOP2_DPPe , VOP_DPP { let AssemblerPredicates = [isVI]; + let DecoderNamespace = "DPP"; + let DisableDecoder = DisableVIDecoder; let src0_modifiers = !if(p.HasModifiers, ?, 0); let src1_modifiers = !if(p.HasModifiers, ?, 0); } Index: test/MC/Disassembler/AMDGPU/dpp_vi.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/AMDGPU/dpp_vi.txt @@ -0,0 +1,89 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=tonga -disassemble -show-encoding < %s | FileCheck %s -check-prefix=VI + +# VI: v_mov_b32_dpp v0, v0 quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x58,0x00,0xff] +0xfa 0x02 0x00 0x7e 0x00 0x58 0x00 0xff + +# VI: v_mov_b32_dpp v0, v0 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x01,0xff] +0xfa 0x02 0x00 0x7e 0x00 0x01 0x01 0xff + +# VI: v_mov_b32_dpp v0, v0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x1f,0x01,0xff] +0xfa 0x02 0x00 0x7e 0x00 0x1f 0x01 0xff + +# VI: v_mov_b32_dpp v0, v0 row_ror:12 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x2c,0x01,0xff] +0xfa 0x02 0x00 0x7e 0x00 0x2c 0x01 0xff + +# VI: v_mov_b32_dpp v0, v0 wave_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x30,0x01,0xff] +0xfa 0x02 0x00 0x7e 0x00 0x30 0x01 0xff + +# VI: v_mov_b32_dpp v0, v0 wave_rol:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x34,0x01,0xff] +0xfa 0x02 0x00 0x7e 0x00 0x34 0x01 0xff + +# VI: v_mov_b32_dpp v0, v0 wave_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x38,0x01,0xff] +0xfa 0x02 0x00 0x7e 0x00 0x38 0x01 0xff + +# VI: v_mov_b32_dpp v0, v0 wave_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x3c,0x01,0xff] +0xfa 0x02 0x00 0x7e 0x00 0x3c 0x01 0xff + +# VI: v_mov_b32_dpp v0, v0 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x40,0x01,0xff] +0xfa 0x02 0x00 0x7e 0x00 0x40 0x01 0xff + +# VI: v_mov_b32_dpp v0, v0 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x41,0x01,0xff] +0xfa 0x02 0x00 0x7e 0x00 0x41 0x01 0xff + +# VI: v_mov_b32_dpp v0, v0 row_bcast:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x42,0x01,0xff] +0xfa 0x02 0x00 0x7e 0x00 0x42 0x01 0xff + +# VI: v_mov_b32_dpp v0, v0 row_bcast:31 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x43,0x01,0xff] +0xfa 0x02 0x00 0x7e 0x00 0x43 0x01 0xff + +# VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xa1] +0xfa 0x02 0x00 0x7e 0x00 0x4d 0x08 0xa1 + +# VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xaf] +0xfa 0x02 0x00 0x7e 0x00 0x4d 0x00 0xaf + +# VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xf1] +0xfa 0x02 0x00 0x7e 0x00 0x4d 0x00 0xf1 + +# VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xff] +0xfa 0x02 0x00 0x7e 0x00 0x4d 0x08 0xff + +# VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x00,0xa1] +0xfa 0x02 0x00 0x7e 0x00 0x4d 0x00 0xa1 + +# VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0xf bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xaf] +0xfa 0x02 0x00 0x7e 0x00 0x4d 0x08 0xaf + +# VI: v_mov_b32_dpp v0, v0 quad_perm:[1,3,0,1] row_mask:0xf bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x4d,0x08,0xf1] +0xfa 0x02 0x00 0x7e 0x00 0x4d 0x08 0xf1 + +# VI: v_cvt_u32_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x0e,0x00,0x7e,0x00,0x01,0x09,0xa1] +0xfa 0x0e 0x00 0x7e 0x00 0x01 0x09 0xa1 + +# VI: v_fract_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x36,0x00,0x7e,0x00,0x01,0x09,0xa1] +0xfa 0x36 0x00 0x7e 0x00 0x01 0x09 0xa1 + +# VI: v_sin_f32_dpp v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x52,0x00,0x7e,0x00,0x01,0x09,0xa1] +0xfa 0x52 0x00 0x7e 0x00 0x01 0x09 0xa1 + +# VI: v_add_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x09,0xa1] +0xfa 0x00 0x00 0x02 0x00 0x01 0x09 0xa1 + +# VI: v_min_f32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x14,0x00,0x01,0x09,0xa1] +0xfa 0x00 0x00 0x14 0x00 0x01 0x09 0xa1 + +# VI: v_and_b32_dpp v0, v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x26,0x00,0x01,0x09,0xa1] +0xfa 0x00 0x00 0x26 0x00 0x01 0x09 0xa1 + +# VI: v_add_f32_dpp v0, -v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x19,0xa1] +0xfa 0x00 0x00 0x02 0x00 0x01 0x19 0xa1 + +# VI: v_add_f32_dpp v0, v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x89,0xa1] +0xfa 0x00 0x00 0x02 0x00 0x01 0x89 0xa1 + +# VI: v_add_f32_dpp v0, -v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x99,0xa1] +0xfa 0x00 0x00 0x02 0x00 0x01 0x99 0xa1 + +# VI: v_add_f32_dpp v0, |v0|, -v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x00,0x00,0x02,0x00,0x01,0x69,0xa1] + +0xfa 0x00 0x00 0x02 0x00 0x01 0x69 0xa1 \ No newline at end of file