Index: llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -755,6 +755,12 @@ } else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) || AMDGPU::isVOPC64DPP(Opc)) { convertVOPCDPPInst(MI); + } else if(MCII->get(Opc).TSFlags & SIInstrFlags::VOP3) { + // Since op_sel is not last operand, we have to add it manually so that + // remaining operands, dpp8 and fi, are at correct operand index. + if (MI.getNumOperands() < DescNumOps && + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) + insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel); } else { // Insert dummy unused src modifiers. if (MI.getNumOperands() < DescNumOps && Index: llvm/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -699,10 +699,12 @@ } class VOP3_DOT_Profile : VOP3_Profile { - // FIXME VOP3 DPP versions are unsupported - let HasExtVOP3DPP = 0; let HasClamp = 0; let HasOMod = 0; + // Type calculations workaround for bf16. + let HasSrc0Mods = 1; + let HasSrc1Mods = 1; + let HasSrc2Mods = 1; let InsVOP3OpSel = getInsVOP3OpSel.ret, FPVRegInputMods, IntOpSelMods), @@ -848,9 +850,8 @@ defm V_MINMAX_U32 : VOP3_Realtriple_gfx11<0x263>; defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11<0x264>; defm V_MINMAX_I32 : VOP3_Realtriple_gfx11<0x265>; -// FIXME VOP3 DPP Dot instructions are unsupported -defm V_DOT2_F16_F16 : VOP3_Real_Base_gfx11<0x266>; -defm V_DOT2_BF16_BF16 : VOP3_Real_Base_gfx11<0x267>; +defm V_DOT2_F16_F16 : VOP3_Realtriple_gfx11<0x266>; +defm V_DOT2_BF16_BF16 : VOP3_Realtriple_gfx11<0x267>; defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">; defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">; defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">; Index: llvm/test/MC/AMDGPU/gfx11_asm_dpp.s =================================================================== --- llvm/test/MC/AMDGPU/gfx11_asm_dpp.s +++ llvm/test/MC/AMDGPU/gfx11_asm_dpp.s @@ -70,6 +70,12 @@ v_fma_f32_e64_dpp v80, v81, abs(v82), v81 dpp8:[0,1,6,3,4,5,6,7] // GFX11: encoding: [0x50,0x02,0x13,0xd6,0xe9,0xa4,0x46,0x05,0x51,0x88,0xc7,0xfa] +v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX11: encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + +v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] +// GFX11: encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] + v_max3_f32_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: encoding: [0x05,0x00,0x1c,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] Index: llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt @@ -47740,6 +47740,12 @@ # GFX11: v_fma_mixlo_f16 v0, |v1|, -v2, |v3| ; encoding: [0x00,0x05,0x21,0xcc,0x01,0x05,0x0e,0x44] 0x00,0x05,0x21,0xcc,0x01,0x05,0x0e,0x44 +# GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92 + +# GFX11: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] +0x00,0x00,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92 + # GFX11: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 neg_lo:[0,1,1] neg_hi:[1,0,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x00,0x05,0x13,0xcc,0xe9,0x04,0x0e,0xc4,0x01,0x77,0x39,0x05] 0x00,0x05,0x13,0xcc,0xe9,0x04,0x0e,0xc4,0x01,0x77,0x39,0x05