diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -4242,8 +4242,8 @@ } // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot). - if ((TSFlags & SIInstrFlags::IsDOT) && (TSFlags & SIInstrFlags::VOP3) && - !(TSFlags & SIInstrFlags::VOP3P)) { + if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) && + (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) { int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); if (OpSel & 3) @@ -8228,17 +8228,7 @@ // it has src2 register operand that is tied to dst operand // we don't allow modifiers for this operand in assembler so src2_modifiers // should be 0. - if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || - Opc == AMDGPU::V_MAC_F32_e64_gfx10 || Opc == AMDGPU::V_MAC_F32_e64_vi || - Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || - Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || - Opc == AMDGPU::V_MAC_F16_e64_vi || Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || - Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || - Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || Opc == AMDGPU::V_FMAC_F32_e64_vi || - Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || - Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || - Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || - Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11) { + if (isMAC(Opc)) { auto it = Inst.begin(); std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -599,20 +599,7 @@ Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address); } while (false); - if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi || - MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || - MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx10 || - MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || - MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || - MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi || - MI.getOpcode() == AMDGPU::V_FMAC_F64_e64_gfx90a || - MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi || - MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 || - MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx11 || - MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || - MI.getOpcode() == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || - MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10 || - MI.getOpcode() == AMDGPU::V_FMAC_F16_t16_e64_gfx11)) { + if (Res && AMDGPU::isMAC(MI.getOpcode())) { // Insert dummy unused src2_modifiers. insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::src2_modifiers); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -509,6 +509,9 @@ LLVM_READONLY bool isVOPD(unsigned Opc); +LLVM_READNONE +bool isMAC(unsigned Opc); + namespace VOPD { enum Component : unsigned { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -434,6 +434,27 @@ return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X); } +bool isMAC(unsigned Opc) { + return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || + Opc == AMDGPU::V_MAC_F32_e64_gfx10 || + Opc == AMDGPU::V_MAC_F32_e64_vi || + Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || + Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || + Opc == AMDGPU::V_MAC_F16_e64_vi || + Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || + Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || + Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || + Opc == AMDGPU::V_FMAC_F32_e64_vi || + Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || + Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || + Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || + Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 || + Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi || + Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi || + Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi || + Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi; +} + bool isTrue16Inst(unsigned Opc) { const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc); return Info ? Info->IsTrue16 : false; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -509,12 +509,22 @@ def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC { let Src0ModDPP = FPVRegInputMods; let Src1ModDPP = FPVRegInputMods; + let HasClamp = 1; } def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC { let HasExtVOP3DPP = 0; let HasSrc0Mods = 1; let HasSrc1Mods = 1; + let HasClamp = 1; + + let Src0Mod = Int32InputMods; + let Src1Mod = Int32InputMods; + let Ins64 = getIns64.ret, + 3 /*NumSrcArgs*/, HasClamp, 1 /*HasModifiers*/, + 1 /*HasSrc2Mods*/, HasOMod, + Src0Mod, Src1Mod, Src2Mod>.ret; + let Asm64 = "$vdst, $src0, $src1$clamp"; } // Write out to vcc or arbitrary SGPR. @@ -2281,7 +2291,7 @@ defm V_FMAAK_F32 : VOP2_Real_MADK_gfx940 <0x18>; } -multiclass VOP2_Real_DOT_ACC_gfx9 op> : VOP2_Real_e32_vi { +multiclass VOP2_Real_DOT_ACC_gfx9 op> : Base_VOP2_Real_e32e64_vi { def _dpp_vi : VOP2_DPP(NAME#"_dpp")>; } diff --git a/llvm/test/MC/AMDGPU/xdl-insts-err.s b/llvm/test/MC/AMDGPU/xdl-insts-err.s --- a/llvm/test/MC/AMDGPU/xdl-insts-err.s +++ b/llvm/test/MC/AMDGPU/xdl-insts-err.s @@ -5,7 +5,6 @@ v_dot2c_f32_f16 v0, v1, v2 // GFX906-ERR: error: instruction not supported on this GPU -// GFX908-ERR: error: e64 variant of this instruction is not supported v_dot2c_f32_f16_e64 v0, v1, v2 // GFX906-ERR: error: instruction not supported on this GPU @@ -16,7 +15,6 @@ v_dot2c_i32_i16 v0, v1, v2 // GFX906-ERR: error: instruction not supported on this GPU -// GFX908-ERR: error: e64 variant of this instruction is not supported v_dot2c_i32_i16_e64 v0, v1, v2 // GFX906-ERR: error: instruction not supported on this GPU @@ -27,7 +25,6 @@ v_dot4c_i32_i8 v0, v1, v2 // GFX906-ERR: error: instruction not supported on this GPU -// GFX908-ERR: error: e64 variant of this instruction is not supported v_dot4c_i32_i8_e64 v0, v1, v2 // GFX906-ERR: error: instruction not supported on this GPU @@ -38,7 +35,6 @@ v_dot8c_i32_i4 v0, v1, v2 // GFX906-ERR: error: instruction not supported on this GPU -// GFX908-ERR: error: e64 variant of this instruction is not supported v_dot8c_i32_i4_e64 v0, v1, v2 // GFX906-ERR: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/xdl-insts-gfx908.s b/llvm/test/MC/AMDGPU/xdl-insts-gfx908.s --- a/llvm/test/MC/AMDGPU/xdl-insts-gfx908.s +++ b/llvm/test/MC/AMDGPU/xdl-insts-gfx908.s @@ -1,4 +1,6 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx90a -show-encoding %s | FileCheck %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck %s // CHECK: encoding: [0x01,0x05,0x0a,0x6e] v_dot2c_f32_f16 v5, v1, v2 @@ -102,6 +104,27 @@ // CHECK: encoding: [0xfa,0x04,0x0a,0x6e,0x01,0xe4,0x80,0x00] v_dot2c_f32_f16_dpp v5, v1, |v2| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK: encoding: [0x05,0x00,0x37,0xd1,0x01,0xfb,0x01,0x00] +v_dot2c_f32_f16_e64 v5, v1, src_scc + +// CHECK: encoding: [0x05,0x00,0x37,0xd1,0xff,0xf9,0x01,0x00] +v_dot2c_f32_f16_e64 v5, v255, src_execz + +// CHECK: encoding: [0x05,0x00,0x37,0xd1,0x65,0xca,0x00,0x00] +v_dot2c_f32_f16_e64 v5, s101, s101 + +// CHECK: encoding: [0x05,0x00,0x37,0xd1,0xc1,0xcc,0x00,0x00] +v_dot2c_f32_f16_e64 v5, -1, flat_scratch_lo + +// CHECK: encoding: [0x05,0x02,0x37,0xd1,0xf0,0xce,0x00,0x40] +v_dot2c_f32_f16_e64 v5, 0.5, -|flat_scratch_hi| + +// CHECK: encoding: [0x05,0x00,0x37,0xd1,0xfc,0xe0,0x01,0x10] +v_dot2c_f32_f16_e64 v5, src_execz, 0.5 mul:4 + +// CHECK: encoding: [0xff,0x81,0x37,0xd1,0xfd,0x82,0x01,0x38] +v_dot2c_f32_f16_e64 v255, -|src_scc|, -1 clamp div:2 + // CHECK: encoding: [0x01,0x05,0x0a,0x70] v_dot2c_i32_i16 v5, v1, v2 @@ -192,6 +215,27 @@ // CHECK: encoding: [0xfa,0x04,0x0a,0x70,0x01,0xe4,0x08,0x00] v_dot2c_i32_i16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// CHECK: encoding: [0x05,0x00,0x38,0xd1,0x01,0xfb,0x01,0x00] +v_dot2c_i32_i16_e64 v5, v1, src_scc + +// CHECK: encoding: [0x05,0x00,0x38,0xd1,0xff,0xf9,0x01,0x00] +v_dot2c_i32_i16_e64 v5, v255, src_execz + +// CHECK: encoding: [0x05,0x00,0x38,0xd1,0x65,0xca,0x00,0x00] +v_dot2c_i32_i16_e64 v5, s101, s101 + +// CHECK: encoding: [0x05,0x00,0x38,0xd1,0xc1,0xcc,0x00,0x00] +v_dot2c_i32_i16_e64 v5, -1, flat_scratch_lo + +// CHECK: encoding: [0x05,0x00,0x38,0xd1,0xf0,0xce,0x00,0x00] +v_dot2c_i32_i16_e64 v5, 0.5, flat_scratch_hi + +// CHECK: encoding: [0x05,0x00,0x38,0xd1,0xfc,0xe0,0x01,0x00] +v_dot2c_i32_i16_e64 v5, src_execz, 0.5 + +// CHECK: encoding: [0xff,0x80,0x38,0xd1,0xfd,0x82,0x01,0x00] +v_dot2c_i32_i16_e64 v255, src_scc, -1 clamp + // CHECK: encoding: [0x01,0x05,0x0a,0x72] v_dot4c_i32_i8 v5, v1, v2 @@ -282,6 +326,27 @@ // CHECK: encoding: [0xfa,0x04,0x0a,0x72,0x01,0xe4,0x08,0x00] v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// CHECK: encoding: [0x05,0x00,0x39,0xd1,0x01,0xfb,0x01,0x00] +v_dot4c_i32_i8_e64 v5, v1, src_scc + +// CHECK: encoding: [0x05,0x00,0x39,0xd1,0xff,0xf9,0x01,0x00] +v_dot4c_i32_i8_e64 v5, v255, src_execz + +// CHECK: encoding: [0x05,0x00,0x39,0xd1,0x65,0xca,0x00,0x00] +v_dot4c_i32_i8_e64 v5, s101, s101 + +// CHECK: encoding: [0x05,0x00,0x39,0xd1,0xc1,0xcc,0x00,0x00] +v_dot4c_i32_i8_e64 v5, -1, flat_scratch_lo + +// CHECK: encoding: [0x05,0x00,0x39,0xd1,0xf0,0xce,0x00,0x00] +v_dot4c_i32_i8_e64 v5, 0.5, flat_scratch_hi + +// CHECK: encoding: [0x05,0x00,0x39,0xd1,0xfc,0xe0,0x01,0x00] +v_dot4c_i32_i8_e64 v5, src_execz, 0.5 + +// CHECK: encoding: [0xff,0x80,0x39,0xd1,0xfd,0x82,0x01,0x00] +v_dot4c_i32_i8_e64 v255, src_scc, -1 clamp + // CHECK: encoding: [0x01,0x05,0x0a,0x74] v_dot8c_i32_i4 v5, v1, v2 @@ -372,6 +437,27 @@ // CHECK: encoding: [0xfa,0x04,0x0a,0x74,0x01,0xe4,0x08,0x00] v_dot8c_i32_i4_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// CHECK: encoding: [0x05,0x00,0x3a,0xd1,0x01,0xfb,0x01,0x00] +v_dot8c_i32_i4_e64 v5, v1, src_scc + +// CHECK: encoding: [0x05,0x00,0x3a,0xd1,0xff,0xf9,0x01,0x00] +v_dot8c_i32_i4_e64 v5, v255, src_execz + +// CHECK: encoding: [0x05,0x00,0x3a,0xd1,0x65,0xca,0x00,0x00] +v_dot8c_i32_i4_e64 v5, s101, s101 + +// CHECK: encoding: [0x05,0x00,0x3a,0xd1,0xc1,0xcc,0x00,0x00] +v_dot8c_i32_i4_e64 v5, -1, flat_scratch_lo + +// CHECK: encoding: [0x05,0x00,0x3a,0xd1,0xf0,0xce,0x00,0x00] +v_dot8c_i32_i4_e64 v5, 0.5, flat_scratch_hi + +// CHECK: encoding: [0x05,0x00,0x3a,0xd1,0xfc,0xe0,0x01,0x00] +v_dot8c_i32_i4_e64 v5, src_execz, 0.5 + +// CHECK: encoding: [0xff,0x80,0x3a,0xd1,0xfd,0x82,0x01,0x00] +v_dot8c_i32_i4_e64 v255, src_scc, -1 clamp + // CHECK: encoding: [0x01,0x05,0x0a,0x78] v_pk_fmac_f16 v5, v1, v2 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx908-xdl-insts.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx908-xdl-insts.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx908-xdl-insts.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx908-xdl-insts.txt @@ -1,4 +1,6 @@ # RUN: llvm-mc -arch=amdgcn -mcpu=gfx908 -disassemble -show-encoding < %s | FileCheck %s +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx90a -disassemble -show-encoding < %s | FileCheck %s +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx940 -disassemble -show-encoding < %s | FileCheck %s # CHECK: v_dot2c_f32_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x6e] 0x01,0x05,0x0a,0x6e @@ -96,6 +98,27 @@ # CHECK: v_dot2c_f32_f16_dpp v5, v1, |v2| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x6e,0x01,0xe4,0x80,0x00] 0xfa,0x04,0x0a,0x6e,0x01,0xe4,0x80,0x00 +# CHECK: v_dot2c_f32_f16_e64 v5, v1, src_scc ; encoding: [0x05,0x00,0x37,0xd1,0x01,0xfb,0x01,0x00] +0x05,0x00,0x37,0xd1,0x01,0xfb,0x01,0x00 + +# CHECK: v_dot2c_f32_f16_e64 v5, v255, src_execz ; encoding: [0x05,0x00,0x37,0xd1,0xff,0xf9,0x01,0x00] +0x05,0x00,0x37,0xd1,0xff,0xf9,0x01,0x00 + +# CHECK: v_dot2c_f32_f16_e64 v5, s101, s101 ; encoding: [0x05,0x00,0x37,0xd1,0x65,0xca,0x00,0x00] +0x05,0x00,0x37,0xd1,0x65,0xca,0x00,0x00 + +# CHECK: v_dot2c_f32_f16_e64 v5, -1, flat_scratch_lo ; encoding: [0x05,0x00,0x37,0xd1,0xc1,0xcc,0x00,0x00] +0x05,0x00,0x37,0xd1,0xc1,0xcc,0x00,0x00 + +# CHECK: v_dot2c_f32_f16_e64 v5, 0.5, -|flat_scratch_hi| ; encoding: [0x05,0x02,0x37,0xd1,0xf0,0xce,0x00,0x40] +0x05,0x02,0x37,0xd1,0xf0,0xce,0x00,0x40 + +# CHECK: v_dot2c_f32_f16_e64 v5, src_execz, 0.5 mul:4 ; encoding: [0x05,0x00,0x37,0xd1,0xfc,0xe0,0x01,0x10] +0x05,0x00,0x37,0xd1,0xfc,0xe0,0x01,0x10 + +# CHECK: v_dot2c_f32_f16_e64 v255, -|src_scc|, -1 clamp div:2 ; encoding: [0xff,0x81,0x37,0xd1,0xfd,0x82,0x01,0x38] +0xff,0x81,0x37,0xd1,0xfd,0x82,0x01,0x38 + # CHECK: v_dot2c_i32_i16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x70] 0x01,0x05,0x0a,0x70 @@ -180,6 +203,27 @@ # CHECK: v_dot2c_i32_i16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x70,0x01,0xe4,0x08,0x00] 0xfa,0x04,0x0a,0x70,0x01,0xe4,0x08,0x00 +# CHECK: v_dot2c_i32_i16_e64 v5, v1, src_scc ; encoding: [0x05,0x00,0x38,0xd1,0x01,0xfb,0x01,0x00] +0x05,0x00,0x38,0xd1,0x01,0xfb,0x01,0x00 + +# CHECK: v_dot2c_i32_i16_e64 v5, v255, src_execz ; encoding: [0x05,0x00,0x38,0xd1,0xff,0xf9,0x01,0x00] +0x05,0x00,0x38,0xd1,0xff,0xf9,0x01,0x00 + +# CHECK: v_dot2c_i32_i16_e64 v5, s101, s101 ; encoding: [0x05,0x00,0x38,0xd1,0x65,0xca,0x00,0x00] +0x05,0x00,0x38,0xd1,0x65,0xca,0x00,0x00 + +# CHECK: v_dot2c_i32_i16_e64 v5, -1, flat_scratch_lo ; encoding: [0x05,0x00,0x38,0xd1,0xc1,0xcc,0x00,0x00] +0x05,0x00,0x38,0xd1,0xc1,0xcc,0x00,0x00 + +# CHECK: v_dot2c_i32_i16_e64 v5, 0.5, flat_scratch_hi ; encoding: [0x05,0x00,0x38,0xd1,0xf0,0xce,0x00,0x00] +0x05,0x00,0x38,0xd1,0xf0,0xce,0x00,0x00 + +# CHECK: v_dot2c_i32_i16_e64 v5, src_execz, 0.5 ; encoding: [0x05,0x00,0x38,0xd1,0xfc,0xe0,0x01,0x00] +0x05,0x00,0x38,0xd1,0xfc,0xe0,0x01,0x00 + +# CHECK: v_dot2c_i32_i16_e64 v255, src_scc, -1 clamp ; encoding: [0xff,0x80,0x38,0xd1,0xfd,0x82,0x01,0x00] +0xff,0x80,0x38,0xd1,0xfd,0x82,0x01,0x00 + # CHECK: v_dot4c_i32_i8_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x72] 0x01,0x05,0x0a,0x72 @@ -264,6 +308,27 @@ # CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0xe4,0x08,0x00] 0xfa,0x04,0x0a,0x72,0x01,0xe4,0x08,0x00 +# CHECK: v_dot4c_i32_i8_e64 v5, v1, src_scc ; encoding: [0x05,0x00,0x39,0xd1,0x01,0xfb,0x01,0x00] +0x05,0x00,0x39,0xd1,0x01,0xfb,0x01,0x00 + +# CHECK: v_dot4c_i32_i8_e64 v5, v255, src_execz ; encoding: [0x05,0x00,0x39,0xd1,0xff,0xf9,0x01,0x00] +0x05,0x00,0x39,0xd1,0xff,0xf9,0x01,0x00 + +# CHECK: v_dot4c_i32_i8_e64 v5, s101, s101 ; encoding: [0x05,0x00,0x39,0xd1,0x65,0xca,0x00,0x00] +0x05,0x00,0x39,0xd1,0x65,0xca,0x00,0x00 + +# CHECK: v_dot4c_i32_i8_e64 v5, -1, flat_scratch_lo ; encoding: [0x05,0x00,0x39,0xd1,0xc1,0xcc,0x00,0x00] +0x05,0x00,0x39,0xd1,0xc1,0xcc,0x00,0x00 + +# CHECK: v_dot4c_i32_i8_e64 v5, 0.5, flat_scratch_hi ; encoding: [0x05,0x00,0x39,0xd1,0xf0,0xce,0x00,0x00] +0x05,0x00,0x39,0xd1,0xf0,0xce,0x00,0x00 + +# CHECK: v_dot4c_i32_i8_e64 v5, src_execz, 0.5 ; encoding: [0x05,0x00,0x39,0xd1,0xfc,0xe0,0x01,0x00] +0x05,0x00,0x39,0xd1,0xfc,0xe0,0x01,0x00 + +# CHECK: v_dot4c_i32_i8_e64 v255, src_scc, -1 clamp ; encoding: [0xff,0x80,0x39,0xd1,0xfd,0x82,0x01,0x00] +0xff,0x80,0x39,0xd1,0xfd,0x82,0x01,0x00 + # CHECK: v_dot8c_i32_i4_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x74] 0x01,0x05,0x0a,0x74 @@ -348,6 +413,27 @@ # CHECK: v_dot8c_i32_i4_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0xe4,0x08,0x00] 0xfa,0x04,0x0a,0x74,0x01,0xe4,0x08,0x00 +# CHECK: v_dot8c_i32_i4_e64 v5, v1, src_scc ; encoding: [0x05,0x00,0x3a,0xd1,0x01,0xfb,0x01,0x00] +0x05,0x00,0x3a,0xd1,0x01,0xfb,0x01,0x00 + +# CHECK: v_dot8c_i32_i4_e64 v5, v255, src_execz ; encoding: [0x05,0x00,0x3a,0xd1,0xff,0xf9,0x01,0x00] +0x05,0x00,0x3a,0xd1,0xff,0xf9,0x01,0x00 + +# CHECK: v_dot8c_i32_i4_e64 v5, s101, s101 ; encoding: [0x05,0x00,0x3a,0xd1,0x65,0xca,0x00,0x00] +0x05,0x00,0x3a,0xd1,0x65,0xca,0x00,0x00 + +# CHECK: v_dot8c_i32_i4_e64 v5, -1, flat_scratch_lo ; encoding: [0x05,0x00,0x3a,0xd1,0xc1,0xcc,0x00,0x00] +0x05,0x00,0x3a,0xd1,0xc1,0xcc,0x00,0x00 + +# CHECK: v_dot8c_i32_i4_e64 v5, 0.5, flat_scratch_hi ; encoding: [0x05,0x00,0x3a,0xd1,0xf0,0xce,0x00,0x00] +0x05,0x00,0x3a,0xd1,0xf0,0xce,0x00,0x00 + +# CHECK: v_dot8c_i32_i4_e64 v5, src_execz, 0.5 ; encoding: [0x05,0x00,0x3a,0xd1,0xfc,0xe0,0x01,0x00] +0x05,0x00,0x3a,0xd1,0xfc,0xe0,0x01,0x00 + +# CHECK: v_dot8c_i32_i4_e64 v255, src_scc, -1 clamp ; encoding: [0xff,0x80,0x3a,0xd1,0xfd,0x82,0x01,0x00] +0xff,0x80,0x3a,0xd1,0xfd,0x82,0x01,0x00 + # CHECK: v_pk_fmac_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78] 0x01,0x05,0x0a,0x78