Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -2188,8 +2188,9 @@ const MCOperand &Src = Inst.getOperand(SrcIdx); if (Src.isReg()) { const unsigned SrcReg = mc2PseudoReg(Src.getReg()); - if (isRegIntersect(DstReg, SrcReg, TRI)) { - return false; + // Make sure dst and src are different (and do not intersect) + for (MCRegAliasIterator R(DstReg, TRI, true); R.isValid(); ++R) { + if (*R == SrcReg) return false; } } } Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -271,9 +271,6 @@ /// \brief Is Reg - scalar register bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); -/// \brief Is there any intersection between registers -bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI); - /// If \p Reg is a pseudo reg, return the correct hardware register given /// \p STI otherwise return \p Reg. unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -538,27 +538,6 @@ Reg == AMDGPU::SCC; } -bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { - - if (Reg0 == Reg1) { - return true; - } - - unsigned SubReg0 = TRI->getSubReg(Reg0, 1); - if (SubReg0 == 0) { - return TRI->getSubRegIndex(Reg1, Reg0) > 0; - } - - for (unsigned Idx = 2; SubReg0 > 0; ++Idx) { - if (isRegIntersect(Reg1, SubReg0, TRI)) { - return true; - } - SubReg0 = TRI->getSubReg(Reg0, Idx); - } - - return false; -} - unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { switch(Reg) { Index: lib/Target/AMDGPU/VOP3PInstructions.td =================================================================== --- lib/Target/AMDGPU/VOP3PInstructions.td +++ lib/Target/AMDGPU/VOP3PInstructions.td @@ -25,6 +25,9 @@ let isCommutable = 1 in { def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile, fma>; +def V_PK_MAD_I16 : VOP3PInst<"v_pk_mad_i16", VOP3_Profile>; +def V_PK_MAD_U16 : VOP3PInst<"v_pk_mad_u16", VOP3_Profile>; + def V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile, fadd>; def V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile, fmul>; def V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile, fmaxnum>; @@ -32,7 +35,6 @@ def V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3_Profile, add>; def V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3_Profile>; -def V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile, sub>; def V_PK_MUL_LO_U16 : VOP3PInst<"v_pk_mul_lo_u16", VOP3_Profile, mul>; def V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3_Profile, smin>; @@ -41,6 +43,9 @@ def V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3_Profile, umax>; } +def V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3_Profile>; +def V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile, sub>; + def V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile, lshl_rev>; def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile, ashr_rev>; def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile, lshr_rev>; @@ -59,6 +64,7 @@ } } +defm V_PK_MAD_I16 : VOP3P_Real_vi <0x380>; defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x381>; defm V_PK_ADD_I16 : VOP3P_Real_vi <0x382>; defm V_PK_SUB_I16 : VOP3P_Real_vi <0x383>; @@ -67,8 +73,10 @@ defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x386>; defm V_PK_MAX_I16 : VOP3P_Real_vi <0x387>; defm V_PK_MIN_I16 : VOP3P_Real_vi <0x388>; +defm V_PK_MAD_U16 : VOP3P_Real_vi <0x389>; defm V_PK_ADD_U16 : VOP3P_Real_vi <0x38a>; +defm V_PK_SUB_U16 : VOP3P_Real_vi <0x38b>; defm V_PK_MAX_U16 : VOP3P_Real_vi <0x38c>; defm V_PK_MIN_U16 : VOP3P_Real_vi <0x38d>; defm V_PK_FMA_F16 : VOP3P_Real_vi <0x38e>; Index: test/MC/AMDGPU/gfx9_asm_all.s =================================================================== --- test/MC/AMDGPU/gfx9_asm_all.s +++ test/MC/AMDGPU/gfx9_asm_all.s @@ -104933,3 +104933,354 @@ v_cmpx_t_u32_sdwa s[6:7], v1, sext(v2) src0_sel:DWORD src1_sel:DWORD // CHECK: [0xf9,0x04,0xbe,0x7d,0x01,0x86,0x06,0x0e] + +v_pk_mad_i16 v5, v1, v2, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0x05,0x0e,0x1c] + +v_pk_mad_i16 v255, v1, v2, v3 +// CHECK: [0xff,0x40,0x80,0xd3,0x01,0x05,0x0e,0x1c] + +v_pk_mad_i16 v5, v255, v2, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0xff,0x05,0x0e,0x1c] + +v_pk_mad_i16 v5, s1, v2, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0x04,0x0e,0x1c] + +v_pk_mad_i16 v5, s101, v2, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x65,0x04,0x0e,0x1c] + +v_pk_mad_i16 v5, flat_scratch_lo, v2, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x66,0x04,0x0e,0x1c] + +v_pk_mad_i16 v5, flat_scratch_hi, v2, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x67,0x04,0x0e,0x1c] + +v_pk_mad_i16 v5, vcc_lo, v2, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x6a,0x04,0x0e,0x1c] + +v_pk_mad_i16 v5, vcc_hi, v2, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x6b,0x04,0x0e,0x1c] + +v_pk_mad_i16 v5, m0, v2, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x7c,0x04,0x0e,0x1c] + +v_pk_mad_i16 v5, exec_lo, v2, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x7e,0x04,0x0e,0x1c] + +v_pk_mad_i16 v5, exec_hi, v2, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x7f,0x04,0x0e,0x1c] + +v_pk_mad_i16 v5, v1, v255, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0xff,0x0f,0x1c] + +v_pk_mad_i16 v5, v1, s2, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0x05,0x0c,0x1c] + +v_pk_mad_i16 v5, v1, s101, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0xcb,0x0c,0x1c] + +v_pk_mad_i16 v5, v1, flat_scratch_lo, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0xcd,0x0c,0x1c] + +v_pk_mad_i16 v5, v1, flat_scratch_hi, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0xcf,0x0c,0x1c] + +v_pk_mad_i16 v5, v1, vcc_lo, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0xd5,0x0c,0x1c] + +v_pk_mad_i16 v5, v1, vcc_hi, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0xd7,0x0c,0x1c] + +v_pk_mad_i16 v5, v1, m0, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0xf9,0x0c,0x1c] + +v_pk_mad_i16 v5, v1, exec_lo, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0xfd,0x0c,0x1c] + +v_pk_mad_i16 v5, v1, exec_hi, v3 +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0xff,0x0c,0x1c] + +v_pk_mad_i16 v5, v1, v2, v255 +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0x05,0xfe,0x1f] + +v_pk_mad_i16 v5, v1, v2, s3 +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0x05,0x0e,0x18] + +v_pk_mad_i16 v5, v1, v2, s101 +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0x05,0x96,0x19] + +v_pk_mad_i16 v5, v1, v2, flat_scratch_lo +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0x05,0x9a,0x19] + +v_pk_mad_i16 v5, v1, v2, flat_scratch_hi +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0x05,0x9e,0x19] + +v_pk_mad_i16 v5, v1, v2, vcc_lo +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0x05,0xaa,0x19] + +v_pk_mad_i16 v5, v1, v2, vcc_hi +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0x05,0xae,0x19] + +v_pk_mad_i16 v5, v1, v2, m0 +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0x05,0xf2,0x19] + +v_pk_mad_i16 v5, v1, v2, exec_lo +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0x05,0xfa,0x19] + +v_pk_mad_i16 v5, v1, v2, exec_hi +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0x05,0xfe,0x19] + +v_pk_mad_i16 v5, v1, v2, v3 op_sel:[0,0,0] +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0x05,0x0e,0x1c] + +v_pk_mad_i16 v5, v1, v2, v3 op_sel:[1,0,0] +// CHECK: [0x05,0x48,0x80,0xd3,0x01,0x05,0x0e,0x1c] + +v_pk_mad_i16 v5, v1, v2, v3 op_sel:[0,1,0] +// CHECK: [0x05,0x50,0x80,0xd3,0x01,0x05,0x0e,0x1c] + +v_pk_mad_i16 v5, v1, v2, v3 op_sel:[0,0,1] +// CHECK: [0x05,0x60,0x80,0xd3,0x01,0x05,0x0e,0x1c] + +v_pk_mad_i16 v5, v1, v2, v3 op_sel:[1,1,1] +// CHECK: [0x05,0x78,0x80,0xd3,0x01,0x05,0x0e,0x1c] + +v_pk_mad_i16 v5, v1, v2, v3 op_sel_hi:[1,1,1] +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0x05,0x0e,0x1c] + +v_pk_mad_i16 v5, v1, v2, v3 op_sel_hi:[0,0,0] +// CHECK: [0x05,0x00,0x80,0xd3,0x01,0x05,0x0e,0x04] + +v_pk_mad_i16 v5, v1, v2, v3 op_sel_hi:[1,0,0] +// CHECK: [0x05,0x00,0x80,0xd3,0x01,0x05,0x0e,0x0c] + +v_pk_mad_i16 v5, v1, v2, v3 op_sel_hi:[0,1,0] +// CHECK: [0x05,0x00,0x80,0xd3,0x01,0x05,0x0e,0x14] + +v_pk_mad_i16 v5, v1, v2, v3 op_sel_hi:[0,0,1] +// CHECK: [0x05,0x40,0x80,0xd3,0x01,0x05,0x0e,0x04] + +v_pk_mad_i16 v5, v1, v2, v3 clamp +// CHECK: [0x05,0xc0,0x80,0xd3,0x01,0x05,0x0e,0x1c] + +v_pk_mad_u16 v5, v1, v2, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0x05,0x0e,0x1c] + +v_pk_mad_u16 v255, v1, v2, v3 +// CHECK: [0xff,0x40,0x89,0xd3,0x01,0x05,0x0e,0x1c] + +v_pk_mad_u16 v5, v255, v2, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0xff,0x05,0x0e,0x1c] + +v_pk_mad_u16 v5, s1, v2, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0x04,0x0e,0x1c] + +v_pk_mad_u16 v5, s101, v2, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x65,0x04,0x0e,0x1c] + +v_pk_mad_u16 v5, flat_scratch_lo, v2, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x66,0x04,0x0e,0x1c] + +v_pk_mad_u16 v5, flat_scratch_hi, v2, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x67,0x04,0x0e,0x1c] + +v_pk_mad_u16 v5, vcc_lo, v2, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x6a,0x04,0x0e,0x1c] + +v_pk_mad_u16 v5, vcc_hi, v2, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x6b,0x04,0x0e,0x1c] + +v_pk_mad_u16 v5, m0, v2, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x7c,0x04,0x0e,0x1c] + +v_pk_mad_u16 v5, exec_lo, v2, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x7e,0x04,0x0e,0x1c] + +v_pk_mad_u16 v5, exec_hi, v2, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x7f,0x04,0x0e,0x1c] + +v_pk_mad_u16 v5, v1, v255, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0xff,0x0f,0x1c] + +v_pk_mad_u16 v5, v1, s2, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0x05,0x0c,0x1c] + +v_pk_mad_u16 v5, v1, s101, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0xcb,0x0c,0x1c] + +v_pk_mad_u16 v5, v1, flat_scratch_lo, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0xcd,0x0c,0x1c] + +v_pk_mad_u16 v5, v1, flat_scratch_hi, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0xcf,0x0c,0x1c] + +v_pk_mad_u16 v5, v1, vcc_lo, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0xd5,0x0c,0x1c] + +v_pk_mad_u16 v5, v1, vcc_hi, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0xd7,0x0c,0x1c] + +v_pk_mad_u16 v5, v1, m0, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0xf9,0x0c,0x1c] + +v_pk_mad_u16 v5, v1, exec_lo, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0xfd,0x0c,0x1c] + +v_pk_mad_u16 v5, v1, exec_hi, v3 +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0xff,0x0c,0x1c] + +v_pk_mad_u16 v5, v1, v2, v255 +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0x05,0xfe,0x1f] + +v_pk_mad_u16 v5, v1, v2, s3 +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0x05,0x0e,0x18] + +v_pk_mad_u16 v5, v1, v2, s101 +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0x05,0x96,0x19] + +v_pk_mad_u16 v5, v1, v2, flat_scratch_lo +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0x05,0x9a,0x19] + +v_pk_mad_u16 v5, v1, v2, flat_scratch_hi +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0x05,0x9e,0x19] + +v_pk_mad_u16 v5, v1, v2, vcc_lo +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0x05,0xaa,0x19] + +v_pk_mad_u16 v5, v1, v2, vcc_hi +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0x05,0xae,0x19] + +v_pk_mad_u16 v5, v1, v2, m0 +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0x05,0xf2,0x19] + +v_pk_mad_u16 v5, v1, v2, exec_lo +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0x05,0xfa,0x19] + +v_pk_mad_u16 v5, v1, v2, exec_hi +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0x05,0xfe,0x19] + +v_pk_mad_u16 v5, v1, v2, v3 op_sel:[0,0,0] +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0x05,0x0e,0x1c] + +v_pk_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0] +// CHECK: [0x05,0x48,0x89,0xd3,0x01,0x05,0x0e,0x1c] + +v_pk_mad_u16 v5, v1, v2, v3 op_sel:[0,1,0] +// CHECK: [0x05,0x50,0x89,0xd3,0x01,0x05,0x0e,0x1c] + +v_pk_mad_u16 v5, v1, v2, v3 op_sel:[0,0,1] +// CHECK: [0x05,0x60,0x89,0xd3,0x01,0x05,0x0e,0x1c] + +v_pk_mad_u16 v5, v1, v2, v3 op_sel:[1,1,1] +// CHECK: [0x05,0x78,0x89,0xd3,0x01,0x05,0x0e,0x1c] + +v_pk_mad_u16 v5, v1, v2, v3 op_sel_hi:[1,1,1] +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0x05,0x0e,0x1c] + +v_pk_mad_u16 v5, v1, v2, v3 op_sel_hi:[0,0,0] +// CHECK: [0x05,0x00,0x89,0xd3,0x01,0x05,0x0e,0x04] + +v_pk_mad_u16 v5, v1, v2, v3 op_sel_hi:[1,0,0] +// CHECK: [0x05,0x00,0x89,0xd3,0x01,0x05,0x0e,0x0c] + +v_pk_mad_u16 v5, v1, v2, v3 op_sel_hi:[0,1,0] +// CHECK: [0x05,0x00,0x89,0xd3,0x01,0x05,0x0e,0x14] + +v_pk_mad_u16 v5, v1, v2, v3 op_sel_hi:[0,0,1] +// CHECK: [0x05,0x40,0x89,0xd3,0x01,0x05,0x0e,0x04] + +v_pk_mad_u16 v5, v1, v2, v3 clamp +// CHECK: [0x05,0xc0,0x89,0xd3,0x01,0x05,0x0e,0x1c] + +v_pk_sub_u16 v5, v1, v2 +// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x18] + +v_pk_sub_u16 v255, v1, v2 +// CHECK: [0xff,0x00,0x8b,0xd3,0x01,0x05,0x02,0x18] + +v_pk_sub_u16 v5, v255, v2 +// CHECK: [0x05,0x00,0x8b,0xd3,0xff,0x05,0x02,0x18] + +v_pk_sub_u16 v5, s1, v2 +// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0x04,0x02,0x18] + +v_pk_sub_u16 v5, s101, v2 +// CHECK: [0x05,0x00,0x8b,0xd3,0x65,0x04,0x02,0x18] + +v_pk_sub_u16 v5, flat_scratch_lo, v2 +// CHECK: [0x05,0x00,0x8b,0xd3,0x66,0x04,0x02,0x18] + +v_pk_sub_u16 v5, flat_scratch_hi, v2 +// CHECK: [0x05,0x00,0x8b,0xd3,0x67,0x04,0x02,0x18] + +v_pk_sub_u16 v5, vcc_lo, v2 +// CHECK: [0x05,0x00,0x8b,0xd3,0x6a,0x04,0x02,0x18] + +v_pk_sub_u16 v5, vcc_hi, v2 +// CHECK: [0x05,0x00,0x8b,0xd3,0x6b,0x04,0x02,0x18] + +v_pk_sub_u16 v5, m0, v2 +// CHECK: [0x05,0x00,0x8b,0xd3,0x7c,0x04,0x02,0x18] + +v_pk_sub_u16 v5, exec_lo, v2 +// CHECK: [0x05,0x00,0x8b,0xd3,0x7e,0x04,0x02,0x18] + +v_pk_sub_u16 v5, exec_hi, v2 +// CHECK: [0x05,0x00,0x8b,0xd3,0x7f,0x04,0x02,0x18] + +v_pk_sub_u16 v5, v1, v255 +// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xff,0x03,0x18] + +v_pk_sub_u16 v5, v1, s2 +// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x00,0x18] + +v_pk_sub_u16 v5, v1, s101 +// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xcb,0x00,0x18] + +v_pk_sub_u16 v5, v1, flat_scratch_lo +// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xcd,0x00,0x18] + +v_pk_sub_u16 v5, v1, flat_scratch_hi +// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xcf,0x00,0x18] + +v_pk_sub_u16 v5, v1, vcc_lo +// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xd5,0x00,0x18] + +v_pk_sub_u16 v5, v1, vcc_hi +// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xd7,0x00,0x18] + +v_pk_sub_u16 v5, v1, m0 +// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xf9,0x00,0x18] + +v_pk_sub_u16 v5, v1, exec_lo +// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xfd,0x00,0x18] + +v_pk_sub_u16 v5, v1, exec_hi +// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xff,0x00,0x18] + +v_pk_sub_u16 v5, v1, v2 op_sel:[0,0] +// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x18] + +v_pk_sub_u16 v5, v1, v2 op_sel:[1,0] +// CHECK: [0x05,0x08,0x8b,0xd3,0x01,0x05,0x02,0x18] + +v_pk_sub_u16 v5, v1, v2 op_sel:[0,1] +// CHECK: [0x05,0x10,0x8b,0xd3,0x01,0x05,0x02,0x18] + +v_pk_sub_u16 v5, v1, v2 op_sel:[1,1] +// CHECK: [0x05,0x18,0x8b,0xd3,0x01,0x05,0x02,0x18] + +v_pk_sub_u16 v5, v1, v2 op_sel_hi:[1,1] +// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x18] + +v_pk_sub_u16 v5, v1, v2 op_sel_hi:[0,0] +// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x00] + +v_pk_sub_u16 v5, v1, v2 op_sel_hi:[1,0] +// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x08] + +v_pk_sub_u16 v5, v1, v2 op_sel_hi:[0,1] +// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x10] + +v_pk_sub_u16 v5, v1, v2 clamp +// CHECK: [0x05,0x80,0x8b,0xd3,0x01,0x05,0x02,0x18] Index: test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt =================================================================== --- test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt +++ test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt @@ -88934,3 +88934,336 @@ # CHECK: v_pk_sub_i16 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x83,0xd3,0x01,0x05,0x02,0x18] 0x05,0x80,0x83,0xd3,0x01,0x05,0x02,0x18 + +# CHECK: v_pk_mad_i16 v5, v1, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0x0e,0x1c] +0x05,0x40,0x80,0xd3,0x01,0x05,0x0e,0x1c + +# CHECK: v_pk_mad_i16 v255, v1, v2, v3 ; encoding: [0xff,0x40,0x80,0xd3,0x01,0x05,0x0e,0x1c] +0xff,0x40,0x80,0xd3,0x01,0x05,0x0e,0x1c + +# CHECK: v_pk_mad_i16 v5, v255, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0xff,0x05,0x0e,0x1c] +0x05,0x40,0x80,0xd3,0xff,0x05,0x0e,0x1c + +# CHECK: v_pk_mad_i16 v5, s1, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x04,0x0e,0x1c] +0x05,0x40,0x80,0xd3,0x01,0x04,0x0e,0x1c + +# CHECK: v_pk_mad_i16 v5, s101, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x65,0x04,0x0e,0x1c] +0x05,0x40,0x80,0xd3,0x65,0x04,0x0e,0x1c + +# CHECK: v_pk_mad_i16 v5, flat_scratch_lo, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x66,0x04,0x0e,0x1c] +0x05,0x40,0x80,0xd3,0x66,0x04,0x0e,0x1c + +# CHECK: v_pk_mad_i16 v5, flat_scratch_hi, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x67,0x04,0x0e,0x1c] +0x05,0x40,0x80,0xd3,0x67,0x04,0x0e,0x1c + +# CHECK: v_pk_mad_i16 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x6a,0x04,0x0e,0x1c] +0x05,0x40,0x80,0xd3,0x6a,0x04,0x0e,0x1c + +# CHECK: v_pk_mad_i16 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x6b,0x04,0x0e,0x1c] +0x05,0x40,0x80,0xd3,0x6b,0x04,0x0e,0x1c + +# CHECK: v_pk_mad_i16 v5, m0, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x7c,0x04,0x0e,0x1c] +0x05,0x40,0x80,0xd3,0x7c,0x04,0x0e,0x1c + +# CHECK: v_pk_mad_i16 v5, exec_lo, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x7e,0x04,0x0e,0x1c] +0x05,0x40,0x80,0xd3,0x7e,0x04,0x0e,0x1c + +# CHECK: v_pk_mad_i16 v5, exec_hi, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x7f,0x04,0x0e,0x1c] +0x05,0x40,0x80,0xd3,0x7f,0x04,0x0e,0x1c + +# CHECK: v_pk_mad_i16 v5, v1, v255, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xff,0x0f,0x1c] +0x05,0x40,0x80,0xd3,0x01,0xff,0x0f,0x1c + +# CHECK: v_pk_mad_i16 v5, v1, s2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0x0c,0x1c] +0x05,0x40,0x80,0xd3,0x01,0x05,0x0c,0x1c + +# CHECK: v_pk_mad_i16 v5, v1, s101, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xcb,0x0c,0x1c] +0x05,0x40,0x80,0xd3,0x01,0xcb,0x0c,0x1c + +# CHECK: v_pk_mad_i16 v5, v1, flat_scratch_lo, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xcd,0x0c,0x1c] +0x05,0x40,0x80,0xd3,0x01,0xcd,0x0c,0x1c + +# CHECK: v_pk_mad_i16 v5, v1, flat_scratch_hi, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xcf,0x0c,0x1c] +0x05,0x40,0x80,0xd3,0x01,0xcf,0x0c,0x1c + +# CHECK: v_pk_mad_i16 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xd5,0x0c,0x1c] +0x05,0x40,0x80,0xd3,0x01,0xd5,0x0c,0x1c + +# CHECK: v_pk_mad_i16 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xd7,0x0c,0x1c] +0x05,0x40,0x80,0xd3,0x01,0xd7,0x0c,0x1c + +# CHECK: v_pk_mad_i16 v5, v1, m0, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xf9,0x0c,0x1c] +0x05,0x40,0x80,0xd3,0x01,0xf9,0x0c,0x1c + +# CHECK: v_pk_mad_i16 v5, v1, exec_lo, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xfd,0x0c,0x1c] +0x05,0x40,0x80,0xd3,0x01,0xfd,0x0c,0x1c + +# CHECK: v_pk_mad_i16 v5, v1, exec_hi, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xff,0x0c,0x1c] +0x05,0x40,0x80,0xd3,0x01,0xff,0x0c,0x1c + +# CHECK: v_pk_mad_i16 v5, v1, v2, v255 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0xfe,0x1f] +0x05,0x40,0x80,0xd3,0x01,0x05,0xfe,0x1f + +# CHECK: v_pk_mad_i16 v5, v1, v2, s3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0x0e,0x18] +0x05,0x40,0x80,0xd3,0x01,0x05,0x0e,0x18 + +# CHECK: v_pk_mad_i16 v5, v1, v2, s101 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0x96,0x19] +0x05,0x40,0x80,0xd3,0x01,0x05,0x96,0x19 + +# CHECK: v_pk_mad_i16 v5, v1, v2, flat_scratch_lo ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0x9a,0x19] +0x05,0x40,0x80,0xd3,0x01,0x05,0x9a,0x19 + +# CHECK: v_pk_mad_i16 v5, v1, v2, flat_scratch_hi ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0x9e,0x19] +0x05,0x40,0x80,0xd3,0x01,0x05,0x9e,0x19 + +# CHECK: v_pk_mad_i16 v5, v1, v2, vcc_lo ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0xaa,0x19] +0x05,0x40,0x80,0xd3,0x01,0x05,0xaa,0x19 + +# CHECK: v_pk_mad_i16 v5, v1, v2, vcc_hi ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0xae,0x19] +0x05,0x40,0x80,0xd3,0x01,0x05,0xae,0x19 + +# CHECK: v_pk_mad_i16 v5, v1, v2, m0 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0xf2,0x19] +0x05,0x40,0x80,0xd3,0x01,0x05,0xf2,0x19 + +# CHECK: v_pk_mad_i16 v5, v1, v2, exec_lo ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0xfa,0x19] +0x05,0x40,0x80,0xd3,0x01,0x05,0xfa,0x19 + +# CHECK: v_pk_mad_i16 v5, v1, v2, exec_hi ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0xfe,0x19] +0x05,0x40,0x80,0xd3,0x01,0x05,0xfe,0x19 + +# CHECK: v_pk_mad_i16 v5, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x05,0x48,0x80,0xd3,0x01,0x05,0x0e,0x1c] +0x05,0x48,0x80,0xd3,0x01,0x05,0x0e,0x1c + +# CHECK: v_pk_mad_i16 v5, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x05,0x50,0x80,0xd3,0x01,0x05,0x0e,0x1c] +0x05,0x50,0x80,0xd3,0x01,0x05,0x0e,0x1c + +# CHECK: v_pk_mad_i16 v5, v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x05,0x60,0x80,0xd3,0x01,0x05,0x0e,0x1c] +0x05,0x60,0x80,0xd3,0x01,0x05,0x0e,0x1c + +# CHECK: v_pk_mad_i16 v5, v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x05,0x78,0x80,0xd3,0x01,0x05,0x0e,0x1c] +0x05,0x78,0x80,0xd3,0x01,0x05,0x0e,0x1c + +# CHECK: v_pk_mad_i16 v5, v1, v2, v3 op_sel_hi:[0,0,0] ; encoding: [0x05,0x00,0x80,0xd3,0x01,0x05,0x0e,0x04] +0x05,0x00,0x80,0xd3,0x01,0x05,0x0e,0x04 + +# CHECK: v_pk_mad_i16 v5, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x05,0x00,0x80,0xd3,0x01,0x05,0x0e,0x0c] +0x05,0x00,0x80,0xd3,0x01,0x05,0x0e,0x0c + +# CHECK: v_pk_mad_i16 v5, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x05,0x00,0x80,0xd3,0x01,0x05,0x0e,0x14] +0x05,0x00,0x80,0xd3,0x01,0x05,0x0e,0x14 + +# CHECK: v_pk_mad_i16 v5, v1, v2, v3 op_sel_hi:[0,0,1] ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0x0e,0x04] +0x05,0x40,0x80,0xd3,0x01,0x05,0x0e,0x04 + +# CHECK: v_pk_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0xc0,0x80,0xd3,0x01,0x05,0x0e,0x1c] +0x05,0xc0,0x80,0xd3,0x01,0x05,0x0e,0x1c + +# CHECK: v_pk_mad_u16 v5, v1, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0x0e,0x1c] +0x05,0x40,0x89,0xd3,0x01,0x05,0x0e,0x1c + +# CHECK: v_pk_mad_u16 v255, v1, v2, v3 ; encoding: [0xff,0x40,0x89,0xd3,0x01,0x05,0x0e,0x1c] +0xff,0x40,0x89,0xd3,0x01,0x05,0x0e,0x1c + +# CHECK: v_pk_mad_u16 v5, v255, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0xff,0x05,0x0e,0x1c] +0x05,0x40,0x89,0xd3,0xff,0x05,0x0e,0x1c + +# CHECK: v_pk_mad_u16 v5, s1, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x04,0x0e,0x1c] +0x05,0x40,0x89,0xd3,0x01,0x04,0x0e,0x1c + +# CHECK: v_pk_mad_u16 v5, s101, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x65,0x04,0x0e,0x1c] +0x05,0x40,0x89,0xd3,0x65,0x04,0x0e,0x1c + +# CHECK: v_pk_mad_u16 v5, flat_scratch_lo, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x66,0x04,0x0e,0x1c] +0x05,0x40,0x89,0xd3,0x66,0x04,0x0e,0x1c + +# CHECK: v_pk_mad_u16 v5, flat_scratch_hi, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x67,0x04,0x0e,0x1c] +0x05,0x40,0x89,0xd3,0x67,0x04,0x0e,0x1c + +# CHECK: v_pk_mad_u16 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x6a,0x04,0x0e,0x1c] +0x05,0x40,0x89,0xd3,0x6a,0x04,0x0e,0x1c + +# CHECK: v_pk_mad_u16 v5, vcc_hi, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x6b,0x04,0x0e,0x1c] +0x05,0x40,0x89,0xd3,0x6b,0x04,0x0e,0x1c + +# CHECK: v_pk_mad_u16 v5, m0, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x7c,0x04,0x0e,0x1c] +0x05,0x40,0x89,0xd3,0x7c,0x04,0x0e,0x1c + +# CHECK: v_pk_mad_u16 v5, exec_lo, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x7e,0x04,0x0e,0x1c] +0x05,0x40,0x89,0xd3,0x7e,0x04,0x0e,0x1c + +# CHECK: v_pk_mad_u16 v5, exec_hi, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x7f,0x04,0x0e,0x1c] +0x05,0x40,0x89,0xd3,0x7f,0x04,0x0e,0x1c + +# CHECK: v_pk_mad_u16 v5, v1, v255, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xff,0x0f,0x1c] +0x05,0x40,0x89,0xd3,0x01,0xff,0x0f,0x1c + +# CHECK: v_pk_mad_u16 v5, v1, s2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0x0c,0x1c] +0x05,0x40,0x89,0xd3,0x01,0x05,0x0c,0x1c + +# CHECK: v_pk_mad_u16 v5, v1, s101, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xcb,0x0c,0x1c] +0x05,0x40,0x89,0xd3,0x01,0xcb,0x0c,0x1c + +# CHECK: v_pk_mad_u16 v5, v1, flat_scratch_lo, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xcd,0x0c,0x1c] +0x05,0x40,0x89,0xd3,0x01,0xcd,0x0c,0x1c + +# CHECK: v_pk_mad_u16 v5, v1, flat_scratch_hi, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xcf,0x0c,0x1c] +0x05,0x40,0x89,0xd3,0x01,0xcf,0x0c,0x1c + +# CHECK: v_pk_mad_u16 v5, v1, vcc_lo, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xd5,0x0c,0x1c] +0x05,0x40,0x89,0xd3,0x01,0xd5,0x0c,0x1c + +# CHECK: v_pk_mad_u16 v5, v1, vcc_hi, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xd7,0x0c,0x1c] +0x05,0x40,0x89,0xd3,0x01,0xd7,0x0c,0x1c + +# CHECK: v_pk_mad_u16 v5, v1, m0, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xf9,0x0c,0x1c] +0x05,0x40,0x89,0xd3,0x01,0xf9,0x0c,0x1c + +# CHECK: v_pk_mad_u16 v5, v1, exec_lo, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xfd,0x0c,0x1c] +0x05,0x40,0x89,0xd3,0x01,0xfd,0x0c,0x1c + +# CHECK: v_pk_mad_u16 v5, v1, exec_hi, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xff,0x0c,0x1c] +0x05,0x40,0x89,0xd3,0x01,0xff,0x0c,0x1c + +# CHECK: v_pk_mad_u16 v5, v1, v2, v255 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0xfe,0x1f] +0x05,0x40,0x89,0xd3,0x01,0x05,0xfe,0x1f + +# CHECK: v_pk_mad_u16 v5, v1, v2, s3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0x0e,0x18] +0x05,0x40,0x89,0xd3,0x01,0x05,0x0e,0x18 + +# CHECK: v_pk_mad_u16 v5, v1, v2, s101 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0x96,0x19] +0x05,0x40,0x89,0xd3,0x01,0x05,0x96,0x19 + +# CHECK: v_pk_mad_u16 v5, v1, v2, flat_scratch_lo ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0x9a,0x19] +0x05,0x40,0x89,0xd3,0x01,0x05,0x9a,0x19 + +# CHECK: v_pk_mad_u16 v5, v1, v2, flat_scratch_hi ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0x9e,0x19] +0x05,0x40,0x89,0xd3,0x01,0x05,0x9e,0x19 + +# CHECK: v_pk_mad_u16 v5, v1, v2, vcc_lo ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0xaa,0x19] +0x05,0x40,0x89,0xd3,0x01,0x05,0xaa,0x19 + +# CHECK: v_pk_mad_u16 v5, v1, v2, vcc_hi ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0xae,0x19] +0x05,0x40,0x89,0xd3,0x01,0x05,0xae,0x19 + +# CHECK: v_pk_mad_u16 v5, v1, v2, m0 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0xf2,0x19] +0x05,0x40,0x89,0xd3,0x01,0x05,0xf2,0x19 + +# CHECK: v_pk_mad_u16 v5, v1, v2, exec_lo ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0xfa,0x19] +0x05,0x40,0x89,0xd3,0x01,0x05,0xfa,0x19 + +# CHECK: v_pk_mad_u16 v5, v1, v2, exec_hi ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0xfe,0x19] +0x05,0x40,0x89,0xd3,0x01,0x05,0xfe,0x19 + +# CHECK: v_pk_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x05,0x48,0x89,0xd3,0x01,0x05,0x0e,0x1c] +0x05,0x48,0x89,0xd3,0x01,0x05,0x0e,0x1c + +# CHECK: v_pk_mad_u16 v5, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x05,0x50,0x89,0xd3,0x01,0x05,0x0e,0x1c] +0x05,0x50,0x89,0xd3,0x01,0x05,0x0e,0x1c + +# CHECK: v_pk_mad_u16 v5, v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x05,0x60,0x89,0xd3,0x01,0x05,0x0e,0x1c] +0x05,0x60,0x89,0xd3,0x01,0x05,0x0e,0x1c + +# CHECK: v_pk_mad_u16 v5, v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x05,0x78,0x89,0xd3,0x01,0x05,0x0e,0x1c] +0x05,0x78,0x89,0xd3,0x01,0x05,0x0e,0x1c + +# CHECK: v_pk_mad_u16 v5, v1, v2, v3 op_sel_hi:[0,0,0] ; encoding: [0x05,0x00,0x89,0xd3,0x01,0x05,0x0e,0x04] +0x05,0x00,0x89,0xd3,0x01,0x05,0x0e,0x04 + +# CHECK: v_pk_mad_u16 v5, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x05,0x00,0x89,0xd3,0x01,0x05,0x0e,0x0c] +0x05,0x00,0x89,0xd3,0x01,0x05,0x0e,0x0c + +# CHECK: v_pk_mad_u16 v5, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x05,0x00,0x89,0xd3,0x01,0x05,0x0e,0x14] +0x05,0x00,0x89,0xd3,0x01,0x05,0x0e,0x14 + +# CHECK: v_pk_mad_u16 v5, v1, v2, v3 op_sel_hi:[0,0,1] ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0x0e,0x04] +0x05,0x40,0x89,0xd3,0x01,0x05,0x0e,0x04 + +# CHECK: v_pk_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0xc0,0x89,0xd3,0x01,0x05,0x0e,0x1c] +0x05,0xc0,0x89,0xd3,0x01,0x05,0x0e,0x1c + +# CHECK: v_pk_sub_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x18] +0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x18 + +# CHECK: v_pk_sub_u16 v255, v1, v2 ; encoding: [0xff,0x00,0x8b,0xd3,0x01,0x05,0x02,0x18] +0xff,0x00,0x8b,0xd3,0x01,0x05,0x02,0x18 + +# CHECK: v_pk_sub_u16 v5, v255, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0xff,0x05,0x02,0x18] +0x05,0x00,0x8b,0xd3,0xff,0x05,0x02,0x18 + +# CHECK: v_pk_sub_u16 v5, s1, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0x04,0x02,0x18] +0x05,0x00,0x8b,0xd3,0x01,0x04,0x02,0x18 + +# CHECK: v_pk_sub_u16 v5, s101, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x65,0x04,0x02,0x18] +0x05,0x00,0x8b,0xd3,0x65,0x04,0x02,0x18 + +# CHECK: v_pk_sub_u16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x66,0x04,0x02,0x18] +0x05,0x00,0x8b,0xd3,0x66,0x04,0x02,0x18 + +# CHECK: v_pk_sub_u16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x67,0x04,0x02,0x18] +0x05,0x00,0x8b,0xd3,0x67,0x04,0x02,0x18 + +# CHECK: v_pk_sub_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x6a,0x04,0x02,0x18] +0x05,0x00,0x8b,0xd3,0x6a,0x04,0x02,0x18 + +# CHECK: v_pk_sub_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x6b,0x04,0x02,0x18] +0x05,0x00,0x8b,0xd3,0x6b,0x04,0x02,0x18 + +# CHECK: v_pk_sub_u16 v5, m0, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x7c,0x04,0x02,0x18] +0x05,0x00,0x8b,0xd3,0x7c,0x04,0x02,0x18 + +# CHECK: v_pk_sub_u16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x7e,0x04,0x02,0x18] +0x05,0x00,0x8b,0xd3,0x7e,0x04,0x02,0x18 + +# CHECK: v_pk_sub_u16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x7f,0x04,0x02,0x18] +0x05,0x00,0x8b,0xd3,0x7f,0x04,0x02,0x18 + +# CHECK: v_pk_sub_u16 v5, v1, v255 ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xff,0x03,0x18] +0x05,0x00,0x8b,0xd3,0x01,0xff,0x03,0x18 + +# CHECK: v_pk_sub_u16 v5, v1, s2 ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x00,0x18] +0x05,0x00,0x8b,0xd3,0x01,0x05,0x00,0x18 + +# CHECK: v_pk_sub_u16 v5, v1, s101 ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xcb,0x00,0x18] +0x05,0x00,0x8b,0xd3,0x01,0xcb,0x00,0x18 + +# CHECK: v_pk_sub_u16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xcd,0x00,0x18] +0x05,0x00,0x8b,0xd3,0x01,0xcd,0x00,0x18 + +# CHECK: v_pk_sub_u16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xcf,0x00,0x18] +0x05,0x00,0x8b,0xd3,0x01,0xcf,0x00,0x18 + +# CHECK: v_pk_sub_u16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xd5,0x00,0x18] +0x05,0x00,0x8b,0xd3,0x01,0xd5,0x00,0x18 + +# CHECK: v_pk_sub_u16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xd7,0x00,0x18] +0x05,0x00,0x8b,0xd3,0x01,0xd7,0x00,0x18 + +# CHECK: v_pk_sub_u16 v5, v1, m0 ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xf9,0x00,0x18] +0x05,0x00,0x8b,0xd3,0x01,0xf9,0x00,0x18 + +# CHECK: v_pk_sub_u16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xfd,0x00,0x18] +0x05,0x00,0x8b,0xd3,0x01,0xfd,0x00,0x18 + +# CHECK: v_pk_sub_u16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xff,0x00,0x18] +0x05,0x00,0x8b,0xd3,0x01,0xff,0x00,0x18 + +# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x08,0x8b,0xd3,0x01,0x05,0x02,0x18] +0x05,0x08,0x8b,0xd3,0x01,0x05,0x02,0x18 + +# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x10,0x8b,0xd3,0x01,0x05,0x02,0x18] +0x05,0x10,0x8b,0xd3,0x01,0x05,0x02,0x18 + +# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x18,0x8b,0xd3,0x01,0x05,0x02,0x18] +0x05,0x18,0x8b,0xd3,0x01,0x05,0x02,0x18 + +# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x00] +0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x00 + +# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x08] +0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x08 + +# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x10] +0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x10 + +# CHECK: v_pk_sub_u16 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x8b,0xd3,0x01,0x05,0x02,0x18] +0x05,0x80,0x8b,0xd3,0x01,0x05,0x02,0x18