Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -104,6 +104,11 @@ MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const; MCOperand decodeSpecialReg32(unsigned Val) const; MCOperand decodeSpecialReg64(unsigned Val) const; + + MCOperand decodeSDWA9Src(const OpWidthTy Width, unsigned Val) const; + MCOperand decodeSDWA9Src16(unsigned Val) const; + MCOperand decodeSDWA9Src32(unsigned Val) const; + MCOperand decodeSDWA9VopcDst(unsigned Val) const; }; //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -62,32 +62,33 @@ return addOperand(Inst, MCOperand::createImm(Imm)); } -#define DECODE_OPERAND2(RegClass, DecName) \ -static DecodeStatus Decode##RegClass##RegisterClass(MCInst &Inst, \ - unsigned Imm, \ - uint64_t /*Addr*/, \ - const void *Decoder) { \ +#define DECODE_OPERAND(StaticDecoderName, DecoderName) \ +static DecodeStatus StaticDecoderName(MCInst &Inst, \ + unsigned Imm, \ + uint64_t /*Addr*/, \ + const void *Decoder) { \ auto DAsm = static_cast(Decoder); \ - return addOperand(Inst, DAsm->decodeOperand_##DecName(Imm)); \ + return addOperand(Inst, DAsm->DecoderName(Imm)); \ } -#define DECODE_OPERAND(RegClass) DECODE_OPERAND2(RegClass, RegClass) +#define DECODE_OPERAND_REG(RegClass) \ +DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) -DECODE_OPERAND(VGPR_32) -DECODE_OPERAND(VS_32) -DECODE_OPERAND(VS_64) +DECODE_OPERAND_REG(VGPR_32) +DECODE_OPERAND_REG(VS_32) +DECODE_OPERAND_REG(VS_64) -DECODE_OPERAND(VReg_64) -DECODE_OPERAND(VReg_96) -DECODE_OPERAND(VReg_128) +DECODE_OPERAND_REG(VReg_64) +DECODE_OPERAND_REG(VReg_96) +DECODE_OPERAND_REG(VReg_128) -DECODE_OPERAND(SReg_32) -DECODE_OPERAND(SReg_32_XM0_XEXEC) -DECODE_OPERAND(SReg_64) -DECODE_OPERAND(SReg_64_XEXEC) -DECODE_OPERAND(SReg_128) -DECODE_OPERAND(SReg_256) -DECODE_OPERAND(SReg_512) +DECODE_OPERAND_REG(SReg_32) +DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) +DECODE_OPERAND_REG(SReg_64) +DECODE_OPERAND_REG(SReg_64_XEXEC) +DECODE_OPERAND_REG(SReg_128) +DECODE_OPERAND_REG(SReg_256) +DECODE_OPERAND_REG(SReg_512) static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, @@ -106,6 +107,13 @@ return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm)); } +#define DECODE_SDWA9(DecName) \ +DECODE_OPERAND(decodeSDWA9##DecName, decodeSDWA9##DecName) + +DECODE_SDWA9(Src32) +DECODE_SDWA9(Src16) +DECODE_SDWA9(VopcDst) + #include "AMDGPUGenDisassemblerTables.inc" //===----------------------------------------------------------------------===// @@ -164,6 +172,9 @@ Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address); if (Res) break; + + Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address); + if (Res) break; } // Reinitialize Bytes as DPP64 could have eaten too much @@ -582,6 +593,48 @@ return errOperand(Val, "unknown operand encoding " + Twine(Val)); } +MCOperand AMDGPUDisassembler::decodeSDWA9Src(const OpWidthTy Width, + unsigned Val) const { + using namespace AMDGPU::SDWA; + + if (SDWA9EncValues::SRC_VGPR_MIN <= Val && + Val <= SDWA9EncValues::SRC_VGPR_MAX) { + return createRegOperand(getVgprClassId(Width), + Val - SDWA9EncValues::SRC_VGPR_MIN); + } + if (SDWA9EncValues::SRC_SGPR_MIN <= Val && + Val <= SDWA9EncValues::SRC_SGPR_MAX) { + return createSRegOperand(getSgprClassId(Width), + Val - SDWA9EncValues::SRC_SGPR_MIN); + } + + return decodeSpecialReg32(Val - SDWA9EncValues::SRC_SGPR_MIN); +} + +MCOperand AMDGPUDisassembler::decodeSDWA9Src16(unsigned Val) const { + return decodeSDWA9Src(OPW16, Val); +} + +MCOperand AMDGPUDisassembler::decodeSDWA9Src32(unsigned Val) const { + return decodeSDWA9Src(OPW32, Val); +} + + +MCOperand AMDGPUDisassembler::decodeSDWA9VopcDst(unsigned Val) const { + using namespace AMDGPU::SDWA; + + if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { + Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; + if (Val > AMDGPU::EncValues::SGPR_MAX) { + return decodeSpecialReg64(Val); + } else { + return createSRegOperand(getSgprClassId(OPW64), Val); + } + } else { + return createRegOperand(AMDGPU::VCC); + } +} + //===----------------------------------------------------------------------===// // AMDGPUSymbolizer //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -331,15 +331,17 @@ SIMCCodeEmitter::getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { + using namespace AMDGPU::SDWA; + uint64_t RegEnc = 0; const MCOperand &MO = MI.getOperand(OpNo); unsigned Reg = MO.getReg(); RegEnc |= MRI.getEncodingValue(Reg); - RegEnc &= SDWA9_SRC_REG_MASK; + RegEnc &= SDWA9EncValues::SRC_VGPR_MASK; if (AMDGPU::isSGPR(Reg, &MRI)) { - RegEnc |= SDWA9_SRC_SGPR_MASK; + RegEnc |= SDWA9EncValues::SRC_SGPR_MASK; } return RegEnc; } @@ -348,6 +350,8 @@ SIMCCodeEmitter::getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { + using namespace AMDGPU::SDWA; + uint64_t RegEnc = 0; const MCOperand &MO = MI.getOperand(OpNo); @@ -355,8 +359,8 @@ unsigned Reg = MO.getReg(); if (Reg != AMDGPU::VCC) { RegEnc |= MRI.getEncodingValue(Reg); - RegEnc &= SDWA9_VOPC_DST_REG_MASK; - RegEnc |= SDWA9_VOPC_DST_VCC_MASK; + RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK; + RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK; } return RegEnc; } Index: lib/Target/AMDGPU/SIDefines.h =================================================================== --- lib/Target/AMDGPU/SIDefines.h +++ lib/Target/AMDGPU/SIDefines.h @@ -299,10 +299,17 @@ UNUSED_PRESERVE = 2, }; -#define SDWA9_SRC_SGPR_MASK 0x100 -#define SDWA9_SRC_REG_MASK 0xFF -#define SDWA9_VOPC_DST_VCC_MASK 0x80 -#define SDWA9_VOPC_DST_REG_MASK 0x7F +enum SDWA9EncValues{ + SRC_SGPR_MASK = 0x100, + SRC_VGPR_MASK = 0xFF, + VOPC_DST_VCC_MASK = 0x80, + VOPC_DST_SGPR_MASK = 0x7F, + + SRC_VGPR_MIN = 0, + SRC_VGPR_MAX = 255, + SRC_SGPR_MIN = 256, + SRC_SGPR_MAX = 357, +}; } // namespace SDWA } // namespace AMDGPU Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -439,16 +439,25 @@ let ParserMatchClass = VReg32OrOffClass; } -def SDWA9Src : RegisterOperand { +class SDWA9Src : RegisterOperand { let OperandNamespace = "AMDGPU"; let OperandType = "OPERAND_SDWA9_SRC"; let EncoderMethod = "getSDWA9SrcEncoding"; } +def SDWA9Src32 : SDWA9Src { + let DecoderMethod = "decodeSDWA9Src32"; +} + +def SDWA9Src16 : SDWA9Src { + let DecoderMethod = "decodeSDWA9Src16"; +} + def SDWA9VopcDst : VOPDstOperand { let OperandNamespace = "AMDGPU"; let OperandType = "OPERAND_SDWA9_VOPC_DST"; let EncoderMethod = "getSDWA9VopcDstEncoding"; + let DecoderMethod = "decodeSDWA9VopcDst"; } class NamedMatchClass : AsmOperandClass { @@ -864,6 +873,10 @@ !if(!eq(VT.Size, 64), VReg_64, VGPR_32)); } +class getSDWA9SrcForVT { + RegisterOperand ret = !if(!eq(VT.Size, 16), SDWA9Src16, SDWA9Src32); +} + // Returns the register class to use for sources of VOP3 instructions for the // given VT. class getVOP3SrcForVT { @@ -1360,8 +1373,8 @@ field RegisterClass Src1DPP = getVregSrcForVT.ret; field RegisterClass Src0SDWA = getVregSrcForVT.ret; field RegisterClass Src1SDWA = getVregSrcForVT.ret; - field RegisterOperand Src0SDWA9 = SDWA9Src; - field RegisterOperand Src1SDWA9 = SDWA9Src; + field RegisterOperand Src0SDWA9 = getSDWA9SrcForVT.ret; + field RegisterOperand Src1SDWA9 = getSDWA9SrcForVT.ret; field Operand Src0Mod = getSrcMod.ret; field Operand Src1Mod = getSrcMod.ret; field Operand Src2Mod = getSrcMod.ret; Index: test/MC/Disassembler/AMDGPU/sdwa_gfx9.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/AMDGPU/sdwa_gfx9.txt @@ -0,0 +1,477 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX9 + +#-----------------------------------------------------------------------------# +# Input modifiers +#-----------------------------------------------------------------------------# + +# GFX9: v_fract_f32_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x36,0x00,0x7e,0x00,0x06,0x25,0x06] +0xf9 0x36 0x00 0x7e 0x00 0x06 0x25 0x06 + +# GFX9: v_sin_f32_sdwa v0, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x52,0x00,0x7e,0x00,0x06,0x35,0x06] +0xf9 0x52 0x00 0x7e 0x00 0x06 0x35 0x06 + +# GFX9: v_add_f32_sdwa v0, -|v0|, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x02,0x00,0x06,0x35,0x12] +0xf9 0x00 0x00 0x02 0x00 0x06 0x35 0x12 + +# GFX9: v_min_f32_sdwa v0, |v0|, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x14,0x00,0x06,0x25,0x12] +0xf9 0x00 0x00 0x14 0x00 0x06 0x25 0x12 + +#-----------------------------------------------------------------------------# +# VOP1 +#-----------------------------------------------------------------------------# + +# GFX9: v_mov_b32_sdwa v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x02,0x10,0x06,0x06] +0xf9 0x02 0x02 0x7e 0x02 0x10 0x06 0x06 + +# GFX9: v_mov_b32_sdwa v3, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 ; encoding: [0xf9,0x02,0x06,0x7e,0x04,0x11,0x05,0x06] +0xf9 0x02 0x06 0x7e 0x04 0x11 0x05 0x06 + +# GFX9: v_mov_b32_sdwa v15, v99 dst_sel:BYTE_2 dst_unused:UNUSED_SEXT src0_sel:WORD_0 ; encoding: [0xf9,0x02,0x1e,0x7e,0x63,0x0a,0x04,0x06] +0xf9 0x02 0x1e 0x7e 0x63 0x0a 0x04 0x06 + +# GFX9: v_min_u32_sdwa v194, v13, v1 dst_sel:BYTE_3 dst_unused:UNUSED_SEXT src0_sel:BYTE_3 src1_sel:BYTE_2 ; encoding: [0xf9,0x02,0x84,0x1d,0x0d,0x0b,0x03,0x02] +0xf9 0x02 0x84 0x1d 0x0d 0x0b 0x03 0x02 + +# GFX9: v_min_u32_sdwa v255, v4, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:WORD_1 ; encoding: [0xf9,0x02,0xfe,0x1d,0x04,0x04,0x02,0x05] +0xf9 0x02 0xfe 0x1d 0x04 0x04 0x02 0x05 + +# GFX9: v_min_u32_sdwa v200, v200, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x90,0x1d,0xc8,0x05,0x01,0x06] +0xf9 0x02 0x90 0x1d 0xc8 0x05 0x01 0x06 + +# GFX9: v_min_u32_sdwa v1, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x1c,0x01,0x06,0x00,0x06] +0xf9 0x02 0x02 0x1c 0x01 0x06 0x00 0x06 + +# GFX9: v_cvt_u32_f32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x0e,0x00,0x7e,0x00,0x16,0x06,0x06] +0xf9 0x0e 0x00 0x7e 0x00 0x16 0x06 0x06 + +# GFX9: v_fract_f32_sdwa v0, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x36,0x00,0x7e,0x00,0x26,0x06,0x06] +0xf9 0x36 0x00 0x7e 0x00 0x26 0x06 0x06 + +# GFX9: v_sin_f32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x52,0x00,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x52 0x00 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_mov_b32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 ; encoding: [0xf9,0x02,0x02,0x7e,0x00,0x16,0x05,0x06] +0xf9 0x02 0x02 0x7e 0x00 0x16 0x05 0x06 + +# GFX9: v_trunc_f32_sdwa v1, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:WORD_1 ; encoding: [0xf9,0x38,0x02,0x7e,0x00,0x36,0x05,0x06] +0xf9 0x38 0x02 0x7e 0x00 0x36 0x05 0x06 + +# GFX9: v_mov_b32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x00,0x16,0x06,0x06] +0xf9 0x02 0x02 0x7e 0x00 0x16 0x06 0x06 + +# GFX9: v_nop ; encoding: [0xf9,0x00,0x00,0x7e,0x00,0x16,0x06,0x06] +0xf9 0x00 0x00 0x7e 0x00 0x16 0x06 0x06 + +# GFX9: v_cvt_u32_f32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x0e,0x00,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x0e 0x00 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_fract_f32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x36,0x00,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x36 0x00 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_sin_f32_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x52,0x00,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x52 0x00 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_mov_b32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x02,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x02 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_cvt_f32_i32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x0a,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x0a 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_cvt_f32_u32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x0c,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x0c 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_cvt_i32_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x10,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x10 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_cvt_f16_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x14,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x14 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x16,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x16 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_cvt_rpi_i32_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x18,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x18 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_cvt_flr_i32_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x1a,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x1a 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_cvt_off_f32_i4_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x1c,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x1c 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_cvt_f32_ubyte0_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x22,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x22 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_cvt_f32_ubyte1_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x24,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x24 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_cvt_f32_ubyte2_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x26,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x26 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_cvt_f32_ubyte3_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x28,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x28 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_trunc_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x38,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x38 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_ceil_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x3a,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x3a 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_rndne_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x3c,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x3c 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_floor_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x3e,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x3e 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_exp_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x40,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x40 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_log_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x42,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x42 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_rcp_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x44,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x44 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_rcp_iflag_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x46,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x46 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_rsq_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x48,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x48 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_sqrt_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x4e,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x4e 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_cos_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x54,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x54 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_not_b32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x56,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x56 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_bfrev_b32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x58,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x58 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_ffbh_u32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x5a,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x5a 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_ffbl_b32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x5c,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x5c 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_ffbh_i32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x5e,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x5e 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_frexp_exp_i32_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x66,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x66 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_frexp_mant_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x68,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x68 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_log_legacy_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x98,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x98 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_exp_legacy_f32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x96,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x96 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_cvt_f16_u16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x72,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x72 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_cvt_f16_i16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x74,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x74 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_cvt_u16_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x76,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x76 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_cvt_i16_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x78,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x78 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_rcp_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x7a,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x7a 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_sqrt_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x7c,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x7c 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_rsq_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x7e,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x7e 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_log_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x80,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x80 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_exp_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x82,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x82 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_frexp_mant_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x84,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x84 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_frexp_exp_i16_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x86,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x86 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_floor_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x88,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x88 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_ceil_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x8a,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x8a 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_trunc_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x8c,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x8c 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_rndne_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x8e,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x8e 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_fract_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x90,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x90 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_sin_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x92,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x92 0x02 0x7e 0x00 0x06 0x05 0x06 + +# GFX9: v_cos_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x94,0x02,0x7e,0x00,0x06,0x05,0x06] +0xf9 0x94 0x02 0x7e 0x00 0x06 0x05 0x06 + +#-----------------------------------------------------------------------------# +# VOP2 +#-----------------------------------------------------------------------------# + +# GFX9: v_add_f32_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x02,0x00,0x06,0x05,0x06] +0xf9 0x00 0x00 0x02 0x00 0x06 0x05 0x06 + +# GFX9: v_min_f32_sdwa v0, v0, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x14,0x00,0x36,0x06,0x02] +0xf9 0x00 0x00 0x14 0x00 0x36 0x06 0x02 + +# GFX9: v_and_b32_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x26,0x00,0x06,0x06,0x02] +0xf9 0x00 0x00 0x26 0x00 0x06 0x06 0x02 + +# GFX9: v_mul_i32_i24_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x06,0x02,0x0c,0x02,0x16,0x06,0x06] +0xf9 0x06 0x02 0x0c 0x02 0x16 0x06 0x06 + +# GFX9: v_add_f32_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x02,0x00,0x06,0x05,0x02] +0xf9 0x00 0x00 0x02 0x00 0x06 0x05 0x02 + +# GFX9: v_min_f32_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x14,0x00,0x06,0x05,0x02] +0xf9 0x00 0x00 0x14 0x00 0x06 0x05 0x02 + +# GFX9: v_and_b32_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x26,0x00,0x06,0x05,0x02] +0xf9 0x00 0x00 0x26 0x00 0x06 0x05 0x02 + +# GFX9: v_mul_i32_i24_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x0c,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x0c 0x02 0x06 0x05 0x02 + +# GFX9: v_sub_f32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x04,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x04 0x02 0x06 0x05 0x02 + +# GFX9: v_subrev_f32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x06,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x06 0x02 0x06 0x05 0x02 + +# GFX9: v_mul_f32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x0a,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x0a 0x02 0x06 0x05 0x02 + +# GFX9: v_mul_hi_i32_i24_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x0e,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x0e 0x02 0x06 0x05 0x02 + +# GFX9: v_mul_u32_u24_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x10,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x10 0x02 0x06 0x05 0x02 + +# GFX9: v_mul_hi_u32_u24_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x12,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x12 0x02 0x06 0x05 0x02 + +# GFX9: v_max_f32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x16,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x16 0x02 0x06 0x05 0x02 + +# GFX9: v_min_i32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x18,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x18 0x02 0x06 0x05 0x02 + +# GFX9: v_max_i32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x1a,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x1a 0x02 0x06 0x05 0x02 + +# GFX9: v_min_u32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x1c,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x1c 0x02 0x06 0x05 0x02 + +# GFX9: v_max_u32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x1e,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x1e 0x02 0x06 0x05 0x02 + +# GFX9: v_lshrrev_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x20,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x20 0x02 0x06 0x05 0x02 + +# GFX9: v_ashrrev_i32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x22,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x22 0x02 0x06 0x05 0x02 + +# GFX9: v_lshlrev_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x24,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x24 0x02 0x06 0x05 0x02 + +# GFX9: v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x28,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x28 0x02 0x06 0x05 0x02 + +# GFX9: v_xor_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x2a,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x2a 0x02 0x06 0x05 0x02 + +# GFX9: v_add_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3e,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x3e 0x02 0x06 0x05 0x02 + +# GFX9: v_sub_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x40,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x40 0x02 0x06 0x05 0x02 + +# GFX9: v_subrev_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x42,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x42 0x02 0x06 0x05 0x02 + +# GFX9: v_mul_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x44,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x44 0x02 0x06 0x05 0x02 + +# GFX9: v_add_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x4c,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x4c 0x02 0x06 0x05 0x02 + +# GFX9: v_sub_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x4e,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x4e 0x02 0x06 0x05 0x02 + +# GFX9: v_subrev_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x50,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x50 0x02 0x06 0x05 0x02 + +# GFX9: v_mul_lo_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x52,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x52 0x02 0x06 0x05 0x02 + +# GFX9: v_lshlrev_b16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x54,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x54 0x02 0x06 0x05 0x02 + +# GFX9: v_lshrrev_b16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x56,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x56 0x02 0x06 0x05 0x02 + +# GFX9: v_ashrrev_i16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x58,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x58 0x02 0x06 0x05 0x02 + +# GFX9: v_max_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x5a,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x5a 0x02 0x06 0x05 0x02 + +# GFX9: v_min_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x5c,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x5c 0x02 0x06 0x05 0x02 + +# GFX9: v_max_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x5e,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x5e 0x02 0x06 0x05 0x02 + +# GFX9: v_max_i16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x60,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x60 0x02 0x06 0x05 0x02 + +# GFX9: v_min_u16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x62,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x62 0x02 0x06 0x05 0x02 + +# GFX9: v_min_i16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x64,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x64 0x02 0x06 0x05 0x02 + +# GFX9: v_ldexp_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x66,0x02,0x06,0x05,0x02] +0xf9 0x06 0x02 0x66 0x02 0x06 0x05 0x02 + +#-----------------------------------------------------------------------------# +# VOPC +#-----------------------------------------------------------------------------# + +# GFX9: v_cmp_eq_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0x00,0x02,0x04] +0xf9 0x04 0x84 0x7c 0x01 0x00 0x02 0x04 + +# GFX9: v_cmp_nle_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x98,0x7c,0x01,0x00,0x02,0x04] +0xf9 0x04 0x98 0x7c 0x01 0x00 0x02 0x04 + +# GFX9: v_cmpx_gt_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xa8,0x7c,0x01,0x00,0x02,0x04] +0xf9 0x04 0xa8 0x7c 0x01 0x00 0x02 0x04 + +# GFX9: v_cmpx_nlt_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xbc,0x7c,0x01,0x00,0x02,0x04] +0xf9 0x04 0xbc 0x7c 0x01 0x00 0x02 0x04 + +# GFX9: v_cmp_lt_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x82,0x7d,0x01,0x00,0x02,0x04] +0xf9 0x04 0x82 0x7d 0x01 0x00 0x02 0x04 + +# GFX9: v_cmp_t_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x8e,0x7d,0x01,0x00,0x02,0x04] +0xf9 0x04 0x8e 0x7d 0x01 0x00 0x02 0x04 + +# GFX9: v_cmpx_eq_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xa4,0x7d,0x01,0x00,0x02,0x04] +0xf9 0x04 0xa4 0x7d 0x01 0x00 0x02 0x04 + +# GFX9: v_cmpx_ne_i32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xaa,0x7d,0x01,0x00,0x02,0x04] +0xf9 0x04 0xaa 0x7d 0x01 0x00 0x02 0x04 + +# GFX9: v_cmp_f_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x90,0x7d,0x01,0x00,0x02,0x04] +0xf9 0x04 0x90 0x7d 0x01 0x00 0x02 0x04 + +# GFX9: v_cmp_gt_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x98,0x7d,0x01,0x00,0x02,0x04] +0xf9 0x04 0x98 0x7d 0x01 0x00 0x02 0x04 + +# GFX9: v_cmpx_le_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xb6,0x7d,0x01,0x00,0x02,0x04] +0xf9 0x04 0xb6 0x7d 0x01 0x00 0x02 0x04 + +# GFX9: v_cmpx_ne_u32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xba,0x7d,0x01,0x00,0x02,0x04] +0xf9 0x04 0xba 0x7d 0x01 0x00 0x02 0x04 + +# GFX9: v_cmp_class_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x20,0x7c,0x01,0x00,0x02,0x04] +0xf9 0x04 0x20 0x7c 0x01 0x00 0x02 0x04 + +# GFX9: v_cmpx_class_f32_sdwa vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x22,0x7c,0x01,0x00,0x02,0x04] +0xf9 0x04 0x22 0x7c 0x01 0x00 0x02 0x04 + +#-----------------------------------------------------------------------------# +# Modifiers +#-----------------------------------------------------------------------------# + +# GFX9: v_fract_f32_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x36,0x00,0x7e,0x00,0x06,0x25,0x06] +0xf9 0x36 0x00 0x7e 0x00 0x06 0x25 0x06 + +# GFX9: v_sin_f32_sdwa v0, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x52,0x00,0x7e,0x00,0x06,0x35,0x06] +0xf9 0x52 0x00 0x7e 0x00 0x06 0x35 0x06 + +# GFX9: v_add_f32_sdwa v0, -|v0|, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x02,0x00,0x06,0x35,0x12] +0xf9 0x00 0x00 0x02 0x00 0x06 0x35 0x12 + +# GFX9: v_min_f32_sdwa v0, |v0|, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x14,0x00,0x06,0x25,0x12] +0xf9 0x00 0x00 0x14 0x00 0x06 0x25 0x12 + +# GFX9: v_mov_b32_sdwa v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x00,0x16,0x0e,0x06] +0xf9 0x02 0x02 0x7e 0x00 0x16 0x0e 0x06 + +# GFX9: v_and_b32_sdwa v0, sext(v0), sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x26,0x00,0x06,0x0e,0x0a] +0xf9 0x00 0x00 0x26 0x00 0x06 0x0e 0x0a + +# GFX9: v_cmp_class_f32_sdwa vcc, -v1, sext(v2) src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x20,0x7c,0x01,0x00,0x12,0x0c] +0xf9 0x04 0x20 0x7c 0x01 0x00 0x12 0x0c + +#===------------------------------------------------------------------------===# +# Scalar registers are allowed +#===------------------------------------------------------------------------===# + +# GFX9: v_mov_b32_sdwa v1, s2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x02,0x10,0x86,0x06] +0xf9 0x02 0x02 0x7e 0x02 0x10 0x86 0x06 + +# GFX9: v_mov_b32_sdwa v1, exec_lo dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x02,0x02,0x7e,0x7e,0x10,0x86,0x06] +0xf9 0x02 0x02 0x7e 0x7e 0x10 0x86 0x06 + +# GFX9: v_add_f32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x02,0x00,0x06,0x85,0x02] +0xf9 0x00 0x00 0x02 0x00 0x06 0x85 0x02 + +# GFX9: v_add_f32_sdwa v0, v0, s22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x2c,0x00,0x02,0x00,0x06,0x05,0x82] +0xf9 0x2c 0x00 0x02 0x00 0x06 0x05 0x82 + +# GFX9: v_cmp_eq_f32_sdwa vcc, s1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0x00,0x85,0x02] +0xf9 0x04 0x84 0x7c 0x01 0x00 0x85 0x02 + +# GFX9: v_cmp_eq_f32_sdwa vcc, v1, s22 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x2c,0x84,0x7c,0x01,0x00,0x05,0x82] +0xf9 0x2c 0x84 0x7c 0x01 0x00 0x05 0x82 + +#===------------------------------------------------------------------------===# +# VOPC with arbitrary SGPR destination +#===------------------------------------------------------------------------===# + +# GFX9: v_cmp_eq_f32_sdwa s[2:3], v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0x82,0x05,0x02] +0xf9 0x04 0x84 0x7c 0x01 0x82 0x05 0x02 + +# GFX9: v_cmp_eq_f32_sdwa exec, v1, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0xfe,0x05,0x02] +0xf9 0x04 0x84 0x7c 0x01 0xfe 0x05 0x02 + +# GFX9: v_cmp_eq_f32_sdwa exec, s2, v2 src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x04,0x84,0x7c,0x02,0xfe,0x85,0x02] +0xf9 0x04 0x84 0x7c 0x02 0xfe 0x85 0x02 + +#===------------------------------------------------------------------------===# +# OMod output modifier allowed +#===------------------------------------------------------------------------===# + +# GFX9: v_trunc_f32_sdwa v1, v2 mul:2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x38,0x02,0x7e,0x02,0x50,0x06,0x06] +0xf9 0x38 0x02 0x7e 0x02 0x50 0x06 0x06 + +# GFX9: v_trunc_f32_sdwa v1, v2 clamp div:2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x38,0x02,0x7e,0x02,0xf0,0x06,0x06] +0xf9 0x38 0x02 0x7e 0x02 0xf0 0x06 0x06 + +# GFX9: v_add_f32_sdwa v0, v0, v0 mul:2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x02,0x00,0x46,0x05,0x02] +0xf9 0x00 0x00 0x02 0x00 0x46 0x05 0x02 + +# GFX9: v_add_f32_sdwa v0, v0, v0 clamp div:2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x02,0x00,0xe6,0x05,0x02] +0xf9 0x00 0x00 0x02 0x00 0xe6 0x05 0x02