Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -70,7 +70,8 @@ SI = 0, VI = 1, SDWA = 2, - SDWA9 = 3 + SDWA9 = 3, + GFX9 = 4 }; // Wrapper for Tablegen'd function. enum Subtarget is not defined in any @@ -110,6 +111,10 @@ Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9 : SIEncodingFamily::SDWA; + if ((get(Opcode).TSFlags & SIInstrFlags::F16_ZFILL) != 0 && + ST.getGeneration() >= AMDGPUSubtarget::GFX9) + Gen = SIEncodingFamily::GFX9; + int MCOp = AMDGPU::getMCOpcode(Opcode, Gen); // -1 means that Opcode is already a native instruction. Index: llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -208,6 +208,9 @@ if (Res) break; Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address); + if (Res) break; + + Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address); } while (false); if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi || Index: llvm/trunk/lib/Target/AMDGPU/SIDefines.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIDefines.h +++ llvm/trunk/lib/Target/AMDGPU/SIDefines.h @@ -69,7 +69,8 @@ VOPAsmPrefer32Bit = UINT64_C(1) << 41, HasFPClamp = UINT64_C(1) << 42, VOP3_OPSEL = UINT64_C(1) << 43, - maybeAtomic = UINT64_C(1) << 44 + maybeAtomic = UINT64_C(1) << 44, + F16_ZFILL = UINT64_C(1) << 45 }; // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. Index: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td @@ -90,6 +90,10 @@ // Is it possible for this instruction to be atomic? field bit maybeAtomic = 0; + // This bit indicates that this is a 16-bit instruction which zero-fills + // unused bits in dst. Note that new GFX9 opcodes preserve unused bits. + field bit F16_ZFILL = 0; + // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = SALU; let TSFlags{1} = VALU; @@ -137,6 +141,7 @@ let TSFlags{43} = VOP3_OPSEL; let TSFlags{44} = maybeAtomic; + let TSFlags{45} = F16_ZFILL; let SchedRW = [Write32Bit]; Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -11,6 +11,9 @@ def isCIOnly : Predicate<"Subtarget->getGeneration() ==" "SISubtarget::SEA_ISLANDS">, AssemblerPredicate <"FeatureSeaIslands">; +def isVIOnly : Predicate<"Subtarget->getGeneration() ==" + "SISubtarget::VOLCANIC_ISLANDS">, + AssemblerPredicate <"FeatureVolcanicIslands">; def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">; @@ -22,6 +25,7 @@ int VI = 1; int SDWA = 2; int SDWA9 = 3; + int GFX9 = 4; } //===----------------------------------------------------------------------===// @@ -1762,7 +1766,8 @@ let ValueCols = [[!cast(SIEncodingFamily.SI)], [!cast(SIEncodingFamily.VI)], [!cast(SIEncodingFamily.SDWA)], - [!cast(SIEncodingFamily.SDWA9)]]; + [!cast(SIEncodingFamily.SDWA9)], + [!cast(SIEncodingFamily.GFX9)]]; } // Get equivalent SOPK instruction. Index: llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td @@ -372,21 +372,33 @@ let SubtargetPredicate = Has16BitInsts in { -def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile, AMDGPUdiv_fixup>; +let F16_ZFILL = 1 in { +def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile, AMDGPUdiv_fixup>; +} +let SubtargetPredicate = isGFX9 in { +def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9", VOP3_Profile>; +} let isCommutable = 1 in { -def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile, fma>; +let F16_ZFILL = 1 in { +def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile, fmad>; +def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile>; +def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile>; +def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile, fma>; +} + +let SubtargetPredicate = isGFX9 in { +def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile>; +def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile>; +def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile>; +def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile>; +} // End SubtargetPredicate = isGFX9 def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>>; def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>>; def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>>; -def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile, fmad>; - -def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile>; -def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile>; - } // End isCommutable = 1 } // End SubtargetPredicate = Has16BitInsts @@ -587,6 +599,27 @@ } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI" +let AssemblerPredicates = [isVIOnly], DecoderNamespace = "VI" in { + +multiclass VOP3_F16_Real_vi op> { + def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3e_vi (NAME).Pfl>; +} + +} // End AssemblerPredicates = [isVIOnly], DecoderNamespace = "VI" + +let AssemblerPredicates = [isGFX9], DecoderNamespace = "GFX9" in { + +multiclass VOP3_F16_Real_gfx9 op, string OpName, string AsmName> { + def _vi : VOP3_Real(OpName), SIEncodingFamily.GFX9>, + VOP3e_vi (OpName).Pfl> { + VOP3_Pseudo ps = !cast(OpName); + let AsmString = AsmName # ps.AsmOperands; + } +} + +} // End AssemblerPredicates = [isGFX9], DecoderNamespace = "GFX9" + defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>; defm V_MAD_I64_I32 : VOP3be_Real_vi <0x1E9>; @@ -631,14 +664,25 @@ defm V_MQSAD_PK_U16_U8 : VOP3_Real_vi <0x1e6>; defm V_MQSAD_U32_U8 : VOP3_Real_vi <0x1e7>; -defm V_MAD_F16 : VOP3_Real_vi <0x1ea>; -defm V_MAD_U16 : VOP3_Real_vi <0x1eb>; -defm V_MAD_I16 : VOP3_Real_vi <0x1ec>; - defm V_PERM_B32 : VOP3_Real_vi <0x1ed>; -defm V_FMA_F16 : VOP3_Real_vi <0x1ee>; -defm V_DIV_FIXUP_F16 : VOP3_Real_vi <0x1ef>; +defm V_MAD_F16 : VOP3_F16_Real_vi <0x1ea>; +defm V_MAD_U16 : VOP3_F16_Real_vi <0x1eb>; +defm V_MAD_I16 : VOP3_F16_Real_vi <0x1ec>; +defm V_FMA_F16 : VOP3_F16_Real_vi <0x1ee>; +defm V_DIV_FIXUP_F16 : VOP3_F16_Real_vi <0x1ef>; + +defm V_MAD_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ea, "V_MAD_F16", "v_mad_legacy_f16">; +defm V_MAD_LEGACY_U16 : VOP3_F16_Real_gfx9 <0x1eb, "V_MAD_U16", "v_mad_legacy_u16">; +defm V_MAD_LEGACY_I16 : VOP3_F16_Real_gfx9 <0x1ec, "V_MAD_I16", "v_mad_legacy_i16">; +defm V_FMA_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ee, "V_FMA_F16", "v_fma_legacy_f16">; +defm V_DIV_FIXUP_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ef, "V_DIV_FIXUP_F16", "v_div_fixup_legacy_f16">; + +defm V_MAD_F16_gfx9 : VOP3_F16_Real_gfx9 <0x203, "V_MAD_F16_gfx9", "v_mad_f16">; +defm V_MAD_U16_gfx9 : VOP3_F16_Real_gfx9 <0x204, "V_MAD_U16_gfx9", "v_mad_u16">; +defm V_MAD_I16_gfx9 : VOP3_F16_Real_gfx9 <0x205, "V_MAD_I16_gfx9", "v_mad_i16">; +defm V_FMA_F16_gfx9 : VOP3_F16_Real_gfx9 <0x206, "V_FMA_F16_gfx9", "v_fma_f16">; +defm V_DIV_FIXUP_F16_gfx9 : VOP3_F16_Real_gfx9 <0x207, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">; defm V_INTERP_P1_F32_e64 : VOP3Interp_Real_vi <0x270>; defm V_INTERP_P2_F32_e64 : VOP3Interp_Real_vi <0x271>; Index: llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s +++ llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s @@ -190,3 +190,135 @@ v_sub_i16 v5, v1, v2 clamp // GFX9: v_sub_i16 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x9f,0xd2,0x01,0x05,0x02,0x00] + +v_fma_f16_e64 v5, v1, v2, v3 +// GFX9: v_fma_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x04] + +v_fma_f16 v5, v1, -v2, v3 +// GFX9: v_fma_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x44] + +v_fma_f16 v5, v1, v2, |v3| +// GFX9: v_fma_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0x06,0xd2,0x01,0x05,0x0e,0x04] + +v_fma_f16 v5, v1, v2, v3 clamp +// GFX9: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04] + +v_fma_legacy_f16_e64 v5, v1, v2, v3 +// GFX9: v_fma_legacy_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04] + +v_fma_legacy_f16 v5, -v1, v2, v3 +// GFX9: v_fma_legacy_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x24] + +v_fma_legacy_f16 v5, v1, |v2|, v3 +// GFX9: v_fma_legacy_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xee,0xd1,0x01,0x05,0x0e,0x04] + +v_fma_legacy_f16 v5, v1, v2, v3 clamp +// GFX9: v_fma_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04] + +v_div_fixup_f16_e64 v5, 0.5, v2, v3 +// GFX9: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x07,0xd2,0xf0,0x04,0x0e,0x04] + +v_div_fixup_f16 v5, v1, 0.5, v3 +// GFX9: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0xe1,0x0d,0x04] + +v_div_fixup_f16 v5, v1, v2, 0.5 +// GFX9: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0xc2,0x03] + +v_div_fixup_f16 v5, -v1, v2, v3 +// GFX9: v_div_fixup_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0x0e,0x24] + +v_div_fixup_f16 v5, |v1|, v2, v3 +// GFX9: v_div_fixup_f16 v5, |v1|, v2, v3 ; encoding: [0x05,0x01,0x07,0xd2,0x01,0x05,0x0e,0x04] + +v_div_fixup_f16 v5, v1, v2, v3 clamp +// GFX9: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04] + +v_div_fixup_legacy_f16_e64 v5, 0.5, v2, v3 +// GFX9: v_div_fixup_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04] + +v_div_fixup_legacy_f16 v5, v1, 0.5, v3 +// GFX9: v_div_fixup_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04] + +v_div_fixup_legacy_f16 v5, v1, v2, 0.5 +// GFX9: v_div_fixup_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03] + +v_div_fixup_legacy_f16 v5, -v1, v2, v3 +// GFX9: v_div_fixup_legacy_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0x24] + +v_div_fixup_legacy_f16 v5, v1, |v2|, v3 +// GFX9: v_div_fixup_legacy_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xef,0xd1,0x01,0x05,0x0e,0x04] + +v_div_fixup_legacy_f16 v5, v1, v2, v3 clamp +// GFX9: v_div_fixup_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04] + +v_mad_f16_e64 v5, 0.5, v2, v3 +// GFX9: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x03,0xd2,0xf0,0x04,0x0e,0x04] + +v_mad_f16 v5, v1, 0.5, v3 +// GFX9: v_mad_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0xe1,0x0d,0x04] + +v_mad_f16 v5, v1, v2, 0.5 +// GFX9: v_mad_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0xc2,0x03] + +v_mad_f16 v5, v1, v2, -v3 +// GFX9: v_mad_f16 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0x84] + +v_mad_f16 v5, v1, v2, |v3| +// GFX9: v_mad_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0x03,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_f16 v5, v1, v2, v3 clamp +// GFX9: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_i16_e64 v5, 0, v2, v3 +// GFX9: v_mad_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x80,0x04,0x0e,0x04] + +v_mad_i16 v5, v1, -1, v3 +// GFX9: v_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04] + +v_mad_i16 v5, v1, v2, -4.0 +// GFX9: v_mad_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03] + +v_mad_legacy_f16_e64 v5, 0.5, v2, v3 +// GFX9: v_mad_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04] + +v_mad_legacy_f16 v5, v1, 0.5, v3 +// GFX9: v_mad_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04] + +v_mad_legacy_f16 v5, v1, v2, 0.5 +// GFX9: v_mad_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03] + +v_mad_legacy_f16 v5, v1, -v2, v3 +// GFX9: v_mad_legacy_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0x44] + +v_mad_legacy_f16 v5, v1, |v2|, v3 +// GFX9: v_mad_legacy_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xea,0xd1,0x01,0x05,0x0e,0x04] + +v_mad_legacy_f16 v5, v1, v2, v3 clamp +// GFX9: v_mad_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04] + +v_mad_legacy_i16_e64 v5, 0, v2, v3 +// GFX9: v_mad_legacy_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x80,0x04,0x0e,0x04] + +v_mad_legacy_i16 v5, v1, -1, v3 +// GFX9: v_mad_legacy_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04] + +v_mad_legacy_i16 v5, v1, v2, -4.0 +// GFX9: v_mad_legacy_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03] + +v_mad_legacy_u16_e64 v5, 0, v2, v3 +// GFX9: v_mad_legacy_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04] + +v_mad_legacy_u16 v5, v1, -1, v3 +// GFX9: v_mad_legacy_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04] + +v_mad_legacy_u16 v5, v1, v2, -4.0 +// GFX9: v_mad_legacy_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03] + +v_mad_u16_e64 v5, 0, v2, v3 +// GFX9: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04] + +v_mad_u16 v5, v1, -1, v3 +// GFX9: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04] + +v_mad_u16 v5, v1, v2, -4.0 +// GFX9: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03] Index: llvm/trunk/test/MC/AMDGPU/vop3.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/vop3.s +++ llvm/trunk/test/MC/AMDGPU/vop3.s @@ -436,6 +436,88 @@ // SICI: v_cubeid_f32 v0, |-1|, |-1.0|, |1.0| ; encoding: [0x00,0x07,0x88,0xd2,0xc1,0xe6,0xc9,0x03] // VI: v_cubeid_f32 v0, |-1|, |-1.0|, |1.0| ; encoding: [0x00,0x07,0xc4,0xd1,0xc1,0xe6,0xc9,0x03] +///===---------------------------------------------------------------------===// +// VOP3 Legacy +///===---------------------------------------------------------------------===// + +v_fma_f16_e64 v5, v1, v2, v3 +// VI: v_fma_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04] + +v_fma_f16 v5, v1, v2, 0.5 +// VI: v_fma_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0xc2,0x03] + +v_fma_f16 v5, -v1, -v2, -v3 +// VI: v_fma_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0xe4] + +v_fma_f16 v5, |v1|, |v2|, |v3| +// VI: v_fma_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xee,0xd1,0x01,0x05,0x0e,0x04] + +v_fma_f16 v5, v1, v2, v3 clamp +// VI: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04] + +v_div_fixup_f16_e64 v5, v1, v2, v3 +// VI: v_div_fixup_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0x04] + +v_div_fixup_f16 v5, 0.5, v2, v3 +// VI: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04] + +v_div_fixup_f16 v5, v1, 0.5, v3 +// VI: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04] + +v_div_fixup_f16 v5, v1, v2, 0.5 +// VI: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03] + +v_div_fixup_f16 v5, v1, v2, -4.0 +// VI: v_div_fixup_f16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xde,0x03] + +v_div_fixup_f16 v5, -v1, v2, v3 +// VI: v_div_fixup_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0x24] + +v_div_fixup_f16 v5, v1, |v2|, v3 +// VI: v_div_fixup_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0xef,0xd1,0x01,0x05,0x0e,0x04] + +v_div_fixup_f16 v5, v1, v2, v3 clamp +// VI: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04] + +v_mad_f16_e64 v5, v1, v2, v3 +// VI: v_mad_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0x04] + +v_mad_f16 v5, 0.5, v2, v3 +// VI: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04] + +v_mad_f16 v5, v1, 0.5, v3 +// VI: v_mad_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04] + +v_mad_f16 v5, v1, v2, 0.5 +// VI: v_mad_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03] + +v_mad_f16 v5, v1, -v2, v3 +// VI: v_mad_f16 v5, v1, -v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0x44] + +v_mad_f16 v5, v1, v2, |v3| +// VI: v_mad_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0xea,0xd1,0x01,0x05,0x0e,0x04] + +v_mad_f16 v5, v1, v2, v3 clamp +// VI: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04] + +v_mad_i16_e64 v5, -1, v2, v3 +// VI: v_mad_i16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0xc1,0x04,0x0e,0x04] + +v_mad_i16 v5, v1, -4.0, v3 +// VI: v_mad_i16 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0xef,0x0d,0x04] + +v_mad_i16 v5, v1, v2, 0 +// VI: v_mad_i16 v5, v1, v2, 0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0x02,0x02] + +v_mad_u16_e64 v5, -1, v2, v3 +// VI: v_mad_u16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0xc1,0x04,0x0e,0x04] + +v_mad_u16 v5, v1, 0, v3 +// VI: v_mad_u16 v5, v1, 0, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x01,0x0d,0x04] + +v_mad_u16 v5, v1, v2, -4.0 +// VI: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03] + // // v_interp* // Index: llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt +++ llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt @@ -0,0 +1,133 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx901 -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX9 + +# GFX9: v_fma_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x04 + +# GFX9: v_fma_f16 v5, -v1, v2, v3 ; encoding: [0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x24] +0x05,0x00,0x06,0xd2,0x01,0x05,0x0e,0x24 + +# GFX9: v_fma_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0x06,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x02,0x06,0xd2,0x01,0x05,0x0e,0x04 + +# GFX9: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04 + +# CHECK: v_fma_legacy_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04 + +# CHECK: v_fma_legacy_f16 v5, v1, v2, -v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x84] +0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x84 + +# CHECK: v_fma_legacy_f16 v5, |v1|, v2, v3 ; encoding: [0x05,0x01,0xee,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x01,0xee,0xd1,0x01,0x05,0x0e,0x04 + +# CHECK: v_fma_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04 + +# CHECK: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x07,0xd2,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x07,0xd2,0xf0,0x04,0x0e,0x04 + +# CHECK: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x07,0xd2,0x01,0xe1,0x0d,0x04 + +# CHECK: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0xc2,0x03] +0x05,0x00,0x07,0xd2,0x01,0x05,0xc2,0x03 + +# CHECK: v_div_fixup_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x07,0xd2,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x07,0xd2,0x01,0x05,0x0e,0xe4 + +# CHECK: v_div_fixup_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0x07,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x07,0x07,0xd2,0x01,0x05,0x0e,0x04 + +# CHECK: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04 + +# CHECK: v_div_fixup_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04] +0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04 + +# CHECK: v_div_fixup_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04] +0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04 + +# CHECK: v_div_fixup_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03] +0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03 + +# CHECK: v_div_fixup_legacy_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0xe4] +0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0xe4 + +# CHECK: v_div_fixup_legacy_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xef,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x07,0xef,0xd1,0x01,0x05,0x0e,0x04 + +# CHECK: v_div_fixup_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04 + +# CHECK: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0x03,0xd2,0xf0,0x04,0x0e,0x04] +0x05,0x00,0x03,0xd2,0xf0,0x04,0x0e,0x04 + +# CHECK: v_mad_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0xe1,0x0d,0x04] +0x05,0x00,0x03,0xd2,0x01,0xe1,0x0d,0x04 + +# CHECK: v_mad_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0xc2,0x03] +0x05,0x00,0x03,0xd2,0x01,0x05,0xc2,0x03 + +# CHECK: v_mad_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0xe4] +0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0xe4 + +# CHECK: v_mad_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0x03,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x07,0x03,0xd2,0x01,0x05,0x0e,0x04 + +# CHECK: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04 + +# CHECK: v_mad_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x80,0x04,0x0e,0x04] +0x05,0x00,0x05,0xd2,0x80,0x04,0x0e,0x04 + +# CHECK: v_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04] +0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04 + +# CHECK: v_mad_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03] +0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03 + +# CHECK: v_mad_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04] +0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04 + +# CHECK: v_mad_legacy_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04] +0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04 + +# CHECK: v_mad_legacy_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03] +0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03 + +# CHECK: v_mad_legacy_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0xe4] +0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0xe4 + +# CHECK: v_mad_legacy_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xea,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x07,0xea,0xd1,0x01,0x05,0x0e,0x04 + +# CHECK: v_mad_legacy_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04 + +# CHECK: v_mad_legacy_i16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x80,0x04,0x0e,0x04] +0x05,0x00,0xec,0xd1,0x80,0x04,0x0e,0x04 + +# CHECK: v_mad_legacy_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04] +0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04 + +# CHECK: v_mad_legacy_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03] +0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03 + +# CHECK: v_mad_legacy_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04] +0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04 + +# CHECK: v_mad_legacy_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04] +0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04 + +# CHECK: v_mad_legacy_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03] +0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03 + +# CHECK: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04] +0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04 + +# CHECK: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04] +0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04 + +# CHECK: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03] +0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03 Index: llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_vi.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_vi.txt +++ llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_vi.txt @@ -240,6 +240,72 @@ # VI: v_ceil_f32_e64 v0, neg(-1.0) ; encoding: [0x00,0x00,0x5d,0xd1,0xf3,0x00,0x00,0x20] 0x00,0x00,0x5d,0xd1,0xf3,0x00,0x00,0x20 +# VI: v_fma_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04 + +# VI: v_fma_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0xf0,0x04,0x0e,0x04] +0x05,0x00,0xee,0xd1,0xf0,0x04,0x0e,0x04 + +# VI: v_fma_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0xee,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x04,0xee,0xd1,0x01,0x05,0x0e,0x04 + +# VI: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x80,0xee,0xd1,0x01,0x05,0x0e,0x04 + +# CHECK: v_div_fixup_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04] +0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04 + +# CHECK: v_div_fixup_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04] +0x05,0x00,0xef,0xd1,0x01,0xe1,0x0d,0x04 + +# CHECK: v_div_fixup_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03] +0x05,0x00,0xef,0xd1,0x01,0x05,0xc2,0x03 + +# CHECK: v_div_fixup_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0xe4] +0x05,0x00,0xef,0xd1,0x01,0x05,0x0e,0xe4 + +# CHECK: v_div_fixup_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xef,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x07,0xef,0xd1,0x01,0x05,0x0e,0x04 + +# CHECK: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x80,0xef,0xd1,0x01,0x05,0x0e,0x04 + +# CHECK: v_mad_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04] +0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04 + +# CHECK: v_mad_f16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04] +0x05,0x00,0xea,0xd1,0x01,0xe1,0x0d,0x04 + +# CHECK: v_mad_f16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03] +0x05,0x00,0xea,0xd1,0x01,0x05,0xc2,0x03 + +# CHECK: v_mad_f16 v5, -v1, -v2, -v3 ; encoding: [0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0xe4] +0x05,0x00,0xea,0xd1,0x01,0x05,0x0e,0xe4 + +# CHECK: v_mad_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0xea,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x07,0xea,0xd1,0x01,0x05,0x0e,0x04 + +# CHECK: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04] +0x05,0x80,0xea,0xd1,0x01,0x05,0x0e,0x04 + +# CHECK: v_mad_i16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0xf0,0x04,0x0e,0x04] +0x05,0x00,0xec,0xd1,0xf0,0x04,0x0e,0x04 + +# CHECK: v_mad_i16 v5, v1, 0.5, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0xe1,0x0d,0x04] +0x05,0x00,0xec,0xd1,0x01,0xe1,0x0d,0x04 + +# CHECK: v_mad_i16 v5, v1, v2, 0.5 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0xc2,0x03] +0x05,0x00,0xec,0xd1,0x01,0x05,0xc2,0x03 + +# CHECK: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04] +0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04 + +# CHECK: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04] +0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04 + +# CHECK: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03] +0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03 + # VI: v_interp_mov_f32_e64 v5, p10, attr0.x ; encoding: [0x05,0x00,0x72,0xd2,0x00,0x00,0x00,0x00] 0x05,0x00,0x72,0xd2,0x00,0x00,0x00,0x00