Index: lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP3Instructions.td +++ lib/Target/AMDGPU/VOP3Instructions.td @@ -376,7 +376,7 @@ def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile, AMDGPUdiv_fixup>; } let SubtargetPredicate = isGFX9 in { -def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9", VOP3_Profile>; +def V_DIV_FIXUP_F16_gfx9 : VOP3OpSelInst <"v_div_fixup_f16_gfx9", VOP3OpSel_Profile>; } let isCommutable = 1 in { @@ -389,10 +389,10 @@ } let SubtargetPredicate = isGFX9 in { -def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile>; -def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile>; -def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile>; -def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile>; +def V_MAD_F16_gfx9 : VOP3OpSelInst <"v_mad_f16_gfx9", VOP3OpSel_Profile>; +def V_MAD_U16_gfx9 : VOP3OpSelInst <"v_mad_u16_gfx9", VOP3OpSel_Profile>; +def V_MAD_I16_gfx9 : VOP3OpSelInst <"v_mad_i16_gfx9", VOP3OpSel_Profile>; +def V_FMA_F16_gfx9 : VOP3OpSelInst <"v_fma_f16_gfx9", VOP3OpSel_Profile>; } // End SubtargetPredicate = isGFX9 def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>>; @@ -618,6 +618,14 @@ } } +multiclass VOP3OpSel_F16_Real_gfx9 op, string AsmName> { + def _vi : VOP3_Real(NAME), SIEncodingFamily.GFX9>, + VOP3OpSel_gfx9 (NAME).Pfl> { + VOP3_Pseudo ps = !cast(NAME); + let AsmString = AsmName # ps.AsmOperands; + } +} + } // End AssemblerPredicates = [isGFX9], DecoderNamespace = "GFX9" defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>; @@ -678,11 +686,11 @@ defm V_FMA_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ee, "V_FMA_F16", "v_fma_legacy_f16">; defm V_DIV_FIXUP_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ef, "V_DIV_FIXUP_F16", "v_div_fixup_legacy_f16">; -defm V_MAD_F16_gfx9 : VOP3_F16_Real_gfx9 <0x203, "V_MAD_F16_gfx9", "v_mad_f16">; -defm V_MAD_U16_gfx9 : VOP3_F16_Real_gfx9 <0x204, "V_MAD_U16_gfx9", "v_mad_u16">; -defm V_MAD_I16_gfx9 : VOP3_F16_Real_gfx9 <0x205, "V_MAD_I16_gfx9", "v_mad_i16">; -defm V_FMA_F16_gfx9 : VOP3_F16_Real_gfx9 <0x206, "V_FMA_F16_gfx9", "v_fma_f16">; -defm V_DIV_FIXUP_F16_gfx9 : VOP3_F16_Real_gfx9 <0x207, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">; +defm V_MAD_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x203, "v_mad_f16">; +defm V_MAD_U16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">; +defm V_MAD_I16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x205, "v_mad_i16">; +defm V_FMA_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x206, "v_fma_f16">; +defm V_DIV_FIXUP_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x207, "v_div_fixup_f16">; defm V_INTERP_P1_F32_e64 : VOP3Interp_Real_vi <0x270>; defm V_INTERP_P2_F32_e64 : VOP3Interp_Real_vi <0x271>; Index: test/MC/AMDGPU/vop3-gfx9.s =================================================================== --- test/MC/AMDGPU/vop3-gfx9.s +++ test/MC/AMDGPU/vop3-gfx9.s @@ -203,6 +203,15 @@ v_fma_f16 v5, v1, v2, v3 clamp // GFX9: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04] +v_fma_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] +// GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x06,0xd2,0x01,0x05,0x0e,0x04] + +v_fma_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] +// GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x06,0xd2,0x01,0x05,0x0e,0x04] + +v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] +// GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x06,0xd2,0x01,0x05,0x0e,0x04] + v_fma_legacy_f16_e64 v5, v1, v2, v3 // GFX9: v_fma_legacy_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04] @@ -230,6 +239,15 @@ v_div_fixup_f16 v5, |v1|, v2, v3 // GFX9: v_div_fixup_f16 v5, |v1|, v2, v3 ; encoding: [0x05,0x01,0x07,0xd2,0x01,0x05,0x0e,0x04] +v_div_fixup_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] +// GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x07,0xd2,0x01,0x05,0x0e,0x04] + +v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] +// GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x07,0xd2,0x01,0x05,0x0e,0x04] + +v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] +// GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x07,0xd2,0x01,0x05,0x0e,0x04] + v_div_fixup_f16 v5, v1, v2, v3 clamp // GFX9: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04] @@ -266,6 +284,24 @@ v_mad_f16 v5, v1, v2, |v3| // GFX9: v_mad_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0x03,0xd2,0x01,0x05,0x0e,0x04] +v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,0] +// GFX9: v_mad_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] +// GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x03,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] +// GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x03,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] +// GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x03,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] +// GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x03,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] +// GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x03,0xd2,0x01,0x05,0x0e,0x04] + v_mad_f16 v5, v1, v2, v3 clamp // GFX9: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04] @@ -278,6 +314,12 @@ v_mad_i16 v5, v1, v2, -4.0 // GFX9: v_mad_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03] +v_mad_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] +// GFX9: v_mad_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x05,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] +// GFX9: v_mad_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x05,0xd2,0x01,0x05,0x0e,0x04] + v_mad_legacy_f16_e64 v5, 0.5, v2, v3 // GFX9: v_mad_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04] @@ -322,3 +364,12 @@ v_mad_u16 v5, v1, v2, -4.0 // GFX9: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03] + +v_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] +// GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x04,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] +// GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x04,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] +// GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x04,0xd2,0x01,0x05,0x0e,0x04] Index: test/MC/Disassembler/AMDGPU/vop3_gfx9.txt =================================================================== --- test/MC/Disassembler/AMDGPU/vop3_gfx9.txt +++ test/MC/Disassembler/AMDGPU/vop3_gfx9.txt @@ -9,6 +9,12 @@ # GFX9: v_fma_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0x06,0xd2,0x01,0x05,0x0e,0x04] 0x05,0x02,0x06,0xd2,0x01,0x05,0x0e,0x04 +# GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x06,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x08,0x06,0xd2,0x01,0x05,0x0e,0x04 + +# GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x06,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x78,0x06,0xd2,0x01,0x05,0x0e,0x04 + # GFX9: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04] 0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04