Index: llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td @@ -113,23 +113,24 @@ class VOP3Inst : VOP3_Pseudo.ret, - !if(P.HasIntClamp, - getVOP3ClampPat.ret, - getVOP3Pat.ret)), - VOP3Only> { - let IntClamp = P.HasIntClamp; -} + !if(P.HasOpSel, + !if(P.HasModifiers, + getVOP3OpSelModPat.ret, + getVOP3OpSelPat.ret), + !if(P.HasModifiers, + getVOP3ModPat.ret, + !if(P.HasIntClamp, + getVOP3ClampPat.ret, + getVOP3Pat.ret))), + VOP3Only, 0, P.HasOpSel> { -class VOP3OpSelInst : - VOP3_Pseudo.ret, - getVOP3OpSelModPat.ret, - getVOP3OpSelPat.ret), - 1, 0, 1> { - - let AsmMatchConverter = "cvtVOP3OpSel"; + let IntClamp = P.HasIntClamp; + let AsmMatchConverter = + !if(P.HasOpSel, + "cvtVOP3OpSel", + !if(!or(P.HasModifiers, !or(P.HasOMod, P.HasIntClamp)), + "cvtVOP3", + "")); } // Special case for v_div_fmas_{f32|f64}, since it seems to be the @@ -152,23 +153,33 @@ (i1 VCC)))]; } -class VOP3_Profile : VOPProfile { - // FIXME: Hack to stop printing _e64 - let Outs64 = (outs DstRC.RegClass:$vdst); - let Asm64 = " " # P.Asm64; +class VOP3Features { + bit HasClamp = Clamp; + bit HasOpSel = OpSel; } -class VOP3Clamp_Profile : VOPProfile { - let HasClamp = 1; +def VOP3_REGULAR : VOP3Features<0, 0>; +def VOP3_CLAMP : VOP3Features<1, 0>; +def VOP3_OPSEL : VOP3Features<1, 1>; + +class VOP3_Profile : VOPProfile { + + let HasClamp = !if(Features.HasClamp, 1, P.HasClamp); + let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel); // FIXME: Hack to stop printing _e64 let Outs64 = (outs DstRC.RegClass:$vdst); - let Asm64 = " " # getAsm64.ret; -} - -class VOP3OpSel_Profile : VOP3_Profile

{ - let HasClamp = 1; - let HasOpSel = 1; + let Asm64 = + " " # !if(Features.HasOpSel, + getAsmVOP3OpSel.ret, + !if(Features.HasClamp, + getAsm64.ret, + P.Asm64)); } class VOP3b_Profile : VOPProfile<[vt, vt, vt, vt]> { @@ -269,8 +280,8 @@ def V_MAD_LEGACY_F32 : VOP3Inst <"v_mad_legacy_f32", VOP3_Profile>; def V_MAD_F32 : VOP3Inst <"v_mad_f32", VOP3_Profile, fmad>; -def V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3Clamp_Profile>; -def V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3Clamp_Profile>; +def V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile>; +def V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile>; def V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile, fma>; def V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile, fma>; def V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile, int_amdgcn_lerp>; @@ -330,10 +341,10 @@ def V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile, AMDGPUfmed3>; def V_MED3_I32 : VOP3Inst <"v_med3_i32", VOP3_Profile, AMDGPUsmed3>; def V_MED3_U32 : VOP3Inst <"v_med3_u32", VOP3_Profile, AMDGPUumed3>; -def V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3Clamp_Profile>; -def V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3Clamp_Profile>; -def V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3Clamp_Profile>; -def V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3Clamp_Profile>; +def V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile>; +def V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile>; +def V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile>; +def V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile>; def V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile, int_amdgcn_cvt_pk_u8_f32>; def V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", VOP3_Profile, AMDGPUdiv_fixup>; @@ -355,10 +366,10 @@ let AsmMatchConverter = ""; } -def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3Clamp_Profile>; +def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile>; let Constraints = "@earlyclobber $vdst" in { -def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3Clamp_Profile>; +def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile>; } // End Constraints = "@earlyclobber $vdst" def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile, AMDGPUtrig_preop> { @@ -383,8 +394,8 @@ let SubtargetPredicate = isCIVI in { let Constraints = "@earlyclobber $vdst" in { -def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3Clamp_Profile>; -def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3Clamp_Profile>; +def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile>; +def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile>; } // End Constraints = "@earlyclobber $vdst" let isCommutable = 1 in { @@ -401,23 +412,23 @@ def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile, AMDGPUdiv_fixup>; } let SubtargetPredicate = isGFX9 in { -def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9", VOP3_Profile>; +def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9", VOP3_Profile>; } let isCommutable = 1 in { let F16_ZFILL = 1 in { def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile, fmad>; -def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3Clamp_Profile>; -def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3Clamp_Profile>; +def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile>; +def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile>; def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile, fma>; } let SubtargetPredicate = isGFX9 in { -def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile>; -def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3Clamp_Profile>; -def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3Clamp_Profile>; -def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile>; +def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile>; +def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile>; +def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile>; +def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile>; } // End SubtargetPredicate = isGFX9 def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>>; @@ -463,7 +474,7 @@ } // End Predicates = [Has16BitInsts] let SubtargetPredicate = isGFX9 in { -def V_PACK_B32_F16 : VOP3OpSelInst <"v_pack_b32_f16", VOP3OpSel_Profile>; +def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile>; def V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile>; def V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile>; def V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile>; @@ -473,26 +484,26 @@ def V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile>; -def V_MED3_F16 : VOP3OpSelInst <"v_med3_f16", VOP3OpSel_Profile, AMDGPUfmed3>; -def V_MED3_I16 : VOP3OpSelInst <"v_med3_i16", VOP3OpSel_Profile, AMDGPUsmed3>; -def V_MED3_U16 : VOP3OpSelInst <"v_med3_u16", VOP3OpSel_Profile, AMDGPUumed3>; - -def V_MIN3_F16 : VOP3OpSelInst <"v_min3_f16", VOP3OpSel_Profile, AMDGPUfmin3>; -def V_MIN3_I16 : VOP3OpSelInst <"v_min3_i16", VOP3OpSel_Profile, AMDGPUsmin3>; -def V_MIN3_U16 : VOP3OpSelInst <"v_min3_u16", VOP3OpSel_Profile, AMDGPUumin3>; - -def V_MAX3_F16 : VOP3OpSelInst <"v_max3_f16", VOP3OpSel_Profile, AMDGPUfmax3>; -def V_MAX3_I16 : VOP3OpSelInst <"v_max3_i16", VOP3OpSel_Profile, AMDGPUsmax3>; -def V_MAX3_U16 : VOP3OpSelInst <"v_max3_u16", VOP3OpSel_Profile, AMDGPUumax3>; +def V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile, AMDGPUfmed3>; +def V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile, AMDGPUsmed3>; +def V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile, AMDGPUumed3>; + +def V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile, AMDGPUfmin3>; +def V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile, AMDGPUsmin3>; +def V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile, AMDGPUumin3>; + +def V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile, AMDGPUfmax3>; +def V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile, AMDGPUsmax3>; +def V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile, AMDGPUumax3>; -def V_ADD_I16 : VOP3OpSelInst <"v_add_i16", VOP3OpSel_Profile>; -def V_SUB_I16 : VOP3OpSelInst <"v_sub_i16", VOP3OpSel_Profile>; +def V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile>; +def V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile>; -def V_MAD_U32_U16 : VOP3OpSelInst <"v_mad_u32_u16", VOP3OpSel_Profile>; -def V_MAD_I32_I16 : VOP3OpSelInst <"v_mad_i32_i16", VOP3OpSel_Profile>; +def V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile>; +def V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile>; -def V_CVT_PKNORM_I16_F16 : VOP3OpSelInst <"v_cvt_pknorm_i16_f16", VOP3OpSel_Profile>; -def V_CVT_PKNORM_U16_F16 : VOP3OpSelInst <"v_cvt_pknorm_u16_f16", VOP3OpSel_Profile>; +def V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile>; +def V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile>; } // End SubtargetPredicate = isGFX9 //===----------------------------------------------------------------------===// @@ -682,6 +693,14 @@ } } +multiclass VOP3OpSel_F16_Real_gfx9 op, string AsmName> { + def _vi : VOP3_Real(NAME), SIEncodingFamily.GFX9>, + VOP3OpSel_gfx9 (NAME).Pfl> { + VOP3_Pseudo ps = !cast(NAME); + let AsmString = AsmName # ps.AsmOperands; + } +} + } // End AssemblerPredicates = [isGFX9], DecoderNamespace = "GFX9" defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>; @@ -742,11 +761,11 @@ defm V_FMA_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ee, "V_FMA_F16", "v_fma_legacy_f16">; defm V_DIV_FIXUP_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ef, "V_DIV_FIXUP_F16", "v_div_fixup_legacy_f16">; -defm V_MAD_F16_gfx9 : VOP3_F16_Real_gfx9 <0x203, "V_MAD_F16_gfx9", "v_mad_f16">; -defm V_MAD_U16_gfx9 : VOP3_F16_Real_gfx9 <0x204, "V_MAD_U16_gfx9", "v_mad_u16">; -defm V_MAD_I16_gfx9 : VOP3_F16_Real_gfx9 <0x205, "V_MAD_I16_gfx9", "v_mad_i16">; -defm V_FMA_F16_gfx9 : VOP3_F16_Real_gfx9 <0x206, "V_FMA_F16_gfx9", "v_fma_f16">; -defm V_DIV_FIXUP_F16_gfx9 : VOP3_F16_Real_gfx9 <0x207, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">; +defm V_MAD_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x203, "v_mad_f16">; +defm V_MAD_U16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">; +defm V_MAD_I16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x205, "v_mad_i16">; +defm V_FMA_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x206, "v_fma_f16">; +defm V_DIV_FIXUP_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x207, "v_div_fixup_f16">; defm V_INTERP_P1_F32_e64 : VOP3Interp_Real_vi <0x270>; defm V_INTERP_P2_F32_e64 : VOP3Interp_Real_vi <0x271>; Index: llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s +++ llvm/trunk/test/MC/AMDGPU/vop3-gfx9.s @@ -203,6 +203,15 @@ v_fma_f16 v5, v1, v2, v3 clamp // GFX9: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04] +v_fma_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] +// GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x06,0xd2,0x01,0x05,0x0e,0x04] + +v_fma_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] +// GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x06,0xd2,0x01,0x05,0x0e,0x04] + +v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] +// GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x06,0xd2,0x01,0x05,0x0e,0x04] + v_fma_legacy_f16_e64 v5, v1, v2, v3 // GFX9: v_fma_legacy_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04] @@ -233,6 +242,15 @@ v_div_fixup_f16 v5, v1, v2, v3 clamp // GFX9: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04] +v_div_fixup_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] +// GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x07,0xd2,0x01,0x05,0x0e,0x04] + +v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] +// GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x07,0xd2,0x01,0x05,0x0e,0x04] + +v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] +// GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x07,0xd2,0x01,0x05,0x0e,0x04] + v_div_fixup_legacy_f16_e64 v5, 0.5, v2, v3 // GFX9: v_div_fixup_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xef,0xd1,0xf0,0x04,0x0e,0x04] @@ -266,6 +284,24 @@ v_mad_f16 v5, v1, v2, |v3| // GFX9: v_mad_f16 v5, v1, v2, |v3| ; encoding: [0x05,0x04,0x03,0xd2,0x01,0x05,0x0e,0x04] +v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,0] +// GFX9: v_mad_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x03,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] +// GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x03,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] +// GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x03,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] +// GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x03,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] +// GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x03,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] +// GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x03,0xd2,0x01,0x05,0x0e,0x04] + v_mad_f16 v5, v1, v2, v3 clamp // GFX9: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04] @@ -281,6 +317,12 @@ v_mad_i16 v5, v1, v2, v3 clamp // GFX9: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x05,0xd2,0x01,0x05,0x0e,0x04] +v_mad_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] +// GFX9: v_mad_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x05,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] +// GFX9: v_mad_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x05,0xd2,0x01,0x05,0x0e,0x04] + v_mad_legacy_f16_e64 v5, 0.5, v2, v3 // GFX9: v_mad_legacy_f16 v5, 0.5, v2, v3 ; encoding: [0x05,0x00,0xea,0xd1,0xf0,0x04,0x0e,0x04] @@ -334,3 +376,12 @@ v_mad_u16 v5, v1, v2, v3 clamp // GFX9: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x04,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] +// GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x04,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] +// GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x04,0xd2,0x01,0x05,0x0e,0x04] + +v_mad_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] +// GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x04,0xd2,0x01,0x05,0x0e,0x04] Index: llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt +++ llvm/trunk/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt @@ -9,6 +9,12 @@ # GFX9: v_fma_f16 v5, v1, |v2|, v3 ; encoding: [0x05,0x02,0x06,0xd2,0x01,0x05,0x0e,0x04] 0x05,0x02,0x06,0xd2,0x01,0x05,0x0e,0x04 +# GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x06,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x08,0x06,0xd2,0x01,0x05,0x0e,0x04 + +# GFX9: v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x06,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x78,0x06,0xd2,0x01,0x05,0x0e,0x04 + # GFX9: v_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04] 0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04 @@ -39,6 +45,15 @@ # GFX9: v_div_fixup_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0x07,0xd2,0x01,0x05,0x0e,0x04] 0x05,0x07,0x07,0xd2,0x01,0x05,0x0e,0x04 +# GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x07,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x08,0x07,0xd2,0x01,0x05,0x0e,0x04 + +# GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x07,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x20,0x07,0xd2,0x01,0x05,0x0e,0x04 + +# GFX9: v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x07,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x40,0x07,0xd2,0x01,0x05,0x0e,0x04 + # GFX9: v_div_fixup_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04] 0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04 @@ -75,6 +90,21 @@ # GFX9: v_mad_f16 v5, |v1|, |v2|, |v3| ; encoding: [0x05,0x07,0x03,0xd2,0x01,0x05,0x0e,0x04] 0x05,0x07,0x03,0xd2,0x01,0x05,0x0e,0x04 +# GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x03,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x08,0x03,0xd2,0x01,0x05,0x0e,0x04 + +# GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x03,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x10,0x03,0xd2,0x01,0x05,0x0e,0x04 + +# GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x03,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x20,0x03,0xd2,0x01,0x05,0x0e,0x04 + +# GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x03,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x40,0x03,0xd2,0x01,0x05,0x0e,0x04 + +# GFX9: v_mad_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x03,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x78,0x03,0xd2,0x01,0x05,0x0e,0x04 + # GFX9: v_mad_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04] 0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04 @@ -87,6 +117,12 @@ # GFX9: v_mad_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03] 0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03 +# GFX9: v_mad_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x05,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x40,0x05,0xd2,0x01,0x05,0x0e,0x04 + +# GFX9: v_mad_i16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x05,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x78,0x05,0xd2,0x01,0x05,0x0e,0x04 + # GFX9: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x05,0xd2,0x01,0x05,0x0e,0x04] 0x05,0x80,0x05,0xd2,0x01,0x05,0x0e,0x04 @@ -141,5 +177,14 @@ # GFX9: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03] 0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03 +# GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x04,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x08,0x04,0xd2,0x01,0x05,0x0e,0x04 + +# GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x04,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x40,0x04,0xd2,0x01,0x05,0x0e,0x04 + +# GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x04,0xd2,0x01,0x05,0x0e,0x04] +0x05,0x78,0x04,0xd2,0x01,0x05,0x0e,0x04 + # GFX9: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x04,0xd2,0x01,0x05,0x0e,0x04] 0x05,0x80,0x04,0xd2,0x01,0x05,0x0e,0x04