Index: llvm/trunk/lib/Target/AMDGPU/CIInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/CIInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/CIInstructions.td @@ -44,29 +44,4 @@ >; } // End SchedRW = [WriteQuarterRate32] -//===----------------------------------------------------------------------===// -// VOP3 Instructions -//===----------------------------------------------------------------------===// - -defm V_MQSAD_U16_U8 : VOP3Inst , "v_mqsad_u16_u8", - VOP_I32_I32_I32 ->; - -defm V_QSAD_PK_U16_U8 : VOP3Inst , "v_qsad_pk_u16_u8", - VOP_I64_I64_I32_I64, int_amdgcn_qsad_pk_u16_u8>; - -defm V_MQSAD_U32_U8 : VOP3Inst , "v_mqsad_u32_u8", - VOP_V4I32_I64_I32_V4I32, int_amdgcn_mqsad_u32_u8>; - -let isCommutable = 1 in { -defm V_MAD_U64_U32 : VOP3Inst , "v_mad_u64_u32", - VOP_I64_I32_I32_I64 ->; - -// XXX - Does this set VCC? -defm V_MAD_I64_I32 : VOP3Inst , "v_mad_i64_i32", - VOP_I64_I32_I32_I64 ->; -} // End isCommutable = 1 - } // End SubtargetPredicate = isCIVI Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -1072,21 +1072,6 @@ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); } -class VOP3b_Profile : VOPProfile<[vt, vt, vt, vt]> { - let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); - let Asm64 = "$vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod"; -} - -def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile { - // FIXME: Hack to stop printing _e64 - let DstRC = RegisterOperand; -} - -def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile { - // FIXME: Hack to stop printing _e64 - let DstRC = RegisterOperand; -} - def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>; @@ -1400,21 +1385,6 @@ let DisableDecoder = DisableVIDecoder; } -multiclass VOP3_m pattern, - string opName, int NumSrcArgs, bit HasMods = 1, bit VOP3Only = 0> { - - def "" : VOP3_Pseudo ; - - def _si : VOP3_Real_si , - VOP3DisableFields; - def _vi : VOP3_Real_vi , - VOP3DisableFields; -} - multiclass VOP3_1_m pattern, string opName, bit HasMods = 1> { @@ -1697,66 +1667,6 @@ } // End isCodeGenOnly = 0 } -multiclass VOP3_Helper pat, int NumSrcArgs, bit HasMods, - bit VOP3Only = 0> : VOP3_m < - op, outs, ins, opName#" "#asm, pat, opName, NumSrcArgs, HasMods, VOP3Only ->; - -multiclass VOP3Inst : - VOP3_Helper < - op, opName, (outs P.DstRC.RegClass:$vdst), P.Ins64, P.Asm64, - !if(!eq(P.NumSrcArgs, 3), - !if(P.HasModifiers, - [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, - i1:$clamp, i32:$omod)), - (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), - (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))], - [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, - P.Src2VT:$src2))]), - !if(!eq(P.NumSrcArgs, 2), - !if(P.HasModifiers, - [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, - i1:$clamp, i32:$omod)), - (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], - [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]) - /* P.NumSrcArgs == 1 */, - !if(P.HasModifiers, - [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, - i1:$clamp, i32:$omod))))], - [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]))), - P.NumSrcArgs, P.HasModifiers, VOP3Only ->; - -// Special case for v_div_fmas_{f32|f64}, since it seems to be the -// only VOP instruction that implicitly reads VCC. -multiclass VOP3_VCC_Inst : VOP3_Helper < - op, opName, - (outs P.DstRC.RegClass:$vdst), - P.Ins64, - "$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", - [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, - i1:$clamp, i32:$omod)), - (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), - (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers)), - (i1 VCC)))], - 3, 1 ->; - -multiclass VOP3bInst pattern = [], bit VOP3Only = 0> : - VOP3b_2_3_m < - op, P.Outs64, P.Ins64, - opName#" "#P.Asm64, pattern, - opName, "", 1, 1, VOP3Only ->; - class Vop3ModPat : Pat< (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), Index: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td @@ -509,256 +509,6 @@ >; //===----------------------------------------------------------------------===// -// VOP3 Instructions -//===----------------------------------------------------------------------===// - -let isCommutable = 1 in { -defm V_MAD_LEGACY_F32 : VOP3Inst , "v_mad_legacy_f32", - VOP_F32_F32_F32_F32 ->; - -defm V_MAD_F32 : VOP3Inst , "v_mad_f32", - VOP_F32_F32_F32_F32, fmad ->; - -defm V_MAD_I32_I24 : VOP3Inst , "v_mad_i32_i24", - VOP_I32_I32_I32_I32, AMDGPUmad_i24 ->; -defm V_MAD_U32_U24 : VOP3Inst , "v_mad_u32_u24", - VOP_I32_I32_I32_I32, AMDGPUmad_u24 ->; -} // End isCommutable = 1 - -defm V_CUBEID_F32 : VOP3Inst , "v_cubeid_f32", - VOP_F32_F32_F32_F32, int_amdgcn_cubeid ->; -defm V_CUBESC_F32 : VOP3Inst , "v_cubesc_f32", - VOP_F32_F32_F32_F32, int_amdgcn_cubesc ->; -defm V_CUBETC_F32 : VOP3Inst , "v_cubetc_f32", - VOP_F32_F32_F32_F32, int_amdgcn_cubetc ->; -defm V_CUBEMA_F32 : VOP3Inst , "v_cubema_f32", - VOP_F32_F32_F32_F32, int_amdgcn_cubema ->; - -defm V_BFE_U32 : VOP3Inst , "v_bfe_u32", - VOP_I32_I32_I32_I32, AMDGPUbfe_u32 ->; -defm V_BFE_I32 : VOP3Inst , "v_bfe_i32", - VOP_I32_I32_I32_I32, AMDGPUbfe_i32 ->; - -defm V_BFI_B32 : VOP3Inst , "v_bfi_b32", - VOP_I32_I32_I32_I32, AMDGPUbfi ->; - -let isCommutable = 1 in { -defm V_FMA_F32 : VOP3Inst , "v_fma_f32", - VOP_F32_F32_F32_F32, fma ->; -defm V_FMA_F64 : VOP3Inst , "v_fma_f64", - VOP_F64_F64_F64_F64, fma ->; - -defm V_LERP_U8 : VOP3Inst , "v_lerp_u8", - VOP_I32_I32_I32_I32, int_amdgcn_lerp ->; -} // End isCommutable = 1 - -//def V_LERP_U8 : VOP3_U8 <0x0000014d, "v_lerp_u8", []>; -defm V_ALIGNBIT_B32 : VOP3Inst , "v_alignbit_b32", - VOP_I32_I32_I32_I32 ->; -defm V_ALIGNBYTE_B32 : VOP3Inst , "v_alignbyte_b32", - VOP_I32_I32_I32_I32 ->; - -defm V_MIN3_F32 : VOP3Inst , "v_min3_f32", - VOP_F32_F32_F32_F32, AMDGPUfmin3>; - -defm V_MIN3_I32 : VOP3Inst , "v_min3_i32", - VOP_I32_I32_I32_I32, AMDGPUsmin3 ->; -defm V_MIN3_U32 : VOP3Inst , "v_min3_u32", - VOP_I32_I32_I32_I32, AMDGPUumin3 ->; -defm V_MAX3_F32 : VOP3Inst , "v_max3_f32", - VOP_F32_F32_F32_F32, AMDGPUfmax3 ->; -defm V_MAX3_I32 : VOP3Inst , "v_max3_i32", - VOP_I32_I32_I32_I32, AMDGPUsmax3 ->; -defm V_MAX3_U32 : VOP3Inst , "v_max3_u32", - VOP_I32_I32_I32_I32, AMDGPUumax3 ->; -defm V_MED3_F32 : VOP3Inst , "v_med3_f32", - VOP_F32_F32_F32_F32, AMDGPUfmed3 ->; -defm V_MED3_I32 : VOP3Inst , "v_med3_i32", - VOP_I32_I32_I32_I32, AMDGPUsmed3 ->; -defm V_MED3_U32 : VOP3Inst , "v_med3_u32", - VOP_I32_I32_I32_I32, AMDGPUumed3 ->; - -defm V_SAD_U8 : VOP3Inst , "v_sad_u8", - VOP_I32_I32_I32_I32, int_amdgcn_sad_u8>; - -defm V_SAD_HI_U8 : VOP3Inst , "v_sad_hi_u8", - VOP_I32_I32_I32_I32, int_amdgcn_sad_hi_u8>; - -defm V_SAD_U16 : VOP3Inst , "v_sad_u16", - VOP_I32_I32_I32_I32, int_amdgcn_sad_u16>; - -defm V_SAD_U32 : VOP3Inst , "v_sad_u32", - VOP_I32_I32_I32_I32 ->; - -defm V_CVT_PK_U8_F32 : VOP3Inst, "v_cvt_pk_u8_f32", - VOP_I32_F32_I32_I32, int_amdgcn_cvt_pk_u8_f32 ->; - -//def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>; -defm V_DIV_FIXUP_F32 : VOP3Inst < - vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup ->; - -let SchedRW = [WriteDoubleAdd] in { - -defm V_DIV_FIXUP_F64 : VOP3Inst < - vop3<0x160, 0x1df>, "v_div_fixup_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fixup ->; - -} // End SchedRW = [WriteDouble] - -let SchedRW = [WriteDoubleAdd] in { -let isCommutable = 1 in { - -defm V_ADD_F64 : VOP3Inst , "v_add_f64", - VOP_F64_F64_F64, fadd, 1 ->; -defm V_MUL_F64 : VOP3Inst , "v_mul_f64", - VOP_F64_F64_F64, fmul, 1 ->; - -defm V_MIN_F64 : VOP3Inst , "v_min_f64", - VOP_F64_F64_F64, fminnum, 1 ->; -defm V_MAX_F64 : VOP3Inst , "v_max_f64", - VOP_F64_F64_F64, fmaxnum, 1 ->; - -} // End isCommutable = 1 - -defm V_LDEXP_F64 : VOP3Inst , "v_ldexp_f64", - VOP_F64_F64_I32, AMDGPUldexp, 1 ->; - -} // End let SchedRW = [WriteDoubleAdd] - -let isCommutable = 1, SchedRW = [WriteQuarterRate32] in { - -defm V_MUL_LO_U32 : VOP3Inst , "v_mul_lo_u32", - VOP_I32_I32_I32 ->; -defm V_MUL_HI_U32 : VOP3Inst , "v_mul_hi_u32", - VOP_I32_I32_I32, mulhu ->; - -let DisableVIDecoder=1 in { // removed from VI as identical to V_MUL_LO_U32 -defm V_MUL_LO_I32 : VOP3Inst , "v_mul_lo_i32", - VOP_I32_I32_I32 ->; -} - -defm V_MUL_HI_I32 : VOP3Inst , "v_mul_hi_i32", - VOP_I32_I32_I32, mulhs ->; - -} // End isCommutable = 1, SchedRW = [WriteQuarterRate32] - -let SchedRW = [WriteFloatFMA, WriteSALU] in { -defm V_DIV_SCALE_F32 : VOP3bInst , "v_div_scale_f32", - VOP3b_F32_I1_F32_F32_F32, [], 1 ->; -} - -let SchedRW = [WriteDouble, WriteSALU] in { -// Double precision division pre-scale. -defm V_DIV_SCALE_F64 : VOP3bInst , "v_div_scale_f64", - VOP3b_F64_I1_F64_F64_F64, [], 1 ->; -} // End SchedRW = [WriteDouble] - -let isCommutable = 1, Uses = [VCC, EXEC] in { - -let SchedRW = [WriteFloatFMA] in { -// v_div_fmas_f32: -// result = src0 * src1 + src2 -// if (vcc) -// result *= 2^32 -// -defm V_DIV_FMAS_F32 : VOP3_VCC_Inst , "v_div_fmas_f32", - VOP_F32_F32_F32_F32, AMDGPUdiv_fmas ->; -} - -let SchedRW = [WriteDouble] in { -// v_div_fmas_f64: -// result = src0 * src1 + src2 -// if (vcc) -// result *= 2^64 -// -defm V_DIV_FMAS_F64 : VOP3_VCC_Inst , "v_div_fmas_f64", - VOP_F64_F64_F64_F64, AMDGPUdiv_fmas ->; - -} // End SchedRW = [WriteDouble] -} // End isCommutable = 1, Uses = [VCC, EXEC] - -defm V_MSAD_U8 : VOP3Inst , "v_msad_u8", - VOP_I32_I32_I32_I32, int_amdgcn_msad_u8>; - -defm V_MQSAD_PK_U16_U8 : VOP3Inst , "v_mqsad_pk_u16_u8", - VOP_I64_I64_I32_I64, int_amdgcn_mqsad_pk_u16_u8>; - -//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "v_mqsad_u8", []>; - -let SchedRW = [WriteDouble] in { -defm V_TRIG_PREOP_F64 : VOP3Inst < - vop3<0x174, 0x292>, "v_trig_preop_f64", VOP_F64_F64_I32, AMDGPUtrig_preop ->; - -} // End SchedRW = [WriteDouble] - -// These instructions only exist on SI and CI -let SubtargetPredicate = isSICI in { - -defm V_LSHL_B64 : VOP3Inst , "v_lshl_b64", VOP_I64_I64_I32>; -defm V_LSHR_B64 : VOP3Inst , "v_lshr_b64", VOP_I64_I64_I32>; -defm V_ASHR_I64 : VOP3Inst , "v_ashr_i64", VOP_I64_I64_I32>; - -defm V_MULLIT_F32 : VOP3Inst , "v_mullit_f32", - VOP_F32_F32_F32_F32>; - -} // End SubtargetPredicate = isSICI - -let SubtargetPredicate = isVI, DisableSIDecoder = 1 in { - -defm V_LSHLREV_B64 : VOP3Inst , "v_lshlrev_b64", - VOP_I64_I32_I64 ->; -defm V_LSHRREV_B64 : VOP3Inst , "v_lshrrev_b64", - VOP_I64_I32_I64 ->; -defm V_ASHRREV_I64 : VOP3Inst , "v_ashrrev_i64", - VOP_I64_I32_I64 ->; - -} // End SubtargetPredicate = isVI - -//===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// Index: llvm/trunk/lib/Target/AMDGPU/VIInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VIInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/VIInstructions.td @@ -75,14 +75,6 @@ } // End isCommutable = 1 defm V_LDEXP_F16 : VOP2Inst , "v_ldexp_f16", VOP_F16_F16_I16>; -//===----------------------------------------------------------------------===// -// VOP3 Instructions -//===----------------------------------------------------------------------===// -let isCommutable = 1 in { - defm V_MAD_F16 : VOP3Inst , "v_mad_f16", VOP_F16_F16_F16_F16>; - defm V_MAD_U16 : VOP3Inst , "v_mad_u16", VOP_I16_I16_I16_I16>; - defm V_MAD_I16 : VOP3Inst , "v_mad_i16", VOP_I16_I16_I16_I16>; -} } // let DisableSIDecoder = 1 // Aliases to simplify matching of floating-point instructions that Index: llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td @@ -0,0 +1,404 @@ +//===-- VOP3Instructions.td - Vector Instruction Defintions ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// VOP3 Classes +//===----------------------------------------------------------------------===// + +class getVOP3ModPat { + list ret3 = [(set P.DstVT:$vdst, + (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), + (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), + (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))]; + + list ret2 = [(set P.DstVT:$vdst, + (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), + (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))]; + + list ret1 = [(set P.DstVT:$vdst, + (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod))))]; + + list ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class getVOP3Pat { + list ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))]; + list ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]; + list ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]; + list ret = !if(!eq(P.NumSrcArgs, 3), ret3, + !if(!eq(P.NumSrcArgs, 2), ret2, + ret1)); +} + +class VOP3Inst : + VOP3_PseudoNew.ret, getVOP3Pat.ret), + VOP3Only>; + +// Special case for v_div_fmas_{f32|f64}, since it seems to be the +// only VOP instruction that implicitly reads VCC. +let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod" in { +def VOP_F32_F32_F32_F32_VCC : VOPProfile<[f32, f32, f32, f32]> { + let Outs64 = (outs DstRC.RegClass:$vdst); +} +def VOP_F64_F64_F64_F64_VCC : VOPProfile<[f64, f64, f64, f64]> { + let Outs64 = (outs DstRC.RegClass:$vdst); +} +} + +class getVOP3VCC { + list ret = + [(set P.DstVT:$vdst, + (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), + (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), + (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers)), + (i1 VCC)))]; +} + +class VOP3_Profile : VOPProfile { + // FIXME: Hack to stop printing _e64 + let Outs64 = (outs DstRC.RegClass:$vdst); + let Asm64 = " " # P.Asm64; +} + +class VOP3b_Profile : VOPProfile<[vt, vt, vt, vt]> { + let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); + let Asm64 = " $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod"; +} + +def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile { + // FIXME: Hack to stop printing _e64 + let DstRC = RegisterOperand; +} + +def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile { + // FIXME: Hack to stop printing _e64 + let DstRC = RegisterOperand; +} + +//===----------------------------------------------------------------------===// +// VOP3 Instructions +//===----------------------------------------------------------------------===// + +let isCommutable = 1 in { + +def V_MAD_LEGACY_F32 : VOP3Inst <"v_mad_legacy_f32", VOP3_Profile>; +def V_MAD_F32 : VOP3Inst <"v_mad_f32", VOP3_Profile, fmad>; +def V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile, AMDGPUmad_i24>; +def V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile, AMDGPUmad_u24>; +def V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile, fma>; +def V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile, fma>; +def V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile, int_amdgcn_lerp>; + +let SchedRW = [WriteDoubleAdd] in { +def V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile, fadd, 1>; +def V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile, fmul, 1>; +def V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile, fminnum, 1>; +def V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile, fmaxnum, 1>; +} // End SchedRW = [WriteDoubleAdd] + +let SchedRW = [WriteQuarterRate32] in { +def V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", VOP3_Profile>; +def V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", VOP3_Profile, mulhu>; +def V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", VOP3_Profile>; +def V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", VOP3_Profile, mulhs>; +} // End SchedRW = [WriteQuarterRate32] + +let Uses = [VCC, EXEC] in { +// v_div_fmas_f32: +// result = src0 * src1 + src2 +// if (vcc) +// result *= 2^32 +// +def V_DIV_FMAS_F32 : VOP3_PseudoNew <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC, + getVOP3VCC.ret> { + let SchedRW = [WriteFloatFMA]; +} +// v_div_fmas_f64: +// result = src0 * src1 + src2 +// if (vcc) +// result *= 2^64 +// +def V_DIV_FMAS_F64 : VOP3_PseudoNew <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, + getVOP3VCC.ret> { + let SchedRW = [WriteDouble]; +} +} // End Uses = [VCC, EXEC] + +} // End isCommutable = 1 + +def V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile, int_amdgcn_cubeid>; +def V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile, int_amdgcn_cubesc>; +def V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile, int_amdgcn_cubetc>; +def V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile, int_amdgcn_cubema>; +def V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGPUbfe_u32>; +def V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; +def V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; +def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile>; +def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile>; +def V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile, AMDGPUfmin3>; +def V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile, AMDGPUsmin3>; +def V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile, AMDGPUumin3>; +def V_MAX3_F32 : VOP3Inst <"v_max3_f32", VOP3_Profile, AMDGPUfmax3>; +def V_MAX3_I32 : VOP3Inst <"v_max3_i32", VOP3_Profile, AMDGPUsmax3>; +def V_MAX3_U32 : VOP3Inst <"v_max3_u32", VOP3_Profile, AMDGPUumax3>; +def V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile, AMDGPUfmed3>; +def V_MED3_I32 : VOP3Inst <"v_med3_i32", VOP3_Profile, AMDGPUsmed3>; +def V_MED3_U32 : VOP3Inst <"v_med3_u32", VOP3_Profile, AMDGPUumed3>; +def V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile, int_amdgcn_sad_u8>; +def V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile, int_amdgcn_sad_hi_u8>; +def V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile, int_amdgcn_sad_u16>; +def V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile>; +def V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile, int_amdgcn_cvt_pk_u8_f32>; +def V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", VOP3_Profile, AMDGPUdiv_fixup>; + +let SchedRW = [WriteDoubleAdd] in { +def V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile, AMDGPUdiv_fixup>; +def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile, AMDGPUldexp, 1>; +} // End SchedRW = [WriteDoubleAdd] + +def V_DIV_SCALE_F32 : VOP3_PseudoNew <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> { + let SchedRW = [WriteFloatFMA, WriteSALU]; +} + +// Double precision division pre-scale. +def V_DIV_SCALE_F64 : VOP3_PseudoNew <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> { + let SchedRW = [WriteDouble, WriteSALU]; +} + +def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile, int_amdgcn_msad_u8>; +def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile, int_amdgcn_mqsad_pk_u16_u8>; + +def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile, AMDGPUtrig_preop> { + let SchedRW = [WriteDouble]; +} + +// These instructions only exist on SI and CI +let SubtargetPredicate = isSICI in { +def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile>; +def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile>; +def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile>; +def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile>; +} // End SubtargetPredicate = isSICI + +let SubtargetPredicate = isVI in { +def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile>; +def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile>; +def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile>; +} // End SubtargetPredicate = isVI + + +let SubtargetPredicate = isCIVI in { + +def V_MQSAD_U16_U8 : VOP3Inst <"v_mqsad_u16_u8", VOP3_Profile>; +def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile, int_amdgcn_qsad_pk_u16_u8>; +def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile, int_amdgcn_mqsad_u32_u8>; + +let isCommutable = 1 in { +def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3_Profile>; + +// XXX - Does this set VCC? +def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3_Profile>; +} // End isCommutable = 1 + +} // End SubtargetPredicate = isCIVI + + +let SubtargetPredicate = isVI in { + +let isCommutable = 1 in { + def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile>; + def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile>; + def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile>; +} + +} // End SubtargetPredicate = isVI + + +//===----------------------------------------------------------------------===// +// Target +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SI +//===----------------------------------------------------------------------===// + +let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in { + +multiclass VOP3_Real_si op> { + def _si : VOP3_Real(NAME), SIEncodingFamily.SI>, + VOP3e_siNew (NAME).Pfl>; +} + +multiclass VOP3be_Real_si op> { + def _si : VOP3_Real(NAME), SIEncodingFamily.SI>, + VOP3be_siNew (NAME).Pfl>; +} + +} // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" + +defm V_MAD_LEGACY_F32 : VOP3_Real_si <0x140>; +defm V_MAD_F32 : VOP3_Real_si <0x141>; +defm V_MAD_I32_I24 : VOP3_Real_si <0x142>; +defm V_MAD_U32_U24 : VOP3_Real_si <0x143>; +defm V_CUBEID_F32 : VOP3_Real_si <0x144>; +defm V_CUBESC_F32 : VOP3_Real_si <0x145>; +defm V_CUBETC_F32 : VOP3_Real_si <0x146>; +defm V_CUBEMA_F32 : VOP3_Real_si <0x147>; +defm V_BFE_U32 : VOP3_Real_si <0x148>; +defm V_BFE_I32 : VOP3_Real_si <0x149>; +defm V_BFI_B32 : VOP3_Real_si <0x14a>; +defm V_FMA_F32 : VOP3_Real_si <0x14b>; +defm V_FMA_F64 : VOP3_Real_si <0x14c>; +defm V_LERP_U8 : VOP3_Real_si <0x14d>; +defm V_ALIGNBIT_B32 : VOP3_Real_si <0x14e>; +defm V_ALIGNBYTE_B32 : VOP3_Real_si <0x14f>; +defm V_MULLIT_F32 : VOP3_Real_si <0x150>; +defm V_MIN3_F32 : VOP3_Real_si <0x151>; +defm V_MIN3_I32 : VOP3_Real_si <0x152>; +defm V_MIN3_U32 : VOP3_Real_si <0x153>; +defm V_MAX3_F32 : VOP3_Real_si <0x154>; +defm V_MAX3_I32 : VOP3_Real_si <0x155>; +defm V_MAX3_U32 : VOP3_Real_si <0x156>; +defm V_MED3_F32 : VOP3_Real_si <0x157>; +defm V_MED3_I32 : VOP3_Real_si <0x158>; +defm V_MED3_U32 : VOP3_Real_si <0x159>; +defm V_SAD_U8 : VOP3_Real_si <0x15a>; +defm V_SAD_HI_U8 : VOP3_Real_si <0x15b>; +defm V_SAD_U16 : VOP3_Real_si <0x15c>; +defm V_SAD_U32 : VOP3_Real_si <0x15d>; +defm V_CVT_PK_U8_F32 : VOP3_Real_si <0x15e>; +defm V_DIV_FIXUP_F32 : VOP3_Real_si <0x15f>; +defm V_DIV_FIXUP_F64 : VOP3_Real_si <0x160>; +defm V_LSHL_B64 : VOP3_Real_si <0x161>; +defm V_LSHR_B64 : VOP3_Real_si <0x162>; +defm V_ASHR_I64 : VOP3_Real_si <0x163>; +defm V_ADD_F64 : VOP3_Real_si <0x164>; +defm V_MUL_F64 : VOP3_Real_si <0x165>; +defm V_MIN_F64 : VOP3_Real_si <0x166>; +defm V_MAX_F64 : VOP3_Real_si <0x167>; +defm V_LDEXP_F64 : VOP3_Real_si <0x168>; +defm V_MUL_LO_U32 : VOP3_Real_si <0x169>; +defm V_MUL_HI_U32 : VOP3_Real_si <0x16a>; +defm V_MUL_LO_I32 : VOP3_Real_si <0x16b>; +defm V_MUL_HI_I32 : VOP3_Real_si <0x16c>; +defm V_DIV_SCALE_F32 : VOP3be_Real_si <0x16d>; +defm V_DIV_SCALE_F64 : VOP3be_Real_si <0x16e>; +defm V_DIV_FMAS_F32 : VOP3_Real_si <0x16f>; +defm V_DIV_FMAS_F64 : VOP3_Real_si <0x170>; +defm V_MSAD_U8 : VOP3_Real_si <0x171>; +defm V_MQSAD_PK_U16_U8 : VOP3_Real_si <0x173>; +defm V_TRIG_PREOP_F64 : VOP3_Real_si <0x174>; + +//===----------------------------------------------------------------------===// +// CI +//===----------------------------------------------------------------------===// + +multiclass VOP3_Real_ci op> { + def _ci : VOP3_Real(NAME), SIEncodingFamily.SI>, + VOP3e_siNew (NAME).Pfl> { + let AssemblerPredicates = [isCIOnly]; + let DecoderNamespace = "CI"; + } +} + +defm V_MQSAD_U16_U8 : VOP3_Real_ci <0x172>; +defm V_QSAD_PK_U16_U8 : VOP3_Real_ci <0x172>; +defm V_MQSAD_U32_U8 : VOP3_Real_ci <0x174>; +defm V_MAD_U64_U32 : VOP3_Real_ci <0x176>; +defm V_MAD_I64_I32 : VOP3_Real_ci <0x177>; + +//===----------------------------------------------------------------------===// +// VI +//===----------------------------------------------------------------------===// + +let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { + +multiclass VOP3_Real_vi op> { + def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3e_viNew (NAME).Pfl>; +} + +multiclass VOP3be_Real_vi op> { + def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3be_viNew (NAME).Pfl>; +} + +} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI" + +defm V_MQSAD_U16_U8 : VOP3_Real_vi <0x172>; +defm V_MAD_U64_U32 : VOP3_Real_vi <0x176>; +defm V_MAD_I64_I32 : VOP3_Real_vi <0x177>; + +defm V_MAD_LEGACY_F32 : VOP3_Real_vi <0x1c0>; +defm V_MAD_F32 : VOP3_Real_vi <0x1c1>; +defm V_MAD_I32_I24 : VOP3_Real_vi <0x1c2>; +defm V_MAD_U32_U24 : VOP3_Real_vi <0x1c3>; +defm V_CUBEID_F32 : VOP3_Real_vi <0x1c4>; +defm V_CUBESC_F32 : VOP3_Real_vi <0x1c5>; +defm V_CUBETC_F32 : VOP3_Real_vi <0x1c6>; +defm V_CUBEMA_F32 : VOP3_Real_vi <0x1c7>; +defm V_BFE_U32 : VOP3_Real_vi <0x1c8>; +defm V_BFE_I32 : VOP3_Real_vi <0x1c9>; +defm V_BFI_B32 : VOP3_Real_vi <0x1ca>; +defm V_FMA_F32 : VOP3_Real_vi <0x1cb>; +defm V_FMA_F64 : VOP3_Real_vi <0x1cc>; +defm V_LERP_U8 : VOP3_Real_vi <0x1cd>; +defm V_ALIGNBIT_B32 : VOP3_Real_vi <0x1ce>; +defm V_ALIGNBYTE_B32 : VOP3_Real_vi <0x1cf>; +defm V_MIN3_F32 : VOP3_Real_vi <0x1d0>; +defm V_MIN3_I32 : VOP3_Real_vi <0x1d1>; +defm V_MIN3_U32 : VOP3_Real_vi <0x1d2>; +defm V_MAX3_F32 : VOP3_Real_vi <0x1d3>; +defm V_MAX3_I32 : VOP3_Real_vi <0x1d4>; +defm V_MAX3_U32 : VOP3_Real_vi <0x1d5>; +defm V_MED3_F32 : VOP3_Real_vi <0x1d6>; +defm V_MED3_I32 : VOP3_Real_vi <0x1d7>; +defm V_MED3_U32 : VOP3_Real_vi <0x1d8>; +defm V_SAD_U8 : VOP3_Real_vi <0x1d9>; +defm V_SAD_HI_U8 : VOP3_Real_vi <0x1da>; +defm V_SAD_U16 : VOP3_Real_vi <0x1db>; +defm V_SAD_U32 : VOP3_Real_vi <0x1dc>; +defm V_CVT_PK_U8_F32 : VOP3_Real_vi <0x1dd>; +defm V_DIV_FIXUP_F32 : VOP3_Real_vi <0x1de>; +defm V_DIV_FIXUP_F64 : VOP3_Real_vi <0x1df>; +defm V_DIV_SCALE_F32 : VOP3be_Real_vi <0x1e0>; +defm V_DIV_SCALE_F64 : VOP3be_Real_vi <0x1e1>; +defm V_DIV_FMAS_F32 : VOP3_Real_vi <0x1e2>; +defm V_DIV_FMAS_F64 : VOP3_Real_vi <0x1e3>; +defm V_MSAD_U8 : VOP3_Real_vi <0x1e4>; +defm V_QSAD_PK_U16_U8 : VOP3_Real_vi <0x1e5>; +defm V_MQSAD_PK_U16_U8 : VOP3_Real_vi <0x1e6>; +defm V_MQSAD_U32_U8 : VOP3_Real_vi <0x1e7>; + +defm V_MAD_F16 : VOP3_Real_vi <0x1ea>; +defm V_MAD_U16 : VOP3_Real_vi <0x1eb>; +defm V_MAD_I16 : VOP3_Real_vi <0x1ec>; + +defm V_ADD_F64 : VOP3_Real_vi <0x280>; +defm V_MUL_F64 : VOP3_Real_vi <0x281>; +defm V_MIN_F64 : VOP3_Real_vi <0x282>; +defm V_MAX_F64 : VOP3_Real_vi <0x283>; +defm V_LDEXP_F64 : VOP3_Real_vi <0x284>; +defm V_MUL_LO_U32 : VOP3_Real_vi <0x285>; + +// removed from VI as identical to V_MUL_LO_U32 +let isAsmParserOnly = 1 in { +defm V_MUL_LO_I32 : VOP3_Real_vi <0x285>; +} + +defm V_MUL_HI_U32 : VOP3_Real_vi <0x286>; +defm V_MUL_HI_I32 : VOP3_Real_vi <0x287>; + +defm V_LSHLREV_B64 : VOP3_Real_vi <0x28f>; +defm V_LSHRREV_B64 : VOP3_Real_vi <0x290>; +defm V_ASHRREV_I64 : VOP3_Real_vi <0x291>; +defm V_TRIG_PREOP_F64 : VOP3_Real_vi <0x292>; Index: llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td @@ -100,6 +100,49 @@ let Inst{15} = !if(P.HasClamp, clamp, 0); } +class VOP3e_siNew op, VOPProfile P> : VOP3a_siNew { + bits<8> vdst; + let Inst{7-0} = vdst; +} + +class VOP3e_viNew op, VOPProfile P> : VOP3a_viNew { + bits<8> vdst; + let Inst{7-0} = vdst; +} + +class VOP3beNew : Enc64 { + bits<8> vdst; + bits<2> src0_modifiers; + bits<9> src0; + bits<2> src1_modifiers; + bits<9> src1; + bits<2> src2_modifiers; + bits<9> src2; + bits<7> sdst; + bits<2> omod; + + let Inst{7-0} = vdst; + let Inst{14-8} = sdst; + let Inst{31-26} = 0x34; //encoding + let Inst{40-32} = !if(P.HasSrc0, src0, 0); + let Inst{49-41} = !if(P.HasSrc1, src1, 0); + let Inst{58-50} = !if(P.HasSrc2, src2, 0); + let Inst{60-59} = !if(P.HasOMod, omod, 0); + let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); + let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); + let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); +} + +class VOP3be_siNew op, VOPProfile P> : VOP3beNew

{ + let Inst{25-17} = op; +} + +class VOP3be_viNew op, VOPProfile P> : VOP3beNew

{ + bits<1> clamp; + let Inst{25-16} = op; + let Inst{15} = !if(P.HasClamp, clamp, 0); +} + class VOP_SDWAeNew : Enc64 { bits<8> src0; bits<3> src0_sel; @@ -128,3 +171,4 @@ } include "VOPCInstructions.td" +include "VOP3Instructions.td"