Index: lib/Target/AMDGPU/CIInstructions.td =================================================================== --- lib/Target/AMDGPU/CIInstructions.td +++ lib/Target/AMDGPU/CIInstructions.td @@ -13,35 +13,3 @@ // S_CBRANCH_CDBGSYS // S_CBRANCH_CDBGSYS_OR_USER // S_CBRANCH_CDBGSYS_AND_USER - -//===----------------------------------------------------------------------===// -// VOP1 Instructions -//===----------------------------------------------------------------------===// - -let SubtargetPredicate = isCIVI in { - -let SchedRW = [WriteDoubleAdd] in { -defm V_TRUNC_F64 : VOP1Inst , "v_trunc_f64", - VOP_F64_F64, ftrunc ->; -defm V_CEIL_F64 : VOP1Inst , "v_ceil_f64", - VOP_F64_F64, fceil ->; -defm V_FLOOR_F64 : VOP1Inst , "v_floor_f64", - VOP_F64_F64, ffloor ->; -defm V_RNDNE_F64 : VOP1Inst , "v_rndne_f64", - VOP_F64_F64, frint ->; -} // End SchedRW = [WriteDoubleAdd] - -let SchedRW = [WriteQuarterRate32] in { -defm V_LOG_LEGACY_F32 : VOP1Inst , "v_log_legacy_f32", - VOP_F32_F32 ->; -defm V_EXP_LEGACY_F32 : VOP1Inst , "v_exp_legacy_f32", - VOP_F32_F32 ->; -} // End SchedRW = [WriteQuarterRate32] - -} // End SubtargetPredicate = isCIVI Index: lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/SIInstrFormats.td +++ lib/Target/AMDGPU/SIInstrFormats.td @@ -157,13 +157,6 @@ let VALU = 1; } -class VOP1Common pattern> : - VOPAnyCommon { - - let VOP1 = 1; - let Size = 4; -} - class VOP2Common pattern> : VOPAnyCommon { @@ -208,16 +201,6 @@ // Vector ALU operations //===----------------------------------------------------------------------===// -class VOP1e op> : Enc32 { - bits<8> vdst; - bits<9> src0; - - let Inst{8-0} = src0; - let Inst{16-9} = op; - let Inst{24-17} = vdst; - let Inst{31-25} = 0x3f; //encoding -} - class VOP2e op> : Enc32 { bits<8> vdst; bits<9> src0; @@ -369,12 +352,6 @@ let Uses = [EXEC] in { -class VOP1 op, dag outs, dag ins, string asm, list pattern> : - VOP1Common , - VOP1e { - let isCodeGenOnly = 0; -} - class VOP2 op, dag outs, dag ins, string asm, list pattern> : VOP2Common , VOP2e { let isCodeGenOnly = 0; Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -19,14 +19,6 @@ field bits<10> VI3; } -class vop1 si, bits<8> vi = si> : vop { - field bits<8> SI = si; - field bits<8> VI = vi; - - field bits<9> SI3 = {1, 1, si{6-0}}; - field bits<10> VI3 = !add(0x140, vi); -} - class vop2 si, bits<6> vi = si> : vop { field bits<6> SI = si; field bits<6> VI = vi; @@ -641,16 +633,18 @@ // XXX - do f16 instructions? class hasModifiers { bit ret = + !if(!eq(SrcVT.Value, f16.Value), 1, !if(!eq(SrcVT.Value, f32.Value), 1, !if(!eq(SrcVT.Value, f64.Value), 1, - 0)); + 0))); } class hasIntModifiers { bit ret = + !if(!eq(SrcVT.Value, i16.Value), 1, !if(!eq(SrcVT.Value, i32.Value), 1, !if(!eq(SrcVT.Value, i64.Value), 1, - 0)); + 0))); } @@ -918,6 +912,7 @@ field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1); field bit HasDst32 = HasDst; + field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case field int NumSrcArgs = getNumSrcArgs.ret; field bit HasSrc0 = !if(!eq(Src0VT.Value, untyped.Value), 0, 1); field bit HasSrc1 = !if(!eq(Src1VT.Value, untyped.Value), 0, 1); @@ -997,41 +992,6 @@ def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; -// Restrict src0 to be VGPR -def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> { - let Src0RC32 = VRegSrc_32; - let Src0RC64 = VRegSrc_32; - - let HasExt = 0; -} - -// Special case because there are no true output operands. Hack vdst -// to be a src operand. The custom inserter must add a tied implicit -// def and use of the super register since there seems to be no way to -// add an implicit def of a virtual register in tablegen. -def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> { - let Src0RC32 = VOPDstOperand; - let Src0RC64 = VOPDstOperand; - - let Outs = (outs); - let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0); - let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0); - - let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, - bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); - let InsSDWA = (ins Src0RC32:$vdst, Int32InputMods:$src0_imodifiers, VCSrc_b32:$src0, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel); - - let Asm32 = getAsm32<1, 1>.ret; - let Asm64 = getAsm64<1, 1, 0>.ret; - let AsmDPP = getAsmDPP<1, 1, 0>.ret; - let AsmSDWA = getAsmSDWA<1, 1, 0>.ret; - - let HasExt = 0; - let HasDst = 0; -} - // Write out to vcc or arbitrary SGPR. def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> { let Asm32 = "$vdst, vcc, $src0, $src1"; @@ -1127,56 +1087,6 @@ bit IsRet = isRet; } -class VOP1_Pseudo pattern, string opName> : - VOP1Common , - VOP , - SIMCInstr , - MnemonicAlias { - let isPseudo = 1; - let isCodeGenOnly = 1; - - field bits<8> vdst; - field bits<9> src0; -} - -class VOP1_Real_si : - VOP1, - SIMCInstr { - let AssemblerPredicate = SIAssemblerPredicate; - let DecoderNamespace = "SICI"; - let DisableDecoder = DisableSIDecoder; -} - -class VOP1_Real_vi : - VOP1, - SIMCInstr { - let AssemblerPredicates = [isVI]; - let DecoderNamespace = "VI"; - let DisableDecoder = DisableVIDecoder; -} - -multiclass VOP1_m pattern, - string asm = opName#p.Asm32> { - def "" : VOP1_Pseudo ; - - def _si : VOP1_Real_si ; - - def _vi : VOP1_Real_vi ; - -} - -class VOP1_DPP : - VOP1_DPPe , - VOP_DPP { - let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]); - let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.DPP, - AMDGPUAsmVariants.Disable); - let DecoderNamespace = "DPP"; - let DisableDecoder = DisableVIDecoder; - let src0_modifiers = !if(p.HasModifiers, ?, 0); - let src1_modifiers = 0; -} - class SDWADisableFields { bits<8> src0 = !if(!eq(p.NumSrcArgs, 0), 0, ?); bits<3> src0_sel = !if(!eq(p.NumSrcArgs, 0), 6, ?); @@ -1200,26 +1110,6 @@ bits<1> clamp = !if(!eq(p.NumSrcArgs, 0), 0, ?); } -class VOP1_SDWA : - VOP1_SDWAe , - VOP_SDWA , - SDWADisableFields

{ - let AsmMatchConverter = "cvtSdwaVOP1"; - let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]); - let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.SDWA, - AMDGPUAsmVariants.Disable); - let DecoderNamespace = "SDWA"; - let DisableDecoder = DisableVIDecoder; -} - -multiclass VOP1SI_m pattern, - string asm = opName#p.Asm32> { - - def "" : VOP1_Pseudo ; - - def _si : VOP1_Real_si ; -} - class VOP2_Pseudo pattern, string opName> : VOP2Common , VOP , @@ -1385,28 +1275,6 @@ let DisableDecoder = DisableVIDecoder; } -multiclass VOP3_1_m pattern, string opName, bit HasMods = 1> { - - def "" : VOP3_Pseudo ; - - def _si : VOP3_Real_si , - VOP3DisableFields<0, 0, HasMods>; - - def _vi : VOP3_Real_vi , - VOP3DisableFields<0, 0, HasMods>; -} - -multiclass VOP3SI_1_m pattern, string opName, bit HasMods = 1> { - - def "" : VOP3_Pseudo ; - - def _si : VOP3_Real_si , - VOP3DisableFields<0, 0, HasMods>; - // No VI instruction. This class is for SI only. -} - multiclass VOP3_2_m pattern, string opName, string revOp, bit HasMods = 1> { @@ -1487,41 +1355,6 @@ } } -multiclass VOP1_Helper pat32, - list pat64> { - - defm _e32 : VOP1_m ; - - defm _e64 : VOP3_1_m ; - - def _dpp : VOP1_DPP ; - - def _sdwa : VOP1_SDWA ; -} - -multiclass VOP1Inst : VOP1_Helper < - op, opName, P, [], - !if(P.HasModifiers, - [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, - i32:$src0_modifiers, i1:$clamp, i32:$omod))))], - [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]) ->; - -multiclass VOP1InstSI { - - defm _e32 : VOP1SI_m ; - - defm _e64 : VOP3SI_1_m ; -} - multiclass VOP2_Helper pat32, list pat64, string revOp> { Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -36,236 +36,6 @@ defm EXP : EXP_m; //===----------------------------------------------------------------------===// -// VOP1 Instructions -//===----------------------------------------------------------------------===// - -let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in { -defm V_NOP : VOP1Inst , "v_nop", VOP_NONE>; -} - -let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in { -defm V_MOV_B32 : VOP1Inst , "v_mov_b32", VOP_I32_I32>; -} // End isMoveImm = 1 - -let Uses = [EXEC] in { - -// FIXME: Specify SchedRW for READFIRSTLANE_B32 - -def V_READFIRSTLANE_B32 : VOP1 < - 0x00000002, - (outs SReg_32:$vdst), - (ins VGPR_32:$src0), - "v_readfirstlane_b32 $vdst, $src0", - [(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))] -> { - let isConvergent = 1; -} - -} - -let SchedRW = [WriteQuarterRate32] in { - -defm V_CVT_I32_F64 : VOP1Inst , "v_cvt_i32_f64", - VOP_I32_F64, fp_to_sint ->; -defm V_CVT_F64_I32 : VOP1Inst , "v_cvt_f64_i32", - VOP_F64_I32, sint_to_fp ->; -defm V_CVT_F32_I32 : VOP1Inst , "v_cvt_f32_i32", - VOP_F32_I32, sint_to_fp ->; -defm V_CVT_F32_U32 : VOP1Inst , "v_cvt_f32_u32", - VOP_F32_I32, uint_to_fp ->; -defm V_CVT_U32_F32 : VOP1Inst , "v_cvt_u32_f32", - VOP_I32_F32, fp_to_uint ->; -defm V_CVT_I32_F32 : VOP1Inst , "v_cvt_i32_f32", - VOP_I32_F32, fp_to_sint ->; -defm V_CVT_F16_F32 : VOP1Inst , "v_cvt_f16_f32", - VOP_I32_F32, fp_to_f16 ->; -defm V_CVT_F32_F16 : VOP1Inst , "v_cvt_f32_f16", - VOP_F32_I32, f16_to_fp ->; -defm V_CVT_RPI_I32_F32 : VOP1Inst , "v_cvt_rpi_i32_f32", - VOP_I32_F32, cvt_rpi_i32_f32>; -defm V_CVT_FLR_I32_F32 : VOP1Inst , "v_cvt_flr_i32_f32", - VOP_I32_F32, cvt_flr_i32_f32>; -defm V_CVT_OFF_F32_I4 : VOP1Inst , "v_cvt_off_f32_i4", VOP_F32_I32>; -defm V_CVT_F32_F64 : VOP1Inst , "v_cvt_f32_f64", - VOP_F32_F64, fpround ->; -defm V_CVT_F64_F32 : VOP1Inst , "v_cvt_f64_f32", - VOP_F64_F32, fpextend ->; -defm V_CVT_F32_UBYTE0 : VOP1Inst , "v_cvt_f32_ubyte0", - VOP_F32_I32, AMDGPUcvt_f32_ubyte0 ->; -defm V_CVT_F32_UBYTE1 : VOP1Inst , "v_cvt_f32_ubyte1", - VOP_F32_I32, AMDGPUcvt_f32_ubyte1 ->; -defm V_CVT_F32_UBYTE2 : VOP1Inst , "v_cvt_f32_ubyte2", - VOP_F32_I32, AMDGPUcvt_f32_ubyte2 ->; -defm V_CVT_F32_UBYTE3 : VOP1Inst , "v_cvt_f32_ubyte3", - VOP_F32_I32, AMDGPUcvt_f32_ubyte3 ->; -defm V_CVT_U32_F64 : VOP1Inst , "v_cvt_u32_f64", - VOP_I32_F64, fp_to_uint ->; -defm V_CVT_F64_U32 : VOP1Inst , "v_cvt_f64_u32", - VOP_F64_I32, uint_to_fp ->; - -} // End SchedRW = [WriteQuarterRate32] - -defm V_FRACT_F32 : VOP1Inst , "v_fract_f32", - VOP_F32_F32, AMDGPUfract ->; -defm V_TRUNC_F32 : VOP1Inst , "v_trunc_f32", - VOP_F32_F32, ftrunc ->; -defm V_CEIL_F32 : VOP1Inst , "v_ceil_f32", - VOP_F32_F32, fceil ->; -defm V_RNDNE_F32 : VOP1Inst , "v_rndne_f32", - VOP_F32_F32, frint ->; -defm V_FLOOR_F32 : VOP1Inst , "v_floor_f32", - VOP_F32_F32, ffloor ->; -defm V_EXP_F32 : VOP1Inst , "v_exp_f32", - VOP_F32_F32, fexp2 ->; - -let SchedRW = [WriteQuarterRate32] in { - -defm V_LOG_F32 : VOP1Inst , "v_log_f32", - VOP_F32_F32, flog2 ->; -defm V_RCP_F32 : VOP1Inst , "v_rcp_f32", - VOP_F32_F32, AMDGPUrcp ->; -defm V_RCP_IFLAG_F32 : VOP1Inst , "v_rcp_iflag_f32", - VOP_F32_F32 ->; -defm V_RSQ_F32 : VOP1Inst , "v_rsq_f32", - VOP_F32_F32, AMDGPUrsq ->; - -} // End SchedRW = [WriteQuarterRate32] - -let SchedRW = [WriteDouble] in { - -defm V_RCP_F64 : VOP1Inst , "v_rcp_f64", - VOP_F64_F64, AMDGPUrcp ->; -defm V_RSQ_F64 : VOP1Inst , "v_rsq_f64", - VOP_F64_F64, AMDGPUrsq ->; - -} // End SchedRW = [WriteDouble]; - -defm V_SQRT_F32 : VOP1Inst , "v_sqrt_f32", - VOP_F32_F32, fsqrt ->; - -let SchedRW = [WriteDouble] in { - -defm V_SQRT_F64 : VOP1Inst , "v_sqrt_f64", - VOP_F64_F64, fsqrt ->; - -} // End SchedRW = [WriteDouble] - -let SchedRW = [WriteQuarterRate32] in { - -defm V_SIN_F32 : VOP1Inst , "v_sin_f32", - VOP_F32_F32, AMDGPUsin ->; -defm V_COS_F32 : VOP1Inst , "v_cos_f32", - VOP_F32_F32, AMDGPUcos ->; - -} // End SchedRW = [WriteQuarterRate32] - -defm V_NOT_B32 : VOP1Inst , "v_not_b32", VOP_I32_I32>; -defm V_BFREV_B32 : VOP1Inst , "v_bfrev_b32", VOP_I32_I32>; -defm V_FFBH_U32 : VOP1Inst , "v_ffbh_u32", VOP_I32_I32>; -defm V_FFBL_B32 : VOP1Inst , "v_ffbl_b32", VOP_I32_I32>; -defm V_FFBH_I32 : VOP1Inst , "v_ffbh_i32", VOP_I32_I32>; -defm V_FREXP_EXP_I32_F64 : VOP1Inst , "v_frexp_exp_i32_f64", - VOP_I32_F64, int_amdgcn_frexp_exp ->; - -let SchedRW = [WriteDoubleAdd] in { -defm V_FREXP_MANT_F64 : VOP1Inst , "v_frexp_mant_f64", - VOP_F64_F64, int_amdgcn_frexp_mant ->; - -defm V_FRACT_F64 : VOP1Inst , "v_fract_f64", - VOP_F64_F64, AMDGPUfract ->; -} // End SchedRW = [WriteDoubleAdd] - - -defm V_FREXP_EXP_I32_F32 : VOP1Inst , "v_frexp_exp_i32_f32", - VOP_I32_F32, int_amdgcn_frexp_exp ->; -defm V_FREXP_MANT_F32 : VOP1Inst , "v_frexp_mant_f32", - VOP_F32_F32, int_amdgcn_frexp_mant ->; -let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in { -defm V_CLREXCP : VOP1Inst , "v_clrexcp", VOP_NO_EXT>; -} - -let Uses = [M0, EXEC] in { -// v_movreld_b32 is a special case because the destination output - // register is really a source. It isn't actually read (but may be - // written), and is only to provide the base register to start - // indexing from. Tablegen seems to not let you define an implicit - // virtual register output for the super register being written into, - // so this must have an implicit def of the register added to it. -defm V_MOVRELD_B32 : VOP1Inst , "v_movreld_b32", VOP_MOVRELD>; -defm V_MOVRELS_B32 : VOP1Inst , "v_movrels_b32", VOP_I32_VI32_NO_EXT>; -defm V_MOVRELSD_B32 : VOP1Inst , "v_movrelsd_b32", VOP_NO_EXT>; - -} // End Uses = [M0, EXEC] - -// These instruction only exist on SI and CI -let SubtargetPredicate = isSICI in { - -let SchedRW = [WriteQuarterRate32] in { - -defm V_MOV_FED_B32 : VOP1InstSI , "v_mov_fed_b32", VOP_I32_I32>; -defm V_LOG_CLAMP_F32 : VOP1InstSI , "v_log_clamp_f32", - VOP_F32_F32, int_amdgcn_log_clamp>; -defm V_RCP_CLAMP_F32 : VOP1InstSI , "v_rcp_clamp_f32", VOP_F32_F32>; -defm V_RCP_LEGACY_F32 : VOP1InstSI , "v_rcp_legacy_f32", - VOP_F32_F32, AMDGPUrcp_legacy>; -defm V_RSQ_CLAMP_F32 : VOP1InstSI , "v_rsq_clamp_f32", - VOP_F32_F32, AMDGPUrsq_clamp ->; -defm V_RSQ_LEGACY_F32 : VOP1InstSI , "v_rsq_legacy_f32", - VOP_F32_F32, AMDGPUrsq_legacy ->; - -} // End SchedRW = [WriteQuarterRate32] - -let SchedRW = [WriteDouble] in { - -defm V_RCP_CLAMP_F64 : VOP1InstSI , "v_rcp_clamp_f64", VOP_F64_F64>; -defm V_RSQ_CLAMP_F64 : VOP1InstSI , "v_rsq_clamp_f64", - VOP_F64_F64, AMDGPUrsq_clamp ->; - -} // End SchedRW = [WriteDouble] - -} // End SubtargetPredicate = isSICI - -//===----------------------------------------------------------------------===// // VINTRP Instructions //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/VIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/VIInstrFormats.td +++ lib/Target/AMDGPU/VIInstrFormats.td @@ -97,15 +97,6 @@ let Inst{63-60} = row_mask; } -class VOP1_DPPe op> : VOP_DPPe { - bits<8> vdst; - - let Inst{8-0} = 0xfa; // dpp - let Inst{16-9} = op; - let Inst{24-17} = vdst; - let Inst{31-25} = 0x3f; //encoding -} - class VOP2_DPPe op> : VOP_DPPe { bits<8> vdst; bits<8> src1; @@ -148,15 +139,6 @@ let Inst{59} = src1_imodifiers; } -class VOP1_SDWAe op> : VOP_SDWAe { - bits<8> vdst; - - let Inst{8-0} = 0xf9; // sdwa - let Inst{16-9} = op; - let Inst{24-17} = vdst; - let Inst{31-25} = 0x3f; // encoding -} - class VOP2_SDWAe op> : VOP_SDWAe { bits<8> vdst; bits<8> src1; Index: lib/Target/AMDGPU/VIInstructions.td =================================================================== --- lib/Target/AMDGPU/VIInstructions.td +++ lib/Target/AMDGPU/VIInstructions.td @@ -14,33 +14,6 @@ let DisableSIDecoder = 1 in { //===----------------------------------------------------------------------===// -// VOP1 Instructions -//===----------------------------------------------------------------------===// - -defm V_CVT_F16_U16 : VOP1Inst , "v_cvt_f16_u16", VOP_F16_I16>; -defm V_CVT_F16_I16 : VOP1Inst , "v_cvt_f16_i16", VOP_F16_I16>; -defm V_CVT_U16_F16 : VOP1Inst , "v_cvt_u16_f16", VOP_I16_F16>; -defm V_CVT_I16_F16 : VOP1Inst , "v_cvt_i16_f16", VOP_I16_F16>; -defm V_RCP_F16 : VOP1Inst , "v_rcp_f16", VOP_F16_F16>; -defm V_SQRT_F16 : VOP1Inst , "v_sqrt_f16", VOP_F16_F16>; -defm V_RSQ_F16 : VOP1Inst , "v_rsq_f16", VOP_F16_F16>; -defm V_LOG_F16 : VOP1Inst , "v_log_f16", VOP_F16_F16>; -defm V_EXP_F16 : VOP1Inst , "v_exp_f16", VOP_F16_F16>; -defm V_FREXP_MANT_F16 : VOP1Inst , "v_frexp_mant_f16", - VOP_F16_F16 ->; -defm V_FREXP_EXP_I16_F16 : VOP1Inst , "v_frexp_exp_i16_f16", - VOP_I16_F16 ->; -defm V_FLOOR_F16 : VOP1Inst , "v_floor_f16", VOP_F16_F16>; -defm V_CEIL_F16 : VOP1Inst , "v_ceil_f16", VOP_F16_F16>; -defm V_TRUNC_F16 : VOP1Inst , "v_trunc_f16", VOP_F16_F16>; -defm V_RNDNE_F16 : VOP1Inst , "v_rndne_f16", VOP_F16_F16>; -defm V_FRACT_F16 : VOP1Inst , "v_fract_f16", VOP_F16_F16>; -defm V_SIN_F16 : VOP1Inst , "v_sin_f16", VOP_F16_F16>; -defm V_COS_F16 : VOP1Inst , "v_cos_f16", VOP_F16_F16>; - -//===----------------------------------------------------------------------===// // VOP2 Instructions //===----------------------------------------------------------------------===// @@ -102,13 +75,6 @@ // DPP Patterns //===----------------------------------------------------------------------===// -def : Pat < - (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, - imm:$bound_ctrl), - (V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask), - (as_i32imm $bank_mask), (as_i1imm $bound_ctrl)) ->; - //===----------------------------------------------------------------------===// // Misc Patterns //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- /dev/null +++ lib/Target/AMDGPU/VOP1Instructions.td @@ -0,0 +1,556 @@ +//===-- VOP1Instructions.td - Vector Instruction Defintions ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// VOP1 Classes +//===----------------------------------------------------------------------===// + +class VOP1e op, VOPProfile P> : Enc32 { + bits<8> vdst; + bits<9> src0; + + let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, 0); + let Inst{16-9} = op; + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{31-25} = 0x3f; //encoding +} + +class VOP1_Pseudo pattern=[]> : + InstSI , + VOP , + SIMCInstr , + MnemonicAlias { + + let isPseudo = 1; + let isCodeGenOnly = 1; + let UseNamedOperandTable = 1; + + string Mnemonic = opName; + string AsmOperands = P.Asm32; + + let Size = 4; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let SubtargetPredicate = isGCN; + + let VOP1 = 1; + let VALU = 1; + let Uses = [EXEC]; + + let AsmVariantName = AMDGPUAsmVariants.Default; + + VOPProfile Pfl = P; +} + +class VOP1_Real : + InstSI , + SIMCInstr { + + let isPseudo = 0; + let isCodeGenOnly = 0; + + // copy relevant pseudo op flags + let SubtargetPredicate = ps.SubtargetPredicate; + let AsmMatchConverter = ps.AsmMatchConverter; + let AsmVariantName = ps.AsmVariantName; + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + let TSFlags = ps.TSFlags; +} + +multiclass VOP1Inst { + def _e32 : VOP1_Pseudo ; + def _e64 : VOP3_PseudoNew ; +} + +//===----------------------------------------------------------------------===// +// VOP1 Instructions +//===----------------------------------------------------------------------===// + +let VOPAsmPrefer32Bit = 1 in { +defm V_NOP : VOP1Inst <"v_nop", VOP_NONE>; +} + +let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in { +defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>; +} // End isMoveImm = 1 + +// FIXME: Specify SchedRW for READFIRSTLANE_B32 +// TODO: Make profile for this, there is VOP3 encoding also +def V_READFIRSTLANE_B32 : + InstSI <(outs SReg_32:$vdst), + (ins VGPR_32:$src0), + "v_readfirstlane_b32 $vdst, $src0", + [(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]>, + Enc32 { + + let isCodeGenOnly = 0; + let UseNamedOperandTable = 1; + + let Size = 4; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let SubtargetPredicate = isGCN; + + let VOP1 = 1; + let VALU = 1; + let Uses = [EXEC]; + let isConvergent = 1; + + bits<8> vdst; + bits<9> src0; + + let Inst{8-0} = src0; + let Inst{16-9} = 0x2; + let Inst{24-17} = vdst; + let Inst{31-25} = 0x3f; //encoding +} + +let SchedRW = [WriteQuarterRate32] in { +defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>; +defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP_F64_I32, sint_to_fp>; +defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP_F32_I32, sint_to_fp>; +defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP_F32_I32, uint_to_fp>; +defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>; +defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>; +defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_I32_F32, fp_to_f16>; +defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_I32, f16_to_fp>; +defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; +defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; +defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP_F32_I32>; +defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; +defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; +defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP_F32_I32, AMDGPUcvt_f32_ubyte0>; +defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP_F32_I32, AMDGPUcvt_f32_ubyte1>; +defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP_F32_I32, AMDGPUcvt_f32_ubyte2>; +defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP_F32_I32, AMDGPUcvt_f32_ubyte3>; +defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; +defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP_F64_I32, uint_to_fp>; +} // End SchedRW = [WriteQuarterRate32] + +defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; +defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; +defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; +defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>; +defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; +defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>; + +let SchedRW = [WriteQuarterRate32] in { +defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>; +defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; +defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32>; +defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; +} // End SchedRW = [WriteQuarterRate32] + +let SchedRW = [WriteDouble] in { +defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; +defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; +} // End SchedRW = [WriteDouble]; + +defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, fsqrt>; + +let SchedRW = [WriteDouble] in { +defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, fsqrt>; +} // End SchedRW = [WriteDouble] + +let SchedRW = [WriteQuarterRate32] in { +defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>; +defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; +} // End SchedRW = [WriteQuarterRate32] + +defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; +defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32>; +defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32>; +defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>; +defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32>; +defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>; + +let SchedRW = [WriteDoubleAdd] in { +defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>; +defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>; +} // End SchedRW = [WriteDoubleAdd] + +defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>; +defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>; + +let VOPAsmPrefer32Bit = 1 in { +defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT>; +} + +// Restrict src0 to be VGPR +def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> { + let Src0RC32 = VRegSrc_32; + let Src0RC64 = VRegSrc_32; + + let HasExt = 0; +} + +// Special case because there are no true output operands. Hack vdst +// to be a src operand. The custom inserter must add a tied implicit +// def and use of the super register since there seems to be no way to +// add an implicit def of a virtual register in tablegen. +def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> { + let Src0RC32 = VOPDstOperand; + let Src0RC64 = VOPDstOperand; + + let Outs = (outs); + let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0); + let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0); + + let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsSDWA = (ins Src0RC32:$vdst, Int32InputMods:$src0_imodifiers, VCSrc_b32:$src0, + clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel); + + let Asm32 = getAsm32<1, 1>.ret; + let Asm64 = getAsm64<1, 1, 0>.ret; + let AsmDPP = getAsmDPP<1, 1, 0>.ret; + let AsmSDWA = getAsmSDWA<1, 1, 0>.ret; + + let HasExt = 0; + let HasDst = 0; + let EmitDst = 1; // force vdst emission +} + +let Uses = [M0, EXEC] in { +// v_movreld_b32 is a special case because the destination output + // register is really a source. It isn't actually read (but may be + // written), and is only to provide the base register to start + // indexing from. Tablegen seems to not let you define an implicit + // virtual register output for the super register being written into, + // so this must have an implicit def of the register added to it. +defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>; +defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_I32_VI32_NO_EXT>; +defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT>; +} // End Uses = [M0, EXEC] + +// These instruction only exist on SI and CI +let SubtargetPredicate = isSICI in { + +let SchedRW = [WriteQuarterRate32] in { +defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>; +defm V_LOG_CLAMP_F32 : VOP1Inst <"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; +defm V_RCP_CLAMP_F32 : VOP1Inst <"v_rcp_clamp_f32", VOP_F32_F32>; +defm V_RCP_LEGACY_F32 : VOP1Inst <"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>; +defm V_RSQ_CLAMP_F32 : VOP1Inst <"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; +defm V_RSQ_LEGACY_F32 : VOP1Inst <"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>; +} // End SchedRW = [WriteQuarterRate32] + +let SchedRW = [WriteDouble] in { +defm V_RCP_CLAMP_F64 : VOP1Inst <"v_rcp_clamp_f64", VOP_F64_F64>; +defm V_RSQ_CLAMP_F64 : VOP1Inst <"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>; +} // End SchedRW = [WriteDouble] + +} // End SubtargetPredicate = isSICI + + +let SubtargetPredicate = isCIVI in { + +let SchedRW = [WriteDoubleAdd] in { +defm V_TRUNC_F64 : VOP1Inst <"v_trunc_f64", VOP_F64_F64, ftrunc>; +defm V_CEIL_F64 : VOP1Inst <"v_ceil_f64", VOP_F64_F64, fceil>; +defm V_FLOOR_F64 : VOP1Inst <"v_floor_f64", VOP_F64_F64, ffloor>; +defm V_RNDNE_F64 : VOP1Inst <"v_rndne_f64", VOP_F64_F64, frint>; +} // End SchedRW = [WriteDoubleAdd] + +let SchedRW = [WriteQuarterRate32] in { +defm V_LOG_LEGACY_F32 : VOP1Inst <"v_log_legacy_f32", VOP_F32_F32>; +defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>; +} // End SchedRW = [WriteQuarterRate32] + +} // End SubtargetPredicate = isCIVI + + +let SubtargetPredicate = isVI in { + +defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP_F16_I16>; +defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP_F16_I16>; +defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16>; +defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16>; +defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16>; +defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16>; +defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16>; +defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16>; +defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16>; +defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16>; +defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16>; +defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16>; +defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16>; +defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16>; +defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16>; +defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16>; +defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16>; +defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16>; + +} + +//===----------------------------------------------------------------------===// +// Target +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SI +//===----------------------------------------------------------------------===// + +multiclass VOP1_Real_si op> { + let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in { + def _e32_si : + VOP1_Real(NAME#"_e32"), SIEncodingFamily.SI>, + VOP1e(NAME#"_e32").Pfl>; + def _e64_si : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, + VOP3e_siNew <{1, 1, op{6-0}}, !cast(NAME#"_e64").Pfl>; + } +} + +defm V_NOP : VOP1_Real_si <0x0>; +defm V_MOV_B32 : VOP1_Real_si <0x1>; +defm V_CVT_I32_F64 : VOP1_Real_si <0x3>; +defm V_CVT_F64_I32 : VOP1_Real_si <0x4>; +defm V_CVT_F32_I32 : VOP1_Real_si <0x5>; +defm V_CVT_F32_U32 : VOP1_Real_si <0x6>; +defm V_CVT_U32_F32 : VOP1_Real_si <0x7>; +defm V_CVT_I32_F32 : VOP1_Real_si <0x8>; +defm V_MOV_FED_B32 : VOP1_Real_si <0x9>; +defm V_CVT_F16_F32 : VOP1_Real_si <0xa>; +defm V_CVT_F32_F16 : VOP1_Real_si <0xb>; +defm V_CVT_RPI_I32_F32 : VOP1_Real_si <0xc>; +defm V_CVT_FLR_I32_F32 : VOP1_Real_si <0xd>; +defm V_CVT_OFF_F32_I4 : VOP1_Real_si <0xe>; +defm V_CVT_F32_F64 : VOP1_Real_si <0xf>; +defm V_CVT_F64_F32 : VOP1_Real_si <0x10>; +defm V_CVT_F32_UBYTE0 : VOP1_Real_si <0x11>; +defm V_CVT_F32_UBYTE1 : VOP1_Real_si <0x12>; +defm V_CVT_F32_UBYTE2 : VOP1_Real_si <0x13>; +defm V_CVT_F32_UBYTE3 : VOP1_Real_si <0x14>; +defm V_CVT_U32_F64 : VOP1_Real_si <0x15>; +defm V_CVT_F64_U32 : VOP1_Real_si <0x16>; +defm V_FRACT_F32 : VOP1_Real_si <0x20>; +defm V_TRUNC_F32 : VOP1_Real_si <0x21>; +defm V_CEIL_F32 : VOP1_Real_si <0x22>; +defm V_RNDNE_F32 : VOP1_Real_si <0x23>; +defm V_FLOOR_F32 : VOP1_Real_si <0x24>; +defm V_EXP_F32 : VOP1_Real_si <0x25>; +defm V_LOG_CLAMP_F32 : VOP1_Real_si <0x26>; +defm V_LOG_F32 : VOP1_Real_si <0x27>; +defm V_RCP_CLAMP_F32 : VOP1_Real_si <0x28>; +defm V_RCP_LEGACY_F32 : VOP1_Real_si <0x29>; +defm V_RCP_F32 : VOP1_Real_si <0x2a>; +defm V_RCP_IFLAG_F32 : VOP1_Real_si <0x2b>; +defm V_RSQ_CLAMP_F32 : VOP1_Real_si <0x2c>; +defm V_RSQ_LEGACY_F32 : VOP1_Real_si <0x2d>; +defm V_RSQ_F32 : VOP1_Real_si <0x2e>; +defm V_RCP_F64 : VOP1_Real_si <0x2f>; +defm V_RCP_CLAMP_F64 : VOP1_Real_si <0x30>; +defm V_RSQ_F64 : VOP1_Real_si <0x31>; +defm V_RSQ_CLAMP_F64 : VOP1_Real_si <0x32>; +defm V_SQRT_F32 : VOP1_Real_si <0x33>; +defm V_SQRT_F64 : VOP1_Real_si <0x34>; +defm V_SIN_F32 : VOP1_Real_si <0x35>; +defm V_COS_F32 : VOP1_Real_si <0x36>; +defm V_NOT_B32 : VOP1_Real_si <0x37>; +defm V_BFREV_B32 : VOP1_Real_si <0x38>; +defm V_FFBH_U32 : VOP1_Real_si <0x39>; +defm V_FFBL_B32 : VOP1_Real_si <0x3a>; +defm V_FFBH_I32 : VOP1_Real_si <0x3b>; +defm V_FREXP_EXP_I32_F64 : VOP1_Real_si <0x3c>; +defm V_FREXP_MANT_F64 : VOP1_Real_si <0x3d>; +defm V_FRACT_F64 : VOP1_Real_si <0x3e>; +defm V_FREXP_EXP_I32_F32 : VOP1_Real_si <0x3f>; +defm V_FREXP_MANT_F32 : VOP1_Real_si <0x40>; +defm V_CLREXCP : VOP1_Real_si <0x41>; +defm V_MOVRELD_B32 : VOP1_Real_si <0x42>; +defm V_MOVRELS_B32 : VOP1_Real_si <0x43>; +defm V_MOVRELSD_B32 : VOP1_Real_si <0x44>; + +//===----------------------------------------------------------------------===// +// CI +//===----------------------------------------------------------------------===// + +multiclass VOP1_Real_ci op> { + let AssemblerPredicates = [isCIOnly], DecoderNamespace = "CI" in { + def _e32_ci : + VOP1_Real(NAME#"_e32"), SIEncodingFamily.SI>, + VOP1e(NAME#"_e32").Pfl>; + def _e64_ci : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, + VOP3e_siNew <{1, 1, op{6-0}}, !cast(NAME#"_e64").Pfl>; + } +} + +defm V_TRUNC_F64 : VOP1_Real_ci <0x17>; +defm V_CEIL_F64 : VOP1_Real_ci <0x18>; +defm V_FLOOR_F64 : VOP1_Real_ci <0x1A>; +defm V_RNDNE_F64 : VOP1_Real_ci <0x19>; +defm V_LOG_LEGACY_F32 : VOP1_Real_ci <0x45>; +defm V_EXP_LEGACY_F32 : VOP1_Real_ci <0x46>; + +//===----------------------------------------------------------------------===// +// VI +//===----------------------------------------------------------------------===// + +class VOP1_SDWAe op, VOPProfile P> : VOP_SDWAeNew

{ + bits<8> vdst; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = op; + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{31-25} = 0x3f; // encoding +} + +class VOP1_SDWA op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : + VOP_SDWA , + VOP1_SDWAe { + let Defs = ps.Defs; + let Uses = ps.Uses; + let SchedRW = ps.SchedRW; + let hasSideEffects = ps.hasSideEffects; + let SubtargetPredicate = isVI; + let AsmMatchConverter = "cvtSdwaVOP1"; + let AssemblerPredicate = !if(p.HasExt, isVI, DisableInst); + let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.SDWA, + AMDGPUAsmVariants.Disable); + let DecoderNamespace = "SDWA"; +} + +class VOP1_DPPe op, VOPProfile P> : VOP_DPPeNew

{ + bits<8> vdst; + + let Inst{8-0} = 0xfa; // dpp + let Inst{16-9} = op; + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{31-25} = 0x3f; //encoding +} + +class VOP1_DPP op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : + VOP_DPP , + VOP1_DPPe { + let Defs = ps.Defs; + let Uses = ps.Uses; + let SchedRW = ps.SchedRW; + let hasSideEffects = ps.hasSideEffects; + let SubtargetPredicate = isVI; + let AssemblerPredicate = !if(p.HasExt, isVI, DisableInst); + let AsmVariantName = !if(p.HasExt, AMDGPUAsmVariants.DPP, + AMDGPUAsmVariants.Disable); + let DecoderNamespace = "DPP"; +} + +multiclass VOP1_Real_vi op> { + let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { + def _e32_vi : + VOP1_Real(NAME#"_e32"), SIEncodingFamily.VI>, + VOP1e(NAME#"_e32").Pfl>; + def _e64_vi : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>, + VOP3e_viNew (NAME#"_e64").Pfl>; + } + + // for now left sdwa/dpp only for asm/dasm + // TODO: add corresponding pseudo + def _sdwa : VOP1_SDWA(NAME#"_e32")>; + def _dpp : VOP1_DPP(NAME#"_e32")>; +} + +defm V_NOP : VOP1_Real_vi <0x0>; +defm V_MOV_B32 : VOP1_Real_vi <0x1>; +defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>; +defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>; +defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>; +defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>; +defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>; +defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>; +defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>; +defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>; +defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>; +defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>; +defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>; +defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>; +defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>; +defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>; +defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>; +defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>; +defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>; +defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>; +defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>; +defm V_FRACT_F32 : VOP1_Real_vi <0x1b>; +defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>; +defm V_CEIL_F32 : VOP1_Real_vi <0x1d>; +defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>; +defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>; +defm V_EXP_F32 : VOP1_Real_vi <0x20>; +defm V_LOG_F32 : VOP1_Real_vi <0x21>; +defm V_RCP_F32 : VOP1_Real_vi <0x22>; +defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>; +defm V_RSQ_F32 : VOP1_Real_vi <0x24>; +defm V_RCP_F64 : VOP1_Real_vi <0x25>; +defm V_RSQ_F64 : VOP1_Real_vi <0x26>; +defm V_SQRT_F32 : VOP1_Real_vi <0x27>; +defm V_SQRT_F64 : VOP1_Real_vi <0x28>; +defm V_SIN_F32 : VOP1_Real_vi <0x29>; +defm V_COS_F32 : VOP1_Real_vi <0x2a>; +defm V_NOT_B32 : VOP1_Real_vi <0x2b>; +defm V_BFREV_B32 : VOP1_Real_vi <0x2c>; +defm V_FFBH_U32 : VOP1_Real_vi <0x2d>; +defm V_FFBL_B32 : VOP1_Real_vi <0x2e>; +defm V_FFBH_I32 : VOP1_Real_vi <0x2f>; +defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>; +defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>; +defm V_FRACT_F64 : VOP1_Real_vi <0x32>; +defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>; +defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>; +defm V_CLREXCP : VOP1_Real_vi <0x35>; +defm V_MOVRELD_B32 : VOP1_Real_vi <0x36>; +defm V_MOVRELS_B32 : VOP1_Real_vi <0x37>; +defm V_MOVRELSD_B32 : VOP1_Real_vi <0x38>; +defm V_TRUNC_F64 : VOP1_Real_vi <0x17>; +defm V_CEIL_F64 : VOP1_Real_vi <0x18>; +defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>; +defm V_RNDNE_F64 : VOP1_Real_vi <0x19>; +defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>; +defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>; +defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>; +defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>; +defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>; +defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>; +defm V_RCP_F16 : VOP1_Real_vi <0x3d>; +defm V_SQRT_F16 : VOP1_Real_vi <0x3e>; +defm V_RSQ_F16 : VOP1_Real_vi <0x3f>; +defm V_LOG_F16 : VOP1_Real_vi <0x40>; +defm V_EXP_F16 : VOP1_Real_vi <0x41>; +defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>; +defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>; +defm V_FLOOR_F16 : VOP1_Real_vi <0x44>; +defm V_CEIL_F16 : VOP1_Real_vi <0x45>; +defm V_TRUNC_F16 : VOP1_Real_vi <0x46>; +defm V_RNDNE_F16 : VOP1_Real_vi <0x47>; +defm V_FRACT_F16 : VOP1_Real_vi <0x48>; +defm V_SIN_F16 : VOP1_Real_vi <0x49>; +defm V_COS_F16 : VOP1_Real_vi <0x4a>; + +let Predicates = [isVI] in { + +def : Pat < + (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, + imm:$bound_ctrl), + (V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask), + (as_i32imm $bank_mask), (as_i1imm $bound_ctrl)) +>; + +} // End Predicates = [isVI] + + Index: lib/Target/AMDGPU/VOPInstructions.td =================================================================== --- lib/Target/AMDGPU/VOPInstructions.td +++ lib/Target/AMDGPU/VOPInstructions.td @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -class VOP3_PseudoNew pattern, bit VOP3Only = 0> : +class VOP3_PseudoNew pattern=[], bit VOP3Only = 0> : InstSI , VOP , SIMCInstr, @@ -81,7 +81,7 @@ let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); let Inst{31-26} = 0x34; //encoding - let Inst{40-32} = src0; + let Inst{40-32} = !if(P.HasSrc0, src0, 0); let Inst{49-41} = !if(P.HasSrc1, src1, 0); let Inst{58-50} = !if(P.HasSrc2, src2, 0); let Inst{60-59} = !if(P.HasOMod, omod, 0); @@ -92,22 +92,22 @@ class VOP3a_siNew op, VOPProfile P> : VOP3aNew

{ let Inst{25-17} = op; - let Inst{11} = !if(P.HasClamp, clamp, 0); + let Inst{11} = !if(P.HasClamp, clamp{0}, 0); } class VOP3a_viNew op, VOPProfile P> : VOP3aNew

{ let Inst{25-16} = op; - let Inst{15} = !if(P.HasClamp, clamp, 0); + let Inst{15} = !if(P.HasClamp, clamp{0}, 0); } class VOP3e_siNew op, VOPProfile P> : VOP3a_siNew { bits<8> vdst; - let Inst{7-0} = vdst; + let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0); } class VOP3e_viNew op, VOPProfile P> : VOP3a_viNew { bits<8> vdst; - let Inst{7-0} = vdst; + let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0); } class VOP3beNew : Enc64 { @@ -140,7 +140,7 @@ class VOP3be_viNew op, VOPProfile P> : VOP3beNew

{ bits<1> clamp; let Inst{25-16} = op; - let Inst{15} = !if(P.HasClamp, clamp, 0); + let Inst{15} = !if(P.HasClamp, clamp{0}, 0); } class VOP_SDWAeNew : Enc64 { @@ -159,8 +159,8 @@ bits<2> SDWA_UNUSED_PRESERVE = 2; let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0); - let Inst{42-40} = !if(P.HasDst, dst_sel{2-0}, SDWA_DWORD{2-0}); - let Inst{44-43} = !if(P.HasDst, dst_unused{1-0}, SDWA_UNUSED_PRESERVE{1-0}); + let Inst{42-40} = !if(P.EmitDst, dst_sel{2-0}, SDWA_DWORD{2-0}); + let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA_UNUSED_PRESERVE{1-0}); let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0); let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA_DWORD{2-0}); let Inst{53-52} = !if(P.HasSrc0Mods, src0_fmodifiers{1-0}, 0); @@ -170,5 +170,26 @@ let Inst{59} = !if(P.HasSrc1IntMods, src1_imodifiers{0}, 0); } +class VOP_DPPeNew : Enc64 { + bits<2> src0_modifiers; + bits<8> src0; + bits<2> src1_modifiers; + bits<9> dpp_ctrl; + bits<1> bound_ctrl; + bits<4> bank_mask; + bits<4> row_mask; + + let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0); + let Inst{48-40} = dpp_ctrl; + let Inst{51} = bound_ctrl; + let Inst{52} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // src0_neg + let Inst{53} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // src0_abs + let Inst{54} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // src1_neg + let Inst{55} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // src1_abs + let Inst{59-56} = bank_mask; + let Inst{63-60} = row_mask; +} + include "VOPCInstructions.td" +include "VOP1Instructions.td" include "VOP3Instructions.td"