Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -1890,6 +1890,8 @@ getAsmDPP.ret, ""); field string AsmSDWA = getAsmSDWA.ret; field string AsmSDWA9 = getAsmSDWA9.ret; + + field string TieRegDPP = "$old"; } class VOP_NO_EXT : VOPProfile { @@ -2104,7 +2106,9 @@ // does not actually change the encoding, and thus may be // removed later. [!cast(SIEncodingFamily.GFX80)], - [!cast(SIEncodingFamily.GFX9)]]; + [!cast(SIEncodingFamily.GFX9)], + [!cast(SIEncodingFamily.GFX10)], + [!cast(SIEncodingFamily.SDWA10)]]; } // Get equivalent SOPK instruction. Index: llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td @@ -169,13 +169,16 @@ let Inst{31-25} = 0x3f; //encoding } -let SchedRW = [WriteQuarterRate32] in { +let SchedRW = [WriteDoubleCvt] in { defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>; defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; +} // End SchedRW = [WriteDoubleCvt] + +let SchedRW = [WriteQuarterRate32] in { defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>; @@ -409,106 +412,221 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; } // End SubtargetPredicate = isGFX9Only -//===----------------------------------------------------------------------===// -// Target -//===----------------------------------------------------------------------===// +let SubtargetPredicate = isGFX10Plus in { + defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NONE>; + + let Uses = [M0] in { + // FIXME-GFX10: Should V_MOVRELSD_2_B32 be VOP_NO_EXT? + defm V_MOVRELSD_2_B32 : + VOP1Inst<"v_movrelsd_2_b32", VOP_NO_EXT>; + + def V_SWAPREL_B32 : VOP1_Pseudo<"v_swaprel_b32", VOP_SWAP_I32, [], 1> { + let Constraints = "$vdst = $src1, $vdst1 = $src0"; + let DisableEncoding = "$vdst1,$src1"; + let SchedRW = [Write64Bit, Write64Bit]; + } + } // End Uses = [M0] +} // End SubtargetPredicate = isGFX10Plus + +//===----------------------------------------------------------------------===// +// Target-specific instruction encodings. +//===----------------------------------------------------------------------===// + +class VOP1_DPP op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : + VOP_DPP { + let hasSideEffects = ps.hasSideEffects; + let Defs = ps.Defs; + let SchedRW = ps.SchedRW; + let Uses = ps.Uses; + + bits<8> vdst; + let Inst{8-0} = 0xfa; + let Inst{16-9} = op; + let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); + let Inst{31-25} = 0x3f; +} //===----------------------------------------------------------------------===// -// SI +// GFX10. //===----------------------------------------------------------------------===// -multiclass VOP1_Real_si op> { - let AssemblerPredicates = [isGFX6GFX7], DecoderNamespace = "GFX6GFX7" in { - def _e32_si : +let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { + multiclass VOP1Only_Real_gfx10 op> { + def _gfx10 : + VOP1_Real(NAME), SIEncodingFamily.GFX10>, + VOP1e(NAME).Pfl>; + } + multiclass VOP1_Real_e32_gfx10 op> { + def _e32_gfx10 : + VOP1_Real(NAME#"_e32"), SIEncodingFamily.GFX10>, + VOP1e(NAME#"_e32").Pfl>; + } + multiclass VOP1_Real_e64_gfx10 op> { + def _e64_gfx10 : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.GFX10>, + VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast(NAME#"_e64").Pfl>; + } + multiclass VOP1_Real_sdwa_gfx10 op> { + def _sdwa_gfx10 : + VOP_SDWA10_Real(NAME#"_sdwa")>, + VOP1_SDWA9Ae(NAME#"_sdwa").Pfl> { + let DecoderNamespace = "SDWA10"; + } + } +} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" + +multiclass VOP1_Real_gfx10_no_dpp op> : + VOP1_Real_e32_gfx10, VOP1_Real_e64_gfx10, + VOP1_Real_sdwa_gfx10; + +multiclass VOP1_Real_gfx10 op> : + VOP1_Real_e32_gfx10, VOP1_Real_e64_gfx10, + VOP1_Real_sdwa_gfx10; + +defm V_PIPEFLUSH : VOP1_Real_gfx10<0x01b>; +defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10<0x048>; +defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>; +defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>; +defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>; +defm V_CVT_I16_F16 : VOP1_Real_gfx10<0x053>; +defm V_RCP_F16 : VOP1_Real_gfx10<0x054>; +defm V_SQRT_F16 : VOP1_Real_gfx10<0x055>; +defm V_RSQ_F16 : VOP1_Real_gfx10<0x056>; +defm V_LOG_F16 : VOP1_Real_gfx10<0x057>; +defm V_EXP_F16 : VOP1_Real_gfx10<0x058>; +defm V_FREXP_MANT_F16 : VOP1_Real_gfx10<0x059>; +defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>; +defm V_FLOOR_F16 : VOP1_Real_gfx10<0x05b>; +defm V_CEIL_F16 : VOP1_Real_gfx10<0x05c>; +defm V_TRUNC_F16 : VOP1_Real_gfx10<0x05d>; +defm V_RNDNE_F16 : VOP1_Real_gfx10<0x05e>; +defm V_FRACT_F16 : VOP1_Real_gfx10<0x05f>; +defm V_SIN_F16 : VOP1_Real_gfx10<0x060>; +defm V_COS_F16 : VOP1_Real_gfx10<0x061>; +defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10<0x062>; +defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>; +defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>; + +defm V_SWAP_B32 : VOP1Only_Real_gfx10<0x065>; +defm V_SWAPREL_B32 : VOP1Only_Real_gfx10<0x068>; + +//===----------------------------------------------------------------------===// +// GFX7, GFX10. +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { + multiclass VOP1_Real_e32_gfx7 op> { + def _e32_gfx7 : VOP1_Real(NAME#"_e32"), SIEncodingFamily.SI>, VOP1e(NAME#"_e32").Pfl>; - def _e64_si : + } + multiclass VOP1_Real_e64_gfx7 op> { + def _e64_gfx7 : VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, - VOP3e_si <{1, 1, op{6-0}}, !cast(NAME#"_e64").Pfl>; + VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast(NAME#"_e64").Pfl>; } -} +} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" + +multiclass VOP1_Real_gfx7 op> : + VOP1_Real_e32_gfx7, VOP1_Real_e64_gfx7; + +multiclass VOP1_Real_gfx7_gfx10 op> : + VOP1_Real_gfx7, VOP1_Real_gfx10; + +defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>; +defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>; + +defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10<0x017>; +defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10<0x018>; +defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10<0x019>; +defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10<0x01a>; -defm V_NOP : VOP1_Real_si <0x0>; -defm V_MOV_B32 : VOP1_Real_si <0x1>; -defm V_CVT_I32_F64 : VOP1_Real_si <0x3>; -defm V_CVT_F64_I32 : VOP1_Real_si <0x4>; -defm V_CVT_F32_I32 : VOP1_Real_si <0x5>; -defm V_CVT_F32_U32 : VOP1_Real_si <0x6>; -defm V_CVT_U32_F32 : VOP1_Real_si <0x7>; -defm V_CVT_I32_F32 : VOP1_Real_si <0x8>; -defm V_MOV_FED_B32 : VOP1_Real_si <0x9>; -defm V_CVT_F16_F32 : VOP1_Real_si <0xa>; -defm V_CVT_F32_F16 : VOP1_Real_si <0xb>; -defm V_CVT_RPI_I32_F32 : VOP1_Real_si <0xc>; -defm V_CVT_FLR_I32_F32 : VOP1_Real_si <0xd>; -defm V_CVT_OFF_F32_I4 : VOP1_Real_si <0xe>; -defm V_CVT_F32_F64 : VOP1_Real_si <0xf>; -defm V_CVT_F64_F32 : VOP1_Real_si <0x10>; -defm V_CVT_F32_UBYTE0 : VOP1_Real_si <0x11>; -defm V_CVT_F32_UBYTE1 : VOP1_Real_si <0x12>; -defm V_CVT_F32_UBYTE2 : VOP1_Real_si <0x13>; -defm V_CVT_F32_UBYTE3 : VOP1_Real_si <0x14>; -defm V_CVT_U32_F64 : VOP1_Real_si <0x15>; -defm V_CVT_F64_U32 : VOP1_Real_si <0x16>; -defm V_FRACT_F32 : VOP1_Real_si <0x20>; -defm V_TRUNC_F32 : VOP1_Real_si <0x21>; -defm V_CEIL_F32 : VOP1_Real_si <0x22>; -defm V_RNDNE_F32 : VOP1_Real_si <0x23>; -defm V_FLOOR_F32 : VOP1_Real_si <0x24>; -defm V_EXP_F32 : VOP1_Real_si <0x25>; -defm V_LOG_CLAMP_F32 : VOP1_Real_si <0x26>; -defm V_LOG_F32 : VOP1_Real_si <0x27>; -defm V_RCP_CLAMP_F32 : VOP1_Real_si <0x28>; -defm V_RCP_LEGACY_F32 : VOP1_Real_si <0x29>; -defm V_RCP_F32 : VOP1_Real_si <0x2a>; -defm V_RCP_IFLAG_F32 : VOP1_Real_si <0x2b>; -defm V_RSQ_CLAMP_F32 : VOP1_Real_si <0x2c>; -defm V_RSQ_LEGACY_F32 : VOP1_Real_si <0x2d>; -defm V_RSQ_F32 : VOP1_Real_si <0x2e>; -defm V_RCP_F64 : VOP1_Real_si <0x2f>; -defm V_RCP_CLAMP_F64 : VOP1_Real_si <0x30>; -defm V_RSQ_F64 : VOP1_Real_si <0x31>; -defm V_RSQ_CLAMP_F64 : VOP1_Real_si <0x32>; -defm V_SQRT_F32 : VOP1_Real_si <0x33>; -defm V_SQRT_F64 : VOP1_Real_si <0x34>; -defm V_SIN_F32 : VOP1_Real_si <0x35>; -defm V_COS_F32 : VOP1_Real_si <0x36>; -defm V_NOT_B32 : VOP1_Real_si <0x37>; -defm V_BFREV_B32 : VOP1_Real_si <0x38>; -defm V_FFBH_U32 : VOP1_Real_si <0x39>; -defm V_FFBL_B32 : VOP1_Real_si <0x3a>; -defm V_FFBH_I32 : VOP1_Real_si <0x3b>; -defm V_FREXP_EXP_I32_F64 : VOP1_Real_si <0x3c>; -defm V_FREXP_MANT_F64 : VOP1_Real_si <0x3d>; -defm V_FRACT_F64 : VOP1_Real_si <0x3e>; -defm V_FREXP_EXP_I32_F32 : VOP1_Real_si <0x3f>; -defm V_FREXP_MANT_F32 : VOP1_Real_si <0x40>; -defm V_CLREXCP : VOP1_Real_si <0x41>; -defm V_MOVRELD_B32 : VOP1_Real_si <0x42>; -defm V_MOVRELS_B32 : VOP1_Real_si <0x43>; -defm V_MOVRELSD_B32 : VOP1_Real_si <0x44>; - -//===----------------------------------------------------------------------===// -// CI -//===----------------------------------------------------------------------===// - -multiclass VOP1_Real_ci op> { - let AssemblerPredicates = [isGFX7Only], DecoderNamespace = "GFX7" in { - def _e32_ci : +//===----------------------------------------------------------------------===// +// GFX6, GFX7, GFX10. +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { + multiclass VOP1_Real_e32_gfx6_gfx7 op> { + def _e32_gfx6_gfx7 : VOP1_Real(NAME#"_e32"), SIEncodingFamily.SI>, VOP1e(NAME#"_e32").Pfl>; - def _e64_ci : + } + multiclass VOP1_Real_e64_gfx6_gfx7 op> { + def _e64_gfx6_gfx7 : VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, - VOP3e_si <{1, 1, op{6-0}}, !cast(NAME#"_e64").Pfl>; + VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast(NAME#"_e64").Pfl>; } -} +} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" + +multiclass VOP1_Real_gfx6_gfx7 op> : + VOP1_Real_e32_gfx6_gfx7, VOP1_Real_e64_gfx6_gfx7; + +multiclass VOP1_Real_gfx6_gfx7_gfx10 op> : + VOP1_Real_gfx6_gfx7, VOP1_Real_gfx10; -defm V_TRUNC_F64 : VOP1_Real_ci <0x17>; -defm V_CEIL_F64 : VOP1_Real_ci <0x18>; -defm V_FLOOR_F64 : VOP1_Real_ci <0x1A>; -defm V_RNDNE_F64 : VOP1_Real_ci <0x19>; -defm V_LOG_LEGACY_F32 : VOP1_Real_ci <0x45>; -defm V_EXP_LEGACY_F32 : VOP1_Real_ci <0x46>; +multiclass VOP1_Real_gfx6_gfx7_gfx10_no_dpp op> : + VOP1_Real_gfx6_gfx7, VOP1_Real_gfx10_no_dpp; + +defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; +defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; +defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; +defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>; +defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>; +defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>; +defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; + +defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10<0x000>; +defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x001>; +defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x003>; +defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x004>; +defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x005>; +defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x006>; +defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x007>; +defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x008>; +defm V_MOV_FED_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x009>; +defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>; +defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; +defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; +defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; +defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10<0x00e>; +defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x00f>; +defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x010>; +defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10<0x011>; +defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10<0x012>; +defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10<0x013>; +defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10<0x014>; +defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x015>; +defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x016>; +defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x020>; +defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x021>; +defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x022>; +defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x023>; +defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x024>; +defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x025>; +defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x027>; +defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02a>; +defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02b>; +defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02e>; +defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x02f>; +defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x031>; +defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x033>; +defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x034>; +defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x035>; +defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x036>; +defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x037>; +defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x038>; +defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>; +defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>; +defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>; +defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03c>; +defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03d>; +defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03e>; +defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x03f>; +defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x040>; +defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>; +defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp<0x042>; +defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp<0x043>; +defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp<0x044>; //===----------------------------------------------------------------------===// // GFX8, GFX9 (VI). Index: llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td @@ -731,13 +731,13 @@ multiclass VOP2_Real_e32e64_si op> : VOP2_Real_e32_si { def _e64_si : VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, - VOP3e_si <{1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>; + VOP3e_gfx6_gfx7 <{1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>; } multiclass VOP2be_Real_e32e64_si op> : VOP2_Real_e32_si { def _e64_si : VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, - VOP3be_si <{1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>; + VOP3be_gfx6_gfx7 <{1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>; } } // End AssemblerPredicates = [isGFX6GFX7], DecoderNamespace = "GFX6GFX7" Index: llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td @@ -671,12 +671,12 @@ multiclass VOP3_Real_si op> { def _si : VOP3_Real(NAME), SIEncodingFamily.SI>, - VOP3e_si (NAME).Pfl>; + VOP3e_gfx6_gfx7 (NAME).Pfl>; } multiclass VOP3be_Real_si op> { def _si : VOP3_Real(NAME), SIEncodingFamily.SI>, - VOP3be_si (NAME).Pfl>; + VOP3be_gfx6_gfx7 (NAME).Pfl>; } } // End AssemblerPredicates = [isGFX6GFX7], DecoderNamespace = "GFX6GFX7" @@ -740,7 +740,7 @@ multiclass VOP3_Real_ci op> { def _ci : VOP3_Real(NAME), SIEncodingFamily.SI>, - VOP3e_si (NAME).Pfl> { + VOP3e_gfx6_gfx7 (NAME).Pfl> { let AssemblerPredicates = [isGFX7Only]; let DecoderNamespace = "GFX7"; } @@ -748,7 +748,7 @@ multiclass VOP3be_Real_ci op> { def _ci : VOP3_Real(NAME), SIEncodingFamily.SI>, - VOP3be_si (NAME).Pfl> { + VOP3be_gfx6_gfx7 (NAME).Pfl> { let AssemblerPredicates = [isGFX7Only]; let DecoderNamespace = "GFX7"; } Index: llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td @@ -708,8 +708,8 @@ VOPCe; def _e64_si : - VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, - VOP3a_si (NAME#"_e64").Pfl> { + VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, + VOP3a_gfx6_gfx7(NAME#"_e64").Pfl> { // Encoding used for VOPC instructions encoded as VOP3 // Differs from VOP3e by destination name (sdst) as VOPC doesn't have vector dst bits<8> sdst; Index: llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOPInstructions.td @@ -188,9 +188,15 @@ let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); } -class VOP3a_si op, VOPProfile P> : VOP3a

{ +class VOP3a_gfx6_gfx7 op, VOPProfile p> : VOP3a

{ + let Inst{11} = !if(p.HasClamp, clamp{0}, 0); let Inst{25-17} = op; - let Inst{11} = !if(P.HasClamp, clamp{0}, 0); +} + +class VOP3a_gfx10 op, VOPProfile p> : VOP3a

{ + let Inst{15} = !if(p.HasClamp, clamp{0}, 0); + let Inst{25-16} = op; + let Inst{31-26} = 0x35; } class VOP3a_vi op, VOPProfile P> : VOP3a

{ @@ -198,9 +204,14 @@ let Inst{15} = !if(P.HasClamp, clamp{0}, 0); } -class VOP3e_si op, VOPProfile P> : VOP3a_si { +class VOP3e_gfx6_gfx7 op, VOPProfile p> : VOP3a_gfx6_gfx7 { bits<8> vdst; - let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{7-0} = !if(p.EmitDst, vdst{7-0}, 0); +} + +class VOP3e_gfx10 op, VOPProfile p> : VOP3a_gfx10 { + bits<8> vdst; + let Inst{7-0} = !if(p.EmitDst, vdst{7-0}, 0); } class VOP3e_vi op, VOPProfile P> : VOP3a_vi { @@ -215,6 +226,13 @@ let Inst{14} = !if(P.HasDst, src0_modifiers{3}, 0); } +class VOP3OpSel_gfx10 op, VOPProfile p> : VOP3e_gfx10 { + let Inst{11} = !if(p.HasSrc0, src0_modifiers{2}, 0); + let Inst{12} = !if(p.HasSrc1, src1_modifiers{2}, 0); + let Inst{13} = !if(p.HasSrc2, src2_modifiers{2}, 0); + let Inst{14} = !if(p.HasDst, src0_modifiers{3}, 0); +} + // NB: For V_INTERP* opcodes, src0 is encoded as src1 and vice versa class VOP3Interp_vi op, VOPProfile P> : VOP3e_vi { bits<2> attrchan; @@ -234,6 +252,21 @@ let Inst{49-41} = src0; } +class VOP3Interp_gfx10 op, VOPProfile p> : VOP3e_gfx10 { + bits<6> attr; + bits<2> attrchan; + bits<1> high; + + let Inst{8} = 0; + let Inst{9} = !if(p.HasSrc0Mods, src0_modifiers{1}, 0); + let Inst{37-32} = attr; + let Inst{39-38} = attrchan; + let Inst{40} = !if(p.HasHigh, high, 0); + let Inst{49-41} = src0; + let Inst{61} = 0; + let Inst{62} = !if(p.HasSrc0Mods, src0_modifiers{0}, 0); +} + class VOP3be : Enc64 { bits<8> vdst; bits<2> src0_modifiers; @@ -293,10 +326,21 @@ let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo) } -class VOP3be_si op, VOPProfile P> : VOP3be

{ +class VOP3Pe_gfx10 op, VOPProfile P> : VOP3Pe { + let Inst{31-26} = 0x33; //encoding +} + +class VOP3be_gfx6_gfx7 op, VOPProfile p> : VOP3be

{ let Inst{25-17} = op; } +class VOP3be_gfx10 op, VOPProfile p> : VOP3be

{ + bits<1> clamp; + let Inst{15} = !if(p.HasClamp, clamp{0}, 0); + let Inst{25-16} = op; + let Inst{31-26} = 0x35; +} + class VOP3be_vi op, VOPProfile P> : VOP3be

{ bits<1> clamp; let Inst{25-16} = op; @@ -391,7 +435,7 @@ class VOP_SDWA9Be : VOP_SDWA9e

{ bits<8> sdst; // {vcc_sdst{0}, sdst{6-0}} - let Inst{46-40} = !if(P.EmitDst, sdst{6-0}, 0); + let Inst{46-40} = !if(P.EmitDst, sdst{6-0}, ?); let Inst{47} = !if(P.EmitDst, sdst{7}, 0); } @@ -454,9 +498,8 @@ let TSFlags = ps.TSFlags; } -class VOP_SDWA9_Real : - InstSI , - SIMCInstr { +class Base_VOP_SDWA9_Real : + InstSI { let isPseudo = 0; let isCodeGenOnly = 0; @@ -483,6 +526,19 @@ let TSFlags = ps.TSFlags; } +class VOP_SDWA9_Real : + Base_VOP_SDWA9_Real , + SIMCInstr ; + +class Base_VOP_SDWA10_Real : Base_VOP_SDWA9_Real { + let SubtargetPredicate = !if(ps.Pfl.HasExtSDWA9, HasSDWA10, DisableInst); + let AssemblerPredicate = !if(ps.Pfl.HasExtSDWA9, HasSDWA10, DisableInst); + let DecoderNamespace = "SDWA10"; +} + +class VOP_SDWA10_Real : + Base_VOP_SDWA10_Real, SIMCInstr; + class VOP_DPPe : Enc64 { bits<2> src0_modifiers; bits<8> src0; @@ -491,6 +547,7 @@ bits<1> bound_ctrl; bits<4> bank_mask; bits<4> row_mask; + bit fi; let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0); let Inst{48-40} = dpp_ctrl; @@ -531,8 +588,8 @@ let AssemblerPredicate = !if(P.HasExtDPP, HasDPP, DisableInst); let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP, AMDGPUAsmVariants.Disable); - let Constraints = !if(P.NumSrcArgs, "$old = $vdst", ""); - let DisableEncoding = !if(P.NumSrcArgs, "$old", ""); + let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", ""); + let DisableEncoding = !if(P.NumSrcArgs, P.TieRegDPP, ""); let DecoderNamespace = "DPP"; VOPProfile Pfl = P; @@ -566,6 +623,31 @@ let TSFlags = ps.TSFlags; } +class VOP_DPP : + InstSI , + VOP_DPPe

{ + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let UseNamedOperandTable = 1; + + let VALU = 1; + let DPP = 1; + let Size = 8; + + let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", ""); + let SubtargetPredicate = HasDPP; + let AssemblerPredicate = !if(P.HasExtDPP, HasDPP, DisableInst); + let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP, + AMDGPUAsmVariants.Disable); + let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", ""); + let DisableEncoding = !if(P.NumSrcArgs, P.TieRegDPP, ""); + let DecoderNamespace = "DPP"; +} + class getNumNodeArgs { SDNode N = !cast(Op); SDTypeProfile TP = N.TypeProfile;