Index: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td +++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td @@ -143,10 +143,10 @@ "Additional instructions for CI+" >; -def FeatureVIInsts : SubtargetFeature<"vi-insts", - "VIInsts", +def FeatureGFX8Insts : SubtargetFeature<"gfx8-insts", + "GFX8Insts", "true", - "Additional instructions for VI+" + "Additional instructions for GFX8+" >; def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts", @@ -155,6 +155,12 @@ "Additional instructions for GFX9+" >; +def FeatureGFX7GFX8GFX9Insts : SubtargetFeature<"gfx7-gfx8-gfx9-insts", + "GFX7GFX8GFX9Insts", + "true", + "Instructions shared in GFX7, GFX8, GFX9" +>; + def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime", "HasSMemRealTime", "true", @@ -454,18 +460,19 @@ [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange, - FeatureDoesNotSupportSRAMECC] + FeatureGFX7GFX8GFX9Insts, FeatureDoesNotSupportSRAMECC] >; def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", "volcanic-islands", [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureFlatAddressSpace, - FeatureGCN3Encoding, FeatureCIInsts, FeatureVIInsts, Feature16BitInsts, + FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP, - FeatureIntClamp, FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC + FeatureIntClamp, FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC, + FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts ] >; @@ -473,13 +480,14 @@ "gfx9", [FeatureFP64, FeatureLocalMemorySize65536, FeatureWavefrontSize64, FeatureFlatAddressSpace, - FeatureGCN3Encoding, FeatureCIInsts, FeatureVIInsts, Feature16BitInsts, + FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm, FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp, FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, - FeatureAddNoCarryInsts, FeatureScalarAtomics, FeatureR128A16 + FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, + FeatureScalarAtomics, FeatureR128A16 ] >; @@ -672,23 +680,44 @@ // Predicate helper class //===----------------------------------------------------------------------===// -def isSICI : Predicate< - "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" - "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS" ->, AssemblerPredicate<"!FeatureGCN3Encoding">; +def isGFX6 : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS">, + AssemblerPredicate<"FeatureSouthernIslands">; + +def isGFX6GFX7 : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">, + AssemblerPredicate<"!FeatureGCN3Encoding">; + +def isGFX7 : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">, + AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts">; + +def isGFX7GFX8GFX9 : + Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, + AssemblerPredicate<"FeatureGFX7GFX8GFX9Insts">; + +def isGFX7Plus : + Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">, + AssemblerPredicate<"FeatureCIInsts">; + +def isGFX8Plus : + Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, + AssemblerPredicate<"FeatureGFX8Insts">; + +def isGFX9Plus : + Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, + AssemblerPredicate<"FeatureGFX9Insts">; def isVI : Predicate < "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, AssemblerPredicate<"FeatureGCN3Encoding">; def isGFX9 : Predicate < - "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, - AssemblerPredicate<"FeatureGFX9Insts">; - -// TODO: Either the name to be changed or we simply use IsCI! -def isCIVI : Predicate < - "Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">, - AssemblerPredicate<"FeatureCIInsts">; + "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">, + AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts">; def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">, AssemblerPredicate<"FeatureFlatAddressSpace">; @@ -728,11 +757,12 @@ def HasSDWA : Predicate<"Subtarget->hasSDWA()">, AssemblerPredicate<"FeatureSDWA,FeatureVolcanicIslands">; -def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">, - AssemblerPredicate<"FeatureSDWA,FeatureGFX9">; +def HasSDWA9 : + Predicate<"Subtarget->hasSDWA()">, + AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts,FeatureSDWA">; def HasDPP : Predicate<"Subtarget->hasDPP()">, - AssemblerPredicate<"FeatureDPP">; + AssemblerPredicate<"FeatureGCN3Encoding,FeatureDPP">; def HasR128A16 : Predicate<"Subtarget->hasR128A16()">, AssemblerPredicate<"FeatureR128A16">; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td @@ -130,7 +130,7 @@ def : GISelSop2Pat ; let AddedComplexity = 100 in { -let SubtargetPredicate = isSICI in { +let SubtargetPredicate = isGFX6GFX7 in { def : GISelVop2Pat ; } def : GISelVop2CommutePat ; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -311,8 +311,9 @@ bool IsGCN; bool GCN3Encoding; bool CIInsts; - bool VIInsts; + bool GFX8Insts; bool GFX9Insts; + bool GFX7GFX8GFX9Insts; bool SGPRInitBug; bool HasSMemRealTime; bool HasIntClamp; @@ -770,7 +771,7 @@ } bool hasLDSFPAtomics() const { - return VIInsts; + return GFX8Insts; } bool hasDPP() const { @@ -803,15 +804,16 @@ } bool hasSMovFedHazard() const { - return getGeneration() >= AMDGPUSubtarget::GFX9; + return getGeneration() == AMDGPUSubtarget::GFX9; } bool hasReadM0MovRelInterpHazard() const { - return getGeneration() >= AMDGPUSubtarget::GFX9; + return getGeneration() == AMDGPUSubtarget::GFX9; } bool hasReadM0SendMsgHazard() const { - return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS; + return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && + getGeneration() <= AMDGPUSubtarget::GFX9; } /// Return the maximum number of waves per SIMD for kernels using \p SGPRs Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -194,8 +194,9 @@ FP64(false), GCN3Encoding(false), CIInsts(false), - VIInsts(false), + GFX8Insts(false), GFX9Insts(false), + GFX7GFX8GFX9Insts(false), SGPRInitBug(false), HasSMemRealTime(false), HasIntClamp(false), Index: llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td @@ -943,7 +943,7 @@ def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">; } -let SubtargetPredicate = isSI in { // isn't on CI & VI +let SubtargetPredicate = isGFX6 in { // isn't on CI & VI /* defm BUFFER_ATOMIC_RSUB : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub">; defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap">; @@ -1040,7 +1040,7 @@ defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_64>; } // End HasPackedD16VMem. -let SubtargetPredicate = isCIVI in { +let SubtargetPredicate = isGFX7Plus in { //===----------------------------------------------------------------------===// // Instruction definitions for CI and newer. @@ -1049,7 +1049,7 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol", int_amdgcn_buffer_wbinvl1_vol>; -} // End let SubtargetPredicate = isCIVI +} // End let SubtargetPredicate = isGFX7Plus //===----------------------------------------------------------------------===// // MUBUF Patterns @@ -1340,7 +1340,7 @@ >; } -let SubtargetPredicate = isSICI in { +let SubtargetPredicate = isGFX6GFX7 in { def : MUBUFLoad_PatternADDR64 ; def : MUBUFLoad_PatternADDR64 ; def : MUBUFLoad_PatternADDR64 ; @@ -1348,7 +1348,7 @@ defm : MUBUFLoad_Atomic_Pattern ; defm : MUBUFLoad_Atomic_Pattern ; -} // End SubtargetPredicate = isSICI +} // End SubtargetPredicate = isGFX6GFX7 multiclass MUBUFLoad_Pattern { @@ -1428,6 +1428,7 @@ defm : MUBUFScratchLoadPat_D16; defm : MUBUFScratchLoadPat_D16; } + multiclass MUBUFStore_Atomic_Pattern { // Store follows atomic op convention so address is forst @@ -1442,10 +1443,10 @@ (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) >; } -let SubtargetPredicate = isSICI in { +let SubtargetPredicate = isGFX6GFX7 in { defm : MUBUFStore_Atomic_Pattern ; defm : MUBUFStore_Atomic_Pattern ; -} // End Predicates = isSICI +} // End Predicates = isGFX6GFX7 multiclass MUBUFStore_Pattern op, MUBUF_Pseudo ps> : MUBUF_Real, Enc64, SIMCInstr { - let AssemblerPredicate=isSICI; - let DecoderNamespace="SICI"; + let AssemblerPredicate=isGFX6GFX7; + let DecoderNamespace="GFX6GFX7"; let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; @@ -1759,8 +1760,8 @@ MTBUF_Real, Enc64, SIMCInstr { - let AssemblerPredicate=isSICI; - let DecoderNamespace="SICI"; + let AssemblerPredicate=isGFX6GFX7; + let DecoderNamespace="GFX6GFX7"; let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; @@ -1804,14 +1805,14 @@ class MUBUF_Real_ci op, MUBUF_Pseudo ps> : MUBUF_Real_si { let AssemblerPredicate=isCIOnly; - let DecoderNamespace="CI"; + let DecoderNamespace="GFX7"; } def BUFFER_WBINVL1_VOL_ci : MUBUF_Real_ci <0x70, BUFFER_WBINVL1_VOL>; //===----------------------------------------------------------------------===// -// VI +// GFX8, GFX9 (VI). //===----------------------------------------------------------------------===// class MUBUF_Real_vi op, MUBUF_Pseudo ps> : Index: llvm/trunk/lib/Target/AMDGPU/DSInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/DSInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/DSInstructions.td @@ -548,7 +548,7 @@ // Instruction definitions for CI and newer. //===----------------------------------------------------------------------===// -let SubtargetPredicate = isCIVI in { +let SubtargetPredicate = isGFX7Plus in { defm DS_WRAP_RTN_B32 : DS_1A2D_RET_mc<"ds_wrap_rtn_b32", VGPR_32>; defm DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET_mc<"ds_condxchg32_rtn_b64", VReg_64>; @@ -567,13 +567,13 @@ def DS_NOP : DS_VOID<"ds_nop">; -} // let SubtargetPredicate = isCIVI +} // let SubtargetPredicate = isGFX7Plus //===----------------------------------------------------------------------===// // Instruction definitions for VI and newer. //===----------------------------------------------------------------------===// -let SubtargetPredicate = isVI in { +let SubtargetPredicate = isGFX8Plus in { let Uses = [EXEC] in { def DS_PERMUTE_B32 : DS_1A1D_PERMUTE <"ds_permute_b32", @@ -584,7 +584,7 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">; -} // let SubtargetPredicate = isVI +} // let SubtargetPredicate = isGFX8Plus //===----------------------------------------------------------------------===// // DS Patterns @@ -711,7 +711,7 @@ // v2i32 loads are split into i32 loads on SI during lowering, due to a bug // related to bounds checking. -let OtherPredicates = [LDSRequiresM0Init, isCIVI] in { +let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in { def : DS64Bit4ByteAlignedReadPat; def : DS64Bit4ByteAlignedWritePat; } @@ -804,18 +804,18 @@ >; //===----------------------------------------------------------------------===// -// Real instructions +// Target-specific instruction encodings. //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// SIInstructions.td +// Base ENC_DS for GFX6, GFX7. //===----------------------------------------------------------------------===// class DS_Real_si op, DS_Pseudo ds> : DS_Real , SIMCInstr { - let AssemblerPredicates=[isSICI]; - let DecoderNamespace="SICI"; + let AssemblerPredicates=[isGFX6GFX7]; + let DecoderNamespace="GFX6GFX7"; // encoding let Inst{7-0} = !if(ds.has_offset0, offset0, 0); @@ -979,7 +979,7 @@ def DS_READ_B128_si : DS_Real_si<0xff, DS_READ_B128>; //===----------------------------------------------------------------------===// -// VIInstructions.td +// GFX8, GFX9 (VI). //===----------------------------------------------------------------------===// class DS_Real_vi op, DS_Pseudo ds> : Index: llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td @@ -63,6 +63,8 @@ // and are not considered done until both have been decremented. let VM_CNT = 1; let LGKM_CNT = !if(!or(is_flat_global, is_flat_scratch), 0, 1); + + let IsNonFlatSeg = !if(!or(is_flat_global, is_flat_scratch), 1, 0); } class FLAT_Real op, FLAT_Pseudo ps> : @@ -490,7 +492,8 @@ defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", VReg_64, i64, atomic_dec_flat>; -let SubtargetPredicate = isCI in { // CI Only flat instructions : FIXME Only? +// GFX7-only flat instructions. +let SubtargetPredicate = isGFX7 in { defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", VGPR_32, f32, null_frag, v2f32, VReg_64>; @@ -510,7 +513,7 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", VReg_64, f64>; -} // End SubtargetPredicate = isCI +} // End SubtargetPredicate = isGFX7 let SubtargetPredicate = HasFlatGlobalInsts in { defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; @@ -890,7 +893,7 @@ FLAT_Real , SIMCInstr { let AssemblerPredicate = isCIOnly; - let DecoderNamespace="CI"; + let DecoderNamespace="GFX7"; } def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>; Index: llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td @@ -260,10 +260,10 @@ multiclass MIMG_Atomic_Helper_m { let ssamp = 0, d16 = 0 in { - def _si : MIMG_Atomic_Helper, + def _si : MIMG_Atomic_Helper, SIMCInstr, MIMGe { - let AssemblerPredicates = [isSICI]; + let AssemblerPredicates = [isGFX6GFX7]; let DisableDecoder = DisableSIDecoder; } Index: llvm/trunk/lib/Target/AMDGPU/SIDefines.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIDefines.h +++ llvm/trunk/lib/Target/AMDGPU/SIDefines.h @@ -89,8 +89,11 @@ // Is a D16 buffer instruction. D16Buf = UINT64_C(1) << 50, + // FLAT instruction accesses FLAT_GLBL or FLAT_SCRATCH segment. + IsNonFlatSeg = UINT64_C(1) << 51, + // Uses floating point double precision rounding mode - FPDPRounding = UINT64_C(1) << 51 + FPDPRounding = UINT64_C(1) << 52 }; // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. Index: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td @@ -10,11 +10,6 @@ // //===----------------------------------------------------------------------===// -def isSI : Predicate<"Subtarget->getGeneration() " - "== AMDGPUSubtarget::SOUTHERN_ISLANDS">, - AssemblerPredicate<"FeatureSouthernIslands">; - - class InstSI pattern = []> : AMDGPUInst, GCNPredicateControl { @@ -115,6 +110,10 @@ // This bit indicates that this is a D16 buffer instruction. field bit D16Buf = 0; + // This field indicates that FLAT instruction accesses FLAT_GLBL or + // FLAT_SCRATCH segment. Must be 0 for non-FLAT instructions. + field bit IsNonFlatSeg = 0; + // This bit indicates that this uses the floating point double precision // rounding mode flags field bit FPDPRounding = 0; @@ -176,7 +175,9 @@ let TSFlags{50} = D16Buf; - let TSFlags{51} = FPDPRounding; + let TSFlags{51} = IsNonFlatSeg; + + let TSFlags{52} = FPDPRounding; let SchedRW = [Write32Bit]; Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -17,7 +17,7 @@ def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">; class GCNPredicateControl : PredicateControl { - Predicate SIAssemblerPredicate = isSICI; + Predicate SIAssemblerPredicate = isGFX6GFX7; Predicate VIAssemblerPredicate = isVI; } @@ -1149,8 +1149,8 @@ def _si : EXP_Helper, SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.SI>, EXPe { - let AssemblerPredicates = [isSICI]; - let DecoderNamespace = "SICI"; + let AssemblerPredicates = [isGFX6GFX7]; + let DecoderNamespace = "GFX6GFX7"; let DisableDecoder = DisableSIDecoder; } @@ -2006,7 +2006,7 @@ VINTRPe , SIMCInstr { let AssemblerPredicate = SIAssemblerPredicate; - let DecoderNamespace = "SICI"; + let DecoderNamespace = "GFX6GFX7"; let DisableDecoder = DisableSIDecoder; } Index: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td @@ -1614,7 +1614,7 @@ // Fract Patterns //===----------------------------------------------------------------------===// -let SubtargetPredicate = isSI in { +let SubtargetPredicate = isGFX6 in { // V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is // used instead. However, SI doesn't have V_FLOOR_F64, so the most efficient @@ -1641,7 +1641,7 @@ DSTCLAMP.NONE, DSTOMOD.NONE) >; -} // End SubtargetPredicates = isSI +} // End SubtargetPredicates = isGFX6 //============================================================================// // Miscellaneous Optimization Patterns @@ -1725,8 +1725,8 @@ def : FPMed3Pat; -let OtherPredicates = [isGFX9] in { +let OtherPredicates = [isGFX9Plus] in { def : FP16Med3Pat; defm : Int16Med3Pat; defm : Int16Med3Pat; -} // End Predicates = [isGFX9] +} // End Predicates = [isGFX9Plus] Index: llvm/trunk/lib/Target/AMDGPU/SMInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SMInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/SMInstructions.td @@ -170,7 +170,7 @@ let has_offset = 0; } -class SM_Inval_Pseudo : SM_Pseudo< +class SM_Inval_Pseudo : SM_Pseudo< opName, (outs), (ins), "", [(node)]> { let hasSideEffects = 1; let mayStore = 1; @@ -292,18 +292,18 @@ def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>; def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>; -let SubtargetPredicate = isCIVI in { +let SubtargetPredicate = isGFX7GFX8GFX9 in { def S_DCACHE_INV_VOL : SM_Inval_Pseudo <"s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>; -} // let SubtargetPredicate = isCIVI +} // let SubtargetPredicate = isGFX7GFX8GFX9 -let SubtargetPredicate = isVI in { +let SubtargetPredicate = isGFX8Plus in { def S_DCACHE_WB : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>; def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>; def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>; defm S_ATC_PROBE : SM_Pseudo_Probe <"s_atc_probe", SReg_64>; defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <"s_atc_probe_buffer", SReg_128>; -} // SubtargetPredicate = isVI +} // SubtargetPredicate = isGFX8Plus let SubtargetPredicate = HasFlatScratchInsts, Uses = [FLAT_SCR] in { defm S_SCRATCH_LOAD_DWORD : SM_Pseudo_Loads <"s_scratch_load_dword", SReg_64, SReg_32_XM0_XEXEC>; @@ -393,8 +393,8 @@ , SIMCInstr , Enc32 { - let AssemblerPredicates = [isSICI]; - let DecoderNamespace = "SICI"; + let AssemblerPredicates = [isGFX6GFX7]; + let DecoderNamespace = "GFX6GFX7"; let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?); let Inst{8} = imm; @@ -636,8 +636,8 @@ SM_Real, Enc64 { - let AssemblerPredicates = [isCIOnly]; - let DecoderNamespace = "CI"; + let AssemblerPredicates = [isGFX7]; + let DecoderNamespace = "GFX7"; let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc); let LGKM_CNT = ps.LGKM_CNT; @@ -673,8 +673,8 @@ , SIMCInstr , Enc32 { - let AssemblerPredicates = [isCIOnly]; - let DecoderNamespace = "CI"; + let AssemblerPredicates = [isGFX7]; + let DecoderNamespace = "GFX7"; let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?); let Inst{8} = imm; @@ -725,7 +725,7 @@ def : GCNPat < (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)), (vt (!cast(Instr#"_IMM_ci") $sbase, $offset, 0))> { - let OtherPredicates = [isCIOnly]; + let OtherPredicates = [isGFX7]; } // 3. SGPR offset @@ -786,7 +786,7 @@ defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16f32>; } // End let AddedComplexity = 100 -let OtherPredicates = [isSICI] in { +let OtherPredicates = [isGFX6GFX7] in { def : GCNPat < (i64 (readcyclecounter)), (S_MEMTIME) Index: llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/SOPInstructions.td @@ -248,6 +248,7 @@ def S_CBRANCH_JOIN : SOP1_0_32R <"s_cbranch_join">; def S_MOV_REGRD_B32 : SOP1_32 <"s_mov_regrd_b32">; + let Defs = [SCC] in { def S_ABS_I32 : SOP1_32 <"s_abs_i32">; } // End Defs = [SCC] @@ -260,7 +261,7 @@ } } -let SubtargetPredicate = isGFX9 in { +let SubtargetPredicate = isGFX9Plus in { let hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC] in { def S_ANDN1_SAVEEXEC_B64 : SOP1_64<"s_andn1_saveexec_b64">; def S_ORN1_SAVEEXEC_B64 : SOP1_64<"s_orn1_saveexec_b64">; @@ -269,7 +270,7 @@ } // End hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC] def S_BITREPLICATE_B64_B32 : SOP1_64_32<"s_bitreplicate_b64_b32">; -} // End SubtargetPredicate = isGFX9 +} // End SubtargetPredicate = isGFX9Plus //===----------------------------------------------------------------------===// // SOP2 Instructions @@ -536,7 +537,7 @@ } } -let SubtargetPredicate = isGFX9 in { +let SubtargetPredicate = isGFX9Plus in { def S_PACK_LL_B32_B16 : SOP2_32<"s_pack_ll_b32_b16">; def S_PACK_LH_B32_B16 : SOP2_32<"s_pack_lh_b32_b16">; def S_PACK_HH_B32_B16 : SOP2_32<"s_pack_hh_b32_b16">; @@ -554,7 +555,7 @@ def S_MUL_HI_I32 : SOP2_32<"s_mul_hi_i32", [(set i32:$sdst, (UniformBinFrag SSrc_b32:$src0, SSrc_b32:$src1))]>; } -} +} // End SubtargetPredicate = isGFX9Plus //===----------------------------------------------------------------------===// // SOPK Instructions @@ -730,7 +731,7 @@ } // End hasSideEffects = 1 -let SubtargetPredicate = isGFX9 in { +let SubtargetPredicate = isGFX9Plus in { def S_CALL_B64 : SOPK_Pseudo< "s_call_b64", (outs SReg_64:$sdst), @@ -738,7 +739,7 @@ "$sdst, $simm16"> { let isCall = 1; } -} +} // End SubtargetPredicate = isGFX9Plus //===----------------------------------------------------------------------===// // SOPC Instructions @@ -822,10 +823,10 @@ def S_BITCMP1_B64 : SOPC_64_32 <0x0f, "s_bitcmp1_b64">; def S_SETVSKIP : SOPC_32 <0x10, "s_setvskip">; -let SubtargetPredicate = isVI in { +let SubtargetPredicate = isGFX8Plus in { def S_CMP_EQ_U64 : SOPC_CMP_64 <0x12, "s_cmp_eq_u64", COND_EQ>; def S_CMP_LG_U64 : SOPC_CMP_64 <0x13, "s_cmp_lg_u64", COND_NE>; -} +} // End SubtargetPredicate = isGFX8Plus let SubtargetPredicate = HasVGPRIndexMode in { def S_SET_GPR_IDX_ON : SOPC <0x11, @@ -875,20 +876,19 @@ let isReturn = 1; } -let SubtargetPredicate = isVI in { def S_ENDPGM_SAVED : SOPP <0x0000001B, (ins), "s_endpgm_saved"> { + let SubtargetPredicate = isGFX8Plus; let simm16 = 0; let isBarrier = 1; let isReturn = 1; } -} -let SubtargetPredicate = isGFX9 in { +let SubtargetPredicate = isGFX9Plus in { let isBarrier = 1, isReturn = 1, simm16 = 0 in { def S_ENDPGM_ORDERED_PS_DONE : SOPP<0x01e, (ins), "s_endpgm_ordered_ps_done">; } // End isBarrier = 1, isReturn = 1, simm16 = 0 -} // End SubtargetPredicate = isGFX9 +} // End SubtargetPredicate = isGFX9Plus let isBranch = 1, SchedRW = [WriteBranch] in { def S_BRANCH : SOPP < @@ -963,13 +963,12 @@ let isConvergent = 1; } -let SubtargetPredicate = isVI in { def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> { + let SubtargetPredicate = isGFX8Plus; let simm16 = 0; let mayLoad = 1; let mayStore = 1; } -} let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16">; @@ -1120,13 +1119,13 @@ //===----------------------------------------------------------------------===// -// Real target instructions, move this to the appropriate subtarget TD file +// Target-specific instruction encodings. //===----------------------------------------------------------------------===// class Select_si : SIMCInstr { - list AssemblerPredicates = [isSICI]; - string DecoderNamespace = "SICI"; + list AssemblerPredicates = [isGFX6GFX7]; + string DecoderNamespace = "GFX6GFX7"; } class SOP1_Real_si op, SOP1_Pseudo ps> : Index: llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP1Instructions.td @@ -170,8 +170,12 @@ } let SchedRW = [WriteQuarterRate32] in { -defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>; +defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>; defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; +defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; +defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; +defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; +defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>; @@ -183,10 +187,6 @@ defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; -defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; -defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; -defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; -defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; } // End SchedRW = [WriteQuarterRate32] defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; @@ -303,41 +303,43 @@ defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>; -// These instruction only exist on SI and CI -let SubtargetPredicate = isSICI in { - -let SchedRW = [WriteQuarterRate32] in { -defm V_LOG_CLAMP_F32 : VOP1Inst <"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; -defm V_RCP_CLAMP_F32 : VOP1Inst <"v_rcp_clamp_f32", VOP_F32_F32>; -defm V_RCP_LEGACY_F32 : VOP1Inst <"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>; -defm V_RSQ_CLAMP_F32 : VOP1Inst <"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; -defm V_RSQ_LEGACY_F32 : VOP1Inst <"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>; -} // End SchedRW = [WriteQuarterRate32] - -let SchedRW = [WriteDouble] in { -defm V_RCP_CLAMP_F64 : VOP1Inst <"v_rcp_clamp_f64", VOP_F64_F64>; -defm V_RSQ_CLAMP_F64 : VOP1Inst <"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>; -} // End SchedRW = [WriteDouble] - -} // End SubtargetPredicate = isSICI - - -let SubtargetPredicate = isCIVI in { - -let SchedRW = [WriteDoubleAdd] in { -defm V_TRUNC_F64 : VOP1Inst <"v_trunc_f64", VOP_F64_F64, ftrunc>; -defm V_CEIL_F64 : VOP1Inst <"v_ceil_f64", VOP_F64_F64, fceil>; -defm V_FLOOR_F64 : VOP1Inst <"v_floor_f64", VOP_F64_F64, ffloor>; -defm V_RNDNE_F64 : VOP1Inst <"v_rndne_f64", VOP_F64_F64, frint>; -} // End SchedRW = [WriteDoubleAdd] - -let SchedRW = [WriteQuarterRate32] in { -defm V_LOG_LEGACY_F32 : VOP1Inst <"v_log_legacy_f32", VOP_F32_F32>; -defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>; -} // End SchedRW = [WriteQuarterRate32] - -} // End SubtargetPredicate = isCIVI - +let SubtargetPredicate = isGFX6GFX7 in { + let SchedRW = [WriteQuarterRate32] in { + defm V_LOG_CLAMP_F32 : + VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; + defm V_RCP_CLAMP_F32 : + VOP1Inst<"v_rcp_clamp_f32", VOP_F32_F32>; + defm V_RCP_LEGACY_F32 : + VOP1Inst<"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>; + defm V_RSQ_CLAMP_F32 : + VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; + defm V_RSQ_LEGACY_F32 : + VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>; + } // End SchedRW = [WriteQuarterRate32] + + let SchedRW = [WriteDouble] in { + defm V_RCP_CLAMP_F64 : + VOP1Inst<"v_rcp_clamp_f64", VOP_F64_F64>; + defm V_RSQ_CLAMP_F64 : + VOP1Inst<"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>; + } // End SchedRW = [WriteDouble] +} // End SubtargetPredicate = isGFX6GFX7 + +let SubtargetPredicate = isGFX7GFX8GFX9 in { + let SchedRW = [WriteQuarterRate32] in { + defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>; + defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>; + } // End SchedRW = [WriteQuarterRate32] +} // End SubtargetPredicate = isGFX7GFX8GFX9 + +let SubtargetPredicate = isGFX7Plus in { + let SchedRW = [WriteDoubleAdd] in { + defm V_TRUNC_F64 : VOP1Inst<"v_trunc_f64", VOP_F64_F64, ftrunc>; + defm V_CEIL_F64 : VOP1Inst<"v_ceil_f64", VOP_F64_F64, fceil>; + defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, frint>; + defm V_FLOOR_F64 : VOP1Inst<"v_floor_f64", VOP_F64_F64, ffloor>; + } // End SchedRW = [WriteDoubleAdd] +} // End SubtargetPredicate = isGFX7Plus let SubtargetPredicate = Has16BitInsts in { @@ -391,20 +393,20 @@ let Ins64 = (ins); } -let SubtargetPredicate = isGFX9 in { - let Constraints = "$vdst = $src1, $vdst1 = $src0", - DisableEncoding="$vdst1,$src1", - SchedRW = [Write64Bit, Write64Bit] in { -// Never VOP3. Takes as long as 2 v_mov_b32s -def V_SWAP_B32 : VOP1_Pseudo <"v_swap_b32", VOP_SWAP_I32, [], 1>; -} +let SubtargetPredicate = isGFX9Plus in { + def V_SWAP_B32 : VOP1_Pseudo<"v_swap_b32", VOP_SWAP_I32, [], 1> { + let Constraints = "$vdst = $src1, $vdst1 = $src0"; + let DisableEncoding = "$vdst1,$src1"; + let SchedRW = [Write64Bit, Write64Bit]; + } -defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; - -defm V_SAT_PK_U8_I16 : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>; -defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>; -defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>; + defm V_SAT_PK_U8_I16 : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>; + defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>; + defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>; +} // End SubtargetPredicate = isGFX9Plus +let SubtargetPredicate = isGFX9 in { + defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; } // End SubtargetPredicate = isGFX9 //===----------------------------------------------------------------------===// @@ -416,7 +418,7 @@ //===----------------------------------------------------------------------===// multiclass VOP1_Real_si op> { - let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in { + let AssemblerPredicates = [isGFX6GFX7], DecoderNamespace = "GFX6GFX7" in { def _e32_si : VOP1_Real(NAME#"_e32"), SIEncodingFamily.SI>, VOP1e(NAME#"_e32").Pfl>; @@ -491,7 +493,7 @@ //===----------------------------------------------------------------------===// multiclass VOP1_Real_ci op> { - let AssemblerPredicates = [isCIOnly], DecoderNamespace = "CI" in { + let AssemblerPredicates = [isCIOnly], DecoderNamespace = "GFX7" in { def _e32_ci : VOP1_Real(NAME#"_e32"), SIEncodingFamily.SI>, VOP1e(NAME#"_e32").Pfl>; @@ -509,7 +511,7 @@ defm V_EXP_LEGACY_F32 : VOP1_Real_ci <0x46>; //===----------------------------------------------------------------------===// -// VI +// GFX8, GFX9 (VI). //===----------------------------------------------------------------------===// class VOP1_DPPe op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : @@ -688,6 +690,9 @@ (as_i1imm $bound_ctrl)) >; +} // End OtherPredicates = [isVI] + +let OtherPredicates = [isGFX8Plus] in { def : GCNPat< (i32 (anyext i16:$src)), (COPY $src) @@ -710,7 +715,7 @@ (EXTRACT_SUBREG $src, sub0) >; -} // End OtherPredicates = [isVI] +} // End OtherPredicates = [isGFX8Plus] //===----------------------------------------------------------------------===// // GFX9 Index: llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td @@ -481,9 +481,7 @@ (V_SUBB_U32_e64 $src0, $src1, $src2, 0) >; -// These instructions only exist on SI and CI -let SubtargetPredicate = isSICI, Predicates = [isSICI] in { - +let SubtargetPredicate = isGFX6GFX7 in { defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>; defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>; @@ -493,8 +491,7 @@ defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN, sra>; defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN, shl>; } // End isCommutable = 1 - -} // End let SubtargetPredicate = SICI, Predicates = [isSICI] +} // End SubtargetPredicate = isGFX6GFX7 class DivergentBinOp : GCNPat< @@ -660,7 +657,7 @@ $src) >; -let Predicates = [Has16BitInsts] in { +let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in { defm : Arithmetic_i16_Pats; defm : Arithmetic_i16_Pats; @@ -706,13 +703,13 @@ (V_SUB_U16_e64 $src0, NegSubInlineConst16:$src1) >; -} // End Predicates = [Has16BitInsts] +} // End Predicates = [Has16BitInsts, isGFX7GFX8GFX9] //===----------------------------------------------------------------------===// // SI //===----------------------------------------------------------------------===// -let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in { +let AssemblerPredicates = [isGFX6GFX7], DecoderNamespace = "GFX6GFX7" in { multiclass VOP2_Real_si op> { def _si : @@ -743,7 +740,7 @@ VOP3be_si <{1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>; } -} // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" +} // End AssemblerPredicates = [isGFX6GFX7], DecoderNamespace = "GFX6GFX7" defm V_CNDMASK_B32 : VOP2_Real_e32e64_si <0x0>; defm V_ADD_F32 : VOP2_Real_e32e64_si <0x3>; Index: llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td @@ -386,19 +386,18 @@ } let SchedRW = [Write64Bit] in { -// These instructions only exist on SI and CI -let SubtargetPredicate = isSICI, Predicates = [isSICI] in { +let SubtargetPredicate = isGFX6GFX7, Predicates = [isGFX6GFX7] in { def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile>, shl>; def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile>, srl>; def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile>, sra>; def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile>; -} // End SubtargetPredicate = isSICI, Predicates = [isSICI] +} // End SubtargetPredicate = isGFX6GFX7, Predicates = [isGFX6GFX7] -let SubtargetPredicate = isVI in { +let SubtargetPredicate = isGFX8Plus in { def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile>; def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile>; def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile>; -} // End SubtargetPredicate = isVI +} // End SubtargetPredicate = isGFX8Plus } // End SchedRW = [Write64Bit] let Predicates = [isVI] in { @@ -417,7 +416,13 @@ } -let SubtargetPredicate = isCIVI in { +let SchedRW = [Write32Bit] in { +let SubtargetPredicate = isGFX8Plus in { +def V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile, AMDGPUperm>; +} // End SubtargetPredicate = isGFX8Plus +} // End SchedRW = [Write32Bit] + +let SubtargetPredicate = isGFX7Plus in { let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in { def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile>; @@ -431,7 +436,7 @@ } // End SchedRW = [WriteDouble, WriteSALU] } // End isCommutable = 1 -} // End SubtargetPredicate = isCIVI +} // End SubtargetPredicate = isGFX7Plus def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile, AMDGPUdiv_fixup> { @@ -441,7 +446,7 @@ def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9", VOP3_Profile, AMDGPUdiv_fixup> { let renamedInGFX9 = 1; - let Predicates = [Has16BitInsts, isGFX9]; + let Predicates = [Has16BitInsts, isGFX9Plus]; let FPDPRounding = 1; } @@ -451,7 +456,7 @@ } def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile, fma> { let renamedInGFX9 = 1; - let Predicates = [Has16BitInsts, isGFX9]; + let Predicates = [Has16BitInsts, isGFX9Plus]; let FPDPRounding = 1; } @@ -479,10 +484,13 @@ def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile> { let FPDPRounding = 1; } +} // End SubtargetPredicate = isGFX9 + +let SubtargetPredicate = isGFX9Plus in { def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile>; def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile>; def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>; -} // End SubtargetPredicate = isGFX9 +} // End SubtargetPredicate = isGFX9Plus let Uses = [M0, EXEC], FPDPRounding = 1 in { def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>, @@ -509,8 +517,6 @@ def V_INTERP_P1_F32_e64 : VOP3Interp <"v_interp_p1_f32", VOP3_INTERP>; def V_INTERP_P2_F32_e64 : VOP3Interp <"v_interp_p2_f32", VOP3_INTERP>; def V_INTERP_MOV_F32_e64 : VOP3Interp <"v_interp_mov_f32", VOP3_INTERP_MOV>; - -def V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile, AMDGPUperm>; } // End SubtargetPredicate = isVI let Predicates = [Has16BitInsts] in { @@ -560,7 +566,7 @@ let PredicateCodeUsesOperands = 1; } -let SubtargetPredicate = isGFX9 in { +let SubtargetPredicate = isGFX9Plus in { def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile>; def V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile>; def V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile>; @@ -610,7 +616,7 @@ def : ThreeOp_i32_Pats; def : ThreeOp_i32_Pats; -} // End SubtargetPredicate = isGFX9 +} // End SubtargetPredicate = isGFX9Plus //===----------------------------------------------------------------------===// // Integer Clamp Patterns @@ -652,15 +658,16 @@ def : IntClampPat; def : IntClampPat; + //===----------------------------------------------------------------------===// -// Target +// Target-specific instruction encodings. //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// SI +// GFX6, GFX7. //===----------------------------------------------------------------------===// -let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in { +let AssemblerPredicates = [isGFX6GFX7], DecoderNamespace = "GFX6GFX7" in { multiclass VOP3_Real_si op> { def _si : VOP3_Real(NAME), SIEncodingFamily.SI>, @@ -672,7 +679,7 @@ VOP3be_si (NAME).Pfl>; } -} // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" +} // End AssemblerPredicates = [isGFX6GFX7], DecoderNamespace = "GFX6GFX7" defm V_MAD_LEGACY_F32 : VOP3_Real_si <0x140>; defm V_MAD_F32 : VOP3_Real_si <0x141>; @@ -728,14 +735,14 @@ defm V_TRIG_PREOP_F64 : VOP3_Real_si <0x174>; //===----------------------------------------------------------------------===// -// CI +// GFX7. //===----------------------------------------------------------------------===// multiclass VOP3_Real_ci op> { def _ci : VOP3_Real(NAME), SIEncodingFamily.SI>, VOP3e_si (NAME).Pfl> { let AssemblerPredicates = [isCIOnly]; - let DecoderNamespace = "CI"; + let DecoderNamespace = "GFX7"; } } @@ -743,7 +750,7 @@ def _ci : VOP3_Real(NAME), SIEncodingFamily.SI>, VOP3be_si (NAME).Pfl> { let AssemblerPredicates = [isCIOnly]; - let DecoderNamespace = "CI"; + let DecoderNamespace = "GFX7"; } } @@ -753,7 +760,7 @@ defm V_MAD_I64_I32 : VOP3be_Real_ci <0x177>; //===----------------------------------------------------------------------===// -// VI +// GFX8, GFX9 (VI). //===----------------------------------------------------------------------===// let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { Index: llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOPCInstructions.td @@ -306,7 +306,7 @@ defm V_CMPX_NLT_F64 : VOPCX_F64 <"v_cmpx_nlt_f64">; defm V_CMPX_TRU_F64 : VOPCX_F64 <"v_cmpx_tru_f64">; -let SubtargetPredicate = isSICI in { +let SubtargetPredicate = isGFX6GFX7 in { defm V_CMPS_F_F32 : VOPC_F32 <"v_cmps_f_f32">; defm V_CMPS_LT_F32 : VOPC_F32 <"v_cmps_lt_f32", COND_NULL, "v_cmps_gt_f32">; @@ -376,7 +376,7 @@ defm V_CMPSX_NLT_F64 : VOPCX_F64 <"v_cmpsx_nlt_f64">; defm V_CMPSX_TRU_F64 : VOPCX_F64 <"v_cmpsx_tru_f64">; -} // End SubtargetPredicate = isSICI +} // End SubtargetPredicate = isGFX6GFX7 let SubtargetPredicate = Has16BitInsts in { @@ -694,7 +694,7 @@ def : FCMP_Pattern ; //===----------------------------------------------------------------------===// -// Target +// Target-specific instruction encodings. //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -702,7 +702,7 @@ //===----------------------------------------------------------------------===// multiclass VOPC_Real_si op> { - let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in { + let AssemblerPredicates = [isGFX6GFX7], DecoderNamespace = "GFX6GFX7" in { def _e32_si : VOPC_Real(NAME#"_e32"), SIEncodingFamily.SI>, VOPCe; @@ -718,7 +718,7 @@ } def : VOPCInstAlias (NAME#"_e64"), !cast(NAME#"_e32_si")> { - let AssemblerPredicate = isSICI; + let AssemblerPredicate = isGFX6GFX7; } }