Index: lib/Target/AMDGPU/CIInstructions.td =================================================================== --- lib/Target/AMDGPU/CIInstructions.td +++ lib/Target/AMDGPU/CIInstructions.td @@ -71,14 +71,6 @@ >; } // End isCommutable = 1 - -//===----------------------------------------------------------------------===// -// SMRD Instructions -//===----------------------------------------------------------------------===// - -defm S_DCACHE_INV_VOL : SMRD_Inval , - "s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>; - //===----------------------------------------------------------------------===// // MUBUF Instructions //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/SIInstrFormats.td +++ lib/Target/AMDGPU/SIInstrFormats.td @@ -200,60 +200,6 @@ } // End Uses = [EXEC] //===----------------------------------------------------------------------===// -// Scalar operations -//===----------------------------------------------------------------------===// - -class SMRDe op, bits<1> imm> : Enc32 { - bits<7> sdst; - bits<7> sbase; - - let Inst{8} = imm; - let Inst{14-9} = sbase{6-1}; - let Inst{21-15} = sdst; - let Inst{26-22} = op; - let Inst{31-27} = 0x18; //encoding -} - -class SMRD_IMMe op> : SMRDe { - bits<8> offset; - let Inst{7-0} = offset; -} - -class SMRD_SOFFe op> : SMRDe { - bits<8> soff; - let Inst{7-0} = soff; -} - - - -class SMRD_IMMe_ci op> : Enc64 { - bits<7> sdst; - bits<7> sbase; - bits<32> offset; - - let Inst{7-0} = 0xff; - let Inst{8} = 0; - let Inst{14-9} = sbase{6-1}; - let Inst{21-15} = sdst; - let Inst{26-22} = op; - let Inst{31-27} = 0x18; //encoding - let Inst{63-32} = offset; -} - - -class SMRD pattern> : - InstSI { - - let LGKM_CNT = 1; - let SMRD = 1; - let mayStore = 0; - let mayLoad = 1; - let hasSideEffects = 0; - let UseNamedOperandTable = 1; - let SchedRW = [WriteSMEM]; -} - -//===----------------------------------------------------------------------===// // Vector ALU operations //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -54,14 +54,6 @@ let VI3 = vi; } -// Specify an SMRD opcode for SI and SMEM opcode for VI - -// FIXME: This should really be bits<5> si, Tablegen crashes if -// parameter default value is other parameter with different bit size -class smrd si, bits<8> vi = si> { - field bits<5> SI = si{4-0}; - field bits<8> VI = vi; -} // Execpt for the NONE field, this must be kept in sync with the // SIEncodingFamily enum in AMDGPUInstrInfo.cpp @@ -173,13 +165,6 @@ def mubuf_load_atomic : MubufLoad ; -def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ - auto Ld = cast(N); - return Ld->getAlignment() >= 4 && - Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && - static_cast(getTargetLowering())->isMemOpUniform(N); -}]>; - //===----------------------------------------------------------------------===// // PatFrags for global memory operations //===----------------------------------------------------------------------===// @@ -477,8 +462,6 @@ def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>; def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>; -def smrd_offset : NamedOperandU32<"SMRDOffset", NamedMatchClass<"SMRDOffset">>; -def smrd_literal_offset : NamedOperandU32<"SMRDLiteralOffset", NamedMatchClass<"SMRDLiteralOffset">>; def glc : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>; def slc : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>; @@ -547,13 +530,6 @@ def MUBUFIntrinsicOffset : ComplexPattern; def MUBUFIntrinsicVOffset : ComplexPattern; -def SMRDImm : ComplexPattern; -def SMRDImm32 : ComplexPattern; -def SMRDSgpr : ComplexPattern; -def SMRDBufferImm : ComplexPattern; -def SMRDBufferImm32 : ComplexPattern; -def SMRDBufferSgpr : ComplexPattern; - def MOVRELOffset : ComplexPattern; def VOP3Mods0 : ComplexPattern; @@ -641,159 +617,6 @@ } //===----------------------------------------------------------------------===// -// Scalar classes -//===----------------------------------------------------------------------===// - - -//===----------------------------------------------------------------------===// -// SMRD classes -//===----------------------------------------------------------------------===// - -class SMRD_Pseudo pattern> : - SMRD , - SIMCInstr { - let isPseudo = 1; - let isCodeGenOnly = 1; -} - -class SMRD_IMM_Real_si op, string opName, dag outs, dag ins, - string asm> : - SMRD , - SMRD_IMMe , - SIMCInstr { - let AssemblerPredicates = [isSICI]; - let DecoderNamespace = "SICI"; - let DisableDecoder = DisableSIDecoder; -} - -class SMRD_SOFF_Real_si op, string opName, dag outs, dag ins, - string asm> : - SMRD , - SMRD_SOFFe , - SIMCInstr { - let AssemblerPredicates = [isSICI]; - let DecoderNamespace = "SICI"; - let DisableDecoder = DisableSIDecoder; -} - - -class SMRD_IMM_Real_vi op, string opName, dag outs, dag ins, - string asm, list pattern = []> : - SMRD , - SMEM_IMMe_vi , - SIMCInstr { - let AssemblerPredicates = [isVI]; - let DecoderNamespace = "VI"; - let DisableDecoder = DisableVIDecoder; -} - -class SMRD_SOFF_Real_vi op, string opName, dag outs, dag ins, - string asm, list pattern = []> : - SMRD , - SMEM_SOFFe_vi , - SIMCInstr { - let AssemblerPredicates = [isVI]; - let DecoderNamespace = "VI"; - let DisableDecoder = DisableVIDecoder; -} - - -multiclass SMRD_IMM_m pattern> { - - def "" : SMRD_Pseudo ; - - def _si : SMRD_IMM_Real_si ; - - // glc is only applicable to scalar stores, which are not yet - // implemented. - let glc = 0 in { - def _vi : SMRD_IMM_Real_vi ; - } -} - -multiclass SMRD_SOFF_m pattern> { - - def "" : SMRD_Pseudo ; - - def _si : SMRD_SOFF_Real_si ; - - // glc is only applicable to scalar stores, which are not yet - // implemented. - let glc = 0 in { - def _vi : SMRD_SOFF_Real_vi ; - } -} - -multiclass SMRD_Special pattern = []> { - let hasSideEffects = 1 in { - def "" : SMRD_Pseudo ; - - let sbase = 0, soff = 0, sdst = sdst_ in { - def _si : SMRD_SOFF_Real_si ; - - let glc = 0 in { - def _vi : SMRD_SOFF_Real_vi ; - } - } - } -} - -multiclass SMRD_Inval { - let mayStore = 1 in { - defm : SMRD_Special; - } -} - -class SMEM_Inval op, string opName, SDPatternOperator node> : - SMRD_SOFF_Real_vi { - let hasSideEffects = 1; - let mayStore = 1; - let sbase = 0; - let sdst = 0; - let glc = 0; - let soff = 0; -} - -class SMEM_Ret op, string opName, SDPatternOperator node> : - SMRD_SOFF_Real_vi { - let hasSideEffects = 1; - let mayStore = ?; - let mayLoad = ?; - let sbase = 0; - let glc = 0; - let soff = 0; -} - -multiclass SMRD_Helper { - defm _IMM : SMRD_IMM_m < - op, opName#"_IMM", (outs dstClass:$sdst), - (ins baseClass:$sbase, smrd_offset:$offset), - opName#" $sdst, $sbase, $offset", [] - >; - - def _IMM_ci : SMRD < - (outs dstClass:$sdst), (ins baseClass:$sbase, smrd_literal_offset:$offset), - opName#" $sdst, $sbase, $offset", []>, SMRD_IMMe_ci { - let AssemblerPredicates = [isCIOnly]; - let DecoderNamespace = "CI"; - } - - defm _SGPR : SMRD_SOFF_m < - op, opName#"_SGPR", (outs dstClass:$sdst), - (ins baseClass:$sbase, SReg_32:$soff), - opName#" $sdst, $sbase, $soff", [] - >; -} - -//===----------------------------------------------------------------------===// // Vector ALU classes //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -22,6 +22,7 @@ def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">; include "SOPInstructions.td" +include "SMInstructions.td" let SubtargetPredicate = isGCN in { @@ -32,53 +33,6 @@ defm EXP : EXP_m; //===----------------------------------------------------------------------===// -// SMRD Instructions -//===----------------------------------------------------------------------===// - -// We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit -// SMRD instructions, because the SReg_32_XM0 register class does not include M0 -// and writing to M0 from an SMRD instruction will hang the GPU. -defm S_LOAD_DWORD : SMRD_Helper , "s_load_dword", SReg_64, SReg_32_XM0>; -defm S_LOAD_DWORDX2 : SMRD_Helper , "s_load_dwordx2", SReg_64, SReg_64>; -defm S_LOAD_DWORDX4 : SMRD_Helper , "s_load_dwordx4", SReg_64, SReg_128>; -defm S_LOAD_DWORDX8 : SMRD_Helper , "s_load_dwordx8", SReg_64, SReg_256>; -defm S_LOAD_DWORDX16 : SMRD_Helper , "s_load_dwordx16", SReg_64, SReg_512>; - -defm S_BUFFER_LOAD_DWORD : SMRD_Helper < - smrd<0x08>, "s_buffer_load_dword", SReg_128, SReg_32_XM0 ->; - -defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper < - smrd<0x09>, "s_buffer_load_dwordx2", SReg_128, SReg_64 ->; - -defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper < - smrd<0x0a>, "s_buffer_load_dwordx4", SReg_128, SReg_128 ->; - -defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper < - smrd<0x0b>, "s_buffer_load_dwordx8", SReg_128, SReg_256 ->; - -defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < - smrd<0x0c>, "s_buffer_load_dwordx16", SReg_128, SReg_512 ->; - -let mayStore = ? in { -// FIXME: mayStore = ? is a workaround for tablegen bug for different -// inferred mayStore flags for the instruction pattern vs. standalone -// Pat. Each considers the other contradictory. - -defm S_MEMTIME : SMRD_Special , "s_memtime", - (outs SReg_64:$sdst), ?, " $sdst", [(set i64:$sdst, (int_amdgcn_s_memtime))] ->; -} - -defm S_DCACHE_INV : SMRD_Inval , "s_dcache_inv", - int_amdgcn_s_dcache_inv>; - -//===----------------------------------------------------------------------===// - // VOPC Instructions //===----------------------------------------------------------------------===// @@ -1826,64 +1780,6 @@ def : FCMP_Pattern ; def : FCMP_Pattern ; def : FCMP_Pattern ; -// SMRD Patterns -//===----------------------------------------------------------------------===// - -multiclass SMRD_Pattern { - - // 1. IMM offset - def : Pat < - (smrd_load (SMRDImm i64:$sbase, i32:$offset)), - (vt (!cast(Instr#"_IMM") $sbase, $offset)) - >; - - // 2. SGPR offset - def : Pat < - (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)), - (vt (!cast(Instr#"_SGPR") $sbase, $offset)) - >; - - def : Pat < - (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)), - (vt (!cast(Instr#"_IMM_ci") $sbase, $offset)) - > { - let Predicates = [isCIOnly]; - } -} - -// Global and constant loads can be selected to either MUBUF or SMRD -// instructions, but SMRD instructions are faster so we want the instruction -// selector to prefer those. -let AddedComplexity = 100 in { - -defm : SMRD_Pattern <"S_LOAD_DWORD", i32>; -defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>; -defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>; -defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>; -defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>; - -// 1. Offset as an immediate -def : Pat < - (SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)), - (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset) ->; - -// 2. Offset loaded in an 32bit SGPR -def : Pat < - (SIload_constant v4i32:$sbase, (SMRDBufferSgpr i32:$offset)), - (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset) ->; - -let Predicates = [isCI] in { - -def : Pat < - (SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)), - (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset) ->; - -} // End Predicates = [isCI] - -} // End let AddedComplexity = 10000 //===----------------------------------------------------------------------===// // VOP1 Patterns @@ -2983,13 +2879,6 @@ def : BFEPattern ; -let Predicates = [isSICI] in { -def : Pat < - (i64 (readcyclecounter)), - (S_MEMTIME) ->; -} - def : Pat< (fcanonicalize f32:$src), (V_MUL_F32_e64 0, CONST.FP32_ONE, 0, $src, 0, 0) Index: lib/Target/AMDGPU/SMInstructions.td =================================================================== --- /dev/null +++ lib/Target/AMDGPU/SMInstructions.td @@ -0,0 +1,430 @@ +//===---- SMInstructions.td - Scalar Memory Instruction Defintions --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def smrd_offset : NamedOperandU32<"SMRDOffset", + NamedMatchClass<"SMRDOffset">> { + let OperandType = "OPERAND_IMMEDIATE"; +} + + +//===----------------------------------------------------------------------===// +// Scalar Memory classes +//===----------------------------------------------------------------------===// + +class SM_Pseudo pattern=[]> : + InstSI , + SIMCInstr { + let isPseudo = 1; + let isCodeGenOnly = 1; + + let LGKM_CNT = 1; + let SMRD = 1; + let mayStore = 0; + let mayLoad = 1; + let hasSideEffects = 0; + let UseNamedOperandTable = 1; + let SchedRW = [WriteSMEM]; + let SubtargetPredicate = isGCN; + + string Mnemonic = opName; + string AsmOperands = asmOps; + + bits<1> has_sbase = 1; + bits<1> has_sdst = 1; + bits<1> has_offset = 1; + bits<1> offset_is_imm = 0; +} + +class SM_Real + : InstSI { + + let isPseudo = 0; + let isCodeGenOnly = 0; + + // copy relevant pseudo op flags + let SubtargetPredicate = ps.SubtargetPredicate; + let AsmMatchConverter = ps.AsmMatchConverter; + + // encoding + bits<7> sbase; + bits<7> sdst; + bits<32> offset; + bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0); +} + +class SM_Load_Pseudo pattern=[]> + : SM_Pseudo { + RegisterClass BaseClass; +} + +multiclass SM_Pseudo_Loads { + def _IMM : SM_Load_Pseudo { + let offset_is_imm = 1; + let BaseClass = baseClass; + let PseudoInstr = opName # "_IMM"; + } + def _SGPR : SM_Load_Pseudo { + let BaseClass = baseClass; + let PseudoInstr = opName # "_SGPR"; + } +} + +class SM_Time_Pseudo : SM_Pseudo< + opName, (outs SReg_64:$sdst), (ins), + " $sdst", [(set i64:$sdst, (node))]> { + let hasSideEffects = 1; + // FIXME: mayStore = ? is a workaround for tablegen bug for different + // inferred mayStore flags for the instruction pattern vs. standalone + // Pat. Each considers the other contradictory. + let mayStore = ?; + let mayLoad = ?; + let has_sbase = 0; + let has_offset = 0; +} + +class SM_Inval_Pseudo : SM_Pseudo< + opName, (outs), (ins), "", [(node)]> { + let hasSideEffects = 1; + let mayStore = 1; + let has_sdst = 0; + let has_sbase = 0; + let has_offset = 0; +} + + +//===----------------------------------------------------------------------===// +// Scalar Memory Instructions +//===----------------------------------------------------------------------===// + +// We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit +// SMRD instructions, because the SReg_32_XM0 register class does not include M0 +// and writing to M0 from an SMRD instruction will hang the GPU. +defm S_LOAD_DWORD : SM_Pseudo_Loads <"s_load_dword", SReg_64, SReg_32_XM0>; +defm S_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_load_dwordx2", SReg_64, SReg_64>; +defm S_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_load_dwordx4", SReg_64, SReg_128>; +defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <"s_load_dwordx8", SReg_64, SReg_256>; +defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <"s_load_dwordx16", SReg_64, SReg_512>; + +defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads < + "s_buffer_load_dword", SReg_128, SReg_32_XM0 +>; + +defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads < + "s_buffer_load_dwordx2", SReg_128, SReg_64 +>; + +defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads < + "s_buffer_load_dwordx4", SReg_128, SReg_128 +>; + +defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads < + "s_buffer_load_dwordx8", SReg_128, SReg_256 +>; + +defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads < + "s_buffer_load_dwordx16", SReg_128, SReg_512 +>; + +def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>; +def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>; + +let SubtargetPredicate = isCIVI in { +def S_DCACHE_INV_VOL : SM_Inval_Pseudo <"s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>; +} // let SubtargetPredicate = isCIVI + +let SubtargetPredicate = isVI in { +def S_DCACHE_WB : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>; +def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>; +def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>; +} // SubtargetPredicate = isVI + + + +//===----------------------------------------------------------------------===// +// Scalar Memory Patterns +//===----------------------------------------------------------------------===// + +def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ + auto Ld = cast(N); + return Ld->getAlignment() >= 4 && + Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + static_cast(getTargetLowering())->isMemOpUniform(N); +}]>; + +def SMRDImm : ComplexPattern; +def SMRDImm32 : ComplexPattern; +def SMRDSgpr : ComplexPattern; +def SMRDBufferImm : ComplexPattern; +def SMRDBufferImm32 : ComplexPattern; +def SMRDBufferSgpr : ComplexPattern; + +let Predicates = [isGCN] in { + +multiclass SMRD_Pattern { + + // 1. IMM offset + def : Pat < + (smrd_load (SMRDImm i64:$sbase, i32:$offset)), + (vt (!cast(Instr#"_IMM") $sbase, $offset)) + >; + + // 2. SGPR offset + def : Pat < + (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)), + (vt (!cast(Instr#"_SGPR") $sbase, $offset)) + >; +} + +let Predicates = [isSICI] in { +def : Pat < + (i64 (readcyclecounter)), + (S_MEMTIME) +>; +} + +// Global and constant loads can be selected to either MUBUF or SMRD +// instructions, but SMRD instructions are faster so we want the instruction +// selector to prefer those. +let AddedComplexity = 100 in { + +defm : SMRD_Pattern <"S_LOAD_DWORD", i32>; +defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>; +defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>; +defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>; +defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>; + +// 1. Offset as an immediate +def SM_LOAD_PATTERN : Pat < // name this pattern to reuse AddedComplexity on CI + (SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)), + (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset) +>; + +// 2. Offset loaded in an 32bit SGPR +def : Pat < + (SIload_constant v4i32:$sbase, (SMRDBufferSgpr i32:$offset)), + (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset) +>; + +} // End let AddedComplexity = 100 + +} // let Predicates = [isGCN] + +let Predicates = [isVI] in { + +// 1. Offset as 20bit DWORD immediate +def : Pat < + (SIload_constant v4i32:$sbase, IMM20bit:$offset), + (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset)) +>; + +def : Pat < + (i64 (readcyclecounter)), + (S_MEMREALTIME) +>; + +} // let Predicates = [isVI] + + +//===----------------------------------------------------------------------===// +// Targets +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SI +//===----------------------------------------------------------------------===// + +class SMRD_Real_si op, SM_Pseudo ps> + : SM_Real + , SIMCInstr + , Enc32 { + + let AssemblerPredicates = [isSICI]; + let DecoderNamespace = "SICI"; + + let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?); + let Inst{8} = imm; + let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?); + let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?); + let Inst{26-22} = op; + let Inst{31-27} = 0x18; //encoding +} + +multiclass SM_Real_Loads_si op, string ps, + SM_Load_Pseudo immPs = !cast(ps#_IMM), + SM_Load_Pseudo sgprPs = !cast(ps#_SGPR)> { + def _IMM_si : SMRD_Real_si { + let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset:$offset); + } + def _SGPR_si : SMRD_Real_si { + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset); + } +} + +defm S_LOAD_DWORD : SM_Real_Loads_si <0x00, "S_LOAD_DWORD">; +defm S_LOAD_DWORDX2 : SM_Real_Loads_si <0x01, "S_LOAD_DWORDX2">; +defm S_LOAD_DWORDX4 : SM_Real_Loads_si <0x02, "S_LOAD_DWORDX4">; +defm S_LOAD_DWORDX8 : SM_Real_Loads_si <0x03, "S_LOAD_DWORDX8">; +defm S_LOAD_DWORDX16 : SM_Real_Loads_si <0x04, "S_LOAD_DWORDX16">; +defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_si <0x08, "S_BUFFER_LOAD_DWORD">; +defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_si <0x09, "S_BUFFER_LOAD_DWORDX2">; +defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_si <0x0a, "S_BUFFER_LOAD_DWORDX4">; +defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_si <0x0b, "S_BUFFER_LOAD_DWORDX8">; +defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_si <0x0c, "S_BUFFER_LOAD_DWORDX16">; + +def S_MEMTIME_si : SMRD_Real_si <0x1e, S_MEMTIME>; +def S_DCACHE_INV_si : SMRD_Real_si <0x1f, S_DCACHE_INV>; + + +//===----------------------------------------------------------------------===// +// VI +//===----------------------------------------------------------------------===// + +class SMEM_Real_vi op, SM_Pseudo ps> + : SM_Real + , SIMCInstr + , Enc64 { + + let AssemblerPredicates = [isVI]; + let DecoderNamespace = "VI"; + + let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); + let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); + + // glc is only applicable to scalar stores, which are not yet + // implemented. + let Inst{16} = 0; // glc bit + let Inst{17} = imm; + let Inst{25-18} = op; + let Inst{31-26} = 0x30; //encoding + let Inst{51-32} = !if(ps.has_offset, offset{19-0}, ?); +} + +multiclass SM_Real_Loads_vi op, string ps, + SM_Load_Pseudo immPs = !cast(ps#_IMM), + SM_Load_Pseudo sgprPs = !cast(ps#_SGPR)> { + def _IMM_vi : SMEM_Real_vi { + let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset:$offset); + } + def _SGPR_vi : SMEM_Real_vi { + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset); + } +} + +defm S_LOAD_DWORD : SM_Real_Loads_vi <0x00, "S_LOAD_DWORD">; +defm S_LOAD_DWORDX2 : SM_Real_Loads_vi <0x01, "S_LOAD_DWORDX2">; +defm S_LOAD_DWORDX4 : SM_Real_Loads_vi <0x02, "S_LOAD_DWORDX4">; +defm S_LOAD_DWORDX8 : SM_Real_Loads_vi <0x03, "S_LOAD_DWORDX8">; +defm S_LOAD_DWORDX16 : SM_Real_Loads_vi <0x04, "S_LOAD_DWORDX16">; +defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_vi <0x08, "S_BUFFER_LOAD_DWORD">; +defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_vi <0x09, "S_BUFFER_LOAD_DWORDX2">; +defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_vi <0x0a, "S_BUFFER_LOAD_DWORDX4">; +defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_vi <0x0b, "S_BUFFER_LOAD_DWORDX8">; +defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_vi <0x0c, "S_BUFFER_LOAD_DWORDX16">; + +def S_DCACHE_INV_vi : SMEM_Real_vi <0x20, S_DCACHE_INV>; +def S_DCACHE_WB_vi : SMEM_Real_vi <0x21, S_DCACHE_WB>; +def S_DCACHE_INV_VOL_vi : SMEM_Real_vi <0x22, S_DCACHE_INV_VOL>; +def S_DCACHE_WB_VOL_vi : SMEM_Real_vi <0x23, S_DCACHE_WB_VOL>; +def S_MEMTIME_vi : SMEM_Real_vi <0x24, S_MEMTIME>; +def S_MEMREALTIME_vi : SMEM_Real_vi <0x25, S_MEMREALTIME>; + + +//===----------------------------------------------------------------------===// +// CI +//===----------------------------------------------------------------------===// + +def smrd_literal_offset : NamedOperandU32<"SMRDLiteralOffset", + NamedMatchClass<"SMRDLiteralOffset">> { + let OperandType = "OPERAND_IMMEDIATE"; +} + +class SMRD_Real_Load_IMM_ci op, SM_Load_Pseudo ps> : + SM_Real, + Enc64 { + + let AssemblerPredicates = [isCIOnly]; + let DecoderNamespace = "CI"; + let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset); + + let LGKM_CNT = ps.LGKM_CNT; + let SMRD = ps.SMRD; + let mayLoad = ps.mayLoad; + let mayStore = ps.mayStore; + let hasSideEffects = ps.hasSideEffects; + let SchedRW = ps.SchedRW; + let UseNamedOperandTable = ps.UseNamedOperandTable; + + let Inst{7-0} = 0xff; + let Inst{8} = 0; + let Inst{14-9} = sbase{6-1}; + let Inst{21-15} = sdst{6-0}; + let Inst{26-22} = op; + let Inst{31-27} = 0x18; //encoding + let Inst{63-32} = offset{31-0}; +} + +def S_LOAD_DWORD_IMM_ci : SMRD_Real_Load_IMM_ci <0x00, S_LOAD_DWORD_IMM>; +def S_LOAD_DWORDX2_IMM_ci : SMRD_Real_Load_IMM_ci <0x01, S_LOAD_DWORDX2_IMM>; +def S_LOAD_DWORDX4_IMM_ci : SMRD_Real_Load_IMM_ci <0x02, S_LOAD_DWORDX4_IMM>; +def S_LOAD_DWORDX8_IMM_ci : SMRD_Real_Load_IMM_ci <0x03, S_LOAD_DWORDX8_IMM>; +def S_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x04, S_LOAD_DWORDX16_IMM>; +def S_BUFFER_LOAD_DWORD_IMM_ci : SMRD_Real_Load_IMM_ci <0x08, S_BUFFER_LOAD_DWORD_IMM>; +def S_BUFFER_LOAD_DWORDX2_IMM_ci : SMRD_Real_Load_IMM_ci <0x09, S_BUFFER_LOAD_DWORDX2_IMM>; +def S_BUFFER_LOAD_DWORDX4_IMM_ci : SMRD_Real_Load_IMM_ci <0x0a, S_BUFFER_LOAD_DWORDX4_IMM>; +def S_BUFFER_LOAD_DWORDX8_IMM_ci : SMRD_Real_Load_IMM_ci <0x0b, S_BUFFER_LOAD_DWORDX8_IMM>; +def S_BUFFER_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x0c, S_BUFFER_LOAD_DWORDX16_IMM>; + +class SMRD_Real_ci op, SM_Pseudo ps> + : SM_Real + , SIMCInstr + , Enc32 { + + let AssemblerPredicates = [isCIOnly]; + let DecoderNamespace = "CI"; + + let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?); + let Inst{8} = imm; + let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?); + let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?); + let Inst{26-22} = op; + let Inst{31-27} = 0x18; //encoding +} + +def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>; + +let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity in { + +class SMRD_Pattern_ci : Pat < + (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)), + (vt (!cast(Instr#"_IMM_ci") $sbase, $offset))> { + let Predicates = [isCIOnly]; +} + +def : SMRD_Pattern_ci <"S_LOAD_DWORD", i32>; +def : SMRD_Pattern_ci <"S_LOAD_DWORDX2", v2i32>; +def : SMRD_Pattern_ci <"S_LOAD_DWORDX4", v4i32>; +def : SMRD_Pattern_ci <"S_LOAD_DWORDX8", v8i32>; +def : SMRD_Pattern_ci <"S_LOAD_DWORDX16", v16i32>; + +def : Pat < + (SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)), + (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset)> { + let Predicates = [isCI]; // should this be isCIOnly? +} + +} // End let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity + Index: lib/Target/AMDGPU/VIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/VIInstrFormats.td +++ lib/Target/AMDGPU/VIInstrFormats.td @@ -69,29 +69,6 @@ let Inst{63-56} = soffset; } -class SMEMe_vi op, bit imm> : Enc64 { - bits<7> sbase; - bits<7> sdst; - bits<1> glc; - - let Inst{5-0} = sbase{6-1}; - let Inst{12-6} = sdst; - let Inst{16} = glc; - let Inst{17} = imm; - let Inst{25-18} = op; - let Inst{31-26} = 0x30; //encoding -} - -class SMEM_IMMe_vi op> : SMEMe_vi { - bits<20> offset; - let Inst{51-32} = offset; -} - -class SMEM_SOFFe_vi op> : SMEMe_vi { - bits<20> soff; - let Inst{51-32} = soff; -} - class VOP3a_vi op> : Enc64 { bits<2> src0_modifiers; bits<9> src0; Index: lib/Target/AMDGPU/VIInstructions.td =================================================================== --- lib/Target/AMDGPU/VIInstructions.td +++ lib/Target/AMDGPU/VIInstructions.td @@ -101,29 +101,10 @@ def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; -//===----------------------------------------------------------------------===// -// SMEM Instructions -//===----------------------------------------------------------------------===// - -def S_DCACHE_WB : SMEM_Inval <0x21, - "s_dcache_wb", int_amdgcn_s_dcache_wb>; - -def S_DCACHE_WB_VOL : SMEM_Inval <0x23, - "s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>; - -def S_MEMREALTIME : SMEM_Ret<0x25, - "s_memrealtime", int_amdgcn_s_memrealtime>; - } // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI let Predicates = [isVI] in { -// 1. Offset as 20bit DWORD immediate -def : Pat < - (SIload_constant v4i32:$sbase, IMM20bit:$offset), - (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset)) ->; - //===----------------------------------------------------------------------===// // DPP Patterns //===----------------------------------------------------------------------===// @@ -139,9 +120,4 @@ // Misc Patterns //===----------------------------------------------------------------------===// -def : Pat < - (i64 (readcyclecounter)), - (S_MEMREALTIME) ->; - } // End Predicates = [isVI]