Index: llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td @@ -0,0 +1,1305 @@ +//===-- BUFInstructions.td - Buffer Instruction Defintions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def MUBUFAddr32 : ComplexPattern; +def MUBUFAddr64 : ComplexPattern; +def MUBUFAddr64Atomic : ComplexPattern; + +def MUBUFScratch : ComplexPattern; +def MUBUFOffset : ComplexPattern; +def MUBUFOffsetNoGLC : ComplexPattern; +def MUBUFOffsetAtomic : ComplexPattern; +def MUBUFIntrinsicOffset : ComplexPattern; +def MUBUFIntrinsicVOffset : ComplexPattern; + +class MubufLoad : PatFrag < + (ops node:$ptr), (op node:$ptr), [{ + auto const AS = cast(N)->getAddressSpace(); + return AS == AMDGPUAS::GLOBAL_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS; +}]>; + +def mubuf_load : MubufLoad ; +def mubuf_az_extloadi8 : MubufLoad ; +def mubuf_sextloadi8 : MubufLoad ; +def mubuf_az_extloadi16 : MubufLoad ; +def mubuf_sextloadi16 : MubufLoad ; +def mubuf_load_atomic : MubufLoad ; + +def BUFAddrKind { + int Offset = 0; + int OffEn = 1; + int IdxEn = 2; + int BothEn = 3; + int Addr64 = 4; +} + +class getAddrName { + string ret = + !if(!eq(addrKind, BUFAddrKind.Offset), "offset", + !if(!eq(addrKind, BUFAddrKind.OffEn), "offen", + !if(!eq(addrKind, BUFAddrKind.IdxEn), "idxen", + !if(!eq(addrKind, BUFAddrKind.BothEn), "bothen", + !if(!eq(addrKind, BUFAddrKind.Addr64), "addr64", + ""))))); +} + +class MUBUFAddr64Table { + bit IsAddr64 = is_addr64; + string OpName = NAME # suffix; +} + +//===----------------------------------------------------------------------===// +// MTBUF classes +//===----------------------------------------------------------------------===// + +class MTBUF_Pseudo pattern=[]> : + InstSI, + SIMCInstr { + + let isPseudo = 1; + let isCodeGenOnly = 1; + let UseNamedOperandTable = 1; + + string Mnemonic = opName; + string AsmOperands = asmOps; + + let VM_CNT = 1; + let EXP_CNT = 1; + let MTBUF = 1; + let Uses = [EXEC]; + + let hasSideEffects = 0; + let UseNamedOperandTable = 1; + let SchedRW = [WriteVMEM]; +} + +class MTBUF_Real op, MTBUF_Pseudo ps> : + InstSI , + Enc64 { + + let isPseudo = 0; + let isCodeGenOnly = 0; + + // copy relevant pseudo op flags + let SubtargetPredicate = ps.SubtargetPredicate; + let AsmMatchConverter = ps.AsmMatchConverter; + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + let TSFlags = ps.TSFlags; + + bits<8> vdata; + bits<12> offset; + bits<1> offen; + bits<1> idxen; + bits<1> glc; + bits<1> addr64; + bits<4> dfmt; + bits<3> nfmt; + bits<8> vaddr; + bits<7> srsrc; + bits<1> slc; + bits<1> tfe; + bits<8> soffset; + + let Inst{11-0} = offset; + let Inst{12} = offen; + let Inst{13} = idxen; + let Inst{14} = glc; + let Inst{15} = addr64; + let Inst{18-16} = op; + let Inst{22-19} = dfmt; + let Inst{25-23} = nfmt; + let Inst{31-26} = 0x3a; //encoding + let Inst{39-32} = vaddr; + let Inst{47-40} = vdata; + let Inst{52-48} = srsrc{6-2}; + let Inst{54} = slc; + let Inst{55} = tfe; + let Inst{63-56} = soffset; +} + +class MTBUF_Load_Pseudo : MTBUF_Pseudo < + opName, (outs regClass:$dst), + (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, + i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, SReg_128:$srsrc, + i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset), + " $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"# + " $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset" +> { + let mayLoad = 1; + let mayStore = 0; +} + +class MTBUF_Store_Pseudo : MTBUF_Pseudo < + opName, (outs), + (ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, + i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, + SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset), + " $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"# + " $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset" +> { + let mayLoad = 0; + let mayStore = 1; +} + +//===----------------------------------------------------------------------===// +// MUBUF classes +//===----------------------------------------------------------------------===// + +class MUBUF_Pseudo pattern=[]> : + InstSI, + SIMCInstr { + + let isPseudo = 1; + let isCodeGenOnly = 1; + let UseNamedOperandTable = 1; + + string Mnemonic = opName; + string AsmOperands = asmOps; + + let VM_CNT = 1; + let EXP_CNT = 1; + let MUBUF = 1; + let Uses = [EXEC]; + let hasSideEffects = 0; + let SchedRW = [WriteVMEM]; + + let AsmMatchConverter = "cvtMubuf"; + + bits<1> offen = 0; + bits<1> idxen = 0; + bits<1> addr64 = 0; + bits<1> has_vdata = 1; + bits<1> has_vaddr = 1; + bits<1> has_glc = 1; + bits<1> glc_value = 0; // the value for glc if no such operand + bits<1> has_srsrc = 1; + bits<1> has_soffset = 1; + bits<1> has_offset = 1; + bits<1> has_slc = 1; + bits<1> has_tfe = 1; +} + +class MUBUF_Real op, MUBUF_Pseudo ps> : + InstSI { + + let isPseudo = 0; + let isCodeGenOnly = 0; + + // copy relevant pseudo op flags + let SubtargetPredicate = ps.SubtargetPredicate; + let AsmMatchConverter = ps.AsmMatchConverter; + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + let TSFlags = ps.TSFlags; + + bits<12> offset; + bits<1> glc; + bits<1> lds = 0; + bits<8> vaddr; + bits<8> vdata; + bits<7> srsrc; + bits<1> slc; + bits<1> tfe; + bits<8> soffset; +} + + +// For cache invalidation instructions. +class MUBUF_Invalidate : + MUBUF_Pseudo { + + let AsmMatchConverter = ""; + + let hasSideEffects = 1; + let mayStore = 1; + + // Set everything to 0. + let offen = 0; + let idxen = 0; + let addr64 = 0; + let has_vdata = 0; + let has_vaddr = 0; + let has_glc = 0; + let glc_value = 0; + let has_srsrc = 0; + let has_soffset = 0; + let has_offset = 0; + let has_slc = 0; + let has_tfe = 0; +} + +class getMUBUFInsDA vdataList, + list vaddrList=[]> { + RegisterClass vdataClass = !if(!empty(vdataList), ?, !head(vdataList)); + RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList)); + dag InsNoData = !if(!empty(vaddrList), + (ins SReg_128:$srsrc, SCSrc_b32:$soffset, + offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), + (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, + offset:$offset, glc:$glc, slc:$slc, tfe:$tfe) + ); + dag InsData = !if(!empty(vaddrList), + (ins vdataClass:$vdata, SReg_128:$srsrc, + SCSrc_b32:$soffset, offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), + (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, + SCSrc_b32:$soffset, offset:$offset, glc:$glc, slc:$slc, tfe:$tfe) + ); + dag ret = !if(!empty(vdataList), InsNoData, InsData); +} + +class getMUBUFIns vdataList=[]> { + dag ret = + !if(!eq(addrKind, BUFAddrKind.Offset), getMUBUFInsDA.ret, + !if(!eq(addrKind, BUFAddrKind.OffEn), getMUBUFInsDA.ret, + !if(!eq(addrKind, BUFAddrKind.IdxEn), getMUBUFInsDA.ret, + !if(!eq(addrKind, BUFAddrKind.BothEn), getMUBUFInsDA.ret, + !if(!eq(addrKind, BUFAddrKind.Addr64), getMUBUFInsDA.ret, + (ins)))))); +} + +class getMUBUFAsmOps { + string Pfx = + !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $soffset", + !if(!eq(addrKind, BUFAddrKind.OffEn), "$vaddr, $srsrc, $soffset offen", + !if(!eq(addrKind, BUFAddrKind.IdxEn), "$vaddr, $srsrc, $soffset idxen", + !if(!eq(addrKind, BUFAddrKind.BothEn), "$vaddr, $srsrc, $soffset idxen offen", + !if(!eq(addrKind, BUFAddrKind.Addr64), "$vaddr, $srsrc, $soffset addr64", + ""))))); + string ret = Pfx # "$offset"; +} + + class MUBUF_SetupAddr { + bits<1> offen = !if(!eq(addrKind, BUFAddrKind.OffEn), 1, + !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0)); + + bits<1> idxen = !if(!eq(addrKind, BUFAddrKind.IdxEn), 1, + !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0)); + + bits<1> addr64 = !if(!eq(addrKind, BUFAddrKind.Addr64), 1, 0); + + bits<1> has_vaddr = !if(!eq(addrKind, BUFAddrKind.Offset), 0, 1); +} + +class MUBUF_Load_Pseudo pattern=[], + // Workaround bug bz30254 + int addrKindCopy = addrKind> + : MUBUF_Pseudo.ret, + " $vdata, " # getMUBUFAsmOps.ret # "$glc$slc$tfe", + pattern>, + MUBUF_SetupAddr { + let PseudoInstr = opName # "_" # getAddrName.ret; + let mayLoad = 1; + let mayStore = 0; +} + +// FIXME: tfe can't be an operand because it requires a separate +// opcode because it needs an N+1 register class dest register. +multiclass MUBUF_Pseudo_Loads { + + def _OFFSET : MUBUF_Load_Pseudo , + MUBUFAddr64Table<0>; + + def _ADDR64 : MUBUF_Load_Pseudo , + MUBUFAddr64Table<1>; + + def _OFFEN : MUBUF_Load_Pseudo ; + def _IDXEN : MUBUF_Load_Pseudo ; + def _BOTHEN : MUBUF_Load_Pseudo ; + + let DisableWQM = 1 in { + def _OFFSET_exact : MUBUF_Load_Pseudo ; + def _OFFEN_exact : MUBUF_Load_Pseudo ; + def _IDXEN_exact : MUBUF_Load_Pseudo ; + def _BOTHEN_exact : MUBUF_Load_Pseudo ; + } +} + +class MUBUF_Store_Pseudo pattern=[], + // Workaround bug bz30254 + int addrKindCopy = addrKind, + RegisterClass vdataClassCopy = vdataClass> + : MUBUF_Pseudo.ret, + " $vdata, " # getMUBUFAsmOps.ret # "$glc$slc$tfe", + pattern>, + MUBUF_SetupAddr { + let PseudoInstr = opName # "_" # getAddrName.ret; + let mayLoad = 0; + let mayStore = 1; +} + +multiclass MUBUF_Pseudo_Stores { + + def _OFFSET : MUBUF_Store_Pseudo , + MUBUFAddr64Table<0>; + + def _ADDR64 : MUBUF_Store_Pseudo , + MUBUFAddr64Table<1>; + + def _OFFEN : MUBUF_Store_Pseudo ; + def _IDXEN : MUBUF_Store_Pseudo ; + def _BOTHEN : MUBUF_Store_Pseudo ; + + let DisableWQM = 1 in { + def _OFFSET_exact : MUBUF_Store_Pseudo ; + def _OFFEN_exact : MUBUF_Store_Pseudo ; + def _IDXEN_exact : MUBUF_Store_Pseudo ; + def _BOTHEN_exact : MUBUF_Store_Pseudo ; + } +} + + +class getMUBUFAtomicInsDA vaddrList=[]> { + RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList)); + dag ret = !if(vdata_in, + !if(!empty(vaddrList), + (ins vdataClass:$vdata_in, + SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, slc:$slc), + (ins vdataClass:$vdata_in, vaddrClass:$vaddr, + SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, slc:$slc) + ), + !if(!empty(vaddrList), + (ins vdataClass:$vdata, + SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, slc:$slc), + (ins vdataClass:$vdata, vaddrClass:$vaddr, + SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, slc:$slc) + )); +} + +class getMUBUFAtomicIns { + dag ret = + !if(!eq(addrKind, BUFAddrKind.Offset), + getMUBUFAtomicInsDA.ret, + !if(!eq(addrKind, BUFAddrKind.OffEn), + getMUBUFAtomicInsDA.ret, + !if(!eq(addrKind, BUFAddrKind.IdxEn), + getMUBUFAtomicInsDA.ret, + !if(!eq(addrKind, BUFAddrKind.BothEn), + getMUBUFAtomicInsDA.ret, + !if(!eq(addrKind, BUFAddrKind.Addr64), + getMUBUFAtomicInsDA.ret, + (ins)))))); +} + +class MUBUF_Atomic_Pseudo pattern=[], + // Workaround bug bz30254 + int addrKindCopy = addrKind> + : MUBUF_Pseudo, + MUBUF_SetupAddr { + let mayStore = 1; + let mayLoad = 1; + let hasPostISelHook = 1; + let hasSideEffects = 1; + let DisableWQM = 1; + let has_glc = 0; + let has_tfe = 0; +} + +class MUBUF_AtomicNoRet_Pseudo pattern=[], + // Workaround bug bz30254 + int addrKindCopy = addrKind, + RegisterClass vdataClassCopy = vdataClass> + : MUBUF_Atomic_Pseudo.ret, + " $vdata, " # getMUBUFAsmOps.ret # "$slc", + pattern>, + AtomicNoRet.ret, 0> { + let PseudoInstr = opName # "_" # getAddrName.ret; + let glc_value = 0; + let AsmMatchConverter = "cvtMubufAtomic"; +} + +class MUBUF_AtomicRet_Pseudo pattern=[], + // Workaround bug bz30254 + int addrKindCopy = addrKind, + RegisterClass vdataClassCopy = vdataClass> + : MUBUF_Atomic_Pseudo.ret, + " $vdata, " # getMUBUFAsmOps.ret # " glc$slc", + pattern>, + AtomicNoRet.ret, 1> { + let PseudoInstr = opName # "_rtn_" # getAddrName.ret; + let glc_value = 1; + let Constraints = "$vdata = $vdata_in"; + let DisableEncoding = "$vdata_in"; + let AsmMatchConverter = "cvtMubufAtomicReturn"; +} + +multiclass MUBUF_Pseudo_Atomics { + + def _OFFSET : MUBUF_AtomicNoRet_Pseudo , + MUBUFAddr64Table <0>; + def _ADDR64 : MUBUF_AtomicNoRet_Pseudo , + MUBUFAddr64Table <1>; + def _OFFEN : MUBUF_AtomicNoRet_Pseudo ; + def _IDXEN : MUBUF_AtomicNoRet_Pseudo ; + def _BOTHEN : MUBUF_AtomicNoRet_Pseudo ; + + def _RTN_OFFSET : MUBUF_AtomicRet_Pseudo , + MUBUFAddr64Table <0, "_RTN">; + + def _RTN_ADDR64 : MUBUF_AtomicRet_Pseudo , + MUBUFAddr64Table <1, "_RTN">; + + def _RTN_OFFEN : MUBUF_AtomicRet_Pseudo ; + def _RTN_IDXEN : MUBUF_AtomicRet_Pseudo ; + def _RTN_BOTHEN : MUBUF_AtomicRet_Pseudo ; +} + + +//===----------------------------------------------------------------------===// +// MUBUF Instructions +//===----------------------------------------------------------------------===// + +let SubtargetPredicate = isGCN in { + +defm BUFFER_LOAD_FORMAT_X : MUBUF_Pseudo_Loads < + "buffer_load_format_x", VGPR_32 +>; +defm BUFFER_LOAD_FORMAT_XY : MUBUF_Pseudo_Loads < + "buffer_load_format_xy", VReg_64 +>; +defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Pseudo_Loads < + "buffer_load_format_xyz", VReg_96 +>; +defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Pseudo_Loads < + "buffer_load_format_xyzw", VReg_128 +>; +defm BUFFER_STORE_FORMAT_X : MUBUF_Pseudo_Stores < + "buffer_store_format_x", VGPR_32 +>; +defm BUFFER_STORE_FORMAT_XY : MUBUF_Pseudo_Stores < + "buffer_store_format_xy", VReg_64 +>; +defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Pseudo_Stores < + "buffer_store_format_xyz", VReg_96 +>; +defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Pseudo_Stores < + "buffer_store_format_xyzw", VReg_128 +>; +defm BUFFER_LOAD_UBYTE : MUBUF_Pseudo_Loads < + "buffer_load_ubyte", VGPR_32, i32, mubuf_az_extloadi8 +>; +defm BUFFER_LOAD_SBYTE : MUBUF_Pseudo_Loads < + "buffer_load_sbyte", VGPR_32, i32, mubuf_sextloadi8 +>; +defm BUFFER_LOAD_USHORT : MUBUF_Pseudo_Loads < + "buffer_load_ushort", VGPR_32, i32, mubuf_az_extloadi16 +>; +defm BUFFER_LOAD_SSHORT : MUBUF_Pseudo_Loads < + "buffer_load_sshort", VGPR_32, i32, mubuf_sextloadi16 +>; +defm BUFFER_LOAD_DWORD : MUBUF_Pseudo_Loads < + "buffer_load_dword", VGPR_32, i32, mubuf_load +>; +defm BUFFER_LOAD_DWORDX2 : MUBUF_Pseudo_Loads < + "buffer_load_dwordx2", VReg_64, v2i32, mubuf_load +>; +defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads < + "buffer_load_dwordx4", VReg_128, v4i32, mubuf_load +>; +defm BUFFER_STORE_BYTE : MUBUF_Pseudo_Stores < + "buffer_store_byte", VGPR_32, i32, truncstorei8_global +>; +defm BUFFER_STORE_SHORT : MUBUF_Pseudo_Stores < + "buffer_store_short", VGPR_32, i32, truncstorei16_global +>; +defm BUFFER_STORE_DWORD : MUBUF_Pseudo_Stores < + "buffer_store_dword", VGPR_32, i32, global_store +>; +defm BUFFER_STORE_DWORDX2 : MUBUF_Pseudo_Stores < + "buffer_store_dwordx2", VReg_64, v2i32, global_store +>; +defm BUFFER_STORE_DWORDX4 : MUBUF_Pseudo_Stores < + "buffer_store_dwordx4", VReg_128, v4i32, global_store +>; +defm BUFFER_ATOMIC_SWAP : MUBUF_Pseudo_Atomics < + "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global +>; +defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Pseudo_Atomics < + "buffer_atomic_cmpswap", VReg_64, v2i32, null_frag +>; +defm BUFFER_ATOMIC_ADD : MUBUF_Pseudo_Atomics < + "buffer_atomic_add", VGPR_32, i32, atomic_add_global +>; +defm BUFFER_ATOMIC_SUB : MUBUF_Pseudo_Atomics < + "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global +>; +defm BUFFER_ATOMIC_SMIN : MUBUF_Pseudo_Atomics < + "buffer_atomic_smin", VGPR_32, i32, atomic_min_global +>; +defm BUFFER_ATOMIC_UMIN : MUBUF_Pseudo_Atomics < + "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global +>; +defm BUFFER_ATOMIC_SMAX : MUBUF_Pseudo_Atomics < + "buffer_atomic_smax", VGPR_32, i32, atomic_max_global +>; +defm BUFFER_ATOMIC_UMAX : MUBUF_Pseudo_Atomics < + "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global +>; +defm BUFFER_ATOMIC_AND : MUBUF_Pseudo_Atomics < + "buffer_atomic_and", VGPR_32, i32, atomic_and_global +>; +defm BUFFER_ATOMIC_OR : MUBUF_Pseudo_Atomics < + "buffer_atomic_or", VGPR_32, i32, atomic_or_global +>; +defm BUFFER_ATOMIC_XOR : MUBUF_Pseudo_Atomics < + "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global +>; +defm BUFFER_ATOMIC_INC : MUBUF_Pseudo_Atomics < + "buffer_atomic_inc", VGPR_32, i32, atomic_inc_global +>; +defm BUFFER_ATOMIC_DEC : MUBUF_Pseudo_Atomics < + "buffer_atomic_dec", VGPR_32, i32, atomic_dec_global +>; +defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_swap_x2", VReg_64, i64, atomic_swap_global +>; +defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_cmpswap_x2", VReg_128, v2i64, null_frag +>; +defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_add_x2", VReg_64, i64, atomic_add_global +>; +defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_sub_x2", VReg_64, i64, atomic_sub_global +>; +defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_smin_x2", VReg_64, i64, atomic_min_global +>; +defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_umin_x2", VReg_64, i64, atomic_umin_global +>; +defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_smax_x2", VReg_64, i64, atomic_max_global +>; +defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_umax_x2", VReg_64, i64, atomic_umax_global +>; +defm BUFFER_ATOMIC_AND_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_and_x2", VReg_64, i64, atomic_and_global +>; +defm BUFFER_ATOMIC_OR_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_or_x2", VReg_64, i64, atomic_or_global +>; +defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_xor_x2", VReg_64, i64, atomic_xor_global +>; +defm BUFFER_ATOMIC_INC_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global +>; +defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global +>; + +let SubtargetPredicate = isSI in { // isn't on CI & VI +/* +defm BUFFER_ATOMIC_RSUB : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub">; +defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap">; +defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics <"buffer_atomic_fmin">; +defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics <"buffer_atomic_fmax">; +defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub_x2">; +defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap_x2">; +defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fmin_x2">; +defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fmax_x2">; +*/ + +def BUFFER_WBINVL1_SC : MUBUF_Invalidate <"buffer_wbinvl1_sc", + int_amdgcn_buffer_wbinvl1_sc>; +} + +def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1", + int_amdgcn_buffer_wbinvl1>; + +//===----------------------------------------------------------------------===// +// MTBUF Instructions +//===----------------------------------------------------------------------===// + +//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0, "tbuffer_load_format_x", []>; +//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <1, "tbuffer_load_format_xy", []>; +//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <2, "tbuffer_load_format_xyz", []>; +def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Pseudo <"tbuffer_load_format_xyzw", VReg_128>; +def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Pseudo <"tbuffer_store_format_x", VGPR_32>; +def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Pseudo <"tbuffer_store_format_xy", VReg_64>; +def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Pseudo <"tbuffer_store_format_xyz", VReg_128>; +def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Pseudo <"tbuffer_store_format_xyzw", VReg_128>; + +} // End let SubtargetPredicate = isGCN + +let SubtargetPredicate = isCIVI in { + +//===----------------------------------------------------------------------===// +// Instruction definitions for CI and newer. +//===----------------------------------------------------------------------===// +// Remaining instructions: +// BUFFER_LOAD_DWORDX3 +// BUFFER_STORE_DWORDX3 + +def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol", + int_amdgcn_buffer_wbinvl1_vol>; + +} // End let SubtargetPredicate = isCIVI + +//===----------------------------------------------------------------------===// +// MUBUF Patterns +//===----------------------------------------------------------------------===// + +def mubuf_vaddr_offset : PatFrag< + (ops node:$ptr, node:$offset, node:$imm_offset), + (add (add node:$ptr, node:$offset), node:$imm_offset) +>; + + +let Predicates = [isGCN] in { + +// int_SI_vs_load_input +def : Pat< + (SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr), + (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0) +>; + +// Offset in an 32-bit VGPR +def : Pat < + (SIload_constant v4i32:$sbase, i32:$voff), + (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0) +>; + + +//===----------------------------------------------------------------------===// +// buffer_load/store_format patterns +//===----------------------------------------------------------------------===// + +multiclass MUBUF_LoadIntrinsicPat { + def : Pat< + (vt (name v4i32:$rsrc, 0, + (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), + imm:$glc, imm:$slc)), + (!cast(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), + (as_i1imm $glc), (as_i1imm $slc), 0) + >; + + def : Pat< + (vt (name v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), + imm:$glc, imm:$slc)), + (!cast(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), + (as_i1imm $glc), (as_i1imm $slc), 0) + >; + + def : Pat< + (vt (name v4i32:$rsrc, 0, + (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), + imm:$glc, imm:$slc)), + (!cast(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), + (as_i1imm $glc), (as_i1imm $slc), 0) + >; + + def : Pat< + (vt (name v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), + imm:$glc, imm:$slc)), + (!cast(opcode # _BOTHEN) + (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), + $rsrc, $soffset, (as_i16imm $offset), + (as_i1imm $glc), (as_i1imm $slc), 0) + >; +} + +defm : MUBUF_LoadIntrinsicPat; +defm : MUBUF_LoadIntrinsicPat; +defm : MUBUF_LoadIntrinsicPat; +defm : MUBUF_LoadIntrinsicPat; +defm : MUBUF_LoadIntrinsicPat; +defm : MUBUF_LoadIntrinsicPat; + +multiclass MUBUF_StoreIntrinsicPat { + def : Pat< + (name vt:$vdata, v4i32:$rsrc, 0, + (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), + imm:$glc, imm:$slc), + (!cast(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), + (as_i1imm $glc), (as_i1imm $slc), 0) + >; + + def : Pat< + (name vt:$vdata, v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), + imm:$glc, imm:$slc), + (!cast(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, + (as_i16imm $offset), (as_i1imm $glc), + (as_i1imm $slc), 0) + >; + + def : Pat< + (name vt:$vdata, v4i32:$rsrc, 0, + (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), + imm:$glc, imm:$slc), + (!cast(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, + (as_i16imm $offset), (as_i1imm $glc), + (as_i1imm $slc), 0) + >; + + def : Pat< + (name vt:$vdata, v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), + imm:$glc, imm:$slc), + (!cast(opcode # _BOTHEN_exact) + $vdata, + (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), + $rsrc, $soffset, (as_i16imm $offset), + (as_i1imm $glc), (as_i1imm $slc), 0) + >; +} + +defm : MUBUF_StoreIntrinsicPat; +defm : MUBUF_StoreIntrinsicPat; +defm : MUBUF_StoreIntrinsicPat; +defm : MUBUF_StoreIntrinsicPat; +defm : MUBUF_StoreIntrinsicPat; +defm : MUBUF_StoreIntrinsicPat; + +//===----------------------------------------------------------------------===// +// buffer_atomic patterns +//===----------------------------------------------------------------------===// + +multiclass BufferAtomicPatterns { + def : Pat< + (name i32:$vdata_in, v4i32:$rsrc, 0, + (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), + imm:$slc), + (!cast(opcode # _RTN_OFFSET) $vdata_in, $rsrc, $soffset, + (as_i16imm $offset), (as_i1imm $slc)) + >; + + def : Pat< + (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), + imm:$slc), + (!cast(opcode # _RTN_IDXEN) $vdata_in, $vindex, $rsrc, $soffset, + (as_i16imm $offset), (as_i1imm $slc)) + >; + + def : Pat< + (name i32:$vdata_in, v4i32:$rsrc, 0, + (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), + imm:$slc), + (!cast(opcode # _RTN_OFFEN) $vdata_in, $voffset, $rsrc, $soffset, + (as_i16imm $offset), (as_i1imm $slc)) + >; + + def : Pat< + (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), + imm:$slc), + (!cast(opcode # _RTN_BOTHEN) + $vdata_in, + (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), + $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)) + >; +} + +defm : BufferAtomicPatterns; +defm : BufferAtomicPatterns; +defm : BufferAtomicPatterns; +defm : BufferAtomicPatterns; +defm : BufferAtomicPatterns; +defm : BufferAtomicPatterns; +defm : BufferAtomicPatterns; +defm : BufferAtomicPatterns; +defm : BufferAtomicPatterns; +defm : BufferAtomicPatterns; + +def : Pat< + (int_amdgcn_buffer_atomic_cmpswap + i32:$data, i32:$cmp, v4i32:$rsrc, 0, + (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), + imm:$slc), + (EXTRACT_SUBREG + (BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET + (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), + $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)), + sub0) +>; + +def : Pat< + (int_amdgcn_buffer_atomic_cmpswap + i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), + imm:$slc), + (EXTRACT_SUBREG + (BUFFER_ATOMIC_CMPSWAP_RTN_IDXEN + (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), + $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)), + sub0) +>; + +def : Pat< + (int_amdgcn_buffer_atomic_cmpswap + i32:$data, i32:$cmp, v4i32:$rsrc, 0, + (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), + imm:$slc), + (EXTRACT_SUBREG + (BUFFER_ATOMIC_CMPSWAP_RTN_OFFEN + (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), + $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)), + sub0) +>; + +def : Pat< + (int_amdgcn_buffer_atomic_cmpswap + i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), + imm:$slc), + (EXTRACT_SUBREG + (BUFFER_ATOMIC_CMPSWAP_RTN_BOTHEN + (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), + (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), + $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)), + sub0) +>; + + +class MUBUFLoad_Pattern : Pat < + (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, + i16:$offset, i1:$glc, i1:$slc, i1:$tfe))), + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe) + >; + +multiclass MUBUFLoad_Atomic_Pattern { + def : Pat < + (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, + i16:$offset, i1:$slc))), + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0) + >; + + def : Pat < + (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))), + (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0) + >; +} + +let Predicates = [isSICI] in { +def : MUBUFLoad_Pattern ; +def : MUBUFLoad_Pattern ; +def : MUBUFLoad_Pattern ; +def : MUBUFLoad_Pattern ; + +defm : MUBUFLoad_Atomic_Pattern ; +defm : MUBUFLoad_Atomic_Pattern ; +} // End Predicates = [isSICI] + +class MUBUFScratchLoadPat : Pat < + (vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr, + i32:$soffset, u16imm:$offset))), + (Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) +>; + +def : MUBUFScratchLoadPat ; +def : MUBUFScratchLoadPat ; +def : MUBUFScratchLoadPat ; +def : MUBUFScratchLoadPat ; +def : MUBUFScratchLoadPat ; +def : MUBUFScratchLoadPat ; +def : MUBUFScratchLoadPat ; + +// BUFFER_LOAD_DWORD*, addr64=0 +multiclass MUBUF_Load_Dword { + + def : Pat < + (vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset, + imm:$offset, 0, 0, imm:$glc, imm:$slc, + imm:$tfe)), + (offset $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), + (as_i1imm $slc), (as_i1imm $tfe)) + >; + + def : Pat < + (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, + imm:$offset, 1, 0, imm:$glc, imm:$slc, + imm:$tfe)), + (offen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), + (as_i1imm $tfe)) + >; + + def : Pat < + (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, + imm:$offset, 0, 1, imm:$glc, imm:$slc, + imm:$tfe)), + (idxen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), + (as_i1imm $slc), (as_i1imm $tfe)) + >; + + def : Pat < + (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset, + imm:$offset, 1, 1, imm:$glc, imm:$slc, + imm:$tfe)), + (bothen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), + (as_i1imm $tfe)) + >; +} + +defm : MUBUF_Load_Dword ; +defm : MUBUF_Load_Dword ; +defm : MUBUF_Load_Dword ; + +multiclass MUBUFStore_Atomic_Pattern { + // Store follows atomic op convention so address is forst + def : Pat < + (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, + i16:$offset, i1:$slc), vt:$val), + (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0) + >; + + def : Pat < + (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), + (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0) + >; +} +let Predicates = [isSICI] in { +defm : MUBUFStore_Atomic_Pattern ; +defm : MUBUFStore_Atomic_Pattern ; +} // End Predicates = [isSICI] + +class MUBUFScratchStorePat : Pat < + (st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset, + u16imm:$offset)), + (Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) +>; + +def : MUBUFScratchStorePat ; +def : MUBUFScratchStorePat ; +def : MUBUFScratchStorePat ; +def : MUBUFScratchStorePat ; +def : MUBUFScratchStorePat ; + +//===----------------------------------------------------------------------===// +// MTBUF Patterns +//===----------------------------------------------------------------------===// + +// TBUFFER_STORE_FORMAT_*, addr64=0 +class MTBUF_StoreResource : Pat< + (SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr, + i32:$soffset, imm:$inst_offset, imm:$dfmt, + imm:$nfmt, imm:$offen, imm:$idxen, + imm:$glc, imm:$slc, imm:$tfe), + (opcode + $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen), + (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc, + (as_i1imm $slc), (as_i1imm $tfe), $soffset) +>; + +def : MTBUF_StoreResource ; +def : MTBUF_StoreResource ; +def : MTBUF_StoreResource ; +def : MTBUF_StoreResource ; + +} // End let Predicates = [isGCN] + +//===----------------------------------------------------------------------===// +// Target instructions, move to the appropriate target TD file +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SI +//===----------------------------------------------------------------------===// + +class MUBUF_Real_si op, MUBUF_Pseudo ps> : + MUBUF_Real, + Enc64, + SIMCInstr { + let AssemblerPredicate=isSICI; + let DecoderNamespace="SICI"; + + let Inst{11-0} = !if(ps.has_offset, offset, ?); + let Inst{12} = ps.offen; + let Inst{13} = ps.idxen; + let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{15} = ps.addr64; + let Inst{16} = lds; + let Inst{24-18} = op; + let Inst{31-26} = 0x38; //encoding + let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); + let Inst{47-40} = !if(ps.has_vdata, vdata, ?); + let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); + let Inst{54} = !if(ps.has_slc, slc, ?); + let Inst{55} = !if(ps.has_tfe, tfe, ?); + let Inst{63-56} = !if(ps.has_soffset, soffset, ?); +} + +multiclass MUBUF_Real_AllAddr_si op> { + def _OFFSET_si : MUBUF_Real_si (NAME#"_OFFSET")>; + def _ADDR64_si : MUBUF_Real_si (NAME#"_ADDR64")>; + def _OFFEN_si : MUBUF_Real_si (NAME#"_OFFEN")>; + def _IDXEN_si : MUBUF_Real_si (NAME#"_IDXEN")>; + def _BOTHEN_si : MUBUF_Real_si (NAME#"_BOTHEN")>; +} + +multiclass MUBUF_Real_Atomic_si op> : MUBUF_Real_AllAddr_si { + def _RTN_OFFSET_si : MUBUF_Real_si (NAME#"_RTN_OFFSET")>; + def _RTN_ADDR64_si : MUBUF_Real_si (NAME#"_RTN_ADDR64")>; + def _RTN_OFFEN_si : MUBUF_Real_si (NAME#"_RTN_OFFEN")>; + def _RTN_IDXEN_si : MUBUF_Real_si (NAME#"_RTN_IDXEN")>; + def _RTN_BOTHEN_si : MUBUF_Real_si (NAME#"_RTN_BOTHEN")>; +} + +defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_si <0x00>; +defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_si <0x01>; +defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_si <0x02>; +defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_si <0x03>; +defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_si <0x04>; +defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_si <0x05>; +defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_si <0x06>; +defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_si <0x07>; +defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_si <0x08>; +defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_si <0x09>; +defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_si <0x0a>; +defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_si <0x0b>; +defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_si <0x0c>; +defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_si <0x0d>; +defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_si <0x0e>; +defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_si <0x18>; +defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_si <0x1a>; +defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_si <0x1c>; +defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_si <0x1d>; +defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_si <0x1e>; + +defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomic_si <0x30>; +defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomic_si <0x31>; +defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomic_si <0x32>; +defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomic_si <0x33>; +//defm BUFFER_ATOMIC_RSUB : MUBUF_Real_Atomic_si <0x34>; // isn't on CI & VI +defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomic_si <0x35>; +defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomic_si <0x36>; +defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomic_si <0x37>; +defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomic_si <0x38>; +defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomic_si <0x39>; +defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomic_si <0x3a>; +defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomic_si <0x3b>; +defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomic_si <0x3c>; +defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomic_si <0x3d>; + +//defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomic_si <0x3e>; // isn't on VI +//defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomic_si <0x3f>; // isn't on VI +//defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomic_si <0x40>; // isn't on VI +defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomic_si <0x50>; +defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomic_si <0x51>; +defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomic_si <0x52>; +defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomic_si <0x53>; +//defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Real_Atomic_si <0x54>; // isn't on CI & VI +defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomic_si <0x55>; +defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomic_si <0x56>; +defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomic_si <0x57>; +defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomic_si <0x58>; +defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomic_si <0x59>; +defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomic_si <0x5a>; +defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomic_si <0x5b>; +defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomic_si <0x5c>; +defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomic_si <0x5d>; +//defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomic_si <0x5e">; // isn't on VI +//defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomic_si <0x5f>; // isn't on VI +//defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomic_si <0x60>; // isn't on VI + +def BUFFER_WBINVL1_SC_si : MUBUF_Real_si <0x70, BUFFER_WBINVL1_SC>; +def BUFFER_WBINVL1_si : MUBUF_Real_si <0x71, BUFFER_WBINVL1>; + +class MTBUF_Real_si op, MTBUF_Pseudo ps> : + MTBUF_Real, + SIMCInstr { + let AssemblerPredicate=isSICI; + let DecoderNamespace="SICI"; +} + +def TBUFFER_LOAD_FORMAT_XYZW_si : MTBUF_Real_si <3, TBUFFER_LOAD_FORMAT_XYZW>; +def TBUFFER_STORE_FORMAT_X_si : MTBUF_Real_si <4, TBUFFER_STORE_FORMAT_X>; +def TBUFFER_STORE_FORMAT_XY_si : MTBUF_Real_si <5, TBUFFER_STORE_FORMAT_XY>; +def TBUFFER_STORE_FORMAT_XYZ_si : MTBUF_Real_si <6, TBUFFER_STORE_FORMAT_XYZ>; +def TBUFFER_STORE_FORMAT_XYZW_si : MTBUF_Real_si <7, TBUFFER_STORE_FORMAT_XYZW>; + + +//===----------------------------------------------------------------------===// +// CI +//===----------------------------------------------------------------------===// + +class MUBUF_Real_ci op, MUBUF_Pseudo ps> : + MUBUF_Real_si { + let AssemblerPredicate=isCIOnly; + let DecoderNamespace="CI"; +} + +def BUFFER_WBINVL1_VOL_ci : MUBUF_Real_ci <0x70, BUFFER_WBINVL1_VOL>; + + +//===----------------------------------------------------------------------===// +// VI +//===----------------------------------------------------------------------===// + +class MUBUF_Real_vi op, MUBUF_Pseudo ps> : + MUBUF_Real, + Enc64, + SIMCInstr { + let AssemblerPredicate=isVI; + let DecoderNamespace="VI"; + + let Inst{11-0} = !if(ps.has_offset, offset, ?); + let Inst{12} = ps.offen; + let Inst{13} = ps.idxen; + let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{16} = lds; + let Inst{17} = !if(ps.has_slc, slc, ?); + let Inst{24-18} = op; + let Inst{31-26} = 0x38; //encoding + let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); + let Inst{47-40} = !if(ps.has_vdata, vdata, ?); + let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); + let Inst{55} = !if(ps.has_tfe, tfe, ?); + let Inst{63-56} = !if(ps.has_soffset, soffset, ?); +} + +multiclass MUBUF_Real_AllAddr_vi op> { + def _OFFSET_vi : MUBUF_Real_vi (NAME#"_OFFSET")>; + def _OFFEN_vi : MUBUF_Real_vi (NAME#"_OFFEN")>; + def _IDXEN_vi : MUBUF_Real_vi (NAME#"_IDXEN")>; + def _BOTHEN_vi : MUBUF_Real_vi (NAME#"_BOTHEN")>; +} + +multiclass MUBUF_Real_Atomic_vi op> : + MUBUF_Real_AllAddr_vi { + def _RTN_OFFSET_vi : MUBUF_Real_vi (NAME#"_RTN_OFFSET")>; + def _RTN_OFFEN_vi : MUBUF_Real_vi (NAME#"_RTN_OFFEN")>; + def _RTN_IDXEN_vi : MUBUF_Real_vi (NAME#"_RTN_IDXEN")>; + def _RTN_BOTHEN_vi : MUBUF_Real_vi (NAME#"_RTN_BOTHEN")>; +} + +defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_vi <0x00>; +defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_vi <0x01>; +defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_vi <0x02>; +defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_vi <0x03>; +defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_vi <0x04>; +defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_vi <0x05>; +defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_vi <0x06>; +defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_vi <0x07>; +defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_vi <0x10>; +defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_vi <0x11>; +defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_vi <0x12>; +defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_vi <0x13>; +defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_vi <0x14>; +defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_vi <0x15>; +defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_vi <0x17>; +defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_vi <0x18>; +defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_vi <0x1a>; +defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_vi <0x1c>; +defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_vi <0x1d>; +defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_vi <0x1f>; + +defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomic_vi <0x40>; +defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomic_vi <0x41>; +defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomic_vi <0x42>; +defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomic_vi <0x43>; +defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomic_vi <0x44>; +defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomic_vi <0x45>; +defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomic_vi <0x46>; +defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomic_vi <0x47>; +defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomic_vi <0x48>; +defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomic_vi <0x49>; +defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomic_vi <0x4a>; +defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomic_vi <0x4b>; +defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomic_vi <0x4c>; + +defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomic_vi <0x60>; +defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomic_vi <0x61>; +defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomic_vi <0x62>; +defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomic_vi <0x63>; +defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomic_vi <0x64>; +defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomic_vi <0x65>; +defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomic_vi <0x66>; +defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomic_vi <0x67>; +defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomic_vi <0x68>; +defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomic_vi <0x69>; +defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomic_vi <0x6a>; +defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomic_vi <0x6b>; +defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomic_vi <0x6c>; + +def BUFFER_WBINVL1_vi : MUBUF_Real_vi <0x3e, BUFFER_WBINVL1>; +def BUFFER_WBINVL1_VOL_vi : MUBUF_Real_vi <0x3f, BUFFER_WBINVL1_VOL>; + +class MTBUF_Real_vi op, MTBUF_Pseudo ps> : + MTBUF_Real, + SIMCInstr { + let AssemblerPredicate=isVI; + let DecoderNamespace="VI"; +} + +def TBUFFER_LOAD_FORMAT_XYZW_vi : MTBUF_Real_vi <3, TBUFFER_LOAD_FORMAT_XYZW>; +def TBUFFER_STORE_FORMAT_X_vi : MTBUF_Real_vi <4, TBUFFER_STORE_FORMAT_X>; +def TBUFFER_STORE_FORMAT_XY_vi : MTBUF_Real_vi <5, TBUFFER_STORE_FORMAT_XY>; +def TBUFFER_STORE_FORMAT_XYZ_vi : MTBUF_Real_vi <6, TBUFFER_STORE_FORMAT_XYZ>; +def TBUFFER_STORE_FORMAT_XYZW_vi : MTBUF_Real_vi <7, TBUFFER_STORE_FORMAT_XYZW>; + Index: llvm/trunk/lib/Target/AMDGPU/CIInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/CIInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/CIInstructions.td @@ -13,8 +13,6 @@ // S_CBRANCH_CDBGSYS // S_CBRANCH_CDBGSYS_OR_USER // S_CBRANCH_CDBGSYS_AND_USER -// BUFFER_LOAD_DWORDX3 -// BUFFER_STORE_DWORDX3 //===----------------------------------------------------------------------===// // VOP1 Instructions @@ -71,14 +69,4 @@ >; } // End isCommutable = 1 -//===----------------------------------------------------------------------===// -// MUBUF Instructions -//===----------------------------------------------------------------------===// - -let DisableSIDecoder = 1 in { -defm BUFFER_WBINVL1_VOL : MUBUF_Invalidate , - "buffer_wbinvl1_vol", int_amdgcn_buffer_wbinvl1_vol ->; -} - } // End SubtargetPredicate = isCIVI Index: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td @@ -332,68 +332,6 @@ let Inst{31-26} = 0x32; // encoding } -class MUBUFe op> : Enc64 { - bits<12> offset; - bits<1> offen; - bits<1> idxen; - bits<1> glc; - bits<1> addr64; - bits<1> lds; - bits<8> vaddr; - bits<8> vdata; - bits<7> srsrc; - bits<1> slc; - bits<1> tfe; - bits<8> soffset; - - let Inst{11-0} = offset; - let Inst{12} = offen; - let Inst{13} = idxen; - let Inst{14} = glc; - let Inst{15} = addr64; - let Inst{16} = lds; - let Inst{24-18} = op; - let Inst{31-26} = 0x38; //encoding - let Inst{39-32} = vaddr; - let Inst{47-40} = vdata; - let Inst{52-48} = srsrc{6-2}; - let Inst{54} = slc; - let Inst{55} = tfe; - let Inst{63-56} = soffset; -} - -class MTBUFe op> : Enc64 { - bits<8> vdata; - bits<12> offset; - bits<1> offen; - bits<1> idxen; - bits<1> glc; - bits<1> addr64; - bits<4> dfmt; - bits<3> nfmt; - bits<8> vaddr; - bits<7> srsrc; - bits<1> slc; - bits<1> tfe; - bits<8> soffset; - - let Inst{11-0} = offset; - let Inst{12} = offen; - let Inst{13} = idxen; - let Inst{14} = glc; - let Inst{15} = addr64; - let Inst{18-16} = op; - let Inst{22-19} = dfmt; - let Inst{25-23} = nfmt; - let Inst{31-26} = 0x3a; //encoding - let Inst{39-32} = vaddr; - let Inst{47-40} = vdata; - let Inst{52-48} = srsrc{6-2}; - let Inst{54} = slc; - let Inst{55} = tfe; - let Inst{63-56} = soffset; -} - class MIMGe op> : Enc64 { bits<8> vdata; bits<4> dmask; @@ -472,37 +410,6 @@ } // End Uses = [EXEC] -//===----------------------------------------------------------------------===// -// Vector I/O operations -//===----------------------------------------------------------------------===// - -class MUBUF pattern> : - InstSI { - - let VM_CNT = 1; - let EXP_CNT = 1; - let MUBUF = 1; - let Uses = [EXEC]; - - let hasSideEffects = 0; - let UseNamedOperandTable = 1; - let AsmMatchConverter = "cvtMubuf"; - let SchedRW = [WriteVMEM]; -} - -class MTBUF pattern> : - InstSI { - - let VM_CNT = 1; - let EXP_CNT = 1; - let MTBUF = 1; - let Uses = [EXEC]; - - let hasSideEffects = 0; - let UseNamedOperandTable = 1; - let SchedRW = [WriteVMEM]; -} - class MIMG pattern> : InstSI { Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -54,7 +54,6 @@ let VI3 = vi; } - // Execpt for the NONE field, this must be kept in sync with the // SIEncodingFamily enum in AMDGPUInstrInfo.cpp def SIEncodingFamily { @@ -118,22 +117,6 @@ SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]> >; -class MubufLoad : PatFrag < - (ops node:$ptr), (op node:$ptr), [{ - - const MemSDNode *LD = cast(N); - return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || - LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; -}]>; - -def mubuf_load : MubufLoad ; -def mubuf_az_extloadi8 : MubufLoad ; -def mubuf_sextloadi8 : MubufLoad ; -def mubuf_az_extloadi16 : MubufLoad ; -def mubuf_sextloadi16 : MubufLoad ; - -def mubuf_load_atomic : MubufLoad ; - //===----------------------------------------------------------------------===// // PatFrags for global memory operations //===----------------------------------------------------------------------===// @@ -309,11 +292,6 @@ [{return isUInt<20>(N->getZExtValue());}] >; -def mubuf_vaddr_offset : PatFrag< - (ops node:$ptr, node:$offset, node:$imm_offset), - (add (add node:$ptr, node:$offset), node:$imm_offset) ->; - class InlineImm : PatLeaf <(vt imm), [{ return isInlineImmediate(N); }]>; @@ -428,7 +406,6 @@ def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>; def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>; - def glc : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>; def slc : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>; def tfe : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>; @@ -512,16 +489,6 @@ def DS1Addr1Offset : ComplexPattern; def DS64Bit4ByteAligned : ComplexPattern; -def MUBUFAddr32 : ComplexPattern; -def MUBUFAddr64 : ComplexPattern; -def MUBUFAddr64Atomic : ComplexPattern; -def MUBUFScratch : ComplexPattern; -def MUBUFOffset : ComplexPattern; -def MUBUFOffsetNoGLC : ComplexPattern; -def MUBUFOffsetAtomic : ComplexPattern; -def MUBUFIntrinsicOffset : ComplexPattern; -def MUBUFIntrinsicVOffset : ComplexPattern; - def MOVRELOffset : ComplexPattern; def VOP3Mods0 : ComplexPattern; @@ -2098,450 +2065,6 @@ } //===----------------------------------------------------------------------===// -// MTBUF classes -//===----------------------------------------------------------------------===// - -class MTBUF_Pseudo pattern> : - MTBUF , - SIMCInstr { - let isPseudo = 1; - let isCodeGenOnly = 1; -} - -class MTBUF_Real_si op, string opName, dag outs, dag ins, - string asm> : - MTBUF , - MTBUFe , - SIMCInstr { - let DecoderNamespace="SICI"; - let DisableDecoder = DisableSIDecoder; -} - -class MTBUF_Real_vi op, string opName, dag outs, dag ins, string asm> : - MTBUF , - MTBUFe_vi , - SIMCInstr { - let DecoderNamespace="VI"; - let DisableDecoder = DisableVIDecoder; -} - -multiclass MTBUF_m op, string opName, dag outs, dag ins, string asm, - list pattern> { - - def "" : MTBUF_Pseudo ; - - def _si : MTBUF_Real_si ; - - def _vi : MTBUF_Real_vi <{0, op{2}, op{1}, op{0}}, opName, outs, ins, asm>; - -} - -let mayStore = 1, mayLoad = 0 in { - -multiclass MTBUF_Store_Helper op, string opName, - RegisterClass regClass> : MTBUF_m < - op, opName, (outs), - (ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, - i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, - SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset), - opName#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt," - #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", [] ->; - -} // mayStore = 1, mayLoad = 0 - -let mayLoad = 1, mayStore = 0 in { - -multiclass MTBUF_Load_Helper op, string opName, - RegisterClass regClass> : MTBUF_m < - op, opName, (outs regClass:$dst), - (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, - i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, SReg_128:$srsrc, - i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset), - opName#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt," - #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", [] ->; - -} // mayLoad = 1, mayStore = 0 - -//===----------------------------------------------------------------------===// -// MUBUF classes -//===----------------------------------------------------------------------===// - -class mubuf si, bits<7> vi = si> { - field bits<7> SI = si; - field bits<7> VI = vi; -} - -let isCodeGenOnly = 0 in { - -class MUBUF_si op, dag outs, dag ins, string asm, list pattern> : - MUBUF , MUBUFe { - let lds = 0; -} - -} // End let isCodeGenOnly = 0 - -class MUBUF_vi op, dag outs, dag ins, string asm, list pattern> : - MUBUF , MUBUFe_vi { - let lds = 0; -} - -class MUBUFAddr64Table { - bit IsAddr64 = is_addr64; - string OpName = NAME # suffix; -} - -class MUBUF_Pseudo pattern> : - MUBUF , - SIMCInstr { - let isPseudo = 1; - let isCodeGenOnly = 1; - - // dummy fields, so that we can use let statements around multiclasses - bits<1> offen; - bits<1> idxen; - bits<8> vaddr; - bits<1> glc; - bits<1> slc; - bits<1> tfe; - bits<8> soffset; -} - -class MUBUF_Real_si : - MUBUF , - MUBUFe , - SIMCInstr { - let lds = 0; - let AssemblerPredicate = SIAssemblerPredicate; - let DecoderNamespace="SICI"; - let DisableDecoder = DisableSIDecoder; -} - -class MUBUF_Real_vi : - MUBUF , - MUBUFe_vi , - SIMCInstr { - let lds = 0; - let AssemblerPredicate = VIAssemblerPredicate; - let DecoderNamespace="VI"; - let DisableDecoder = DisableVIDecoder; -} - -multiclass MUBUF_m pattern> { - - def "" : MUBUF_Pseudo , - MUBUFAddr64Table <0>; - - let DisableWQM = 1 in { - def "_exact" : MUBUF_Pseudo ; - } - - let addr64 = 0, isCodeGenOnly = 0 in { - def _si : MUBUF_Real_si ; - } - - def _vi : MUBUF_Real_vi ; -} - -multiclass MUBUFAddr64_m pattern> { - - def "" : MUBUF_Pseudo , - MUBUFAddr64Table <1>; - - let addr64 = 1, isCodeGenOnly = 0 in { - def _si : MUBUF_Real_si ; - } - - // There is no VI version. If the pseudo is selected, it should be lowered - // for VI appropriately. -} - -multiclass MUBUFAtomicOffset_m pattern, bit is_return> { - - def "" : MUBUF_Pseudo , - MUBUFAddr64Table <0, !if(is_return, "_RTN", "")>, - AtomicNoRet; - - let offen = 0, idxen = 0, tfe = 0, vaddr = 0 in { - let addr64 = 0 in { - def _si : MUBUF_Real_si ; - } - - def _vi : MUBUF_Real_vi ; - } -} - -multiclass MUBUFAtomicAddr64_m pattern, bit is_return> { - - def "" : MUBUF_Pseudo , - MUBUFAddr64Table <1, !if(is_return, "_RTN", "")>, - AtomicNoRet; - - let offen = 0, idxen = 0, addr64 = 1, tfe = 0 in { - def _si : MUBUF_Real_si ; - } - - // There is no VI version. If the pseudo is selected, it should be lowered - // for VI appropriately. -} - -multiclass MUBUFAtomicOther_m pattern, bit is_return> { - - def "" : MUBUF_Pseudo , - AtomicNoRet; - - let tfe = 0 in { - let addr64 = 0 in { - def _si : MUBUF_Real_si ; - } - - def _vi : MUBUF_Real_vi ; - } -} - -multiclass MUBUF_Atomic { - - let mayStore = 1, mayLoad = 1, hasPostISelHook = 1, hasSideEffects = 1, - DisableWQM = 1 in { - - // No return variants - let glc = 0, AsmMatchConverter = "cvtMubufAtomic" in { - - defm _ADDR64 : MUBUFAtomicAddr64_m < - op, name#"_addr64", (outs), - (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, - SCSrc_b32:$soffset, offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset addr64$offset$slc", [], 0 - >; - - defm _OFFSET : MUBUFAtomicOffset_m < - op, name#"_offset", (outs), - (ins rc:$vdata, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, - slc:$slc), - name#" $vdata, off, $srsrc, $soffset$offset$slc", [], 0 - >; - - let offen = 1, idxen = 0 in { - defm _OFFEN : MUBUFAtomicOther_m < - op, name#"_offen", (outs), - (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset offen$offset$slc", [], 0 - >; - } - - let offen = 0, idxen = 1 in { - defm _IDXEN : MUBUFAtomicOther_m < - op, name#"_idxen", (outs), - (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset$slc", [], 0 - >; - } - - let offen = 1, idxen = 1 in { - defm _BOTHEN : MUBUFAtomicOther_m < - op, name#"_bothen", (outs), - (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset$slc", - [], 0 - >; - } - } // glc = 0 - - // Variant that return values - let glc = 1, Constraints = "$vdata = $vdata_in", - AsmMatchConverter = "cvtMubufAtomicReturn", - DisableEncoding = "$vdata_in" in { - - defm _RTN_ADDR64 : MUBUFAtomicAddr64_m < - op, name#"_rtn_addr64", (outs rc:$vdata), - (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, - SCSrc_b32:$soffset, offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset addr64$offset glc$slc", - [(set vt:$vdata, - (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$slc), vt:$vdata_in))], 1 - >; - - defm _RTN_OFFSET : MUBUFAtomicOffset_m < - op, name#"_rtn_offset", (outs rc:$vdata), - (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, slc:$slc), - name#" $vdata, off, $srsrc, $soffset$offset glc$slc", - [(set vt:$vdata, - (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, - i1:$slc), vt:$vdata_in))], 1 - >; - - let offen = 1, idxen = 0 in { - defm _RTN_OFFEN : MUBUFAtomicOther_m < - op, name#"_rtn_offen", (outs rc:$vdata), - (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset offen$offset glc$slc", - [], 1 - >; - } - - let offen = 0, idxen = 1 in { - defm _RTN_IDXEN : MUBUFAtomicOther_m < - op, name#"_rtn_idxen", (outs rc:$vdata), - (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset glc$slc", - [], 1 - >; - } - - let offen = 1, idxen = 1 in { - defm _RTN_BOTHEN : MUBUFAtomicOther_m < - op, name#"_rtn_bothen", (outs rc:$vdata), - (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset glc$slc", - [], 1 - >; - } - } // glc = 1 - - } // mayStore = 1, mayLoad = 1, hasPostISelHook = 1 -} - -// FIXME: tfe can't be an operand because it requires a separate -// opcode because it needs an N+1 register class dest register. -multiclass MUBUF_Load_Helper { - - let mayLoad = 1, mayStore = 0 in { - let offen = 0, idxen = 0, vaddr = 0 in { - defm _OFFSET : MUBUF_m ; - } - - let offen = 1, idxen = 0 in { - defm _OFFEN : MUBUF_m ; - } - - let offen = 0, idxen = 1 in { - defm _IDXEN : MUBUF_m ; - } - - let offen = 1, idxen = 1 in { - defm _BOTHEN : MUBUF_m ; - } - - let offen = 0, idxen = 0 in { - defm _ADDR64 : MUBUFAddr64_m ; - } - } -} - -multiclass MUBUF_Store_Helper { - let mayLoad = 0, mayStore = 1 in { - let offen = 0, idxen = 0, vaddr = 0 in { - defm _OFFSET : MUBUF_m ; - } // offen = 0, idxen = 0, vaddr = 0 - - let offen = 1, idxen = 0 in { - defm _OFFEN : MUBUF_m ; - } // end offen = 1, idxen = 0 - - let offen = 0, idxen = 1 in { - defm _IDXEN : MUBUF_m ; - } - - let offen = 1, idxen = 1 in { - defm _BOTHEN : MUBUF_m ; - } - - let offen = 0, idxen = 0 in { - defm _ADDR64 : MUBUFAddr64_m ; - } - } // End mayLoad = 0, mayStore = 1 -} - -// For cache invalidation instructions. -multiclass MUBUF_Invalidate { - let hasSideEffects = 1, mayStore = 1, AsmMatchConverter = "" in { - def "" : MUBUF_Pseudo ; - - // Set everything to 0. - let offset = 0, offen = 0, idxen = 0, glc = 0, vaddr = 0, - vdata = 0, srsrc = 0, slc = 0, tfe = 0, soffset = 0 in { - let addr64 = 0 in { - def _si : MUBUF_Real_si ; - } - - def _vi : MUBUF_Real_vi ; - } - } // End hasSideEffects = 1, mayStore = 1, AsmMatchConverter = "" -} - -//===----------------------------------------------------------------------===// // Vector instruction mappings //===----------------------------------------------------------------------===// Index: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td @@ -24,6 +24,7 @@ include "SOPInstructions.td" include "SMInstructions.td" include "FLATInstructions.td" +include "BUFInstructions.td" let SubtargetPredicate = isGCN in { @@ -272,183 +273,6 @@ defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 , "v_cmpx_class_f64">; //===----------------------------------------------------------------------===// -// MUBUF Instructions -//===----------------------------------------------------------------------===// - -defm BUFFER_LOAD_FORMAT_X : MUBUF_Load_Helper < - mubuf<0x00>, "buffer_load_format_x", VGPR_32 ->; -defm BUFFER_LOAD_FORMAT_XY : MUBUF_Load_Helper < - mubuf<0x01>, "buffer_load_format_xy", VReg_64 ->; -defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Load_Helper < - mubuf<0x02>, "buffer_load_format_xyz", VReg_96 ->; -defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper < - mubuf<0x03>, "buffer_load_format_xyzw", VReg_128 ->; -defm BUFFER_STORE_FORMAT_X : MUBUF_Store_Helper < - mubuf<0x04>, "buffer_store_format_x", VGPR_32 ->; -defm BUFFER_STORE_FORMAT_XY : MUBUF_Store_Helper < - mubuf<0x05>, "buffer_store_format_xy", VReg_64 ->; -defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Store_Helper < - mubuf<0x06>, "buffer_store_format_xyz", VReg_96 ->; -defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Store_Helper < - mubuf<0x07>, "buffer_store_format_xyzw", VReg_128 ->; -defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper < - mubuf<0x08, 0x10>, "buffer_load_ubyte", VGPR_32, i32, mubuf_az_extloadi8 ->; -defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper < - mubuf<0x09, 0x11>, "buffer_load_sbyte", VGPR_32, i32, mubuf_sextloadi8 ->; -defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper < - mubuf<0x0a, 0x12>, "buffer_load_ushort", VGPR_32, i32, mubuf_az_extloadi16 ->; -defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper < - mubuf<0x0b, 0x13>, "buffer_load_sshort", VGPR_32, i32, mubuf_sextloadi16 ->; -defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper < - mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, mubuf_load ->; -defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper < - mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, mubuf_load ->; -defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper < - mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, mubuf_load ->; - -defm BUFFER_STORE_BYTE : MUBUF_Store_Helper < - mubuf<0x18>, "buffer_store_byte", VGPR_32, i32, truncstorei8_global ->; - -defm BUFFER_STORE_SHORT : MUBUF_Store_Helper < - mubuf<0x1a>, "buffer_store_short", VGPR_32, i32, truncstorei16_global ->; - -defm BUFFER_STORE_DWORD : MUBUF_Store_Helper < - mubuf<0x1c>, "buffer_store_dword", VGPR_32, i32, global_store ->; - -defm BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper < - mubuf<0x1d>, "buffer_store_dwordx2", VReg_64, v2i32, global_store ->; - -defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper < - mubuf<0x1e, 0x1f>, "buffer_store_dwordx4", VReg_128, v4i32, global_store ->; - -defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic < - mubuf<0x30, 0x40>, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global ->; -defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Atomic < - mubuf<0x31, 0x41>, "buffer_atomic_cmpswap", VReg_64, v2i32, null_frag ->; -defm BUFFER_ATOMIC_ADD : MUBUF_Atomic < - mubuf<0x32, 0x42>, "buffer_atomic_add", VGPR_32, i32, atomic_add_global ->; -defm BUFFER_ATOMIC_SUB : MUBUF_Atomic < - mubuf<0x33, 0x43>, "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global ->; -//def BUFFER_ATOMIC_RSUB : MUBUF_ , "buffer_atomic_rsub", []>; // isn't on CI & VI -defm BUFFER_ATOMIC_SMIN : MUBUF_Atomic < - mubuf<0x35, 0x44>, "buffer_atomic_smin", VGPR_32, i32, atomic_min_global ->; -defm BUFFER_ATOMIC_UMIN : MUBUF_Atomic < - mubuf<0x36, 0x45>, "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global ->; -defm BUFFER_ATOMIC_SMAX : MUBUF_Atomic < - mubuf<0x37, 0x46>, "buffer_atomic_smax", VGPR_32, i32, atomic_max_global ->; -defm BUFFER_ATOMIC_UMAX : MUBUF_Atomic < - mubuf<0x38, 0x47>, "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global ->; -defm BUFFER_ATOMIC_AND : MUBUF_Atomic < - mubuf<0x39, 0x48>, "buffer_atomic_and", VGPR_32, i32, atomic_and_global ->; -defm BUFFER_ATOMIC_OR : MUBUF_Atomic < - mubuf<0x3a, 0x49>, "buffer_atomic_or", VGPR_32, i32, atomic_or_global ->; -defm BUFFER_ATOMIC_XOR : MUBUF_Atomic < - mubuf<0x3b, 0x4a>, "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global ->; -defm BUFFER_ATOMIC_INC : MUBUF_Atomic < - mubuf<0x3c, 0x4b>, "buffer_atomic_inc", VGPR_32, i32, atomic_inc_global ->; -defm BUFFER_ATOMIC_DEC : MUBUF_Atomic < - mubuf<0x3d, 0x4c>, "buffer_atomic_dec", VGPR_32, i32, atomic_dec_global ->; - -//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_Atomic , "buffer_atomic_fcmpswap", []>; // isn't on VI -//def BUFFER_ATOMIC_FMIN : MUBUF_Atomic , "buffer_atomic_fmin", []>; // isn't on VI -//def BUFFER_ATOMIC_FMAX : MUBUF_Atomic , "buffer_atomic_fmax", []>; // isn't on VI -defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Atomic < - mubuf<0x50, 0x60>, "buffer_atomic_swap_x2", VReg_64, i64, atomic_swap_global ->; -defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Atomic < - mubuf<0x51, 0x61>, "buffer_atomic_cmpswap_x2", VReg_128, v2i64, null_frag ->; -defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Atomic < - mubuf<0x52, 0x62>, "buffer_atomic_add_x2", VReg_64, i64, atomic_add_global ->; -defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Atomic < - mubuf<0x53, 0x63>, "buffer_atomic_sub_x2", VReg_64, i64, atomic_sub_global ->; -//defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Atomic , "buffer_atomic_rsub_x2", []>; // isn't on CI & VI -defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Atomic < - mubuf<0x55, 0x64>, "buffer_atomic_smin_x2", VReg_64, i64, atomic_min_global ->; -defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Atomic < - mubuf<0x56, 0x65>, "buffer_atomic_umin_x2", VReg_64, i64, atomic_umin_global ->; -defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Atomic < - mubuf<0x57, 0x66>, "buffer_atomic_smax_x2", VReg_64, i64, atomic_max_global ->; -defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Atomic < - mubuf<0x58, 0x67>, "buffer_atomic_umax_x2", VReg_64, i64, atomic_umax_global ->; -defm BUFFER_ATOMIC_AND_X2 : MUBUF_Atomic < - mubuf<0x59, 0x68>, "buffer_atomic_and_x2", VReg_64, i64, atomic_and_global ->; -defm BUFFER_ATOMIC_OR_X2 : MUBUF_Atomic < - mubuf<0x5a, 0x69>, "buffer_atomic_or_x2", VReg_64, i64, atomic_or_global ->; -defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Atomic < - mubuf<0x5b, 0x6a>, "buffer_atomic_xor_x2", VReg_64, i64, atomic_xor_global ->; -defm BUFFER_ATOMIC_INC_X2 : MUBUF_Atomic < - mubuf<0x5c, 0x6b>, "buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global ->; -defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Atomic < - mubuf<0x5d, 0x6c>, "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global ->; -//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 , "buffer_atomic_fcmpswap_x2", []>; // isn't on VI -//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 , "buffer_atomic_fmin_x2", []>; // isn't on VI -//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 , "buffer_atomic_fmax_x2", []>; // isn't on VI - -let SubtargetPredicate = isSI, DisableVIDecoder = 1 in { -defm BUFFER_WBINVL1_SC : MUBUF_Invalidate , "buffer_wbinvl1_sc", int_amdgcn_buffer_wbinvl1_sc>; // isn't on CI & VI -} - -defm BUFFER_WBINVL1 : MUBUF_Invalidate , "buffer_wbinvl1", int_amdgcn_buffer_wbinvl1>; - -//===----------------------------------------------------------------------===// -// MTBUF Instructions -//===----------------------------------------------------------------------===// - -//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "tbuffer_load_format_x", []>; -//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "tbuffer_load_format_xy", []>; -//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "tbuffer_load_format_xyz", []>; -defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "tbuffer_load_format_xyzw", VReg_128>; -defm TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "tbuffer_store_format_x", VGPR_32>; -defm TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "tbuffer_store_format_xy", VReg_64>; -defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "tbuffer_store_format_xyz", VReg_128>; -defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "tbuffer_store_format_xyzw", VReg_128>; - -//===----------------------------------------------------------------------===// // VOP1 Instructions //===----------------------------------------------------------------------===// @@ -1418,12 +1242,6 @@ (SI_KILL 0xbf800000) >; -/* int_SI_vs_load_input */ -def : Pat< - (SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr), - (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0) ->; - def : Pat < (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr, f32:$src0, f32:$src1, f32:$src2, f32:$src3), @@ -1432,200 +1250,6 @@ >; //===----------------------------------------------------------------------===// -// buffer_load/store_format patterns -//===----------------------------------------------------------------------===// - -multiclass MUBUF_LoadIntrinsicPat { - def : Pat< - (vt (name v4i32:$rsrc, 0, - (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), - imm:$glc, imm:$slc)), - (!cast(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), - (as_i1imm $glc), (as_i1imm $slc), 0) - >; - - def : Pat< - (vt (name v4i32:$rsrc, i32:$vindex, - (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), - imm:$glc, imm:$slc)), - (!cast(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), - (as_i1imm $glc), (as_i1imm $slc), 0) - >; - - def : Pat< - (vt (name v4i32:$rsrc, 0, - (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), - imm:$glc, imm:$slc)), - (!cast(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), - (as_i1imm $glc), (as_i1imm $slc), 0) - >; - - def : Pat< - (vt (name v4i32:$rsrc, i32:$vindex, - (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), - imm:$glc, imm:$slc)), - (!cast(opcode # _BOTHEN) - (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), - $rsrc, $soffset, (as_i16imm $offset), - (as_i1imm $glc), (as_i1imm $slc), 0) - >; -} - -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; - -multiclass MUBUF_StoreIntrinsicPat { - def : Pat< - (name vt:$vdata, v4i32:$rsrc, 0, - (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), - imm:$glc, imm:$slc), - (!cast(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), - (as_i1imm $glc), (as_i1imm $slc), 0) - >; - - def : Pat< - (name vt:$vdata, v4i32:$rsrc, i32:$vindex, - (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), - imm:$glc, imm:$slc), - (!cast(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, - (as_i16imm $offset), (as_i1imm $glc), - (as_i1imm $slc), 0) - >; - - def : Pat< - (name vt:$vdata, v4i32:$rsrc, 0, - (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), - imm:$glc, imm:$slc), - (!cast(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, - (as_i16imm $offset), (as_i1imm $glc), - (as_i1imm $slc), 0) - >; - - def : Pat< - (name vt:$vdata, v4i32:$rsrc, i32:$vindex, - (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), - imm:$glc, imm:$slc), - (!cast(opcode # _BOTHEN_exact) - $vdata, - (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), - $rsrc, $soffset, (as_i16imm $offset), - (as_i1imm $glc), (as_i1imm $slc), 0) - >; -} - -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; - -//===----------------------------------------------------------------------===// -// buffer_atomic patterns -//===----------------------------------------------------------------------===// -multiclass BufferAtomicPatterns { - def : Pat< - (name i32:$vdata_in, v4i32:$rsrc, 0, - (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), - imm:$slc), - (!cast(opcode # _RTN_OFFSET) $vdata_in, $rsrc, $soffset, - (as_i16imm $offset), (as_i1imm $slc)) - >; - - def : Pat< - (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex, - (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), - imm:$slc), - (!cast(opcode # _RTN_IDXEN) $vdata_in, $vindex, $rsrc, $soffset, - (as_i16imm $offset), (as_i1imm $slc)) - >; - - def : Pat< - (name i32:$vdata_in, v4i32:$rsrc, 0, - (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), - imm:$slc), - (!cast(opcode # _RTN_OFFEN) $vdata_in, $voffset, $rsrc, $soffset, - (as_i16imm $offset), (as_i1imm $slc)) - >; - - def : Pat< - (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex, - (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), - imm:$slc), - (!cast(opcode # _RTN_BOTHEN) - $vdata_in, - (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), - $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)) - >; -} - -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; -defm : BufferAtomicPatterns; - -def : Pat< - (int_amdgcn_buffer_atomic_cmpswap - i32:$data, i32:$cmp, v4i32:$rsrc, 0, - (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), - imm:$slc), - (EXTRACT_SUBREG - (BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET - (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), - $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)), - sub0) ->; - -def : Pat< - (int_amdgcn_buffer_atomic_cmpswap - i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, - (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), - imm:$slc), - (EXTRACT_SUBREG - (BUFFER_ATOMIC_CMPSWAP_RTN_IDXEN - (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), - $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)), - sub0) ->; - -def : Pat< - (int_amdgcn_buffer_atomic_cmpswap - i32:$data, i32:$cmp, v4i32:$rsrc, 0, - (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), - imm:$slc), - (EXTRACT_SUBREG - (BUFFER_ATOMIC_CMPSWAP_RTN_OFFEN - (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), - $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)), - sub0) ->; - -def : Pat< - (int_amdgcn_buffer_atomic_cmpswap - i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, - (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), - imm:$slc), - (EXTRACT_SUBREG - (BUFFER_ATOMIC_CMPSWAP_RTN_BOTHEN - (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), - (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), - $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)), - sub0) ->; - -//===----------------------------------------------------------------------===// // V_ICMPIntrinsic Pattern. //===----------------------------------------------------------------------===// class ICMP_Pattern : Pat < @@ -1998,12 +1622,6 @@ def : Ext32Pat ; def : Ext32Pat ; -// Offset in an 32-bit VGPR -def : Pat < - (SIload_constant v4i32:$sbase, i32:$voff), - (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0) ->; - // The multiplication scales from [0,1] to the unsigned integer range def : Pat < (AMDGPUurecip i32:$src0), @@ -2022,151 +1640,6 @@ defm : BFIPatterns ; def : ROTRPattern ; -//===----------------------------------------------------------------------===// -// MUBUF Patterns -//===----------------------------------------------------------------------===// - -class MUBUFLoad_Pattern : Pat < - (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe) - >; - -multiclass MUBUFLoad_Atomic_Pattern { - def : Pat < - (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$slc))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0) - >; - - def : Pat < - (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))), - (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0) - >; -} - -let Predicates = [isSICI] in { -def : MUBUFLoad_Pattern ; -def : MUBUFLoad_Pattern ; -def : MUBUFLoad_Pattern ; -def : MUBUFLoad_Pattern ; - -defm : MUBUFLoad_Atomic_Pattern ; -defm : MUBUFLoad_Atomic_Pattern ; -} // End Predicates = [isSICI] - -class MUBUFScratchLoadPat : Pat < - (vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr, - i32:$soffset, u16imm:$offset))), - (Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) ->; - -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; - -// BUFFER_LOAD_DWORD*, addr64=0 -multiclass MUBUF_Load_Dword { - - def : Pat < - (vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset, - imm:$offset, 0, 0, imm:$glc, imm:$slc, - imm:$tfe)), - (offset $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), - (as_i1imm $slc), (as_i1imm $tfe)) - >; - - def : Pat < - (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, - imm:$offset, 1, 0, imm:$glc, imm:$slc, - imm:$tfe)), - (offen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), - (as_i1imm $tfe)) - >; - - def : Pat < - (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, - imm:$offset, 0, 1, imm:$glc, imm:$slc, - imm:$tfe)), - (idxen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), - (as_i1imm $slc), (as_i1imm $tfe)) - >; - - def : Pat < - (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset, - imm:$offset, 1, 1, imm:$glc, imm:$slc, - imm:$tfe)), - (bothen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc), - (as_i1imm $tfe)) - >; -} - -defm : MUBUF_Load_Dword ; -defm : MUBUF_Load_Dword ; -defm : MUBUF_Load_Dword ; - -multiclass MUBUFStore_Atomic_Pattern { - // Store follows atomic op convention so address is forst - def : Pat < - (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$slc), vt:$val), - (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0) - >; - - def : Pat < - (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), - (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0) - >; -} -let Predicates = [isSICI] in { -defm : MUBUFStore_Atomic_Pattern ; -defm : MUBUFStore_Atomic_Pattern ; -} // End Predicates = [isSICI] - -class MUBUFScratchStorePat : Pat < - (st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset, - u16imm:$offset)), - (Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) ->; - -def : MUBUFScratchStorePat ; -def : MUBUFScratchStorePat ; -def : MUBUFScratchStorePat ; -def : MUBUFScratchStorePat ; -def : MUBUFScratchStorePat ; - -//===----------------------------------------------------------------------===// -// MTBUF Patterns -//===----------------------------------------------------------------------===// - -// TBUFFER_STORE_FORMAT_*, addr64=0 -class MTBUF_StoreResource : Pat< - (SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr, - i32:$soffset, imm:$inst_offset, imm:$dfmt, - imm:$nfmt, imm:$offen, imm:$idxen, - imm:$glc, imm:$slc, imm:$tfe), - (opcode - $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen), - (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc, - (as_i1imm $slc), (as_i1imm $tfe), $soffset) ->; - -def : MTBUF_StoreResource ; -def : MTBUF_StoreResource ; -def : MTBUF_StoreResource ; -def : MTBUF_StoreResource ; - /********** ====================== **********/ /********** Indirect adressing **********/ /********** ====================== **********/ Index: llvm/trunk/lib/Target/AMDGPU/VIInstrFormats.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VIInstrFormats.td +++ llvm/trunk/lib/Target/AMDGPU/VIInstrFormats.td @@ -11,64 +11,6 @@ // //===----------------------------------------------------------------------===// -class MUBUFe_vi op> : Enc64 { - bits<12> offset; - bits<1> offen; - bits<1> idxen; - bits<1> glc; - bits<1> lds; - bits<8> vaddr; - bits<8> vdata; - bits<7> srsrc; - bits<1> slc; - bits<1> tfe; - bits<8> soffset; - - let Inst{11-0} = offset; - let Inst{12} = offen; - let Inst{13} = idxen; - let Inst{14} = glc; - let Inst{16} = lds; - let Inst{17} = slc; - let Inst{24-18} = op; - let Inst{31-26} = 0x38; //encoding - let Inst{39-32} = vaddr; - let Inst{47-40} = vdata; - let Inst{52-48} = srsrc{6-2}; - let Inst{55} = tfe; - let Inst{63-56} = soffset; -} - -class MTBUFe_vi op> : Enc64 { - bits<12> offset; - bits<1> offen; - bits<1> idxen; - bits<1> glc; - bits<4> dfmt; - bits<3> nfmt; - bits<8> vaddr; - bits<8> vdata; - bits<7> srsrc; - bits<1> slc; - bits<1> tfe; - bits<8> soffset; - - let Inst{11-0} = offset; - let Inst{12} = offen; - let Inst{13} = idxen; - let Inst{14} = glc; - let Inst{18-15} = op; - let Inst{22-19} = dfmt; - let Inst{25-23} = nfmt; - let Inst{31-26} = 0x3a; //encoding - let Inst{39-32} = vaddr; - let Inst{47-40} = vdata; - let Inst{52-48} = srsrc{6-2}; - let Inst{54} = slc; - let Inst{55} = tfe; - let Inst{63-56} = soffset; -} - class VOP3a_vi op> : Enc64 { bits<2> src0_modifiers; bits<9> src0;