Index: lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructions.td +++ lib/Target/AMDGPU/AMDGPUInstructions.td @@ -222,6 +222,10 @@ // Load/Store Pattern Fragments //===----------------------------------------------------------------------===// +class Aligned8Bytes : PatFrag (N)->getAlignment() % 8 == 0; +}]>; + class LoadFrag : PatFrag<(ops node:$ptr), (op node:$ptr)>; class StoreFrag : PatFrag < @@ -236,6 +240,10 @@ return cast(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS; }]>; +class ConstantAddress : CodePatPred<[{ + return cast(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; +}]>; + class LocalAddress : CodePatPred<[{ return cast(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; @@ -247,72 +255,16 @@ class FlatLoadAddress : CodePatPred<[{ const auto AS = cast(N)->getAddressSpace(); return AS == AMDGPUASI.FLAT_ADDRESS || - AS == AMDGPUASI.GLOBAL_ADDRESS; -}]>; - - -def load_private : LoadFrag , PrivateAddress; -def truncstorei8_private : StoreFrag, PrivateAddress; -def truncstorei16_private : StoreFrag , PrivateAddress; -def store_private : StoreFrag , PrivateAddress; - -def store_private_hi16 : StoreHi16 , PrivateAddress; -def truncstorei8_private_hi16 : StoreHi16, PrivateAddress; - - -class GlobalMemOp : PatFrag (N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS; -}]>; - -// Global address space loads -class GlobalLoad : GlobalMemOp < - (ops node:$ptr), (op node:$ptr) ->; - -def global_load : GlobalLoad ; -def global_atomic_load : GlobalLoad; - -// Global address space stores -class GlobalStore : GlobalMemOp < - (ops node:$value, node:$ptr), (op node:$value, node:$ptr) ->; - -def global_store : GlobalStore ; -def global_store_atomic : GlobalStore; - - -class ConstantMemOp : PatFrag (N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; + AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.CONSTANT_ADDRESS; }]>; -// Constant address space loads -class ConstantLoad : ConstantMemOp < - (ops node:$ptr), (op node:$ptr) ->; - -def constant_load : ConstantLoad; - -class LocalMemOp : PatFrag (N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; -}]>; - -// Local address space loads -class LocalLoad : LocalMemOp < - (ops node:$ptr), (op node:$ptr) ->; - -class LocalStore : LocalMemOp < - (ops node:$value, node:$ptr), (op node:$value, node:$ptr) ->; - -class FlatMemOp : PatFrag (N)->getAddressSPace() == AMDGPUASI.FLAT_ADDRESS; +class FlatStoreAddress : CodePatPred<[{ + const auto AS = cast(N)->getAddressSpace(); + return AS == AMDGPUASI.FLAT_ADDRESS || + AS == AMDGPUASI.GLOBAL_ADDRESS; }]>; -class FlatLoad : FlatMemOp < - (ops node:$ptr), (op node:$ptr) ->; - class AZExtLoadBase : PatFrag<(ops node:$ptr), (ld_node node:$ptr), [{ LoadSDNode *L = cast(N); @@ -326,71 +278,98 @@ return cast(N)->getMemoryVT() == MVT::i8; }]>; -def az_extloadi8_global : GlobalLoad ; -def sextloadi8_global : GlobalLoad ; +def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; -def az_extloadi8_constant : ConstantLoad ; -def sextloadi8_constant : ConstantLoad ; +def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; -def az_extloadi8_local : LocalLoad ; -def sextloadi8_local : LocalLoad ; +class PrivateLoad : LoadFrag , PrivateAddress; +class PrivateStore : StoreFrag , PrivateAddress; -def store_local_hi16 : StoreHi16 , LocalAddress; -def truncstorei8_local_hi16 : StoreHi16, LocalAddress; +class LocalLoad : LoadFrag , LocalAddress; +class LocalStore : StoreFrag , LocalAddress; +class GlobalLoad : LoadFrag, GlobalAddress; +class GlobalStore : StoreFrag, GlobalAddress; -def extloadi8_private : LoadFrag , PrivateAddress; -def sextloadi8_private : LoadFrag , PrivateAddress; +class FlatLoad : LoadFrag , FlatLoadAddress; +class FlatStore : StoreFrag , FlatStoreAddress; -def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]>; +class ConstantLoad : LoadFrag , ConstantAddress; -def az_extloadi16_global : GlobalLoad ; -def sextloadi16_global : GlobalLoad ; -def az_extloadi16_constant : ConstantLoad ; -def sextloadi16_constant : ConstantLoad ; +def load_private : PrivateLoad ; +def extloadi8_private : PrivateLoad ; +def sextloadi8_private : PrivateLoad ; +def extloadi16_private : PrivateLoad ; +def sextloadi16_private : PrivateLoad ; -def az_extloadi16_local : LocalLoad ; -def sextloadi16_local : LocalLoad ; +def store_private : PrivateStore ; +def truncstorei8_private : PrivateStore; +def truncstorei16_private : PrivateStore ; +def store_hi16_private : StoreHi16 , PrivateAddress; +def truncstorei8_hi16_private : StoreHi16, PrivateAddress; -def extloadi16_private : LoadFrag , PrivateAddress; -def sextloadi16_private : LoadFrag , PrivateAddress; -def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; -}]>; +def load_global : GlobalLoad ; +def sextloadi8_global : GlobalLoad ; +def az_extloadi8_global : GlobalLoad ; +def sextloadi16_global : GlobalLoad ; +def az_extloadi16_global : GlobalLoad ; +def atomic_load_global : GlobalLoad; -def az_extloadi32_global : GlobalLoad ; +def store_global : GlobalStore ; +def truncstorei8_global : GlobalStore ; +def truncstorei16_global : GlobalStore ; +def store_atomic_global : GlobalStore; +def truncstorei8_hi16_global : StoreHi16 , GlobalAddress; +def truncstorei16_hi16_global : StoreHi16 , GlobalAddress; -def az_extloadi32_flat : FlatLoad ; +def load_local : LocalLoad ; +def az_extloadi8_local : LocalLoad ; +def sextloadi8_local : LocalLoad ; +def az_extloadi16_local : LocalLoad ; +def sextloadi16_local : LocalLoad ; -def az_extloadi32_constant : ConstantLoad ; +def store_local : LocalStore ; +def truncstorei8_local : LocalStore ; +def truncstorei16_local : LocalStore ; +def store_local_hi16 : StoreHi16 , LocalAddress; +def truncstorei8_local_hi16 : StoreHi16, LocalAddress; -def truncstorei8_global : GlobalStore ; -def truncstorei16_global : GlobalStore ; +def load_align8_local : Aligned8Bytes < + (ops node:$ptr), (load_local node:$ptr) +>; -def truncstorei8_global_hi16 : StoreHi16 , GlobalAddress; -def truncstorei16_global_hi16 : StoreHi16 , GlobalAddress; +def store_align8_local : Aligned8Bytes < + (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr) +>; -def local_store : StoreFrag , LocalAddress; -def truncstorei8_local : StoreFrag , LocalAddress; -def truncstorei16_local : StoreFrag , LocalAddress; -def local_load : LocalLoad ; +def load_flat : FlatLoad ; +def az_extloadi8_flat : FlatLoad ; +def sextloadi8_flat : FlatLoad ; +def az_extloadi16_flat : FlatLoad ; +def sextloadi16_flat : FlatLoad ; +def atomic_load_flat : FlatLoad; -class Aligned8Bytes : PatFrag (N)->getAlignment() % 8 == 0; -}]>; +def store_flat : FlatStore ; +def truncstorei8_flat : FlatStore ; +def truncstorei16_flat : FlatStore ; +def atomic_store_flat : FlatStore ; +def truncstorei8_hi16_flat : StoreHi16, FlatStoreAddress; +def truncstorei16_hi16_flat : StoreHi16, FlatStoreAddress; -def local_load_aligned8bytes : Aligned8Bytes < - (ops node:$ptr), (local_load node:$ptr) ->; -def local_store_aligned8bytes : Aligned8Bytes < - (ops node:$val, node:$ptr), (local_store node:$val, node:$ptr) ->; +def constant_load : ConstantLoad; +def sextloadi8_constant : ConstantLoad ; +def az_extloadi8_constant : ConstantLoad ; +def sextloadi16_constant : ConstantLoad ; +def az_extloadi16_constant : ConstantLoad ; + class local_binary_atomic_op : PatFrag<(ops node:$ptr, node:$value), @@ -398,7 +377,6 @@ return cast(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; - def atomic_swap_local : local_binary_atomic_op; def atomic_load_add_local : local_binary_atomic_op; def atomic_load_sub_local : local_binary_atomic_op; @@ -465,26 +443,25 @@ defm atomic_umin_global : global_binary_atomic_op; defm atomic_xor_global : global_binary_atomic_op; -//legacy +// Legacy. def AMDGPUatomic_cmp_swap_global : PatFrag< - (ops node:$ptr, node:$value), - (AMDGPUatomic_cmp_swap node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>; + (ops node:$ptr, node:$value), + (AMDGPUatomic_cmp_swap node:$ptr, node:$value)>, GlobalAddress; def atomic_cmp_swap_global : PatFrag< - (ops node:$ptr, node:$cmp, node:$value), - (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>; + (ops node:$ptr, node:$cmp, node:$value), + (atomic_cmp_swap node:$ptr, node:$cmp, node:$value)>, GlobalAddress; + def atomic_cmp_swap_global_noret : PatFrag< - (ops node:$ptr, node:$cmp, node:$value), - (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; + (ops node:$ptr, node:$cmp, node:$value), + (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), + [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; def atomic_cmp_swap_global_ret : PatFrag< - (ops node:$ptr, node:$cmp, node:$value), - (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; + (ops node:$ptr, node:$cmp, node:$value), + (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), + [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; //===----------------------------------------------------------------------===// // Misc Pattern Fragments Index: lib/Target/AMDGPU/BUFInstructions.td =================================================================== --- lib/Target/AMDGPU/BUFInstructions.td +++ lib/Target/AMDGPU/BUFInstructions.td @@ -699,16 +699,16 @@ "buffer_store_short", VGPR_32, i32, truncstorei16_global >; defm BUFFER_STORE_DWORD : MUBUF_Pseudo_Stores < - "buffer_store_dword", VGPR_32, i32, global_store + "buffer_store_dword", VGPR_32, i32, store_global >; defm BUFFER_STORE_DWORDX2 : MUBUF_Pseudo_Stores < - "buffer_store_dwordx2", VReg_64, v2i32, global_store + "buffer_store_dwordx2", VReg_64, v2i32, store_global >; defm BUFFER_STORE_DWORDX3 : MUBUF_Pseudo_Stores < - "buffer_store_dwordx3", VReg_96, untyped, global_store + "buffer_store_dwordx3", VReg_96, untyped, store_global >; defm BUFFER_STORE_DWORDX4 : MUBUF_Pseudo_Stores < - "buffer_store_dwordx4", VReg_128, v4i32, global_store + "buffer_store_dwordx4", VReg_128, v4i32, store_global >; defm BUFFER_ATOMIC_SWAP : MUBUF_Pseudo_Atomics < "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global @@ -1219,8 +1219,8 @@ >; } let Predicates = [isSICI] in { -defm : MUBUFStore_Atomic_Pattern ; -defm : MUBUFStore_Atomic_Pattern ; +defm : MUBUFStore_Atomic_Pattern ; +defm : MUBUFStore_Atomic_Pattern ; } // End Predicates = [isSICI] @@ -1235,7 +1235,7 @@ } defm : MUBUFStore_Pattern ; -defm : MUBUFStore_Pattern ; +defm : MUBUFStore_Pattern ; multiclass MUBUFScratchStorePat ; -defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; } } Index: lib/Target/AMDGPU/CaymanInstructions.td =================================================================== --- lib/Target/AMDGPU/CaymanInstructions.td +++ lib/Target/AMDGPU/CaymanInstructions.td @@ -76,7 +76,7 @@ CF_MEM_RAT_CACHELESS <0x14, 0, mask, (ins rc:$rw_gpr, R600_TReg32_X:$index_gpr), "STORE_DWORD $rw_gpr, $index_gpr", - [(global_store vt:$rw_gpr, i32:$index_gpr)]> { + [(store_global vt:$rw_gpr, i32:$index_gpr)]> { let eop = 0; // This bit is not used on Cayman. } Index: lib/Target/AMDGPU/DSInstructions.td =================================================================== --- lib/Target/AMDGPU/DSInstructions.td +++ lib/Target/AMDGPU/DSInstructions.td @@ -543,25 +543,25 @@ (inst $ptr, (as_i16imm $offset), (i1 0)) >; -def : DSReadPat ; -def : DSReadPat ; -def : DSReadPat ; -def : DSReadPat ; -def : DSReadPat ; -def : DSReadPat ; -def : DSReadPat ; -def : DSReadPat ; -def : DSReadPat ; +def : DSReadPat ; +def : DSReadPat ; +def : DSReadPat ; +def : DSReadPat ; +def : DSReadPat ; +def : DSReadPat ; +def : DSReadPat ; +def : DSReadPat ; +def : DSReadPat ; let AddedComplexity = 100 in { -def : DSReadPat ; +def : DSReadPat ; } // End AddedComplexity = 100 def : Pat < - (v2i32 (si_load_local (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, - i8:$offset1))), + (v2i32 (load_local_m0 (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, + i8:$offset1))), (DS_READ2_B32 $ptr, $offset0, $offset1, (i1 0)) >; @@ -570,11 +570,11 @@ (inst $ptr, $value, (as_i16imm $offset), (i1 0)) >; -def : DSWritePat ; -def : DSWritePat ; -def : DSWritePat ; -def : DSWritePat ; -def : DSWritePat ; +def : DSWritePat ; +def : DSWritePat ; +def : DSWritePat ; +def : DSWritePat ; +def : DSWritePat ; let Predicates = [HasD16LoadStore] in { def : DSWritePat ; @@ -583,11 +583,11 @@ let AddedComplexity = 100 in { -def : DSWritePat ; +def : DSWritePat ; } // End AddedComplexity = 100 def : Pat < - (si_store_local v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, + (store_local_m0 v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)), (DS_WRITE2_B32 $ptr, (i32 (EXTRACT_SUBREG $value, sub0)), (i32 (EXTRACT_SUBREG $value, sub1)), $offset0, $offset1, Index: lib/Target/AMDGPU/EvergreenInstructions.td =================================================================== --- lib/Target/AMDGPU/EvergreenInstructions.td +++ lib/Target/AMDGPU/EvergreenInstructions.td @@ -128,21 +128,21 @@ def RAT_WRITE_CACHELESS_32_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x1, (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), "STORE_RAW $rw_gpr, $index_gpr, $eop", - [(global_store i32:$rw_gpr, i32:$index_gpr)] + [(store_global i32:$rw_gpr, i32:$index_gpr)] >; // 64-bit store def RAT_WRITE_CACHELESS_64_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x3, (ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), "STORE_RAW $rw_gpr.XY, $index_gpr, $eop", - [(global_store v2i32:$rw_gpr, i32:$index_gpr)] + [(store_global v2i32:$rw_gpr, i32:$index_gpr)] >; //128-bit store def RAT_WRITE_CACHELESS_128_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0xf, (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), "STORE_RAW $rw_gpr.XYZW, $index_gpr, $eop", - [(global_store v4i32:$rw_gpr, i32:$index_gpr)] + [(store_global v4i32:$rw_gpr, i32:$index_gpr)] >; def RAT_STORE_TYPED_eg: CF_MEM_RAT_STORE_TYPED<1>; @@ -614,7 +614,7 @@ def LDS_MIN_UINT : R600_LDS_1A1D_NORET <0x7, "LDS_MIN_UINT", [] >; def LDS_MAX_UINT : R600_LDS_1A1D_NORET <0x8, "LDS_MAX_UINT", [] >; def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE", - [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)] + [(store_local (i32 R600_Reg32:$src1), R600_Reg32:$src0)] >; def LDS_BYTE_WRITE : R600_LDS_1A1D_NORET<0x12, "LDS_BYTE_WRITE", [(truncstorei8_local i32:$src1, i32:$src0)] @@ -656,7 +656,7 @@ [(set i32:$dst, (atomic_cmp_swap_32_local i32:$src0, i32:$src1, i32:$src2))] >; def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", - [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] + [(set (i32 R600_Reg32:$dst), (load_local R600_Reg32:$src0))] >; def LDS_BYTE_READ_RET : R600_LDS_1A <0x36, "LDS_BYTE_READ_RET", [(set i32:$dst, (sextloadi8_local i32:$src0))] Index: lib/Target/AMDGPU/FLATInstructions.td =================================================================== --- lib/Target/AMDGPU/FLATInstructions.td +++ lib/Target/AMDGPU/FLATInstructions.td @@ -620,37 +620,6 @@ // Flat Patterns //===----------------------------------------------------------------------===// -class flat_ld : PatFrag<(ops node:$ptr), - (ld node:$ptr), [{ - auto const AS = cast(N)->getAddressSpace(); - return AS == AMDGPUASI.FLAT_ADDRESS || - AS == AMDGPUASI.GLOBAL_ADDRESS || - AS == AMDGPUASI.CONSTANT_ADDRESS; -}]>; - -class flat_st : PatFrag<(ops node:$val, node:$ptr), - (st node:$val, node:$ptr), [{ - auto const AS = cast(N)->getAddressSpace(); - return AS == AMDGPUASI.FLAT_ADDRESS || - AS == AMDGPUASI.GLOBAL_ADDRESS; -}]>; - -def atomic_flat_load : flat_ld ; -def flat_load : flat_ld ; -def flat_az_extloadi8 : flat_ld ; -def flat_sextloadi8 : flat_ld ; -def flat_az_extloadi16 : flat_ld ; -def flat_sextloadi16 : flat_ld ; - -def atomic_flat_store : flat_st ; -def flat_store : flat_st ; -def flat_truncstorei8 : flat_st ; -def flat_truncstorei16 : flat_st ; - -def flat_truncstorei8_hi16 : StoreHi16, FlatLoadAddress; -def flat_truncstorei16_hi16 : StoreHi16, FlatLoadAddress; - - // Patterns for global loads with no offset. class FlatLoadPat : Pat < (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))), @@ -705,27 +674,27 @@ let Predicates = [HasFlatAddressSpace] in { -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; -def : FlatLoadAtomicPat ; -def : FlatLoadAtomicPat ; +def : FlatLoadAtomicPat ; +def : FlatLoadAtomicPat ; -def : FlatStorePat ; -def : FlatStorePat ; -def : FlatStorePat ; -def : FlatStorePat ; -def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; -def : FlatStoreAtomicPat ; -def : FlatStoreAtomicPat ; +def : FlatStoreAtomicPat ; +def : FlatStoreAtomicPat ; def : FlatAtomicPat ; def : FlatAtomicPat ; @@ -755,12 +724,12 @@ def : FlatAtomicPat ; def : FlatAtomicPat ; -def : FlatStorePat ; -def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; let Predicates = [HasD16LoadStore] in { -def : FlatStorePat ; -def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; } } // End Predicates = [HasFlatAddressSpace] @@ -775,30 +744,28 @@ def : FlatLoadSignedPat ; -def : FlatLoadSignedPat ; -def : FlatLoadSignedPat ; -def : FlatLoadSignedPat ; +def : FlatLoadSignedPat ; +def : FlatLoadSignedPat ; +def : FlatLoadSignedPat ; -def : FlatLoadAtomicPat ; -def : FlatLoadAtomicPat ; +def : FlatLoadAtomicPat ; +def : FlatLoadAtomicPat ; def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; -def : FlatStoreSignedPat ; -def : FlatStoreSignedPat ; -def : FlatStoreSignedPat ; -def : FlatStoreSignedPat ; - +def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; let Predicates = [HasD16LoadStore] in { -def : FlatStoreSignedPat ; -def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; } - -def : FlatStoreSignedAtomicPat ; -def : FlatStoreSignedAtomicPat ; +def : FlatStoreSignedAtomicPat ; +def : FlatStoreSignedAtomicPat ; def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -115,80 +115,109 @@ defm atomic_dec_global : global_binary_atomic_op; //===----------------------------------------------------------------------===// -// SDNodes and PatFrag for local loads and stores to enable s_mov_b32 m0, -1 -// to be glued to the memory instructions. +// SDNodes PatFrags for loads/stores with a glue input. +// This is for SDNodes and PatFrag for local loads and stores to +// enable s_mov_b32 m0, -1 to be glued to the memory instructions. +// +// These mirror the regular load/store PatFrags and rely on special +// processing during Select() to add the glued copy. +// //===----------------------------------------------------------------------===// -def SIld_local : SDNode <"ISD::LOAD", SDTLoad, +def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] >; -def si_ld_local : PatFrag <(ops node:$ptr), (SIld_local node:$ptr), [{ - return cast(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; +def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr), [{ + return cast(N)->getAddressingMode() == ISD::UNINDEXED; }]>; -def si_load_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{ - return cast(N)->getAddressingMode() == ISD::UNINDEXED && - cast(N)->getExtensionType() == ISD::NON_EXTLOAD; +def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr), [{ + return cast(N)->getExtensionType() == ISD::NON_EXTLOAD; }]>; -def si_load_local_align8 : Aligned8Bytes < - (ops node:$ptr), (si_load_local node:$ptr) ->; +def extload_glue : PatFrag<(ops node:$ptr), (load_glue node:$ptr), [{ + return cast(N)->getExtensionType() == ISD::EXTLOAD; +}]>; -def si_sextload_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{ +def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr), [{ return cast(N)->getExtensionType() == ISD::SEXTLOAD; }]>; -def si_az_extload_local : AZExtLoadBase ; -multiclass SIExtLoadLocal { +def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr), [{ + return cast(N)->getExtensionType() == ISD::ZEXTLOAD; +}]>; - def _i8 : PatFrag <(ops node:$ptr), (ld_node node:$ptr), - [{return cast(N)->getMemoryVT() == MVT::i8;}] - >; +def az_extload_glue : AZExtLoadBase ; - def _i16 : PatFrag <(ops node:$ptr), (ld_node node:$ptr), - [{return cast(N)->getMemoryVT() == MVT::i16;}] - >; -} +def az_extloadi8_glue : PatFrag<(ops node:$ptr), (az_extload_glue node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def az_extloadi16_glue : PatFrag<(ops node:$ptr), (az_extload_glue node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; -defm si_sextload_local : SIExtLoadLocal ; -defm si_az_extload_local : SIExtLoadLocal ; +def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; -def SIst_local : SDNode <"ISD::STORE", SDTStore, +def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def load_glue_align8 : Aligned8Bytes < + (ops node:$ptr), (load_glue node:$ptr) +>; + + +def load_local_m0 : LoadFrag, LocalAddress; +def sextloadi8_local_m0 : LoadFrag, LocalAddress; +def sextloadi16_local_m0 : LoadFrag, LocalAddress; +def az_extloadi8_local_m0 : LoadFrag, LocalAddress; +def az_extloadi16_local_m0 : LoadFrag, LocalAddress; +def load_align8_local_m0 : LoadFrag , LocalAddress; + + +def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] >; -def si_st_local : PatFrag < - (ops node:$val, node:$ptr), (SIst_local node:$val, node:$ptr), [{ - return cast(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; +def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr), + (AMDGPUst_glue node:$val, node:$ptr), [{ + return cast(N)->getAddressingMode() == ISD::UNINDEXED; }]>; -def si_store_local : PatFrag < - (ops node:$val, node:$ptr), (si_st_local node:$val, node:$ptr), [{ - return cast(N)->getAddressingMode() == ISD::UNINDEXED && - !cast(N)->isTruncatingStore(); +def store_glue : PatFrag<(ops node:$val, node:$ptr), + (unindexedstore_glue node:$val, node:$ptr), [{ + return !cast(N)->isTruncatingStore(); }]>; -def si_store_local_align8 : Aligned8Bytes < - (ops node:$val, node:$ptr), (si_store_local node:$val, node:$ptr) ->; - -def si_truncstore_local : PatFrag < - (ops node:$val, node:$ptr), (si_st_local node:$val, node:$ptr), [{ +def truncstore_glue : PatFrag<(ops node:$val, node:$ptr), + (unindexedstore_glue node:$val, node:$ptr), [{ return cast(N)->isTruncatingStore(); }]>; -def si_truncstore_local_i8 : PatFrag < - (ops node:$val, node:$ptr), (si_truncstore_local node:$val, node:$ptr), [{ +def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr), + (truncstore_glue node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; }]>; -def si_truncstore_local_i16 : PatFrag < - (ops node:$val, node:$ptr), (si_truncstore_local node:$val, node:$ptr), [{ +def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr), + (truncstore_glue node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; }]>; +def store_glue_align8 : Aligned8Bytes < + (ops node:$value, node:$ptr), (store_glue node:$value, node:$ptr) +>; + +def store_local_m0 : StoreFrag, LocalAddress; +def truncstorei8_local_m0 : StoreFrag, LocalAddress; +def truncstorei16_local_m0 : StoreFrag, LocalAddress; + +def store_align8_local_m0 : StoreFrag, LocalAddress; + def si_setcc_uniform : PatFrag < (ops node:$lhs, node:$rhs, node:$cond), (setcc node:$lhs, node:$rhs, node:$cond), [{