Index: lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructions.td +++ lib/Target/AMDGPU/AMDGPUInstructions.td @@ -380,12 +380,6 @@ [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}] >; -class flat_binary_atomic_op : PatFrag< - (ops node:$ptr, node:$value), - (atomic_op node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}] ->; - def atomic_swap_global : global_binary_atomic_op; def atomic_add_global : global_binary_atomic_op; def atomic_and_global : global_binary_atomic_op; @@ -404,19 +398,6 @@ [{ return SDValue(N, 0).use_empty(); }] >; -def atomic_swap_flat : flat_binary_atomic_op; -def atomic_add_flat : flat_binary_atomic_op; -def atomic_and_flat : flat_binary_atomic_op; -def atomic_max_flat : flat_binary_atomic_op; -def atomic_min_flat : flat_binary_atomic_op; -def atomic_or_flat : flat_binary_atomic_op; -def atomic_sub_flat : flat_binary_atomic_op; -def atomic_umax_flat : flat_binary_atomic_op; -def atomic_umin_flat : flat_binary_atomic_op; -def atomic_xor_flat : flat_binary_atomic_op; - -def atomic_cmp_swap_flat : flat_binary_atomic_op; - //===----------------------------------------------------------------------===// // Misc Pattern Fragments //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/CIInstructions.td =================================================================== --- lib/Target/AMDGPU/CIInstructions.td +++ lib/Target/AMDGPU/CIInstructions.td @@ -89,244 +89,4 @@ >; } -//===----------------------------------------------------------------------===// -// Flat Instructions -//===----------------------------------------------------------------------===// - -defm FLAT_LOAD_UBYTE : FLAT_Load_Helper < - flat<0x8, 0x10>, "flat_load_ubyte", VGPR_32 ->; -defm FLAT_LOAD_SBYTE : FLAT_Load_Helper < - flat<0x9, 0x11>, "flat_load_sbyte", VGPR_32 ->; -defm FLAT_LOAD_USHORT : FLAT_Load_Helper < - flat<0xa, 0x12>, "flat_load_ushort", VGPR_32 ->; -defm FLAT_LOAD_SSHORT : FLAT_Load_Helper < - flat<0xb, 0x13>, "flat_load_sshort", VGPR_32> -; -defm FLAT_LOAD_DWORD : FLAT_Load_Helper < - flat<0xc, 0x14>, "flat_load_dword", VGPR_32 ->; -defm FLAT_LOAD_DWORDX2 : FLAT_Load_Helper < - flat<0xd, 0x15>, "flat_load_dwordx2", VReg_64 ->; -defm FLAT_LOAD_DWORDX4 : FLAT_Load_Helper < - flat<0xe, 0x17>, "flat_load_dwordx4", VReg_128 ->; -defm FLAT_LOAD_DWORDX3 : FLAT_Load_Helper < - flat<0xf, 0x16>, "flat_load_dwordx3", VReg_96 ->; -defm FLAT_STORE_BYTE : FLAT_Store_Helper < - flat<0x18>, "flat_store_byte", VGPR_32 ->; -defm FLAT_STORE_SHORT : FLAT_Store_Helper < - flat <0x1a>, "flat_store_short", VGPR_32 ->; -defm FLAT_STORE_DWORD : FLAT_Store_Helper < - flat<0x1c>, "flat_store_dword", VGPR_32 ->; -defm FLAT_STORE_DWORDX2 : FLAT_Store_Helper < - flat<0x1d>, "flat_store_dwordx2", VReg_64 ->; -defm FLAT_STORE_DWORDX4 : FLAT_Store_Helper < - flat<0x1e, 0x1f>, "flat_store_dwordx4", VReg_128 ->; -defm FLAT_STORE_DWORDX3 : FLAT_Store_Helper < - flat<0x1f, 0x1e>, "flat_store_dwordx3", VReg_96 ->; -defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC < - flat<0x30, 0x40>, "flat_atomic_swap", VGPR_32, i32, atomic_swap_flat ->; -defm FLAT_ATOMIC_CMPSWAP : FLAT_ATOMIC < - flat<0x31, 0x41>, "flat_atomic_cmpswap", VGPR_32, i32, - atomic_cmp_swap_flat, v2i32, VReg_64 ->; -defm FLAT_ATOMIC_ADD : FLAT_ATOMIC < - flat<0x32, 0x42>, "flat_atomic_add", VGPR_32, i32, atomic_add_flat ->; -defm FLAT_ATOMIC_SUB : FLAT_ATOMIC < - flat<0x33, 0x43>, "flat_atomic_sub", VGPR_32, i32, atomic_sub_flat ->; -defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC < - flat<0x35, 0x44>, "flat_atomic_smin", VGPR_32, i32, atomic_min_flat ->; -defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC < - flat<0x36, 0x45>, "flat_atomic_umin", VGPR_32, i32, atomic_umin_flat ->; -defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC < - flat<0x37, 0x46>, "flat_atomic_smax", VGPR_32, i32, atomic_max_flat ->; -defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC < - flat<0x38, 0x47>, "flat_atomic_umax", VGPR_32, i32, atomic_umax_flat ->; -defm FLAT_ATOMIC_AND : FLAT_ATOMIC < - flat<0x39, 0x48>, "flat_atomic_and", VGPR_32, i32, atomic_and_flat ->; -defm FLAT_ATOMIC_OR : FLAT_ATOMIC < - flat<0x3a, 0x49>, "flat_atomic_or", VGPR_32, i32, atomic_or_flat ->; -defm FLAT_ATOMIC_XOR : FLAT_ATOMIC < - flat<0x3b, 0x4a>, "flat_atomic_xor", VGPR_32, i32, atomic_xor_flat ->; -defm FLAT_ATOMIC_INC : FLAT_ATOMIC < - flat<0x3c, 0x4b>, "flat_atomic_inc", VGPR_32, i32, atomic_inc_flat ->; -defm FLAT_ATOMIC_DEC : FLAT_ATOMIC < - flat<0x3d, 0x4c>, "flat_atomic_dec", VGPR_32, i32, atomic_dec_flat ->; -defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC < - flat<0x50, 0x60>, "flat_atomic_swap_x2", VReg_64, i64, atomic_swap_flat ->; -defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_ATOMIC < - flat<0x51, 0x61>, "flat_atomic_cmpswap_x2", VReg_64, i64, - atomic_cmp_swap_flat, v2i64, VReg_128 ->; -defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC < - flat<0x52, 0x62>, "flat_atomic_add_x2", VReg_64, i64, atomic_add_flat ->; -defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC < - flat<0x53, 0x63>, "flat_atomic_sub_x2", VReg_64, i64, atomic_sub_flat ->; -defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC < - flat<0x55, 0x64>, "flat_atomic_smin_x2", VReg_64, i64, atomic_min_flat ->; -defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC < - flat<0x56, 0x65>, "flat_atomic_umin_x2", VReg_64, i64, atomic_umin_flat ->; -defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC < - flat<0x57, 0x66>, "flat_atomic_smax_x2", VReg_64, i64, atomic_max_flat ->; -defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC < - flat<0x58, 0x67>, "flat_atomic_umax_x2", VReg_64, i64, atomic_umax_flat ->; -defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC < - flat<0x59, 0x68>, "flat_atomic_and_x2", VReg_64, i64, atomic_and_flat ->; -defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC < - flat<0x5a, 0x69>, "flat_atomic_or_x2", VReg_64, i64, atomic_or_flat ->; -defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC < - flat<0x5b, 0x6a>, "flat_atomic_xor_x2", VReg_64, i64, atomic_xor_flat ->; -defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC < - flat<0x5c, 0x6b>, "flat_atomic_inc_x2", VReg_64, i64, atomic_inc_flat ->; -defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC < - flat<0x5d, 0x6c>, "flat_atomic_dec_x2", VReg_64, i64, atomic_dec_flat ->; - } // End SubtargetPredicate = isCIVI - -// CI Only flat instructions - -let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1 in { - -defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC < - flat<0x3e>, "flat_atomic_fcmpswap", VGPR_32, f32, - null_frag, v2f32, VReg_64 ->; -defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC < - flat<0x3f>, "flat_atomic_fmin", VGPR_32, f32 ->; -defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC < - flat<0x40>, "flat_atomic_fmax", VGPR_32, f32 ->; -defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC < - flat<0x5e>, "flat_atomic_fcmpswap_x2", VReg_64, f64, - null_frag, v2f64, VReg_128 ->; -defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC < - flat<0x5f>, "flat_atomic_fmin_x2", VReg_64, f64 ->; -defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC < - flat<0x60>, "flat_atomic_fmax_x2", VReg_64, f64 ->; - -} // End SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1 - -//===----------------------------------------------------------------------===// -// Flat Patterns -//===----------------------------------------------------------------------===// - -let Predicates = [isCIVI] in { - -// Patterns for global loads with no offset. -class FlatLoadPat : Pat < - (vt (node i64:$addr)), - (inst $addr, 0, 0, 0) ->; - -class FlatLoadAtomicPat : Pat < - (vt (node i64:$addr)), - (inst $addr, 1, 0, 0) ->; - -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; - -def : FlatLoadAtomicPat ; -def : FlatLoadAtomicPat ; - - -class FlatStorePat : Pat < - (node vt:$data, i64:$addr), - (inst $addr, $data, 0, 0, 0) ->; - -class FlatStoreAtomicPat : Pat < - // atomic store follows atomic binop convention so the address comes - // first. - (node i64:$addr, vt:$data), - (inst $addr, $data, 1, 0, 0) ->; - -def : FlatStorePat ; -def : FlatStorePat ; -def : FlatStorePat ; -def : FlatStorePat ; -def : FlatStorePat ; - -def : FlatStoreAtomicPat ; -def : FlatStoreAtomicPat ; - -class FlatAtomicPat : Pat < - (vt (node i64:$addr, data_vt:$data)), - (inst $addr, $data, 0, 0) ->; - -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; - -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; - -} // End Predicates = [isCIVI] Index: lib/Target/AMDGPU/FLATInstructions.td =================================================================== --- /dev/null +++ lib/Target/AMDGPU/FLATInstructions.td @@ -0,0 +1,523 @@ +//===-- FLATInstructions.td - FLAT Instruction Defintions -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def FLATAtomic : ComplexPattern; + +//===----------------------------------------------------------------------===// +// FLAT classes +//===----------------------------------------------------------------------===// + +class FLAT_Pseudo pattern=[]> : + InstSI, + SIMCInstr { + + let isPseudo = 1; + let isCodeGenOnly = 1; + + let SubtargetPredicate = isCIVI; + + let FLAT = 1; + // Internally, FLAT instruction are executed as both an LDS and a + // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT + // and are not considered done until both have been decremented. + let VM_CNT = 1; + let LGKM_CNT = 1; + + let Uses = [EXEC, FLAT_SCR]; // M0 + + let UseNamedOperandTable = 1; + let hasSideEffects = 0; + let SchedRW = [WriteVMEM]; + + string Mnemonic = opName; + string AsmOperands = asmOps; + + bits<1> has_vdst = 1; + bits<1> has_data = 1; + bits<1> has_glc = 1; + bits<1> glcValue = 0; +} + +class FLAT_Real op, FLAT_Pseudo ps> : + InstSI , + Enc64 { + + let isPseudo = 0; + let isCodeGenOnly = 0; + + // copy relevant pseudo op flags + let SubtargetPredicate = ps.SubtargetPredicate; + let AsmMatchConverter = ps.AsmMatchConverter; + + // encoding fields + bits<8> addr; + bits<8> data; + bits<8> vdst; + bits<1> slc; + bits<1> glc; + bits<1> tfe; + + // 15-0 is reserved. + let Inst{16} = !if(ps.has_glc, glc, ps.glcValue); + let Inst{17} = slc; + let Inst{24-18} = op; + let Inst{31-26} = 0x37; // Encoding. + let Inst{39-32} = addr; + let Inst{47-40} = !if(ps.has_data, data, ?); + // 54-48 is reserved. + let Inst{55} = tfe; + let Inst{63-56} = !if(ps.has_vdst, vdst, ?); +} + +class FLAT_Load_Pseudo : FLAT_Pseudo< + opName, + (outs regClass:$vdst), + (ins VReg_64:$addr, glc:$glc, slc:$slc, tfe:$tfe), + " $vdst, $addr$glc$slc$tfe"> { + let has_data = 0; + let mayLoad = 1; +} + +class FLAT_Store_Pseudo : FLAT_Pseudo< + opName, + (outs), + (ins VReg_64:$addr, vdataClass:$data, glc:$glc, slc:$slc, tfe:$tfe), + " $addr, $data$glc$slc$tfe"> { + let mayLoad = 0; + let mayStore = 1; + let has_vdst = 0; +} + +multiclass FLAT_Atomic_Pseudo< + string opName, + RegisterClass vdst_rc, + ValueType vt, + SDPatternOperator atomic = null_frag, + ValueType data_vt = vt, + RegisterClass data_rc = vdst_rc> { + + def "" : FLAT_Pseudo , + AtomicNoRet { + let mayLoad = 1; + let mayStore = 1; + let has_glc = 0; + let glcValue = 0; + let has_vdst = 0; + let PseudoInstr = NAME; + } + + def _RTN : FLAT_Pseudo , + AtomicNoRet { + let hasPostISelHook = 1; + let has_glc = 0; + let glcValue = 1; + let PseudoInstr = NAME # "_RTN"; + } +} + + +class flat_binary_atomic_op : PatFrag< + (ops node:$ptr, node:$value), + (atomic_op node:$ptr, node:$value), + [{return cast(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}] +>; + +def atomic_cmp_swap_flat : flat_binary_atomic_op; +def atomic_swap_flat : flat_binary_atomic_op; +def atomic_add_flat : flat_binary_atomic_op; +def atomic_and_flat : flat_binary_atomic_op; +def atomic_max_flat : flat_binary_atomic_op; +def atomic_min_flat : flat_binary_atomic_op; +def atomic_or_flat : flat_binary_atomic_op; +def atomic_sub_flat : flat_binary_atomic_op; +def atomic_umax_flat : flat_binary_atomic_op; +def atomic_umin_flat : flat_binary_atomic_op; +def atomic_xor_flat : flat_binary_atomic_op; +def atomic_inc_flat : flat_binary_atomic_op; +def atomic_dec_flat : flat_binary_atomic_op; + + + +//===----------------------------------------------------------------------===// +// Flat Instructions +//===----------------------------------------------------------------------===// + +def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>; +def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>; +def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>; +def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>; +def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>; +def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>; +def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>; +def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>; + +def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>; +def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>; +def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>; +def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>; +def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>; +def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>; + +defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", + VGPR_32, i32, atomic_cmp_swap_flat, + v2i32, VReg_64>; + +defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", + VReg_64, i64, atomic_cmp_swap_flat, + v2i64, VReg_128>; + +defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", + VGPR_32, i32, atomic_swap_flat>; + +defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2", + VReg_64, i64, atomic_swap_flat>; + +defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add", + VGPR_32, i32, atomic_add_flat>; + +defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub", + VGPR_32, i32, atomic_sub_flat>; + +defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin", + VGPR_32, i32, atomic_min_flat>; + +defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin", + VGPR_32, i32, atomic_umin_flat>; + +defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax", + VGPR_32, i32, atomic_max_flat>; + +defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax", + VGPR_32, i32, atomic_umax_flat>; + +defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and", + VGPR_32, i32, atomic_and_flat>; + +defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or", + VGPR_32, i32, atomic_or_flat>; + +defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor", + VGPR_32, i32, atomic_xor_flat>; + +defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc", + VGPR_32, i32, atomic_inc_flat>; + +defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec", + VGPR_32, i32, atomic_dec_flat>; + +defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2", + VReg_64, i64, atomic_add_flat>; + +defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2", + VReg_64, i64, atomic_sub_flat>; + +defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2", + VReg_64, i64, atomic_min_flat>; + +defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2", + VReg_64, i64, atomic_umin_flat>; + +defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2", + VReg_64, i64, atomic_max_flat>; + +defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2", + VReg_64, i64, atomic_umax_flat>; + +defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2", + VReg_64, i64, atomic_and_flat>; + +defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2", + VReg_64, i64, atomic_or_flat>; + +defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2", + VReg_64, i64, atomic_xor_flat>; + +defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", + VReg_64, i64, atomic_inc_flat>; + +defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", + VReg_64, i64, atomic_dec_flat>; + +let SubtargetPredicate = isCI in { // CI Only flat instructions : FIXME Only? + +defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", + VGPR_32, f32, null_frag, v2f32, VReg_64>; + +defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", + VReg_64, f64, null_frag, v2f64, VReg_128>; + +defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", + VGPR_32, f32>; + +defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", + VGPR_32, f32>; + +defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2", + VReg_64, f64>; + +defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", + VReg_64, f64>; + +} // End SubtargetPredicate = isCI + +//===----------------------------------------------------------------------===// +// Flat Patterns +//===----------------------------------------------------------------------===// + +class flat_ld : PatFrag<(ops node:$ptr), + (ld node:$ptr), [{ + auto const AS = cast(N)->getAddressSpace(); + return AS == AMDGPUAS::FLAT_ADDRESS || + AS == AMDGPUAS::GLOBAL_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS; +}]>; + +class flat_st : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + auto const AS = cast(N)->getAddressSpace(); + return AS == AMDGPUAS::FLAT_ADDRESS || + AS == AMDGPUAS::GLOBAL_ADDRESS; +}]>; + +def atomic_flat_load : flat_ld ; +def flat_load : flat_ld ; +def flat_az_extloadi8 : flat_ld ; +def flat_sextloadi8 : flat_ld ; +def flat_az_extloadi16 : flat_ld ; +def flat_sextloadi16 : flat_ld ; + +def atomic_flat_store : flat_st ; +def flat_store : flat_st ; +def flat_truncstorei8 : flat_st ; +def flat_truncstorei16 : flat_st ; + +// Patterns for global loads with no offset. +class FlatLoadPat : Pat < + (vt (node i64:$addr)), + (inst $addr, 0, 0, 0) +>; + +class FlatLoadAtomicPat : Pat < + (vt (node i64:$addr)), + (inst $addr, 1, 0, 0) +>; + +class FlatStorePat : Pat < + (node vt:$data, i64:$addr), + (inst $addr, $data, 0, 0, 0) +>; + +class FlatStoreAtomicPat : Pat < + // atomic store follows atomic binop convention so the address comes + // first. + (node i64:$addr, vt:$data), + (inst $addr, $data, 1, 0, 0) +>; + +class FlatAtomicPat : Pat < + (vt (node i64:$addr, data_vt:$data)), + (inst $addr, $data, 0, 0) +>; + +let Predicates = [isCIVI] in { + +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; + +def : FlatLoadAtomicPat ; +def : FlatLoadAtomicPat ; + +def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; + +def : FlatStoreAtomicPat ; +def : FlatStoreAtomicPat ; + +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; + +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; + +} // End Predicates = [isCIVI] + + + +//===----------------------------------------------------------------------===// +// Target +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// CI +//===----------------------------------------------------------------------===// + +class FLAT_Real_ci op, FLAT_Pseudo ps> : + FLAT_Real , + SIMCInstr { + let AssemblerPredicate = isCIOnly; + let DecoderNamespace="CI"; +} + +def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>; +def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>; +def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>; +def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>; +def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>; +def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>; +def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>; +def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>; + +def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>; +def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>; +def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>; +def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>; +def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>; +def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>; + +multiclass FLAT_Real_Atomics_ci op, FLAT_Pseudo ps> { + def _ci : FLAT_Real_ci(ps.PseudoInstr)>; + def _RTN_ci : FLAT_Real_ci(ps.PseudoInstr # "_RTN")>; +} + +defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>; +defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>; +defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>; +defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>; +defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>; +defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>; +defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>; +defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>; +defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>; +defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>; +defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>; +defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>; +defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>; +defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>; +defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>; +defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>; +defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>; +defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>; +defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>; +defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>; +defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>; +defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>; +defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>; +defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>; +defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>; +defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>; + +// CI Only flat instructions +defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>; +defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>; +defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>; +defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>; +defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>; +defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>; + + +//===----------------------------------------------------------------------===// +// VI +//===----------------------------------------------------------------------===// + +class FLAT_Real_vi op, FLAT_Pseudo ps> : + FLAT_Real , + SIMCInstr { + let AssemblerPredicate = isVI; + let DecoderNamespace="VI"; +} + +def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; +def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; +def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; +def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>; +def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>; +def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>; +def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>; +def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>; + +def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>; +def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>; +def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>; +def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>; +def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>; +def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>; + +multiclass FLAT_Real_Atomics_vi op, FLAT_Pseudo ps> { + def _vi : FLAT_Real_vi(ps.PseudoInstr)>; + def _RTN_vi : FLAT_Real_vi(ps.PseudoInstr # "_RTN")>; +} + +defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>; +defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>; +defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>; +defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>; +defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>; +defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>; +defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>; +defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>; +defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>; +defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>; +defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>; +defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>; +defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>; +defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>; +defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>; +defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>; +defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>; +defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>; +defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>; +defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>; +defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>; +defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>; +defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>; +defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>; +defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>; +defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>; + Index: lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/SIInstrFormats.td +++ lib/Target/AMDGPU/SIInstrFormats.td @@ -473,26 +473,6 @@ let Inst{57-53} = ssamp{6-2}; } -class FLATe op> : Enc64 { - bits<8> addr; - bits<8> data; - bits<8> vdst; - bits<1> slc; - bits<1> glc; - bits<1> tfe; - - // 15-0 is reserved. - let Inst{16} = glc; - let Inst{17} = slc; - let Inst{24-18} = op; - let Inst{31-26} = 0x37; // Encoding. - let Inst{39-32} = addr; - let Inst{47-40} = data; - // 54-48 is reserved. - let Inst{55} = tfe; - let Inst{63-56} = vdst; -} - class EXPe : Enc64 { bits<4> en; bits<6> tgt; @@ -572,22 +552,6 @@ let SchedRW = [WriteVMEM]; } -class FLAT op, dag outs, dag ins, string asm, list pattern> : - InstSI, FLATe { - let FLAT = 1; - // Internally, FLAT instruction are executed as both an LDS and a - // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT - // and are not considered done until both have been decremented. - let VM_CNT = 1; - let LGKM_CNT = 1; - - let Uses = [EXEC, FLAT_SCR]; // M0 - - let UseNamedOperandTable = 1; - let hasSideEffects = 0; - let SchedRW = [WriteVMEM]; -} - class MIMG pattern> : InstSI { Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -126,37 +126,6 @@ SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]> >; -//===----------------------------------------------------------------------===// -// PatFrags for FLAT instructions -//===----------------------------------------------------------------------===// - -class flat_ld : PatFrag<(ops node:$ptr), - (ld node:$ptr), [{ - const MemSDNode *LD = cast(N); - return LD->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS || - LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || - LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; -}]>; - -def flat_load : flat_ld ; -def atomic_flat_load : flat_ld; -def flat_az_extloadi8 : flat_ld ; -def flat_sextloadi8 : flat_ld ; -def flat_az_extloadi16 : flat_ld ; -def flat_sextloadi16 : flat_ld ; - -class flat_st : PatFrag<(ops node:$val, node:$ptr), - (st node:$val, node:$ptr), [{ - const MemSDNode *ST = cast(N); - return ST->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS || - ST->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; -}]>; - -def flat_store: flat_st ; -def atomic_flat_store: flat_st ; -def flat_truncstorei8 : flat_st ; -def flat_truncstorei16 : flat_st ; - class MubufLoad : PatFrag < (ops node:$ptr), (op node:$ptr), [{ @@ -187,9 +156,6 @@ def atomic_inc_global : global_binary_atomic_op; def atomic_dec_global : global_binary_atomic_op; -def atomic_inc_flat : flat_binary_atomic_op; -def atomic_dec_flat : flat_binary_atomic_op; - //===----------------------------------------------------------------------===// // SDNodes and PatFrag for local loads and stores to enable s_mov_b32 m0, -1 // to be glued to the memory instructions. @@ -539,7 +505,6 @@ def MUBUFAddr32 : ComplexPattern; def MUBUFAddr64 : ComplexPattern; def MUBUFAddr64Atomic : ComplexPattern; -def FLATAtomic : ComplexPattern; def MUBUFScratch : ComplexPattern; def MUBUFOffset : ComplexPattern; def MUBUFOffsetNoGLC : ComplexPattern; @@ -2693,114 +2658,6 @@ } // End hasSideEffects = 1, mayStore = 1, AsmMatchConverter = "" } -//===----------------------------------------------------------------------===// -// FLAT classes -//===----------------------------------------------------------------------===// - -class flat ci, bits<7> vi = ci> { - field bits<7> CI = ci; - field bits<7> VI = vi; -} - -class FLAT_Pseudo pattern> : - FLAT <0, outs, ins, "", pattern>, - SIMCInstr { - let isPseudo = 1; - let isCodeGenOnly = 1; -} - -class FLAT_Real_ci op, string opName, dag outs, dag ins, string asm> : - FLAT , - SIMCInstr { - let AssemblerPredicate = isCIOnly; - let DecoderNamespace="CI"; -} - -class FLAT_Real_vi op, string opName, dag outs, dag ins, string asm> : - FLAT , - SIMCInstr { - let AssemblerPredicate = VIAssemblerPredicate; - let DecoderNamespace="VI"; - let DisableDecoder = DisableVIDecoder; -} - -multiclass FLAT_AtomicRet_m pattern> { - def "" : FLAT_Pseudo , - AtomicNoRet ; - - def _ci : FLAT_Real_ci ; - - def _vi : FLAT_Real_vi ; -} - -multiclass FLAT_Load_Helper { - - let data = 0, mayLoad = 1 in { - - def "" : FLAT_Pseudo ; - - def _ci : FLAT_Real_ci ; - - def _vi : FLAT_Real_vi ; - } -} - -multiclass FLAT_Store_Helper { - - let mayLoad = 0, mayStore = 1, vdst = 0 in { - - def "" : FLAT_Pseudo ; - - def _ci : FLAT_Real_ci ; - - def _vi : FLAT_Real_vi ; - } -} - -multiclass FLAT_ATOMIC { - - let mayLoad = 1, mayStore = 1, glc = 0, vdst = 0 in { - def "" : FLAT_Pseudo , - AtomicNoRet ; - - def _ci : FLAT_Real_ci ; - - def _vi : FLAT_Real_vi ; - } - - let glc = 1, hasPostISelHook = 1 in { - defm _RTN : FLAT_AtomicRet_m < - op, (outs vdst_rc:$vdst), - (ins VReg_64:$addr, data_rc:$data, slc:$slc, tfe:$tfe), - asm_name#" $vdst, $addr, $data glc$slc$tfe", - [(set vt:$vdst, - (atomic (FLATAtomic i64:$addr, i1:$slc, i1:$tfe), data_vt:$data))] - >; - } -} - class MIMG_Mask { string Op = op; int Channels = channels; Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -22,6 +22,7 @@ def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">; include "SOPInstructions.td" +include "FLATInstructions.td" let SubtargetPredicate = isGCN in {