Index: llvm/lib/Target/AMDGPU/FLATInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/FLATInstructions.td +++ llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -243,6 +243,33 @@ } } +class FLAT_Global_Load_LDS_Pseudo : FLAT_Pseudo< + opName, + (outs ), + !con( + !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)), + (ins flat_offset:$offset, CPol_0:$cpol)), + " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> { + let LGKM_CNT = 1; + let is_flat_global = 1; + let has_data = 0; + let has_vdst = 0; + let mayLoad = 1; + let mayStore = 1; + let has_saddr = 1; + let enabled_saddr = EnableSaddr; + let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); + let Uses = [M0, EXEC]; + let SchedRW = [WriteVMEM, WriteLDS]; +} + +multiclass FLAT_Global_Load_LDS_Pseudo { + def "" : FLAT_Global_Load_LDS_Pseudo, + GlobalSaddrTable<0, opName>; + def _SADDR : FLAT_Global_Load_LDS_Pseudo, + GlobalSaddrTable<1, opName>; +} + class FLAT_Global_Store_AddTid_Pseudo : FLAT_Pseudo< opName, @@ -366,6 +393,47 @@ } } +class FLAT_Scratch_Load_LDS_Pseudo : FLAT_Pseudo< + opName, + (outs ), + !if(EnableSVE, + (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), + !if(EnableSaddr, + (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), + !if(EnableVaddr, + (ins VGPR_32:$vaddr, flat_offset:$offset, CPol:$cpol), + (ins flat_offset:$offset, CPol:$cpol)))), + " "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { + + let LGKM_CNT = 1; + let is_flat_scratch = 1; + let has_data = 0; + let has_vdst = 0; + let mayLoad = 1; + let mayStore = 1; + let has_saddr = 1; + let enabled_saddr = EnableSaddr; + let has_vaddr = EnableVaddr; + let has_sve = EnableSVE; + let sve = EnableVaddr; + let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); + let Uses = [M0, EXEC]; + let SchedRW = [WriteVMEM, WriteLDS]; +} + +multiclass FLAT_Scratch_Load_LDS_Pseudo { + def "" : FLAT_Scratch_Load_LDS_Pseudo, + FlatScratchInst; + def _SADDR : FLAT_Scratch_Load_LDS_Pseudo, + FlatScratchInst; + def _SVS : FLAT_Scratch_Load_LDS_Pseudo, + FlatScratchInst; + def _ST : FLAT_Scratch_Load_LDS_Pseudo, + FlatScratchInst; +} + class FLAT_AtomicNoRet_Pseudo pattern = []> : FLAT_Pseudo { @@ -772,6 +840,16 @@ let SubtargetPredicate = HasGFX10_BEncoding in defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub>; + +let SubtargetPredicate = isGFX940Plus in { + +defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">; +defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">; +defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ushort">; +defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">; +defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">; + +} // End let SubtargetPredicate = isGFX940Plus } // End is_flat_global = 1 @@ -803,6 +881,16 @@ defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>; defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>; +let SubtargetPredicate = isGFX940Plus in { + +defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">; +defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">; +defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">; +defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">; +defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">; + +} // End let SubtargetPredicate = isGFX940Plus + } // End SubtargetPredicate = HasFlatScratchInsts let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { @@ -1613,6 +1701,13 @@ defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; +let AssemblerPredicate = isGFX940Plus in { +defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_vi <0x026>; +defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_vi <0x027>; +defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_vi <0x028>; +defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_vi <0x029>; +defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_vi <0x02a>; +} // End let AssemblerPredicate = isGFX940Plus defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; @@ -1641,6 +1736,14 @@ defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; +let AssemblerPredicate = isGFX940Plus in { +defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_SVE_vi <0x026>; +defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_SVE_vi <0x027>; +defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_vi <0x028>; +defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_vi <0x029>; +defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_AllAddr_SVE_vi <0x02a>; +} // End let AssemblerPredicate = isGFX940Plus + defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_SVE_vi <0x10>; defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_SVE_vi <0x11>; defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_SVE_vi <0x12>; Index: llvm/test/MC/AMDGPU/gfx940_asm_features.s =================================================================== --- llvm/test/MC/AMDGPU/gfx940_asm_features.s +++ llvm/test/MC/AMDGPU/gfx940_asm_features.s @@ -129,6 +129,74 @@ // GFX940: ds_pk_add_rtn_bf16 a3, v2, a1 ; encoding: [0x00,0x00,0x70,0xdb,0x02,0x01,0x00,0x03] ds_pk_add_rtn_bf16 a3, v2, a1 +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: global_load_lds_dword v[2:3], off ; encoding: [0x00,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00] +global_load_lds_dword v[2:3], off + +// NOT-GFX940: error: +// GFX940: global_load_lds_dword v[2:3], off sc0 nt sc1 ; encoding: [0x00,0x80,0xab,0xde,0x02,0x00,0x7f,0x00] +global_load_lds_dword v[2:3], off sc0 nt sc1 + +// NOT-GFX940: error: +// GFX940: global_load_lds_dword v[2:3], off offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00] +global_load_lds_dword v[2:3], off offset:4 + +// NOT-GFX940: error: +// GFX940: global_load_lds_dword v2, s[4:5] offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x04,0x00] +global_load_lds_dword v2, s[4:5] offset:4 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: global_load_lds_ubyte v[2:3], off ; encoding: [0x00,0x80,0x98,0xdc,0x02,0x00,0x7f,0x00] +global_load_lds_ubyte v[2:3], off + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: global_load_lds_sbyte v[2:3], off ; encoding: [0x00,0x80,0x9c,0xdc,0x02,0x00,0x7f,0x00] +global_load_lds_sbyte v[2:3], off + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: global_load_lds_sshort v[2:3], off ; encoding: [0x00,0x80,0xa4,0xdc,0x02,0x00,0x7f,0x00] +global_load_lds_sshort v[2:3], off + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: global_load_lds_ushort v[2:3], off ; encoding: [0x00,0x80,0xa0,0xdc,0x02,0x00,0x7f,0x00] +global_load_lds_ushort v[2:3], off + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: scratch_load_lds_dword v2, off ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x7f,0x00] +scratch_load_lds_dword v2, off + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: scratch_load_lds_dword v2, s4 ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00] +scratch_load_lds_dword v2, s4 + +// NOT-GFX940: error: +// GFX940: scratch_load_lds_dword v2, s4 offset:4 ; encoding: [0x04,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00] +scratch_load_lds_dword v2, s4 offset:4 + +// NOT-GFX940: error: +// GFX940: scratch_load_lds_dword off, s4 offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x04,0x00] +scratch_load_lds_dword off, s4 offset:4 + +// NOT-GFX940: error: +// GFX940: scratch_load_lds_dword off, off offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00] +scratch_load_lds_dword off, off offset:4 + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: scratch_load_lds_ubyte v2, off ; encoding: [0x00,0x60,0x98,0xdc,0x02,0x00,0x7f,0x00] +scratch_load_lds_ubyte v2, off + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: scratch_load_lds_sbyte v2, off ; encoding: [0x00,0x60,0x9c,0xdc,0x02,0x00,0x7f,0x00] +scratch_load_lds_sbyte v2, off + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: scratch_load_lds_ushort v2, off ; encoding: [0x00,0x60,0xa0,0xdc,0x02,0x00,0x7f,0x00] +scratch_load_lds_ushort v2, off + +// NOT-GFX940: error: instruction not supported on this GPU +// GFX940: scratch_load_lds_sshort v2, off ; encoding: [0x00,0x60,0xa4,0xdc,0x02,0x00,0x7f,0x00] +scratch_load_lds_sshort v2, off + // NOT-GFX940: error: instruction not supported on this GPU // GFX940: v_mov_b64_e32 v[2:3], v[4:5] ; encoding: [0x04,0x71,0x04,0x7e] v_mov_b64 v[2:3], v[4:5] Index: llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt @@ -87,6 +87,60 @@ # GFX940: ds_pk_add_rtn_bf16 a3, v2, a1 ; encoding: [0x00,0x00,0x70,0xdb,0x02,0x01,0x00,0x03] 0x00,0x00,0x70,0xdb,0x02,0x01,0x00,0x03 +# GFX940: global_load_lds_dword v[2:3], off ; encoding: [0x00,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: global_load_lds_dword v[2:3], off sc0 nt sc1 ; encoding: [0x00,0x80,0xab,0xde,0x02,0x00,0x7f,0x00] +0x00,0x80,0xab,0xde,0x02,0x00,0x7f,0x00 + +# GFX940: global_load_lds_dword v[2:3], off offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00] +0x04,0x80,0xa8,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: global_load_lds_dword v2, s[4:5] offset:4 ; encoding: [0x04,0x80,0xa8,0xdc,0x02,0x00,0x04,0x00] +0x04,0x80,0xa8,0xdc,0x02,0x00,0x04,0x00 + +# GFX940: global_load_lds_ubyte v[2:3], off ; encoding: [0x00,0x80,0x98,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x80,0x98,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: global_load_lds_sbyte v[2:3], off ; encoding: [0x00,0x80,0x9c,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x80,0x9c,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: global_load_lds_sshort v[2:3], off ; encoding: [0x00,0x80,0xa4,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x80,0xa4,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: global_load_lds_ushort v[2:3], off ; encoding: [0x00,0x80,0xa0,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x80,0xa0,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: scratch_load_lds_dword v2, off ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x60,0xa8,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: scratch_load_lds_dword v2, s4 ; encoding: [0x00,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00] +0x00,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00 + +# GFX940: scratch_load_lds_dword v2, s4 offset:4 ; encoding: [0x04,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00] +0x04,0x60,0xa8,0xdc,0x02,0x00,0x04,0x00 + +# GFX940: scratch_load_lds_dword off, s4 offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x04,0x00] +0x04,0x40,0xa8,0xdc,0x00,0x00,0x04,0x00 + +# GFX940: scratch_load_lds_dword off, off offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00] +0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00 + +# GFX940: scratch_load_lds_dword off, off offset:4 ; encoding: [0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00] +0x04,0x40,0xa8,0xdc,0x00,0x00,0x7f,0x00 + +# GFX940: scratch_load_lds_ubyte v2, off ; encoding: [0x00,0x60,0x98,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x60,0x98,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: scratch_load_lds_sbyte v2, off ; encoding: [0x00,0x60,0x9c,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x60,0x9c,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: scratch_load_lds_ushort v2, off ; encoding: [0x00,0x60,0xa0,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x60,0xa0,0xdc,0x02,0x00,0x7f,0x00 + +# GFX940: scratch_load_lds_sshort v2, off ; encoding: [0x00,0x60,0xa4,0xdc,0x02,0x00,0x7f,0x00] +0x00,0x60,0xa4,0xdc,0x02,0x00,0x7f,0x00 + # GFX940: v_mov_b64_e32 v[2:3], v[4:5] ; encoding: [0x04,0x71,0x04,0x7e] 0x04,0x71,0x04,0x7e