diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -110,10 +110,11 @@ let has_dlc = 1; } -class SM_Store_Pseudo pattern = []> - : SM_Pseudo { - RegisterClass BaseClass; - RegisterClass SrcClass; +class SM_Store_Pseudo + : SM_Pseudo { + RegisterClass BaseClass = baseClass; + RegisterClass SrcClass = srcClass; let mayLoad = 0; let mayStore = 1; let has_glc = 1; @@ -177,23 +178,28 @@ multiclass SM_Pseudo_Stores { - def _IMM : SM_Store_Pseudo { + " $sdata, $sbase, $offset$cpol"> { let has_offset = 1; - let BaseClass = baseClass; - let SrcClass = srcClass; let PseudoInstr = opName # "_IMM"; } - def _SGPR : SM_Store_Pseudo { + " $sdata, $sbase, $soffset$cpol"> { let has_soffset = 1; - let BaseClass = baseClass; - let SrcClass = srcClass; let PseudoInstr = opName # "_SGPR"; } + + def _SGPR_IMM : SM_Store_Pseudo { + let has_offset = 1; + let has_soffset = 1; + let PseudoInstr = opName # "_SGPR_IMM"; + } } multiclass SM_Pseudo_Discards { @@ -948,6 +954,11 @@ def _SGPR_gfx10 : SMEM_Real_Store_gfx10 { let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); } + + def _SGPR_IMM_gfx10 : SMEM_Real_Store_gfx10 (ps#_SGPR_IMM)> { + let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, + SReg_32:$soffset, smem_offset_mod:$offset, CPol:$cpol); + } } defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">; diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s --- a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s @@ -581,6 +581,9 @@ s_store_dword s1, s[4:5], 0x0 // GFX10: encoding: [0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xfa] +s_store_dword s1, s[4:5], s0 offset:0x12345 +// GFX10: encoding: [0x42,0x00,0x40,0xf4,0x45,0x23,0x01,0x00] + s_store_dword s1, s[4:5], s0 glc // GFX10: encoding: [0x42,0x00,0x41,0xf4,0x00,0x00,0x00,0x00] @@ -593,6 +596,9 @@ s_store_dword s1, s[4:5], 0x1234 glc dlc // GFX10: encoding: [0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa] +s_store_dword s1, s[4:5], s0 offset:0x12345 glc dlc +// GFX10: encoding: [0x42,0x40,0x41,0xf4,0x45,0x23,0x01,0x00] + s_store_dwordx2 s[2:3], s[4:5], s0 // GFX10: encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0x00] @@ -632,6 +638,9 @@ s_store_dwordx2 s[2:3], s[4:5], 0x0 // GFX10: encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xfa] +s_store_dwordx2 s[2:3], s[4:5], s0 offset:0x12345 +// GFX10: encoding: [0x82,0x00,0x44,0xf4,0x45,0x23,0x01,0x00] + s_store_dwordx2 s[2:3], s[4:5], s0 glc // GFX10: encoding: [0x82,0x00,0x45,0xf4,0x00,0x00,0x00,0x00] @@ -644,6 +653,9 @@ s_store_dwordx2 s[2:3], s[4:5], 0x1234 glc dlc // GFX10: encoding: [0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa] +s_store_dwordx2 s[2:3], s[4:5], s0 offset:0x12345 glc dlc +// GFX10: encoding: [0x82,0x40,0x45,0xf4,0x45,0x23,0x01,0x00] + s_store_dwordx4 s[4:7], s[4:5], s0 // GFX10: encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0x00] @@ -680,6 +692,9 @@ s_store_dwordx4 s[4:7], s[4:5], 0x0 // GFX10: encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xfa] +s_store_dwordx4 s[4:7], s[4:5], s0 offset:0x12345 +// GFX10: encoding: [0x02,0x01,0x48,0xf4,0x45,0x23,0x01,0x00] + s_store_dwordx4 s[4:7], s[4:5], s0 glc // GFX10: encoding: [0x02,0x01,0x49,0xf4,0x00,0x00,0x00,0x00] @@ -692,6 +707,9 @@ s_store_dwordx4 s[4:7], s[4:5], 0x1234 glc dlc // GFX10: encoding: [0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa] +s_store_dwordx4 s[4:7], s[4:5], s0 offset:0x12345 glc dlc +// GFX10: encoding: [0x02,0x41,0x49,0xf4,0x45,0x23,0x01,0x00] + s_buffer_store_dword s1, s[8:11], s0 // GFX10: encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0x00] @@ -728,6 +746,9 @@ s_buffer_store_dword s1, s[8:11], 0x0 // GFX10: encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xfa] +s_buffer_store_dword s1, s[8:11], s0 offset:0x12345 +// GFX10: encoding: [0x44,0x00,0x60,0xf4,0x45,0x23,0x01,0x00] + s_buffer_store_dword s1, s[8:11], s0 glc // GFX10: encoding: [0x44,0x00,0x61,0xf4,0x00,0x00,0x00,0x00] @@ -740,6 +761,9 @@ s_buffer_store_dword s1, s[8:11], 0x1234 glc dlc // GFX10: encoding: [0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa] +s_buffer_store_dword s1, s[8:11], s0 offset:0x12345 glc dlc +// GFX10: encoding: [0x44,0x40,0x61,0xf4,0x45,0x23,0x01,0x00] + s_buffer_store_dwordx2 s[2:3], s[8:11], s0 // GFX10: encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0x00] @@ -776,6 +800,9 @@ s_buffer_store_dwordx2 s[2:3], s[8:11], 0x0 // GFX10: encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xfa] +s_buffer_store_dwordx2 s[2:3], s[8:11], s0 offset:0x12345 +// GFX10: encoding: [0x84,0x00,0x64,0xf4,0x45,0x23,0x01,0x00] + s_buffer_store_dwordx2 s[2:3], s[8:11], s0 glc // GFX10: encoding: [0x84,0x00,0x65,0xf4,0x00,0x00,0x00,0x00] @@ -788,6 +815,9 @@ s_buffer_store_dwordx2 s[2:3], s[8:11], 0x1234 glc dlc // GFX10: encoding: [0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa] +s_buffer_store_dwordx2 s[2:3], s[8:11], s0 offset:0x12345 glc dlc +// GFX10: encoding: [0x84,0x40,0x65,0xf4,0x45,0x23,0x01,0x00] + s_buffer_store_dwordx4 s[4:7], s[8:11], s0 // GFX10: encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0x00] @@ -821,6 +851,9 @@ s_buffer_store_dwordx4 s[4:7], s[8:11], 0x0 // GFX10: encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xfa] +s_buffer_store_dwordx4 s[4:7], s[8:11], s0 offset:0x12345 +// GFX10: encoding: [0x04,0x01,0x68,0xf4,0x45,0x23,0x01,0x00] + s_buffer_store_dwordx4 s[4:7], s[8:11], s0 glc // GFX10: encoding: [0x04,0x01,0x69,0xf4,0x00,0x00,0x00,0x00] @@ -833,6 +866,9 @@ s_buffer_store_dwordx4 s[4:7], s[8:11], 0x1234 glc dlc // GFX10: encoding: [0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa] +s_buffer_store_dwordx4 s[4:7], s[8:11], s0 offset:0x12345 glc dlc +// GFX10: encoding: [0x04,0x41,0x69,0xf4,0x45,0x23,0x01,0x00] + s_memrealtime s[10:11] // GFX10: encoding: [0x80,0x02,0x94,0xf4,0x00,0x00,0x00,0x00] @@ -893,6 +929,9 @@ s_scratch_store_dword s1, s[4:5], 0x123 glc // GFX10: encoding: [0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa] +s_scratch_store_dword s1, s[4:5], s0 offset:0x12345 glc +// GFX10: encoding: [0x42,0x00,0x55,0xf4,0x45,0x23,0x01,0x00] + s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc // GFX10: encoding: [0x82,0x00,0x59,0xf4,0x00,0x00,0x00,0xca] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt @@ -11591,6 +11591,9 @@ # GFX10: s_buffer_store_dword s1, s[8:11], 0x1234 glc dlc ; encoding: [0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa] 0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa +# GFX10: s_buffer_store_dword s1, s[8:11], s0 offset:0x12345 glc dlc ; encoding: [0x44,0x40,0x61,0xf4,0x45,0x23,0x01,0x00] +0x44,0x40,0x61,0xf4,0x45,0x23,0x01,0x00 + # GFX10: s_buffer_store_dword s1, s[8:11], m0 ; encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xf8] 0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xf8 @@ -11639,6 +11642,9 @@ # GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], 0x1234 glc dlc ; encoding: [0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa] 0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa +# GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], s0 offset:0x12345 glc dlc ; encoding: [0x84,0x40,0x65,0xf4,0x45,0x23,0x01,0x00] +0x84,0x40,0x65,0xf4,0x45,0x23,0x01,0x00 + # GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], m0 ; encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xf8] 0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xf8 @@ -11681,6 +11687,9 @@ # GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], 0x1234 glc dlc ; encoding: [0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa] 0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa +# GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], s0 offset:0x12345 glc dlc ; encoding: [0x04,0x41,0x69,0xf4,0x45,0x23,0x01,0x00] +0x04,0x41,0x69,0xf4,0x45,0x23,0x01,0x00 + # GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], m0 ; encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xf8] 0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xf8 @@ -18086,6 +18095,9 @@ # GFX10: s_scratch_store_dword s1, s[4:5], 0x123 glc ; encoding: [0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa] 0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa +# GFX10: s_scratch_store_dword s1, s[4:5], s0 offset:0x12345 glc ; encoding: [0x42,0x00,0x55,0xf4,0x45,0x23,0x01,0x00] +0x42,0x00,0x55,0xf4,0x45,0x23,0x01,0x00 + # GFX10: s_scratch_store_dword s101, s[4:5], s0 ; encoding: [0x42,0x19,0x54,0xf4,0x00,0x00,0x00,0x00] 0x42,0x19,0x54,0xf4,0x00,0x00,0x00,0x00 @@ -18323,6 +18335,9 @@ # GFX10: s_store_dword s1, s[4:5], 0x1234 glc dlc ; encoding: [0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa] 0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa +# GFX10: s_store_dword s1, s[4:5], s0 offset:0x12345 glc dlc ; encoding: [0x42,0x40,0x41,0xf4,0x45,0x23,0x01,0x00] +0x42,0x40,0x41,0xf4,0x45,0x23,0x01,0x00 + # GFX10: s_store_dword s1, s[4:5], m0 ; encoding: [0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xf8] 0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xf8 @@ -18374,6 +18389,9 @@ # GFX10: s_store_dwordx2 s[2:3], s[4:5], 0x1234 glc dlc ; encoding: [0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa] 0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa +# GFX10: s_store_dwordx2 s[2:3], s[4:5], s0 offset:0x12345 glc dlc ; encoding: [0x82,0x40,0x45,0xf4,0x45,0x23,0x01,0x00] +0x82,0x40,0x45,0xf4,0x45,0x23,0x01,0x00 + # GFX10: s_store_dwordx2 s[2:3], s[4:5], m0 ; encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xf8] 0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xf8 @@ -18419,6 +18437,9 @@ # GFX10: s_store_dwordx4 s[4:7], s[4:5], 0x1234 glc dlc ; encoding: [0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa] 0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa +# GFX10: s_store_dwordx4 s[4:7], s[4:5], s0 offset:0x12345 glc dlc ; encoding: [0x02,0x41,0x49,0xf4,0x45,0x23,0x01,0x00] +0x02,0x41,0x49,0xf4,0x45,0x23,0x01,0x00 + # GFX10: s_store_dwordx4 s[4:7], s[4:5], m0 ; encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xf8] 0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xf8