diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -87,18 +87,21 @@ bits<5> cpol; } -class SM_Probe_Pseudo - : SM_Pseudo { +class SM_Probe_Pseudo + : SM_Pseudo { let mayLoad = 0; let mayStore = 0; let has_glc = 0; let LGKM_CNT = 0; let ScalarStore = 0; let hasSideEffects = 1; - let has_offset = isImm; - let has_soffset = !not(isImm); - let PseudoInstr = opName # !if(isImm, "_IMM", "_SGPR"); + let has_offset = hasOffset; + let has_soffset = hasSOffset; + let PseudoInstr = opName # variant; } class SM_Load_Pseudo pattern=[]> @@ -235,8 +238,16 @@ } multiclass SM_Pseudo_Probe { - def _IMM : SM_Probe_Pseudo ; - def _SGPR : SM_Probe_Pseudo ; + def _IMM : SM_Probe_Pseudo < + opName, "_IMM", baseClass, (ins smem_offset:$offset), "$offset", + /* hasOffset= */ 1, /* hasSOffset= */ 0>; + def _SGPR : SM_Probe_Pseudo < + opName, "_SGPR", baseClass, (ins SReg_32:$soffset), "$soffset", + /* hasOffset= */ 0, /* hasSOffset= */ 1>; + def _SGPR_IMM : SM_Probe_Pseudo < + opName, "_SGPR_IMM", baseClass, + (ins SReg_32:$soffset, smem_offset_mod:$offset), "$soffset$offset", + /* hasOffset= */ 1, /* hasSOffset= */ 1>; } class SM_WaveId_Pseudo : SM_Pseudo< @@ -635,6 +646,12 @@ multiclass SM_Real_Probe_vi op, string ps> { def _IMM_vi : SMEM_Real_Store_Base_vi (ps#_IMM)>; def _SGPR_vi : SMEM_Real_Store_Base_vi (ps#_SGPR)>; + def _SGPR_alt_gfx9 + : SMEM_Real_Store_Base_vi (ps#_SGPR)>, + SMEM_Real_SGPR_alt_gfx9; + let IsGFX9SpecificEncoding = true in + def _SGPR_IMM_gfx9 + : SMEM_Real_Store_Base_vi (ps#_SGPR_IMM)>; } defm S_LOAD_DWORD : SM_Real_Loads_vi <0x00, "S_LOAD_DWORD">; @@ -1068,6 +1085,8 @@ multiclass SM_Real_Probe_gfx10 op, string ps> { def _IMM_gfx10 : SMEM_Real_Store_gfx10 (ps#_IMM)>; def _SGPR_gfx10 : SMEM_Real_Store_gfx10 (ps#_SGPR)>; + def _SGPR_IMM_gfx10 + : SMEM_Real_Store_gfx10 (ps#_SGPR_IMM)>; } defm S_ATC_PROBE : SM_Real_Probe_gfx10 <0x26, "S_ATC_PROBE">; @@ -1227,6 +1246,8 @@ multiclass SM_Real_Probe_gfx11 op, string ps> { def _IMM_gfx11 : SMEM_Real_Store_gfx11 (ps#_IMM)>; def _SGPR_gfx11 : SMEM_Real_Store_gfx11 (ps#_SGPR)>; + def _SGPR_IMM_gfx11 + : SMEM_Real_Store_gfx11 (ps#_SGPR_IMM)>; } defm S_ATC_PROBE : SM_Real_Probe_gfx11 <0x22, "S_ATC_PROBE">; diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s --- a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s @@ -947,12 +947,18 @@ s_atc_probe 0x7, s[4:5], 100 // GFX10: s_atc_probe 7, s[4:5], 0x64 ; encoding: [0xc2,0x01,0x98,0xf4,0x64,0x00,0x00,0xfa] +s_atc_probe 0x7, s[4:5], s9 offset:100 +// GFX10: s_atc_probe 7, s[4:5], s9 offset:0x64 ; encoding: [0xc2,0x01,0x98,0xf4,0x64,0x00,0x00,0x12] + s_atc_probe_buffer 0x7, s[8:11], s2 // GFX10: s_atc_probe_buffer 7, s[8:11], s2 ; encoding: [0xc4,0x01,0x9c,0xf4,0x00,0x00,0x00,0x04] s_atc_probe_buffer 0x7, s[8:11], 100 // GFX10: s_atc_probe_buffer 7, s[8:11], 0x64 ; encoding: [0xc4,0x01,0x9c,0xf4,0x64,0x00,0x00,0xfa] +s_atc_probe_buffer 0x7, s[8:11], s9 offset:100 +// GFX10: s_atc_probe_buffer 7, s[8:11], s9 offset:0x64 ; encoding: [0xc4,0x01,0x9c,0xf4,0x64,0x00,0x00,0x12] + s_dcache_discard s[2:3], s2 // GFX10: s_dcache_discard s[2:3], s2 ; encoding: [0x01,0x00,0xa0,0xf4,0x00,0x00,0x00,0x04] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_smem.s b/llvm/test/MC/AMDGPU/gfx11_asm_smem.s --- a/llvm/test/MC/AMDGPU/gfx11_asm_smem.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_smem.s @@ -467,12 +467,18 @@ s_atc_probe 7, s[4:5], 0x64 // GFX11: encoding: [0xc2,0x01,0x88,0xf4,0x64,0x00,0x00,0xf8] +s_atc_probe 7, s[4:5], s9 offset:0x64 +// GFX11: encoding: [0xc2,0x01,0x88,0xf4,0x64,0x00,0x00,0x12] + s_atc_probe_buffer 7, s[8:11], s2 // GFX11: encoding: [0xc4,0x01,0x8c,0xf4,0x00,0x00,0x00,0x04] s_atc_probe_buffer 7, s[8:11], 0x64 // GFX11: encoding: [0xc4,0x01,0x8c,0xf4,0x64,0x00,0x00,0xf8] +s_atc_probe_buffer 7, s[8:11], s9 offset:0x64 +// GFX11: encoding: [0xc4,0x01,0x8c,0xf4,0x64,0x00,0x00,0x12] + s_store_dword s1, s[4:5], s0 // GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_smem.s b/llvm/test/MC/AMDGPU/gfx9_asm_smem.s --- a/llvm/test/MC/AMDGPU/gfx9_asm_smem.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_smem.s @@ -1368,6 +1368,9 @@ s_atc_probe 0x0, s[4:5], 0x0 // CHECK: [0x02,0x00,0x9a,0xc0,0x00,0x00,0x00,0x00] +s_atc_probe 0x0, s[4:5], s7 offset:0x12345 +// CHECK: [0x02,0x40,0x9a,0xc0,0x45,0x23,0x01,0x0e] + s_atc_probe_buffer 0x0, s[8:11], s0 // CHECK: [0x04,0x00,0x9c,0xc0,0x00,0x00,0x00,0x00] @@ -1410,6 +1413,9 @@ s_atc_probe_buffer 0x0, s[8:11], 0x0 // CHECK: [0x04,0x00,0x9e,0xc0,0x00,0x00,0x00,0x00] +s_atc_probe_buffer 0x0, s[8:11], s7 offset:0x12345 +// CHECK: [0x04,0x40,0x9e,0xc0,0x45,0x23,0x01,0x0e] + s_dcache_discard s[2:3], s0 // CHECK: [0x01,0x00,0xa0,0xc0,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt @@ -9944,12 +9944,18 @@ # GFX10: s_atc_probe 7, s[4:5], 0x64 ; encoding: [0xc2,0x01,0x98,0xf4,0x64,0x00,0x00,0xfa] 0xc2,0x01,0x98,0xf4,0x64,0x00,0x00,0xfa +# GFX10: s_atc_probe 7, s[4:5], s9 offset:0x64 ; encoding: [0xc2,0x01,0x98,0xf4,0x64,0x00,0x00,0x12] +0xc2,0x01,0x98,0xf4,0x64,0x00,0x00,0x12 + # GFX10: s_atc_probe 7, s[4:5], s2 ; encoding: [0xc2,0x01,0x98,0xf4,0x00,0x00,0x00,0x04] 0xc2,0x01,0x98,0xf4,0x00,0x00,0x00,0x04 # GFX10: s_atc_probe_buffer 7, s[8:11], 0x64 ; encoding: [0xc4,0x01,0x9c,0xf4,0x64,0x00,0x00,0xfa] 0xc4,0x01,0x9c,0xf4,0x64,0x00,0x00,0xfa +# GFX10: s_atc_probe_buffer 7, s[8:11], s9 offset:0x64 ; encoding: [0xc4,0x01,0x9c,0xf4,0x64,0x00,0x00,0x12] +0xc4,0x01,0x9c,0xf4,0x64,0x00,0x00,0x12 + # GFX10: s_atc_probe_buffer 7, s[8:11], s2 ; encoding: [0xc4,0x01,0x9c,0xf4,0x00,0x00,0x00,0x04] 0xc4,0x01,0x9c,0xf4,0x00,0x00,0x00,0x04 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_all.txt @@ -10294,12 +10294,18 @@ # GFX11: s_atc_probe 7, s[4:5], 0x64 ; encoding: [0xc2,0x01,0x88,0xf4,0x64,0x00,0x00,0xf8] 0xc2,0x01,0x88,0xf4,0x64,0x00,0x00,0xf8 +# GFX11: s_atc_probe 7, s[4:5], s9 offset:0x64 ; encoding: [0xc2,0x01,0x88,0xf4,0x64,0x00,0x00,0x12] +0xc2,0x01,0x88,0xf4,0x64,0x00,0x00,0x12 + # GFX11: s_atc_probe 7, s[4:5], s2 ; encoding: [0xc2,0x01,0x88,0xf4,0x00,0x00,0x00,0x04] 0xc2,0x01,0x88,0xf4,0x00,0x00,0x00,0x04 # GFX11: s_atc_probe_buffer 7, s[8:11], 0x64 ; encoding: [0xc4,0x01,0x8c,0xf4,0x64,0x00,0x00,0xf8] 0xc4,0x01,0x8c,0xf4,0x64,0x00,0x00,0xf8 +# GFX11: s_atc_probe_buffer 7, s[8:11], s9 offset:0x64 ; encoding: [0xc4,0x01,0x8c,0xf4,0x64,0x00,0x00,0x12] +0xc4,0x01,0x8c,0xf4,0x64,0x00,0x00,0x12 + # GFX11: s_atc_probe_buffer 7, s[8:11], s2 ; encoding: [0xc4,0x01,0x8c,0xf4,0x00,0x00,0x00,0x04] 0xc4,0x01,0x8c,0xf4,0x00,0x00,0x00,0x04 diff --git a/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt b/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt --- a/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt @@ -316,3 +316,22 @@ # GFX9: s_atomic_add_x2 s[34:35], exec, s11 glc ; encoding: [0xbf,0x08,0x89,0xc2,0x0b,0x00,0x00,0x00] 0xbf,0x08,0x89,0xc2,0x0b,0x00,0x00,0x00 + +#===------------------------------------------------------------------------===# +# s_atc_probe +#===------------------------------------------------------------------------===# + +# GFX9: s_atc_probe 7, s[4:5], s9 ; encoding: [0xc2,0x01,0x98,0xc0,0x09,0x00,0x00,0x00] +0xc2,0x01,0x98,0xc0,0x09,0x00,0x00,0x00 + +# The SGRP variants can alternatively be encoded with imm=0, soffset_en=1 +# and the offset register encoded in the soffset field with the offset +# field being disregarded. +# GFX9: s_atc_probe 7, s[4:5], s9 ; encoding: [0xc2,0x41,0x98,0xc0,0x00,0x00,0x00,0x12] +0xc2,0x41,0x98,0xc0,0x2e,0x00,0x00,0x12 + +# GFX9: s_atc_probe 7, s[4:5], s9 offset:0x64 ; encoding: [0xc2,0x41,0x9a,0xc0,0x64,0x00,0x00,0x12] +0xc2,0x41,0x9a,0xc0,0x64,0x00,0x00,0x12 + +# GFX9: s_atc_probe_buffer 7, s[8:11], s9 offset:0x64 ; encoding: [0xc4,0x41,0x9e,0xc0,0x64,0x00,0x00,0x12] +0xc4,0x41,0x9e,0xc0,0x64,0x00,0x00,0x12