diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -87,6 +87,21 @@ bits<5> cpol; } +class OffsetMode { + bit HasOffset = hasOffset; + bit HasSOffset = hasSOffset; + string Variant = variant; + dag Ins = ins; + string Asm = asm; +} + +def IMM_Offset : OffsetMode<1, 0, "_IMM", (ins smem_offset:$offset), "$offset">; +def SGPR_Offset : OffsetMode<0, 1, "_SGPR", (ins SReg_32:$soffset), "$soffset">; +def SGPR_IMM_Offset : OffsetMode<1, 1, "_SGPR_IMM", + (ins SReg_32:$soffset, smem_offset_mod:$offset), + "$soffset$offset">; + class SM_Probe_Pseudo @@ -282,23 +297,21 @@ class SM_Pseudo_Atomic : SM_Atomic_Pseudo, AtomicNoRet { - let has_offset = isImm; - let has_soffset = !not(isImm); + let has_offset = offsets.HasOffset; + let has_soffset = offsets.HasSOffset; let PseudoInstr = opNameWithSuffix; let Constraints = !if(isRet, "$sdst = $sdata", ""); @@ -308,10 +321,12 @@ multiclass SM_Pseudo_Atomics { - def _IMM : SM_Pseudo_Atomic ; - def _SGPR : SM_Pseudo_Atomic ; - def _IMM_RTN : SM_Pseudo_Atomic ; - def _SGPR_RTN : SM_Pseudo_Atomic ; + def _IMM : SM_Pseudo_Atomic ; + def _SGPR : SM_Pseudo_Atomic ; + def _SGPR_IMM : SM_Pseudo_Atomic ; + def _IMM_RTN : SM_Pseudo_Atomic ; + def _SGPR_RTN : SM_Pseudo_Atomic ; + def _SGPR_IMM_RTN : SM_Pseudo_Atomic ; } //===----------------------------------------------------------------------===// @@ -705,8 +720,20 @@ multiclass SM_Real_Atomics_vi op, string ps> { def _IMM_vi : SMEM_Atomic_Real_vi (ps#_IMM)>; def _SGPR_vi : SMEM_Atomic_Real_vi (ps#_SGPR)>; + def _SGPR_alt_gfx9 + : SMEM_Atomic_Real_vi (ps#_SGPR)>, + SMEM_Real_SGPR_alt_gfx9; + let IsGFX9SpecificEncoding = true in + def _SGPR_IMM_gfx9 + : SMEM_Atomic_Real_vi (ps#_SGPR_IMM)>; def _IMM_RTN_vi : SMEM_Atomic_Real_vi (ps#_IMM_RTN)>; def _SGPR_RTN_vi : SMEM_Atomic_Real_vi (ps#_SGPR_RTN)>; + def _SGPR_RTN_alt_gfx9 + : SMEM_Atomic_Real_vi (ps#_SGPR_RTN)>, + SMEM_Real_SGPR_alt_gfx9; + let IsGFX9SpecificEncoding = true in + def _SGPR_IMM_RTN_gfx9 + : SMEM_Atomic_Real_vi (ps#_SGPR_IMM_RTN)>; } defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_vi <0x40, "S_BUFFER_ATOMIC_SWAP">; @@ -1103,8 +1130,10 @@ multiclass SM_Real_Atomics_gfx10 op, string ps> { def _IMM_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_IMM)>; def _SGPR_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_SGPR)>; + def _SGPR_IMM_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_SGPR_IMM)>; def _IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_IMM_RTN)>; def _SGPR_RTN_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_SGPR_RTN)>; + def _SGPR_IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_SGPR_IMM_RTN)>; } let SubtargetPredicate = HasScalarAtomics in { diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s --- a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s @@ -983,6 +983,9 @@ s_atomic_add s5, s[2:3], 0x64 // GFX10: encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0xfa] +s_atomic_add s5, s[2:3], s7 offset:0x64 +// GFX10: encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0x0e] + s_atomic_add_x2 s[10:11], s[2:3], s101 // GFX10: encoding: [0x81,0x02,0x88,0xf6,0x00,0x00,0x00,0xca] @@ -1109,6 +1112,9 @@ s_atomic_add s5, s[2:3], 0x64 glc // GFX10: encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0xfa] +s_atomic_add s5, s[2:3], s7 offset:0x64 glc +// GFX10: encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0x0e] + s_atomic_add_x2 s[10:11], s[2:3], s101 glc // GFX10: encoding: [0x81,0x02,0x89,0xf6,0x00,0x00,0x00,0xca] diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_smem.s b/llvm/test/MC/AMDGPU/gfx9_asm_smem.s --- a/llvm/test/MC/AMDGPU/gfx9_asm_smem.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_smem.s @@ -3123,9 +3123,15 @@ s_atomic_add s5, s[2:3], 0x0 // CHECK: [0x41,0x01,0x0a,0xc2,0x00,0x00,0x00,0x00] +s_atomic_add s5, s[2:3], s7 offset:0x12345 +// CHECK: [0x41,0x41,0x0a,0xc2,0x45,0x23,0x01,0x0e] + s_atomic_add s5, s[2:3], s0 glc // CHECK: [0x41,0x01,0x09,0xc2,0x00,0x00,0x00,0x00] +s_atomic_add s5, s[2:3], s7 offset:0x12345 glc +// CHECK: [0x41,0x41,0x0b,0xc2,0x45,0x23,0x01,0x0e] + s_atomic_sub s5, s[2:3], s0 // CHECK: [0x41,0x01,0x0c,0xc2,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt @@ -13253,12 +13253,18 @@ # GFX10: s_atomic_add s5, s[2:3], 0x64 ; encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0xfa] 0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0xfa +# GFX10: s_atomic_add s5, s[2:3], s7 offset:0x64 ; encoding: [0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0x0e] +0x41,0x01,0x08,0xf6,0x64,0x00,0x00,0x0e + # GFX10: s_atomic_add s5, s[2:3], 0x64 dlc ; encoding: [0x41,0x41,0x08,0xf6,0x64,0x00,0x00,0xfa] 0x41,0x41,0x08,0xf6,0x64,0x00,0x00,0xfa # GFX10: s_atomic_add s5, s[2:3], 0x64 glc ; encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0xfa] 0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0xfa +# GFX10: s_atomic_add s5, s[2:3], s7 offset:0x64 glc ; encoding: [0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0x0e] +0x41,0x01,0x09,0xf6,0x64,0x00,0x00,0x0e + # GFX10: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xca] 0x41,0x01,0x08,0xf6,0x00,0x00,0x00,0xca diff --git a/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt b/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt --- a/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/smem_gfx9.txt @@ -66,9 +66,27 @@ # GFX9: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x01,0x08,0xc2,0x65,0x00,0x00,0x00] 0x41,0x01,0x08,0xc2,0x65,0x00,0x00,0x00 +# The SGRP variants can alternatively be encoded with imm=0, soffset_en=1 +# and the offset register encoded in the soffset field with the offset +# field being disregarded. +# GFX9: s_atomic_add s5, s[2:3], s101 ; encoding: [0x41,0x41,0x08,0xc2,0x00,0x00,0x00,0xca] +0x41,0x41,0x08,0xc2,0x2e,0x00,0x00,0xca + +# The SGRP variants can alternatively be encoded with imm=0, soffset_en=1 +# and the offset register encoded in the soffset field with the offset +# field being disregarded. +# GFX9: s_atomic_add s5, s[2:3], s101 glc ; encoding: [0x41,0x41,0x09,0xc2,0x00,0x00,0x00,0xca] +0x41,0x41,0x09,0xc2,0x2e,0x00,0x00,0xca + # GFX9: s_atomic_add_x2 s[10:11], s[2:3], s101 ; encoding: [0x81,0x02,0x88,0xc2,0x65,0x00,0x00,0x00] 0x81,0x02,0x88,0xc2,0x65,0x00,0x00,0x00 +# GFX9: s_atomic_add s5, s[2:3], s7 offset:0x12345 ; encoding: [0x41,0x41,0x0a,0xc2,0x45,0x23,0x01,0x0e] +0x41,0x41,0x0a,0xc2,0x45,0x23,0x01,0x0e + +# GFX9: s_atomic_add s5, s[2:3], s7 offset:0x12345 glc ; encoding: [0x41,0x41,0x0b,0xc2,0x45,0x23,0x01,0x0e] +0x41,0x41,0x0b,0xc2,0x45,0x23,0x01,0x0e + # GFX9: s_atomic_and s101, s[2:3], s0 ; encoding: [0x41,0x19,0x20,0xc2,0x00,0x00,0x00,0x00] 0x41,0x19,0x20,0xc2,0x00,0x00,0x00,0x00