diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1010,19 +1010,36 @@ let SubtargetPredicate = isGFX6 in { // isn't on CI & VI /* defm BUFFER_ATOMIC_RSUB : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub">; -defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap">; -defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics <"buffer_atomic_fmin">; -defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics <"buffer_atomic_fmax">; defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub_x2">; -defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap_x2">; -defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fmin_x2">; -defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fmax_x2">; */ def BUFFER_WBINVL1_SC : MUBUF_Invalidate <"buffer_wbinvl1_sc", int_amdgcn_buffer_wbinvl1_sc>; } +let SubtargetPredicate = isGFX6GFX7GFX10 in { + +defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics < + "buffer_atomic_fcmpswap", VReg_64, v2f32, null_frag +>; +defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics < + "buffer_atomic_fmin", VGPR_32, f32, null_frag +>; +defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics < + "buffer_atomic_fmax", VGPR_32, f32, null_frag +>; +defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_fcmpswap_x2", VReg_128, v2f64, null_frag +>; +defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_fmin_x2", VReg_64, f64, null_frag +>; +defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics < + "buffer_atomic_fmax_x2", VReg_64, f64, null_frag +>; + +} + let SubtargetPredicate = HasD16LoadStore in { defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Pseudo_Loads < @@ -2025,10 +2042,9 @@ defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03b>; defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03c>; defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03d>; -// FIXME-GFX6-GFX7-GFX10: Add following instructions: -//defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>; -//defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>; -//defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>; +defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>; +defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>; +defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>; defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x050>; defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x051>; defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x052>; @@ -2043,10 +2059,9 @@ defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>; defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>; // FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7. -// FIXME-GFX6-GFX7-GFX10: Add following instructions: -//defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>; -//defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>; -//defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>; +defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>; +defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>; +defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>; defm BUFFER_WBINVL1_SC : MUBUF_Real_gfx6<0x070>; defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>; diff --git a/llvm/test/MC/AMDGPU/mubuf-gfx10.s b/llvm/test/MC/AMDGPU/mubuf-gfx10.s --- a/llvm/test/MC/AMDGPU/mubuf-gfx10.s +++ b/llvm/test/MC/AMDGPU/mubuf-gfx10.s @@ -8,3 +8,39 @@ buffer_load_sbyte v5, off, s[8:11], s3 glc slc dlc // GFX10: buffer_load_sbyte v5, off, s[8:11], s3 glc slc dlc ; encoding: [0x00,0xc0,0x24,0xe0,0x00,0x05,0x42,0x03] + +buffer_atomic_fcmpswap v[0:1], off, s[0:3], s0 offset:4095 +// GFX10: buffer_atomic_fcmpswap v[0:1], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0xf8,0xe0,0x00,0x00,0x00,0x00] + +buffer_atomic_fcmpswap_x2 v[0:3], off, s[0:3], s0 offset:4095 +// GFX10: buffer_atomic_fcmpswap_x2 v[0:3], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe1,0x00,0x00,0x00,0x00] + +buffer_atomic_fcmpswap_x2 v[0:3], v0, s[0:3], s0 idxen offset:4095 +// GFX10: buffer_atomic_fcmpswap_x2 v[0:3], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x78,0xe1,0x00,0x00,0x00,0x00] + +buffer_atomic_fmax v1, off, s[0:3], s0 offset:4095 +// GFX10: buffer_atomic_fmax v1, off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x01,0x00,0x00] + +buffer_atomic_fmax v0, off, s[0:3], s0 offset:7 +// GFX10: buffer_atomic_fmax v0, off, s[0:3], s0 offset:7 ; encoding: [0x07,0x00,0x00,0xe1,0x00,0x00,0x00,0x00] + +buffer_atomic_fmax v0, off, s[0:3], s0 offset:4095 glc +// GFX10: buffer_atomic_fmax v0, off, s[0:3], s0 offset:4095 glc ; encoding: [0xff,0x4f,0x00,0xe1,0x00,0x00,0x00,0x00] + +buffer_atomic_fmax_x2 v[5:6], off, s[0:3], s0 offset:4095 +// GFX10: buffer_atomic_fmax_x2 v[5:6], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x05,0x00,0x00] + +buffer_atomic_fmax_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 +// GFX10: buffer_atomic_fmax_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x80,0xe1,0x00,0x00,0x00,0x00] + +buffer_atomic_fmin v0, off, s[0:3], s0 +// GFX10: buffer_atomic_fmin v0, off, s[0:3], s0 ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00] + +buffer_atomic_fmin v0, off, s[0:3], s0 offset:0 +// GFX10: buffer_atomic_fmin v0, off, s[0:3], s0 ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00] + +buffer_atomic_fmin_x2 v[0:1], off, s[0:3], s0 offset:4095 slc +// GFX10: buffer_atomic_fmin_x2 v[0:1], off, s[0:3], s0 offset:4095 slc ; encoding: [0xff,0x0f,0x7c,0xe1,0x00,0x00,0x40,0x00] + +buffer_atomic_fmin_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 +// GFX10: buffer_atomic_fmin_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x7c,0xe1,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/mubuf.s b/llvm/test/MC/AMDGPU/mubuf.s --- a/llvm/test/MC/AMDGPU/mubuf.s +++ b/llvm/test/MC/AMDGPU/mubuf.s @@ -719,6 +719,62 @@ // NOSICI: error: invalid operand for instruction // VI: buffer_atomic_add v5, off, s[8:11], 0.15915494 offset:4095 glc ; encoding: [0xff,0x4f,0x08,0xe1,0x00,0x05,0x02,0xf8] +buffer_atomic_fcmpswap v[0:1], off, s[0:3], s0 offset:4095 +// SICI: buffer_atomic_fcmpswap v[0:1], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0xf8,0xe0,0x00,0x00,0x00,0x00] +// NOVI: error: not a valid operand. + +buffer_atomic_fcmpswap v[0:1], v[0:1], s[0:3], s0 addr64 offset:4095 +// SICI: buffer_atomic_fcmpswap v[0:1], v[0:1], s[0:3], s0 addr64 offset:4095 ; encoding: [0xff,0x8f,0xf8,0xe0,0x00,0x00,0x00,0x00] +// NOVI: error: not a valid operand. + +buffer_atomic_fcmpswap_x2 v[0:3], off, s[0:3], s0 offset:4095 +// SICI: buffer_atomic_fcmpswap_x2 v[0:3], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe1,0x00,0x00,0x00,0x00] +// NOVI: error: not a valid operand. + +buffer_atomic_fcmpswap_x2 v[0:3], v0, s[0:3], s0 idxen offset:4095 +// SICI: buffer_atomic_fcmpswap_x2 v[0:3], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x78,0xe1,0x00,0x00,0x00,0x00] +// NOVI: error: not a valid operand. + +buffer_atomic_fmax v1, off, s[0:3], s0 offset:4095 +// SICI: buffer_atomic_fmax v1, off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x01,0x00,0x00] +// NOVI: error: not a valid operand. + +buffer_atomic_fmax v0, off, s[0:3], s0 offset:7 +// SICI: buffer_atomic_fmax v0, off, s[0:3], s0 offset:7 ; encoding: [0x07,0x00,0x00,0xe1,0x00,0x00,0x00,0x00] +// NOVI: error: not a valid operand. + +buffer_atomic_fmax v0, off, s[0:3], s0 offset:4095 glc +// SICI: buffer_atomic_fmax v0, off, s[0:3], s0 offset:4095 glc ; encoding: [0xff,0x4f,0x00,0xe1,0x00,0x00,0x00,0x00] +// NOVI: error: not a valid operand. + +buffer_atomic_fmax_x2 v[5:6], off, s[0:3], s0 offset:4095 +// SICI: buffer_atomic_fmax_x2 v[5:6], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x05,0x00,0x00] +// NOVI: error: not a valid operand. + +buffer_atomic_fmax_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 +// SICI: buffer_atomic_fmax_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x80,0xe1,0x00,0x00,0x00,0x00] +// NOVI: error: not a valid operand. + +buffer_atomic_fmin v0, v[0:1], s[0:3], s0 addr64 offset:4095 +// SICI: buffer_atomic_fmin v0, v[0:1], s[0:3], s0 addr64 offset:4095 ; encoding: [0xff,0x8f,0xfc,0xe0,0x00,0x00,0x00,0x00] +// NOVI: error: not a valid operand. + +buffer_atomic_fmin v0, off, s[0:3], s0 +// SICI: buffer_atomic_fmin v0, off, s[0:3], s0 ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00] +// NOVI: error: instruction not supported on this GPU + +buffer_atomic_fmin v0, off, s[0:3], s0 offset:0 +// SICI: buffer_atomic_fmin v0, off, s[0:3], s0 ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00] +// NOVI: error: not a valid operand. + +buffer_atomic_fmin_x2 v[0:1], off, s[0:3], s0 offset:4095 slc +// SICI: buffer_atomic_fmin_x2 v[0:1], off, s[0:3], s0 offset:4095 slc ; encoding: [0xff,0x0f,0x7c,0xe1,0x00,0x00,0x40,0x00] +// NOVI: error: not a valid operand. + +buffer_atomic_fmin_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 +// SICI: buffer_atomic_fmin_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x7c,0xe1,0x00,0x00,0x00,0x00] +// NOVI: error: not a valid operand. + //===----------------------------------------------------------------------===// // Lds support //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/Disassembler/AMDGPU/mubuf_gfx10.txt b/llvm/test/MC/Disassembler/AMDGPU/mubuf_gfx10.txt new file mode 100644 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/mubuf_gfx10.txt @@ -0,0 +1,31 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -disassemble -show-encoding < %s | FileCheck %s + +# CHECK: buffer_atomic_fcmpswap v[5:6], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xf8,0xe0,0x00,0x05,0x02,0x03] +0xff,0x0f,0xf8,0xe0,0x00,0x05,0x02,0x03 + +# CHECK: buffer_atomic_fcmpswap v[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xf8,0xe0,0x00,0xfe,0x02,0x03] +0xff,0x0f,0xf8,0xe0,0x00,0xfe,0x02,0x03 + +# CHECK: buffer_atomic_fcmpswap_x2 v[5:8], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x78,0xe1,0x00,0x05,0x02,0x03] +0x07,0x00,0x78,0xe1,0x00,0x05,0x02,0x03 + +# CHECK: buffer_atomic_fcmpswap_x2 v[5:8], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x78,0xe1,0x00,0x05,0x02,0x03] +0xff,0x4f,0x78,0xe1,0x00,0x05,0x02,0x03 + +# CHECK: buffer_atomic_fmax v5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x00,0xe1,0x00,0x05,0x02,0x03] +0xff,0x2f,0x00,0xe1,0x00,0x05,0x02,0x03 + +# CHECK: buffer_atomic_fmax_x2 v[5:6], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x80,0xe1,0x00,0x05,0x02,0x03] +0xff,0x4f,0x80,0xe1,0x00,0x05,0x02,0x03 + +# CHECK: buffer_atomic_fmax_x2 v[5:6], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x05,0x42,0x03] +0xff,0x0f,0x80,0xe1,0x00,0x05,0x42,0x03 + +# CHECK: buffer_atomic_fmin v5, off, s[8:11], s3 ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x05,0x02,0x03] +0x00,0x00,0xfc,0xe0,0x00,0x05,0x02,0x03 + +# CHECK: buffer_atomic_fmin v5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0xfc,0xe0,0x00,0x05,0x02,0x03] +0x07,0x00,0xfc,0xe0,0x00,0x05,0x02,0x03 + +# CHECK: buffer_atomic_fmin_x2 v[5:6], off, ttmp[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe1,0x00,0x05,0x1e,0x03] +0xff,0x0f,0x7c,0xe1,0x00,0x05,0x1e,0x03