diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -523,38 +523,70 @@ //===----------------------------------------------------------------------===// -// VI +// VI and GFX9. //===----------------------------------------------------------------------===// -class SMEM_Real_vi op, SM_Pseudo ps> +class SMEM_Real_vi op, SM_Pseudo ps, bit isGFX9Specific = false> : SM_Real , SIMCInstr , Enc64 { - let AssemblerPredicate = isGFX8GFX9; + let AssemblerPredicate = !if(isGFX9Specific, isGFX9Only, isGFX8GFX9); let DecoderNamespace = "GFX8"; let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); + // soffset_en + // Note that for GFX9 instructions with immediate offsets, soffset_en + // must be defined, whereas in GFX8 it's undefined in all cases, + // meaning GFX9 is not perfectly backward-compatible with GFX8, despite + // documentation suggesting otherwise. + let Inst{14} = !if(isGFX9Specific, + !if(ps.has_offset, ps.has_soffset, !if(ps.has_soffset, 0, ?)), + ?); + let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); + + // imm + // TODO: Shall not be defined if the instruction has no offset nor + // soffset. let Inst{17} = ps.has_offset; + let Inst{25-18} = op; let Inst{31-26} = 0x30; //encoding + // TODO: Support the two other possible GFX9 encodings for the + // (no-offset + soffset) case, namely: + // imm=0 soffset_en=1 offset=? soffset= + // imm=1 soffset_en=1 offset=0 soffset= + + // offset // VI supports 20-bit unsigned offsets while GFX9+ supports 21-bit signed. // Offset value is corrected accordingly when offset is encoded/decoded. + // TODO: Forbid non-M0 register offsets for GFX8 stores and atomics. let Inst{38-32} = !if(ps.has_offset, offset{6-0}, !if(ps.has_soffset, soffset{6-0}, ?)); let Inst{52-39} = !if(ps.has_offset, offset{20-7}, ?); + + // soffset + let Inst{63-57} = !if(!and(ps.has_offset, ps.has_soffset), soffset{6-0}, ?); } -multiclass SM_Real_Loads_vi op, string ps, - SM_Load_Pseudo immPs = !cast(ps#_IMM), - SM_Load_Pseudo sgprPs = !cast(ps#_SGPR)> { - def _IMM_vi : SMEM_Real_vi { - let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); +class SMEM_Real_Load_vi op, SM_Load_Pseudo ps, bit isGFX9Specific = false> + : SMEM_Real_vi { + RegisterClass BaseClass = ps.BaseClass; +} + +multiclass SM_Real_Loads_vi op, string ps> { + def _IMM_vi : SMEM_Real_Load_vi (ps#_IMM)> { + let InOperandList = (ins BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); } - def _SGPR_vi : SMEM_Real_vi { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); + def _SGPR_vi : SMEM_Real_Load_vi (ps#_SGPR)> { + let InOperandList = (ins BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); + } + def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi (ps#_SGPR_IMM), + /* isGFX9Specific= */ true> { + let InOperandList = (ins BaseClass:$sbase, SReg_32:$soffset, + smem_offset_mod:$offset, CPol:$cpol); } } diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_smem.s b/llvm/test/MC/AMDGPU/gfx9_asm_smem.s --- a/llvm/test/MC/AMDGPU/gfx9_asm_smem.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_smem.s @@ -60,6 +60,9 @@ s_load_dword s5, s[2:3], 0x0 // CHECK: [0x41,0x01,0x02,0xc0,0x00,0x00,0x00,0x00] +s_load_dword s5, s[2:3], s7 offset:0x12345 +// CHECK: [0x41,0x41,0x02,0xc0,0x45,0x23,0x01,0x0e] + s_load_dword s5, s[2:3], s0 glc // CHECK: [0x41,0x01,0x01,0xc0,0x00,0x00,0x00,0x00] @@ -120,6 +123,9 @@ s_load_dwordx2 s[10:11], s[2:3], 0x0 // CHECK: [0x81,0x02,0x06,0xc0,0x00,0x00,0x00,0x00] +s_load_dwordx2 s[10:11], s[2:3], s0 offset:0x12345 +// CHECK: [0x81,0x42,0x06,0xc0,0x45,0x23,0x01,0x00] + s_load_dwordx2 s[10:11], s[2:3], s0 glc // CHECK: [0x81,0x02,0x05,0xc0,0x00,0x00,0x00,0x00] @@ -174,6 +180,9 @@ s_load_dwordx4 s[20:23], s[2:3], 0x0 // CHECK: [0x01,0x05,0x0a,0xc0,0x00,0x00,0x00,0x00] +s_load_dwordx4 s[20:23], s[2:3], s0 offset:0x12345 +// CHECK: [0x01,0x45,0x0a,0xc0,0x45,0x23,0x01,0x00] + s_load_dwordx4 s[20:23], s[2:3], s0 glc // CHECK: [0x01,0x05,0x09,0xc0,0x00,0x00,0x00,0x00] @@ -228,6 +237,9 @@ s_load_dwordx8 s[20:27], s[2:3], 0x0 // CHECK: [0x01,0x05,0x0e,0xc0,0x00,0x00,0x00,0x00] +s_load_dwordx8 s[20:27], s[2:3], s0 offset:0x12345 +// CHECK: [0x01,0x45,0x0e,0xc0,0x45,0x23,0x01,0x00] + s_load_dwordx8 s[20:27], s[2:3], s0 glc // CHECK: [0x01,0x05,0x0d,0xc0,0x00,0x00,0x00,0x00] @@ -282,6 +294,9 @@ s_load_dwordx16 s[20:35], s[2:3], 0x0 // CHECK: [0x01,0x05,0x12,0xc0,0x00,0x00,0x00,0x00] +s_load_dwordx16 s[20:35], s[2:3], s0 offset:0x12345 +// CHECK: [0x01,0x45,0x12,0xc0,0x45,0x23,0x01,0x00] + s_load_dwordx16 s[20:35], s[2:3], s0 glc // CHECK: [0x01,0x05,0x11,0xc0,0x00,0x00,0x00,0x00] @@ -345,6 +360,9 @@ s_scratch_load_dword s5, s[2:3], 0x0 // CHECK: [0x41,0x01,0x16,0xc0,0x00,0x00,0x00,0x00] +s_scratch_load_dword s5, s[2:3], s0 offset:0x12345 +// CHECK: [0x41,0x41,0x16,0xc0,0x45,0x23,0x01,0x00] + s_scratch_load_dword s5, s[2:3], s0 glc // CHECK: [0x41,0x01,0x15,0xc0,0x00,0x00,0x00,0x00] @@ -405,6 +423,9 @@ s_scratch_load_dwordx2 s[10:11], s[2:3], 0x0 // CHECK: [0x81,0x02,0x1a,0xc0,0x00,0x00,0x00,0x00] +s_scratch_load_dwordx2 s[10:11], s[2:3], s0 offset:0x12345 +// CHECK: [0x81,0x42,0x1a,0xc0,0x45,0x23,0x01,0x00] + s_scratch_load_dwordx2 s[10:11], s[2:3], s0 glc // CHECK: [0x81,0x02,0x19,0xc0,0x00,0x00,0x00,0x00] @@ -459,6 +480,9 @@ s_scratch_load_dwordx4 s[20:23], s[2:3], 0x0 // CHECK: [0x01,0x05,0x1e,0xc0,0x00,0x00,0x00,0x00] +s_scratch_load_dwordx4 s[20:23], s[2:3], s0 offset:0x12345 +// CHECK: [0x01,0x45,0x1e,0xc0,0x45,0x23,0x01,0x00] + s_scratch_load_dwordx4 s[20:23], s[2:3], s0 glc // CHECK: [0x01,0x05,0x1d,0xc0,0x00,0x00,0x00,0x00] @@ -516,6 +540,9 @@ s_buffer_load_dword s5, s[4:7], 0x0 // CHECK: [0x42,0x01,0x22,0xc0,0x00,0x00,0x00,0x00] +s_buffer_load_dword s5, s[4:7], s0 offset:0x12345 +// CHECK: [0x42,0x41,0x22,0xc0,0x45,0x23,0x01,0x00] + s_buffer_load_dword s5, s[4:7], s0 glc // CHECK: [0x42,0x01,0x21,0xc0,0x00,0x00,0x00,0x00] @@ -570,6 +597,9 @@ s_buffer_load_dwordx2 s[10:11], s[4:7], 0x0 // CHECK: [0x82,0x02,0x26,0xc0,0x00,0x00,0x00,0x00] +s_buffer_load_dwordx2 s[10:11], s[4:7], s0 offset:0x12345 +// CHECK: [0x82,0x42,0x26,0xc0,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx2 s[10:11], s[4:7], s0 glc // CHECK: [0x82,0x02,0x25,0xc0,0x00,0x00,0x00,0x00] @@ -618,6 +648,9 @@ s_buffer_load_dwordx4 s[20:23], s[4:7], 0x0 // CHECK: [0x02,0x05,0x2a,0xc0,0x00,0x00,0x00,0x00] +s_buffer_load_dwordx4 s[20:23], s[4:7], s0 offset:0x12345 +// CHECK: [0x02,0x45,0x2a,0xc0,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx4 s[20:23], s[4:7], s0 glc // CHECK: [0x02,0x05,0x29,0xc0,0x00,0x00,0x00,0x00] @@ -666,6 +699,9 @@ s_buffer_load_dwordx8 s[20:27], s[4:7], 0x0 // CHECK: [0x02,0x05,0x2e,0xc0,0x00,0x00,0x00,0x00] +s_buffer_load_dwordx8 s[20:27], s[4:7], s0 offset:0x12345 +// CHECK: [0x02,0x45,0x2e,0xc0,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx8 s[20:27], s[4:7], s0 glc // CHECK: [0x02,0x05,0x2d,0xc0,0x00,0x00,0x00,0x00] @@ -714,6 +750,9 @@ s_buffer_load_dwordx16 s[20:35], s[4:7], 0x0 // CHECK: [0x02,0x05,0x32,0xc0,0x00,0x00,0x00,0x00] +s_buffer_load_dwordx16 s[20:35], s[4:7], s0 offset:0x12345 +// CHECK: [0x02,0x45,0x32,0xc0,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx16 s[20:35], s[4:7], s0 glc // CHECK: [0x02,0x05,0x31,0xc0,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/smem.s b/llvm/test/MC/AMDGPU/smem.s --- a/llvm/test/MC/AMDGPU/smem.s +++ b/llvm/test/MC/AMDGPU/smem.s @@ -675,7 +675,7 @@ s_load_dword s1, s[2:3], s0 offset:0x1FFFFF // NOSICI: error: operands are not valid for this GPU or mode // NOVI: error: operands are not valid for this GPU or mode -// NOGFX9: error: operands are not valid for this GPU or mode +// NOGFX9: error: expected a 21-bit signed offset // NOGFX10: error: expected a 21-bit signed offset s_store_dword s1, s[2:3], 0x1FFFFF @@ -686,7 +686,7 @@ s_buffer_load_dword s10, s[92:95], s0 offset:-1 // NOSICI: error: operands are not valid for this GPU or mode // NOVI: error: operands are not valid for this GPU or mode -// NOGFX9: error: operands are not valid for this GPU or mode +// NOGFX9: error: expected a 20-bit unsigned offset // NOGFX10: error: expected a 20-bit unsigned offset s_buffer_store_dword s10, s[92:95], 0x1FFFFF diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt @@ -10074,6 +10074,9 @@ # CHECK: s_load_dword s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x02,0xc0,0x00,0x00,0x00,0x00] 0x41,0x01,0x02,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_load_dword s5, s[2:3], s0 offset:0x12345 ; encoding: [0x41,0x41,0x02,0xc0,0x45,0x23,0x01,0x00] +0x41,0x41,0x02,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_load_dword s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x01,0xc0,0x00,0x00,0x00,0x00] 0x41,0x01,0x01,0xc0,0x00,0x00,0x00,0x00 @@ -10125,6 +10128,9 @@ # CHECK: s_load_dwordx2 s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0x06,0xc0,0x00,0x00,0x00,0x00] 0x81,0x02,0x06,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_load_dwordx2 s[10:11], s[2:3], s0 offset:0x12345 ; encoding: [0x81,0x42,0x06,0xc0,0x45,0x23,0x01,0x00] +0x81,0x42,0x06,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_load_dwordx2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x05,0xc0,0x00,0x00,0x00,0x00] 0x81,0x02,0x05,0xc0,0x00,0x00,0x00,0x00 @@ -10170,6 +10176,9 @@ # CHECK: s_load_dwordx4 s[20:23], s[2:3], 0x0 ; encoding: [0x01,0x05,0x0a,0xc0,0x00,0x00,0x00,0x00] 0x01,0x05,0x0a,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_load_dwordx4 s[20:23], s[2:3], s0 offset:0x12345 ; encoding: [0x01,0x45,0x0a,0xc0,0x45,0x23,0x01,0x00] +0x01,0x45,0x0a,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_load_dwordx4 s[20:23], s[2:3], s0 glc ; encoding: [0x01,0x05,0x09,0xc0,0x00,0x00,0x00,0x00] 0x01,0x05,0x09,0xc0,0x00,0x00,0x00,0x00 @@ -10215,6 +10224,9 @@ # CHECK: s_load_dwordx8 s[20:27], s[2:3], 0x0 ; encoding: [0x01,0x05,0x0e,0xc0,0x00,0x00,0x00,0x00] 0x01,0x05,0x0e,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_load_dwordx8 s[20:27], s[2:3], s0 offset:0x12345 ; encoding: [0x01,0x45,0x0e,0xc0,0x45,0x23,0x01,0x00] +0x01,0x45,0x0e,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_load_dwordx8 s[20:27], s[2:3], s0 glc ; encoding: [0x01,0x05,0x0d,0xc0,0x00,0x00,0x00,0x00] 0x01,0x05,0x0d,0xc0,0x00,0x00,0x00,0x00 @@ -10260,6 +10272,9 @@ # CHECK: s_load_dwordx16 s[20:35], s[2:3], 0x0 ; encoding: [0x01,0x05,0x12,0xc0,0x00,0x00,0x00,0x00] 0x01,0x05,0x12,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_load_dwordx16 s[20:35], s[2:3], s0 offset:0x12345 ; encoding: [0x01,0x45,0x12,0xc0,0x45,0x23,0x01,0x00] +0x01,0x45,0x12,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_load_dwordx16 s[20:35], s[2:3], s0 glc ; encoding: [0x01,0x05,0x11,0xc0,0x00,0x00,0x00,0x00] 0x01,0x05,0x11,0xc0,0x00,0x00,0x00,0x00 @@ -10308,6 +10323,9 @@ # CHECK: s_buffer_load_dword s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x22,0xc0,0x00,0x00,0x00,0x00] 0x42,0x01,0x22,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_buffer_load_dword s5, s[4:7], s0 offset:0x12345 ; encoding: [0x42,0x41,0x22,0xc0,0x45,0x23,0x01,0x00] +0x42,0x41,0x22,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_buffer_load_dword s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x21,0xc0,0x00,0x00,0x00,0x00] 0x42,0x01,0x21,0xc0,0x00,0x00,0x00,0x00 @@ -10353,6 +10371,9 @@ # CHECK: s_buffer_load_dwordx2 s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0x26,0xc0,0x00,0x00,0x00,0x00] 0x82,0x02,0x26,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_buffer_load_dwordx2 s[10:11], s[4:7], s0 offset:0x12345 ; encoding: [0x82,0x42,0x26,0xc0,0x45,0x23,0x01,0x00] +0x82,0x42,0x26,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_buffer_load_dwordx2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x25,0xc0,0x00,0x00,0x00,0x00] 0x82,0x02,0x25,0xc0,0x00,0x00,0x00,0x00 @@ -10392,6 +10413,9 @@ # CHECK: s_buffer_load_dwordx4 s[20:23], s[4:7], 0x0 ; encoding: [0x02,0x05,0x2a,0xc0,0x00,0x00,0x00,0x00] 0x02,0x05,0x2a,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_buffer_load_dwordx4 s[20:23], s[4:7], s0 offset:0x12345 ; encoding: [0x02,0x45,0x2a,0xc0,0x45,0x23,0x01,0x00] +0x02,0x45,0x2a,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_buffer_load_dwordx4 s[20:23], s[4:7], s0 glc ; encoding: [0x02,0x05,0x29,0xc0,0x00,0x00,0x00,0x00] 0x02,0x05,0x29,0xc0,0x00,0x00,0x00,0x00 @@ -10431,6 +10455,9 @@ # CHECK: s_buffer_load_dwordx8 s[20:27], s[4:7], 0x0 ; encoding: [0x02,0x05,0x2e,0xc0,0x00,0x00,0x00,0x00] 0x02,0x05,0x2e,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_buffer_load_dwordx8 s[20:27], s[4:7], s0 offset:0x12345 ; encoding: [0x02,0x45,0x2e,0xc0,0x45,0x23,0x01,0x00] +0x02,0x45,0x2e,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_buffer_load_dwordx8 s[20:27], s[4:7], s0 glc ; encoding: [0x02,0x05,0x2d,0xc0,0x00,0x00,0x00,0x00] 0x02,0x05,0x2d,0xc0,0x00,0x00,0x00,0x00 @@ -10470,6 +10497,9 @@ # CHECK: s_buffer_load_dwordx16 s[20:35], s[4:7], 0x0 ; encoding: [0x02,0x05,0x32,0xc0,0x00,0x00,0x00,0x00] 0x02,0x05,0x32,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_buffer_load_dwordx16 s[20:35], s[4:7], s0 offset:0x12345 ; encoding: [0x02,0x45,0x32,0xc0,0x45,0x23,0x01,0x00] +0x02,0x45,0x32,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_buffer_load_dwordx16 s[20:35], s[4:7], s0 glc ; encoding: [0x02,0x05,0x31,0xc0,0x00,0x00,0x00,0x00] 0x02,0x05,0x31,0xc0,0x00,0x00,0x00,0x00