diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -522,38 +522,71 @@ //===----------------------------------------------------------------------===// -// VI +// VI and GFX9. //===----------------------------------------------------------------------===// class SMEM_Real_vi op, SM_Pseudo ps> : SM_Real , SIMCInstr , Enc64 { - let AssemblerPredicate = isGFX8GFX9; + field bit IsGFX9Specific = false; + let AssemblerPredicate = !if(IsGFX9Specific, isGFX9Only, isGFX8GFX9); let DecoderNamespace = "GFX8"; let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); + // Note that for GFX9 instructions with immediate offsets, soffset_en + // must be defined, whereas in GFX8 it's undefined in all cases, + // meaning GFX9 is not perfectly backward-compatible with GFX8, despite + // documentation suggesting otherwise. + field bit SOffsetEn = !if(IsGFX9Specific, + !if(ps.has_offset, ps.has_soffset, !if(ps.has_soffset, 0, ?)), + ?); + let Inst{14} = SOffsetEn; + let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); + + // imm + // TODO: Shall not be defined if the instruction has no offset nor + // soffset. let Inst{17} = ps.has_offset; + let Inst{25-18} = op; let Inst{31-26} = 0x30; //encoding // VI supports 20-bit unsigned offsets while GFX9+ supports 21-bit signed. // Offset value is corrected accordingly when offset is encoded/decoded. - let Inst{38-32} = !if(ps.has_offset, offset{6-0}, !if(ps.has_soffset, soffset{6-0}, ?)); - let Inst{52-39} = !if(ps.has_offset, offset{20-7}, ?); + // TODO: Forbid non-M0 register offsets for GFX8 stores and atomics. + field bits<21> Offset; + let Offset{6-0} = !if(ps.has_offset, offset{6-0}, + !if(ps.has_soffset, soffset{6-0}, ?)); + let Offset{20-7} = !if(ps.has_offset, offset{20-7}, ?); + let Inst{52-32} = Offset; + + // soffset + let Inst{63-57} = !if(!and(IsGFX9Specific, ps.has_soffset), soffset{6-0}, ?); } -multiclass SM_Real_Loads_vi op, string ps, - SM_Load_Pseudo immPs = !cast(ps#_IMM), - SM_Load_Pseudo sgprPs = !cast(ps#_SGPR)> { - def _IMM_vi : SMEM_Real_vi { - let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); - } - def _SGPR_vi : SMEM_Real_vi { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); +class SMEM_Real_Load_vi op, string ps, dag offsets> + : SMEM_Real_vi(ps)> { + RegisterClass BaseClass = !cast(ps).BaseClass; + let InOperandList = !con((ins BaseClass:$sbase), offsets, (ins CPol:$cpol)); +} + +multiclass SM_Real_Loads_vi op, string ps> { + def _IMM_vi : SMEM_Real_Load_vi ; + def _SGPR_vi : SMEM_Real_Load_vi ; + let IsGFX9Specific = true in { + // The alternative GFX9 SGPR encoding using soffset to encode the + // offset register. Not available in assembler and goes to the GFX9 + // encoding family to avoid conflicts with the primary SGPR variant. + let SOffsetEn = 1, Offset = ?, Subtarget = SIEncodingFamily.GFX9, + AsmVariantName = "NonParsable" in + def _SGPR_alt_gfx9 : SMEM_Real_Load_vi ; + def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi < + op, ps#"_SGPR_IMM", (ins SReg_32:$soffset, smem_offset_mod:$offset)>; } } diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_smem.s b/llvm/test/MC/AMDGPU/gfx9_asm_smem.s --- a/llvm/test/MC/AMDGPU/gfx9_asm_smem.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_smem.s @@ -60,6 +60,9 @@ s_load_dword s5, s[2:3], 0x0 // CHECK: [0x41,0x01,0x02,0xc0,0x00,0x00,0x00,0x00] +s_load_dword s5, s[2:3], s7 offset:0x12345 +// CHECK: [0x41,0x41,0x02,0xc0,0x45,0x23,0x01,0x0e] + s_load_dword s5, s[2:3], s0 glc // CHECK: [0x41,0x01,0x01,0xc0,0x00,0x00,0x00,0x00] @@ -120,6 +123,9 @@ s_load_dwordx2 s[10:11], s[2:3], 0x0 // CHECK: [0x81,0x02,0x06,0xc0,0x00,0x00,0x00,0x00] +s_load_dwordx2 s[10:11], s[2:3], s0 offset:0x12345 +// CHECK: [0x81,0x42,0x06,0xc0,0x45,0x23,0x01,0x00] + s_load_dwordx2 s[10:11], s[2:3], s0 glc // CHECK: [0x81,0x02,0x05,0xc0,0x00,0x00,0x00,0x00] @@ -174,6 +180,9 @@ s_load_dwordx4 s[20:23], s[2:3], 0x0 // CHECK: [0x01,0x05,0x0a,0xc0,0x00,0x00,0x00,0x00] +s_load_dwordx4 s[20:23], s[2:3], s0 offset:0x12345 +// CHECK: [0x01,0x45,0x0a,0xc0,0x45,0x23,0x01,0x00] + s_load_dwordx4 s[20:23], s[2:3], s0 glc // CHECK: [0x01,0x05,0x09,0xc0,0x00,0x00,0x00,0x00] @@ -228,6 +237,9 @@ s_load_dwordx8 s[20:27], s[2:3], 0x0 // CHECK: [0x01,0x05,0x0e,0xc0,0x00,0x00,0x00,0x00] +s_load_dwordx8 s[20:27], s[2:3], s0 offset:0x12345 +// CHECK: [0x01,0x45,0x0e,0xc0,0x45,0x23,0x01,0x00] + s_load_dwordx8 s[20:27], s[2:3], s0 glc // CHECK: [0x01,0x05,0x0d,0xc0,0x00,0x00,0x00,0x00] @@ -282,6 +294,9 @@ s_load_dwordx16 s[20:35], s[2:3], 0x0 // CHECK: [0x01,0x05,0x12,0xc0,0x00,0x00,0x00,0x00] +s_load_dwordx16 s[20:35], s[2:3], s0 offset:0x12345 +// CHECK: [0x01,0x45,0x12,0xc0,0x45,0x23,0x01,0x00] + s_load_dwordx16 s[20:35], s[2:3], s0 glc // CHECK: [0x01,0x05,0x11,0xc0,0x00,0x00,0x00,0x00] @@ -345,6 +360,9 @@ s_scratch_load_dword s5, s[2:3], 0x0 // CHECK: [0x41,0x01,0x16,0xc0,0x00,0x00,0x00,0x00] +s_scratch_load_dword s5, s[2:3], s0 offset:0x12345 +// CHECK: [0x41,0x41,0x16,0xc0,0x45,0x23,0x01,0x00] + s_scratch_load_dword s5, s[2:3], s0 glc // CHECK: [0x41,0x01,0x15,0xc0,0x00,0x00,0x00,0x00] @@ -405,6 +423,9 @@ s_scratch_load_dwordx2 s[10:11], s[2:3], 0x0 // CHECK: [0x81,0x02,0x1a,0xc0,0x00,0x00,0x00,0x00] +s_scratch_load_dwordx2 s[10:11], s[2:3], s0 offset:0x12345 +// CHECK: [0x81,0x42,0x1a,0xc0,0x45,0x23,0x01,0x00] + s_scratch_load_dwordx2 s[10:11], s[2:3], s0 glc // CHECK: [0x81,0x02,0x19,0xc0,0x00,0x00,0x00,0x00] @@ -459,6 +480,9 @@ s_scratch_load_dwordx4 s[20:23], s[2:3], 0x0 // CHECK: [0x01,0x05,0x1e,0xc0,0x00,0x00,0x00,0x00] +s_scratch_load_dwordx4 s[20:23], s[2:3], s0 offset:0x12345 +// CHECK: [0x01,0x45,0x1e,0xc0,0x45,0x23,0x01,0x00] + s_scratch_load_dwordx4 s[20:23], s[2:3], s0 glc // CHECK: [0x01,0x05,0x1d,0xc0,0x00,0x00,0x00,0x00] @@ -516,6 +540,9 @@ s_buffer_load_dword s5, s[4:7], 0x0 // CHECK: [0x42,0x01,0x22,0xc0,0x00,0x00,0x00,0x00] +s_buffer_load_dword s5, s[4:7], s0 offset:0x12345 +// CHECK: [0x42,0x41,0x22,0xc0,0x45,0x23,0x01,0x00] + s_buffer_load_dword s5, s[4:7], s0 glc // CHECK: [0x42,0x01,0x21,0xc0,0x00,0x00,0x00,0x00] @@ -570,6 +597,9 @@ s_buffer_load_dwordx2 s[10:11], s[4:7], 0x0 // CHECK: [0x82,0x02,0x26,0xc0,0x00,0x00,0x00,0x00] +s_buffer_load_dwordx2 s[10:11], s[4:7], s0 offset:0x12345 +// CHECK: [0x82,0x42,0x26,0xc0,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx2 s[10:11], s[4:7], s0 glc // CHECK: [0x82,0x02,0x25,0xc0,0x00,0x00,0x00,0x00] @@ -618,6 +648,9 @@ s_buffer_load_dwordx4 s[20:23], s[4:7], 0x0 // CHECK: [0x02,0x05,0x2a,0xc0,0x00,0x00,0x00,0x00] +s_buffer_load_dwordx4 s[20:23], s[4:7], s0 offset:0x12345 +// CHECK: [0x02,0x45,0x2a,0xc0,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx4 s[20:23], s[4:7], s0 glc // CHECK: [0x02,0x05,0x29,0xc0,0x00,0x00,0x00,0x00] @@ -666,6 +699,9 @@ s_buffer_load_dwordx8 s[20:27], s[4:7], 0x0 // CHECK: [0x02,0x05,0x2e,0xc0,0x00,0x00,0x00,0x00] +s_buffer_load_dwordx8 s[20:27], s[4:7], s0 offset:0x12345 +// CHECK: [0x02,0x45,0x2e,0xc0,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx8 s[20:27], s[4:7], s0 glc // CHECK: [0x02,0x05,0x2d,0xc0,0x00,0x00,0x00,0x00] @@ -714,6 +750,9 @@ s_buffer_load_dwordx16 s[20:35], s[4:7], 0x0 // CHECK: [0x02,0x05,0x32,0xc0,0x00,0x00,0x00,0x00] +s_buffer_load_dwordx16 s[20:35], s[4:7], s0 offset:0x12345 +// CHECK: [0x02,0x45,0x32,0xc0,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx16 s[20:35], s[4:7], s0 glc // CHECK: [0x02,0x05,0x31,0xc0,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/smem.s b/llvm/test/MC/AMDGPU/smem.s --- a/llvm/test/MC/AMDGPU/smem.s +++ b/llvm/test/MC/AMDGPU/smem.s @@ -673,7 +673,7 @@ s_load_dword s1, s[2:3], s0 offset:0x1FFFFF // NOSICI: error: operands are not valid for this GPU or mode // NOVI: error: operands are not valid for this GPU or mode -// NOGFX9: error: operands are not valid for this GPU or mode +// NOGFX9: error: expected a 21-bit signed offset // NOGFX10: error: expected a 21-bit signed offset s_store_dword s1, s[2:3], 0x1FFFFF @@ -684,7 +684,7 @@ s_buffer_load_dword s10, s[92:95], s0 offset:-1 // NOSICI: error: operands are not valid for this GPU or mode // NOVI: error: operands are not valid for this GPU or mode -// NOGFX9: error: operands are not valid for this GPU or mode +// NOGFX9: error: expected a 20-bit unsigned offset // NOGFX10: error: expected a 20-bit unsigned offset s_buffer_store_dword s10, s[92:95], 0x1FFFFF diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt @@ -9588,6 +9588,11 @@ # CHECK: s_load_dword s5, s[2:3], s101 ; encoding: [0x41,0x01,0x00,0xc0,0x65,0x00,0x00,0x00] 0x41,0x01,0x00,0xc0,0x65,0x00,0x00,0x00 +# Make sure that raising the GFX9 soffset_en bit doesn't affect GFX8 +# decoding. +# CHECK: s_load_dword s5, s[2:3], s101 ; encoding: [0x41,0x01,0x00,0xc0,0x65,0x00,0x00,0x00] +0x41,0x41,0x00,0xc0,0x65,0x00,0x00,0x00 + # CHECK: s_load_dword s5, s[2:3], flat_scratch_lo ; encoding: [0x41,0x01,0x00,0xc0,0x66,0x00,0x00,0x00] 0x41,0x01,0x00,0xc0,0x66,0x00,0x00,0x00 @@ -9621,6 +9626,11 @@ # CHECK: s_load_dword s5, s[2:3], 0x7ffff ; encoding: [0x41,0x01,0x02,0xc0,0xff,0xff,0x07,0x00] 0x41,0x01,0x02,0xc0,0xff,0xff,0x07,0x00 +# Make sure that raising the GFX9 soffset_en bit doesn't affect GFX8 +# decoding. +# CHECK: s_load_dword s5, s[2:3], 0x7ffff ; encoding: [0x41,0x01,0x02,0xc0,0xff,0xff,0x07,0x00] +0x41,0x41,0x02,0xc0,0xff,0xff,0x07,0x00 + # CHECK: s_load_dword s5, s[2:3], s2 glc ; encoding: [0x41,0x01,0x01,0xc0,0x02,0x00,0x00,0x00] 0x41,0x01,0x01,0xc0,0x02,0x00,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt @@ -10056,6 +10056,12 @@ # CHECK: s_load_dword s5, s[2:3], s101 ; encoding: [0x41,0x01,0x00,0xc0,0x65,0x00,0x00,0x00] 0x41,0x01,0x00,0xc0,0x65,0x00,0x00,0x00 +# The SGRP variants can alternatively be encoded with imm=0, soffset_en=1 +# and the offset register encoded in the soffset field with the offset +# field being disregarded. +# CHECK: s_load_dword s5, s[2:3], s64 ; encoding: [0x41,0x41,0x00,0xc0,0x00,0x00,0x00,0x80] +0x41,0x41,0x00,0xc0,0x65,0x00,0x00,0x80 + # CHECK: s_load_dword s5, s[2:3], flat_scratch_lo ; encoding: [0x41,0x01,0x00,0xc0,0x66,0x00,0x00,0x00] 0x41,0x01,0x00,0xc0,0x66,0x00,0x00,0x00 @@ -10074,6 +10080,14 @@ # CHECK: s_load_dword s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x02,0xc0,0x00,0x00,0x00,0x00] 0x41,0x01,0x02,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_load_dword s5, s[2:3], s0 offset:0x12345 ; encoding: [0x41,0x41,0x02,0xc0,0x45,0x23,0x01,0x00] +0x41,0x41,0x02,0xc0,0x45,0x23,0x01,0x00 + +# SP3 prefers to decode instructions with imm=1 and soffset_en=1 to the +# form with the 'offset:' modifier, even if the offset is 0. +# CHECK: s_load_dword s5, s[2:3], s0 offset:0x0 ; encoding: [0x41,0x41,0x02,0xc0,0x00,0x00,0x00,0x00] +0x41,0x41,0x02,0xc0,0x00,0x00,0x00,0x00 + # CHECK: s_load_dword s5, s[2:3], s0 glc ; encoding: [0x41,0x01,0x01,0xc0,0x00,0x00,0x00,0x00] 0x41,0x01,0x01,0xc0,0x00,0x00,0x00,0x00 @@ -10125,6 +10139,9 @@ # CHECK: s_load_dwordx2 s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0x06,0xc0,0x00,0x00,0x00,0x00] 0x81,0x02,0x06,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_load_dwordx2 s[10:11], s[2:3], s0 offset:0x12345 ; encoding: [0x81,0x42,0x06,0xc0,0x45,0x23,0x01,0x00] +0x81,0x42,0x06,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_load_dwordx2 s[10:11], s[2:3], s0 glc ; encoding: [0x81,0x02,0x05,0xc0,0x00,0x00,0x00,0x00] 0x81,0x02,0x05,0xc0,0x00,0x00,0x00,0x00 @@ -10170,6 +10187,9 @@ # CHECK: s_load_dwordx4 s[20:23], s[2:3], 0x0 ; encoding: [0x01,0x05,0x0a,0xc0,0x00,0x00,0x00,0x00] 0x01,0x05,0x0a,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_load_dwordx4 s[20:23], s[2:3], s0 offset:0x12345 ; encoding: [0x01,0x45,0x0a,0xc0,0x45,0x23,0x01,0x00] +0x01,0x45,0x0a,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_load_dwordx4 s[20:23], s[2:3], s0 glc ; encoding: [0x01,0x05,0x09,0xc0,0x00,0x00,0x00,0x00] 0x01,0x05,0x09,0xc0,0x00,0x00,0x00,0x00 @@ -10215,6 +10235,9 @@ # CHECK: s_load_dwordx8 s[20:27], s[2:3], 0x0 ; encoding: [0x01,0x05,0x0e,0xc0,0x00,0x00,0x00,0x00] 0x01,0x05,0x0e,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_load_dwordx8 s[20:27], s[2:3], s0 offset:0x12345 ; encoding: [0x01,0x45,0x0e,0xc0,0x45,0x23,0x01,0x00] +0x01,0x45,0x0e,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_load_dwordx8 s[20:27], s[2:3], s0 glc ; encoding: [0x01,0x05,0x0d,0xc0,0x00,0x00,0x00,0x00] 0x01,0x05,0x0d,0xc0,0x00,0x00,0x00,0x00 @@ -10260,6 +10283,9 @@ # CHECK: s_load_dwordx16 s[20:35], s[2:3], 0x0 ; encoding: [0x01,0x05,0x12,0xc0,0x00,0x00,0x00,0x00] 0x01,0x05,0x12,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_load_dwordx16 s[20:35], s[2:3], s0 offset:0x12345 ; encoding: [0x01,0x45,0x12,0xc0,0x45,0x23,0x01,0x00] +0x01,0x45,0x12,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_load_dwordx16 s[20:35], s[2:3], s0 glc ; encoding: [0x01,0x05,0x11,0xc0,0x00,0x00,0x00,0x00] 0x01,0x05,0x11,0xc0,0x00,0x00,0x00,0x00 @@ -10308,6 +10334,9 @@ # CHECK: s_buffer_load_dword s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x22,0xc0,0x00,0x00,0x00,0x00] 0x42,0x01,0x22,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_buffer_load_dword s5, s[4:7], s0 offset:0x12345 ; encoding: [0x42,0x41,0x22,0xc0,0x45,0x23,0x01,0x00] +0x42,0x41,0x22,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_buffer_load_dword s5, s[4:7], s0 glc ; encoding: [0x42,0x01,0x21,0xc0,0x00,0x00,0x00,0x00] 0x42,0x01,0x21,0xc0,0x00,0x00,0x00,0x00 @@ -10353,6 +10382,9 @@ # CHECK: s_buffer_load_dwordx2 s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0x26,0xc0,0x00,0x00,0x00,0x00] 0x82,0x02,0x26,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_buffer_load_dwordx2 s[10:11], s[4:7], s0 offset:0x12345 ; encoding: [0x82,0x42,0x26,0xc0,0x45,0x23,0x01,0x00] +0x82,0x42,0x26,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_buffer_load_dwordx2 s[10:11], s[4:7], s0 glc ; encoding: [0x82,0x02,0x25,0xc0,0x00,0x00,0x00,0x00] 0x82,0x02,0x25,0xc0,0x00,0x00,0x00,0x00 @@ -10392,6 +10424,9 @@ # CHECK: s_buffer_load_dwordx4 s[20:23], s[4:7], 0x0 ; encoding: [0x02,0x05,0x2a,0xc0,0x00,0x00,0x00,0x00] 0x02,0x05,0x2a,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_buffer_load_dwordx4 s[20:23], s[4:7], s0 offset:0x12345 ; encoding: [0x02,0x45,0x2a,0xc0,0x45,0x23,0x01,0x00] +0x02,0x45,0x2a,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_buffer_load_dwordx4 s[20:23], s[4:7], s0 glc ; encoding: [0x02,0x05,0x29,0xc0,0x00,0x00,0x00,0x00] 0x02,0x05,0x29,0xc0,0x00,0x00,0x00,0x00 @@ -10431,6 +10466,9 @@ # CHECK: s_buffer_load_dwordx8 s[20:27], s[4:7], 0x0 ; encoding: [0x02,0x05,0x2e,0xc0,0x00,0x00,0x00,0x00] 0x02,0x05,0x2e,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_buffer_load_dwordx8 s[20:27], s[4:7], s0 offset:0x12345 ; encoding: [0x02,0x45,0x2e,0xc0,0x45,0x23,0x01,0x00] +0x02,0x45,0x2e,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_buffer_load_dwordx8 s[20:27], s[4:7], s0 glc ; encoding: [0x02,0x05,0x2d,0xc0,0x00,0x00,0x00,0x00] 0x02,0x05,0x2d,0xc0,0x00,0x00,0x00,0x00 @@ -10470,6 +10508,9 @@ # CHECK: s_buffer_load_dwordx16 s[20:35], s[4:7], 0x0 ; encoding: [0x02,0x05,0x32,0xc0,0x00,0x00,0x00,0x00] 0x02,0x05,0x32,0xc0,0x00,0x00,0x00,0x00 +# CHECK: s_buffer_load_dwordx16 s[20:35], s[4:7], s0 offset:0x12345 ; encoding: [0x02,0x45,0x32,0xc0,0x45,0x23,0x01,0x00] +0x02,0x45,0x32,0xc0,0x45,0x23,0x01,0x00 + # CHECK: s_buffer_load_dwordx16 s[20:35], s[4:7], s0 glc ; encoding: [0x02,0x05,0x31,0xc0,0x00,0x00,0x00,0x00] 0x02,0x05,0x31,0xc0,0x00,0x00,0x00,0x00