diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -64,6 +64,8 @@ const MCSubtargetInfo &STI, raw_ostream &O); void printSMEMOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printSMEMOffsetMod(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printGDS(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -191,6 +191,13 @@ O << formatHex(MI->getOperand(OpNo).getImm()); } +void AMDGPUInstPrinter::printSMEMOffsetMod(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + O << " offset:"; + printSMEMOffset(MI, OpNo, STI, O); +} + void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4471,7 +4471,7 @@ if (MI.mayStore()) { // The register offset form of scalar stores may only use m0 as the // soffset register. - const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff); + const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soffset); if (Soff && Soff->getReg() != AMDGPU::M0) { ErrInfo = "scalar stores must use m0 as offset register"; return false; @@ -5314,7 +5314,7 @@ Register SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI); SBase->setReg(SGPR); } - MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soff); + MachineOperand *SOff = getNamedOperand(MI, AMDGPU::OpName::soffset); if (SOff && !RI.isSGPRClass(MRI.getRegClass(SOff->getReg()))) { Register SGPR = readlaneVGPRToSGPR(SOff->getReg(), MI, MRI); SOff->setReg(SGPR); diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -11,13 +11,19 @@ let OperandType = "OPERAND_IMMEDIATE"; } -def smem_offset : NamedOperandU32<"SMEMOffset", - NamedMatchClass<"SMEMOffset">> { +class SMEMOffset : NamedOperandU32<"SMEMOffset", + NamedMatchClass<"SMEMOffset">> { let OperandType = "OPERAND_IMMEDIATE"; let EncoderMethod = "getSMEMOffsetEncoding"; let DecoderMethod = "decodeSMEMOffset"; } +def smem_offset : SMEMOffset; + +def smem_offset_mod : SMEMOffset { + let PrintMethod = "printSMEMOffsetMod"; +} + //===----------------------------------------------------------------------===// // Scalar Memory classes //===----------------------------------------------------------------------===// @@ -43,8 +49,8 @@ bits<1> has_sdst = 1; bit has_glc = 0; bit has_dlc = 0; - bits<1> has_offset = 1; - bits<1> offset_is_imm = 0; + bit has_offset = 0; + bit has_soffset = 0; bit is_buffer = 0; } @@ -77,19 +83,21 @@ bits<7> sbase; bits<7> sdst; bits<32> offset; - bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0); + bits<8> soffset; bits<5> cpol; } class SM_Probe_Pseudo - : SM_Pseudo { + : SM_Pseudo { let mayLoad = 0; let mayStore = 0; let has_glc = 0; let LGKM_CNT = 0; let ScalarStore = 0; let hasSideEffects = 1; - let offset_is_imm = isImm; + let has_offset = isImm; + let has_soffset = !not(isImm); let PseudoInstr = opName # !if(isImm, "_IMM", "_SGPR"); } @@ -114,14 +122,16 @@ } class SM_Discard_Pseudo - : SM_Pseudo { + : SM_Pseudo { let mayLoad = 0; let mayStore = 0; let has_glc = 0; let has_sdst = 0; let ScalarStore = 0; let hasSideEffects = 1; - let offset_is_imm = isImm; + let has_offset = isImm; + let has_soffset = !not(isImm); let PseudoInstr = opName # !if(isImm, "_IMM", "_SGPR"); } @@ -132,7 +142,7 @@ (outs dstClass:$sdst), (ins baseClass:$sbase, i32imm:$offset, CPol:$cpol), " $sdst, $sbase, $offset$cpol", []> { - let offset_is_imm = 1; + let has_offset = 1; let BaseClass = baseClass; let PseudoInstr = opName # "_IMM"; let has_glc = 1; @@ -141,13 +151,27 @@ def _SGPR : SM_Load_Pseudo { + (ins baseClass:$sbase, SReg_32:$soffset, CPol:$cpol), + " $sdst, $sbase, $soffset$cpol", []> { + let has_soffset = 1; let BaseClass = baseClass; let PseudoInstr = opName # "_SGPR"; let has_glc = 1; let has_dlc = 1; } + + def _SGPR_IMM : SM_Load_Pseudo { + let has_offset = 1; + let has_soffset = 1; + let BaseClass = baseClass; + let PseudoInstr = opName # "_SGPR_IMM"; + let has_glc = 1; + let has_dlc = 1; + } } multiclass SM_Pseudo_Stores { - let offset_is_imm = 1; + let has_offset = 1; let BaseClass = baseClass; let SrcClass = srcClass; let PseudoInstr = opName # "_IMM"; } def _SGPR : SM_Store_Pseudo { + (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soffset, CPol:$cpol), + " $sdata, $sbase, $soffset$cpol", []> { + let has_soffset = 1; let BaseClass = baseClass; let SrcClass = srcClass; let PseudoInstr = opName # "_SGPR"; @@ -173,7 +198,7 @@ multiclass SM_Pseudo_Discards { def _IMM : SM_Discard_Pseudo ; - def _SGPR : SM_Discard_Pseudo ; + def _SGPR : SM_Discard_Pseudo ; } class SM_Time_Pseudo : SM_Pseudo< @@ -184,7 +209,6 @@ let mayStore = 0; let mayLoad = 0; let has_sbase = 0; - let has_offset = 0; } class SM_Inval_Pseudo : SM_Pseudo< @@ -193,12 +217,11 @@ let mayStore = 0; let has_sdst = 0; let has_sbase = 0; - let has_offset = 0; } multiclass SM_Pseudo_Probe { def _IMM : SM_Probe_Pseudo ; - def _SGPR : SM_Probe_Pseudo ; + def _SGPR : SM_Probe_Pseudo ; } class SM_WaveId_Pseudo : SM_Pseudo< @@ -208,7 +231,6 @@ let mayStore = 0; let mayLoad = 1; let has_sbase = 0; - let has_offset = 0; } //===----------------------------------------------------------------------===// @@ -225,6 +247,7 @@ let mayStore = 1; let has_glc = 1; let has_dlc = 1; + let has_soffset = 1; // Should these be set? let ScalarStore = 1; @@ -250,11 +273,13 @@ !if(isRet, (outs dataClass:$sdst), (outs)), !if(isImm, (ins dataClass:$sdata, baseClass:$sbase, smem_offset:$offset, CPolTy:$cpol), - (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, CPolTy:$cpol)), - !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset$cpol", + (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$soffset, CPolTy:$cpol)), + !if(isRet, " $sdst", " $sdata") # ", $sbase, " # + !if(isImm, "$offset", "$soffset") # "$cpol", isRet>, AtomicNoRet { - let offset_is_imm = isImm; + let has_offset = isImm; + let has_soffset = !not(isImm); let PseudoInstr = opNameWithSuffix; let Constraints = !if(isRet, "$sdst = $sdata", ""); @@ -452,8 +477,8 @@ let AssemblerPredicate = isGFX6GFX7; let DecoderNamespace = "GFX6GFX7"; - let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?); - let Inst{8} = imm; + let Inst{7-0} = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?)); + let Inst{8} = ps.has_offset; let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?); let Inst{26-22} = op; @@ -470,10 +495,8 @@ let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, CPol:$cpol); } - // FIXME: The operand name $offset is inconsistent with $soff used - // in the pseudo def _SGPR_si : SMRD_Real_si { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol); + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); } } @@ -508,14 +531,14 @@ let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); - let Inst{17} = imm; + let Inst{17} = ps.has_offset; let Inst{25-18} = op; let Inst{31-26} = 0x30; //encoding // VI supports 20-bit unsigned offsets while GFX9+ supports 21-bit signed. // Offset value is corrected accordingly when offset is encoded/decoded. - let Inst{38-32} = !if(ps.has_offset, offset{6-0}, ?); - let Inst{52-39} = !if(ps.has_offset, !if(imm, offset{20-7}, ?), ?); + let Inst{38-32} = !if(ps.has_offset, offset{6-0}, !if(ps.has_soffset, soffset{6-0}, ?)); + let Inst{52-39} = !if(ps.has_offset, offset{20-7}, ?); } multiclass SM_Real_Loads_vi op, string ps, @@ -525,7 +548,7 @@ let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); } def _SGPR_vi : SMEM_Real_vi { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol); + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); } } @@ -540,14 +563,12 @@ multiclass SM_Real_Stores_vi op, string ps, SM_Store_Pseudo immPs = !cast(ps#_IMM), SM_Store_Pseudo sgprPs = !cast(ps#_SGPR)> { - // FIXME: The operand name $offset is inconsistent with $soff used - // in the pseudo def _IMM_vi : SMEM_Real_Store_vi { let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); } def _SGPR_vi : SMEM_Real_Store_vi { - let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol); + let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); } } @@ -727,8 +748,8 @@ let AssemblerPredicate = isGFX7Only; let DecoderNamespace = "GFX7"; - let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?); - let Inst{8} = imm; + let Inst{7-0} = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?)); + let Inst{8} = ps.has_offset; let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?); let Inst{26-22} = op; @@ -887,9 +908,12 @@ let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); let Inst{25-18} = op; let Inst{31-26} = 0x3d; - let Inst{52-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{20-0}, ?), ?); - let Inst{63-57} = !if(ps.offset_is_imm, !cast(SGPR_NULL.HWEncoding), - !if(ps.has_offset, offset{6-0}, ?)); + + // There are SMEM instructions that do not employ any of the offset + // fields, in which case we need them to remain undefined. + let Inst{52-32} = !if(ps.has_offset, offset{20-0}, !if(ps.has_soffset, 0, ?)); + let Inst{63-57} = !if(ps.has_soffset, soffset{6-0}, + !if(ps.has_offset, !cast(SGPR_NULL.HWEncoding), ?)); } multiclass SM_Real_Loads_gfx10 op, string ps, @@ -899,7 +923,11 @@ let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); } def _SGPR_gfx10 : SMEM_Real_gfx10 { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol); + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); + } + def _SGPR_IMM_gfx10 : SMEM_Real_gfx10(ps#_SGPR_IMM)> { + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, + smem_offset_mod:$offset, CPol:$cpol); } } @@ -913,14 +941,12 @@ multiclass SM_Real_Stores_gfx10 op, string ps, SM_Store_Pseudo immPs = !cast(ps#_IMM), SM_Store_Pseudo sgprPs = !cast(ps#_SGPR)> { - // FIXME: The operand name $offset is inconsistent with $soff used - // in the pseudo def _IMM_gfx10 : SMEM_Real_Store_gfx10 { let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); } def _SGPR_gfx10 : SMEM_Real_Store_gfx10 { - let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol); + let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); } } diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s --- a/llvm/test/MC/AMDGPU/gfx10_asm_smem.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_smem.s @@ -38,9 +38,15 @@ s_load_dword s5, s[2:3], m0 // GFX10: encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xf8] +s_load_dword s5, s[2:3], null +// GFX10: encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xfa] + s_load_dword s5, s[2:3], 0x0 // GFX10: encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xfa] +s_load_dword s5, s[2:3], s0 offset:0x12345 +// GFX10: encoding: [0x41,0x01,0x00,0xf4,0x45,0x23,0x01,0x00] + s_load_dword s5, s[2:3], s0 glc // GFX10: encoding: [0x41,0x01,0x01,0xf4,0x00,0x00,0x00,0x00] @@ -53,6 +59,9 @@ s_load_dword s5, s[2:3], 0x1234 glc dlc // GFX10: encoding: [0x41,0x41,0x01,0xf4,0x34,0x12,0x00,0xfa] +s_load_dword s5, s[2:3], s0 offset:0x12345 glc dlc +// GFX10: encoding: [0x41,0x41,0x01,0xf4,0x45,0x23,0x01,0x00] + s_load_dwordx2 s[10:11], s[2:3], s0 // GFX10: encoding: [0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0x00] @@ -86,9 +95,15 @@ s_load_dwordx2 s[10:11], s[2:3], m0 // GFX10: encoding: [0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0xf8] +s_load_dwordx2 s[10:11], s[2:3], null +// GFX10: encoding: [0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0xfa] + s_load_dwordx2 s[10:11], s[2:3], 0x0 // GFX10: encoding: [0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0xfa] +s_load_dwordx2 s[10:11], s[2:3], s0 offset:0x12345 +// GFX10: encoding: [0x81,0x02,0x04,0xf4,0x45,0x23,0x01,0x00] + s_load_dwordx2 s[10:11], s[2:3], s0 glc // GFX10: encoding: [0x81,0x02,0x05,0xf4,0x00,0x00,0x00,0x00] @@ -101,6 +116,9 @@ s_load_dwordx2 s[10:11], s[2:3], 0x1234 glc dlc // GFX10: encoding: [0x81,0x42,0x05,0xf4,0x34,0x12,0x00,0xfa] +s_load_dwordx2 s[10:11], s[2:3], s0 offset:0x12345 glc dlc +// GFX10: encoding: [0x81,0x42,0x05,0xf4,0x45,0x23,0x01,0x00] + s_load_dwordx4 s[20:23], s[2:3], s0 // GFX10: encoding: [0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0x00] @@ -131,9 +149,15 @@ s_load_dwordx4 s[20:23], s[2:3], m0 // GFX10: encoding: [0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0xf8] +s_load_dwordx4 s[20:23], s[2:3], null +// GFX10: encoding: [0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0xfa] + s_load_dwordx4 s[20:23], s[2:3], 0x0 // GFX10: encoding: [0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0xfa] +s_load_dwordx4 s[20:23], s[2:3], s0 offset:0x12345 +// GFX10: encoding: [0x01,0x05,0x08,0xf4,0x45,0x23,0x01,0x00] + s_load_dwordx4 s[20:23], s[2:3], s0 glc // GFX10: encoding: [0x01,0x05,0x09,0xf4,0x00,0x00,0x00,0x00] @@ -146,6 +170,9 @@ s_load_dwordx4 s[20:23], s[2:3], 0x1234 glc dlc // GFX10: encoding: [0x01,0x45,0x09,0xf4,0x34,0x12,0x00,0xfa] +s_load_dwordx4 s[20:23], s[2:3], s0 offset:0x12345 glc dlc +// GFX10: encoding: [0x01,0x45,0x09,0xf4,0x45,0x23,0x01,0x00] + s_load_dwordx8 s[20:27], s[2:3], s0 // GFX10: encoding: [0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0x00] @@ -176,9 +203,15 @@ s_load_dwordx8 s[20:27], s[2:3], m0 // GFX10: encoding: [0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0xf8] +s_load_dwordx8 s[20:27], s[2:3], null +// GFX10: encoding: [0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0xfa] + s_load_dwordx8 s[20:27], s[2:3], 0x0 // GFX10: encoding: [0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0xfa] +s_load_dwordx8 s[20:27], s[2:3], s0 offset:0x12345 +// GFX10: encoding: [0x01,0x05,0x0c,0xf4,0x45,0x23,0x01,0x00] + s_load_dwordx8 s[20:27], s[2:3], s0 glc // GFX10: encoding: [0x01,0x05,0x0d,0xf4,0x00,0x00,0x00,0x00] @@ -191,6 +224,9 @@ s_load_dwordx8 s[20:27], s[2:3], 0x1234 glc dlc // GFX10: encoding: [0x01,0x45,0x0d,0xf4,0x34,0x12,0x00,0xfa] +s_load_dwordx8 s[20:27], s[2:3], s0 offset:0x12345 glc dlc +// GFX10: encoding: [0x01,0x45,0x0d,0xf4,0x45,0x23,0x01,0x00] + s_load_dwordx16 s[20:35], s[2:3], s0 // GFX10: encoding: [0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0x00] @@ -221,9 +257,15 @@ s_load_dwordx16 s[20:35], s[2:3], m0 // GFX10: encoding: [0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0xf8] +s_load_dwordx16 s[20:35], s[2:3], null +// GFX10: encoding: [0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0xfa] + s_load_dwordx16 s[20:35], s[2:3], 0x0 // GFX10: encoding: [0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0xfa] +s_load_dwordx16 s[20:35], s[2:3], s0 offset:0x12345 +// GFX10: encoding: [0x01,0x05,0x10,0xf4,0x45,0x23,0x01,0x00] + s_load_dwordx16 s[20:35], s[2:3], s0 glc // GFX10: encoding: [0x01,0x05,0x11,0xf4,0x00,0x00,0x00,0x00] @@ -236,6 +278,9 @@ s_load_dwordx16 s[20:35], s[2:3], 0x1234 glc dlc // GFX10: encoding: [0x01,0x45,0x11,0xf4,0x34,0x12,0x00,0xfa] +s_load_dwordx16 s[20:35], s[2:3], s0 offset:0x12345 glc dlc +// GFX10: encoding: [0x01,0x45,0x11,0xf4,0x45,0x23,0x01,0x00] + s_buffer_load_dword s5, s[4:7], s0 // GFX10: encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0x00] @@ -266,9 +311,15 @@ s_buffer_load_dword s5, s[4:7], m0 // GFX10: encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0xf8] +s_buffer_load_dword s5, s[4:7], null +// GFX10: encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0xfa] + s_buffer_load_dword s5, s[4:7], 0x0 // GFX10: encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0xfa] +s_buffer_load_dword s5, s[4:7], s0 offset:0x12345 +// GFX10: encoding: [0x42,0x01,0x20,0xf4,0x45,0x23,0x01,0x00] + s_buffer_load_dword s5, s[4:7], s0 glc // GFX10: encoding: [0x42,0x01,0x21,0xf4,0x00,0x00,0x00,0x00] @@ -281,6 +332,9 @@ s_buffer_load_dword s5, s[4:7], 0x1234 glc dlc // GFX10: encoding: [0x42,0x41,0x21,0xf4,0x34,0x12,0x00,0xfa] +s_buffer_load_dword s5, s[4:7], s0 offset:0x12345 glc dlc +// GFX10: encoding: [0x42,0x41,0x21,0xf4,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx2 s[10:11], s[4:7], s0 // GFX10: encoding: [0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0x00] @@ -311,9 +365,15 @@ s_buffer_load_dwordx2 s[10:11], s[4:7], m0 // GFX10: encoding: [0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0xf8] +s_buffer_load_dwordx2 s[10:11], s[4:7], null +// GFX10: encoding: [0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0xfa] + s_buffer_load_dwordx2 s[10:11], s[4:7], 0x0 // GFX10: encoding: [0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0xfa] +s_buffer_load_dwordx2 s[10:11], s[4:7], s0 offset:0x12345 +// GFX10: encoding: [0x82,0x02,0x24,0xf4,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx2 s[10:11], s[4:7], s0 glc // GFX10: encoding: [0x82,0x02,0x25,0xf4,0x00,0x00,0x00,0x00] @@ -326,6 +386,9 @@ s_buffer_load_dwordx2 s[10:11], s[4:7], 0x1234 glc dlc // GFX10: encoding: [0x82,0x42,0x25,0xf4,0x34,0x12,0x00,0xfa] +s_buffer_load_dwordx2 s[10:11], s[4:7], s0 offset:0x12345 glc dlc +// GFX10: encoding: [0x82,0x42,0x25,0xf4,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx4 s[20:23], s[4:7], s0 // GFX10: encoding: [0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0x00] @@ -353,9 +416,15 @@ s_buffer_load_dwordx4 s[20:23], s[4:7], m0 // GFX10: encoding: [0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0xf8] +s_buffer_load_dwordx4 s[20:23], s[4:7], null +// GFX10: encoding: [0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0xfa] + s_buffer_load_dwordx4 s[20:23], s[4:7], 0x0 // GFX10: encoding: [0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0xfa] +s_buffer_load_dwordx4 s[20:23], s[4:7], s0 offset:0x12345 +// GFX10: encoding: [0x02,0x05,0x28,0xf4,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx4 s[20:23], s[4:7], s0 glc // GFX10: encoding: [0x02,0x05,0x29,0xf4,0x00,0x00,0x00,0x00] @@ -368,6 +437,9 @@ s_buffer_load_dwordx4 s[20:23], s[4:7], 0x1234 glc dlc // GFX10: encoding: [0x02,0x45,0x29,0xf4,0x34,0x12,0x00,0xfa] +s_buffer_load_dwordx4 s[20:23], s[4:7], s0 offset:0x12345 glc dlc +// GFX10: encoding: [0x02,0x45,0x29,0xf4,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx8 s[20:27], s[4:7], s0 // GFX10: encoding: [0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0x00] @@ -395,9 +467,15 @@ s_buffer_load_dwordx8 s[20:27], s[4:7], m0 // GFX10: encoding: [0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0xf8] +s_buffer_load_dwordx8 s[20:27], s[4:7], null +// GFX10: encoding: [0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0xfa] + s_buffer_load_dwordx8 s[20:27], s[4:7], 0x0 // GFX10: encoding: [0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0xfa] +s_buffer_load_dwordx8 s[20:27], s[4:7], s0 offset:0x12345 +// GFX10: encoding: [0x02,0x05,0x2c,0xf4,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx8 s[20:27], s[4:7], s0 glc // GFX10: encoding: [0x02,0x05,0x2d,0xf4,0x00,0x00,0x00,0x00] @@ -410,6 +488,9 @@ s_buffer_load_dwordx8 s[20:27], s[4:7], 0x1234 glc dlc // GFX10: encoding: [0x02,0x45,0x2d,0xf4,0x34,0x12,0x00,0xfa] +s_buffer_load_dwordx8 s[20:27], s[4:7], s0 offset:0x12345 glc dlc +// GFX10: encoding: [0x02,0x45,0x2d,0xf4,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx16 s[20:35], s[4:7], s0 // GFX10: encoding: [0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0x00] @@ -437,9 +518,15 @@ s_buffer_load_dwordx16 s[20:35], s[4:7], m0 // GFX10: encoding: [0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0xf8] +s_buffer_load_dwordx16 s[20:35], s[4:7], null +// GFX10: encoding: [0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0xfa] + s_buffer_load_dwordx16 s[20:35], s[4:7], 0x0 // GFX10: encoding: [0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0xfa] +s_buffer_load_dwordx16 s[20:35], s[4:7], s0 offset:0x12345 +// GFX10: encoding: [0x02,0x05,0x30,0xf4,0x45,0x23,0x01,0x00] + s_buffer_load_dwordx16 s[20:35], s[4:7], s0 glc // GFX10: encoding: [0x02,0x05,0x31,0xf4,0x00,0x00,0x00,0x00] @@ -452,6 +539,9 @@ s_buffer_load_dwordx16 s[20:35], s[4:7], 0x1234 glc dlc // GFX10: encoding: [0x02,0x45,0x31,0xf4,0x34,0x12,0x00,0xfa] +s_buffer_load_dwordx16 s[20:35], s[4:7], s0 offset:0x12345 glc dlc +// GFX10: encoding: [0x02,0x45,0x31,0xf4,0x45,0x23,0x01,0x00] + s_store_dword s1, s[4:5], s0 // GFX10: encoding: [0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0x00] @@ -485,6 +575,9 @@ s_store_dword s1, s[4:5], m0 // GFX10: encoding: [0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xf8] +s_store_dword s1, s[4:5], null +// GFX10: encoding: [0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xfa] + s_store_dword s1, s[4:5], 0x0 // GFX10: encoding: [0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xfa] @@ -533,6 +626,9 @@ s_store_dwordx2 s[2:3], s[4:5], m0 // GFX10: encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xf8] +s_store_dwordx2 s[2:3], s[4:5], null +// GFX10: encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xfa] + s_store_dwordx2 s[2:3], s[4:5], 0x0 // GFX10: encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xfa] @@ -578,6 +674,9 @@ s_store_dwordx4 s[4:7], s[4:5], m0 // GFX10: encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xf8] +s_store_dwordx4 s[4:7], s[4:5], null +// GFX10: encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xfa] + s_store_dwordx4 s[4:7], s[4:5], 0x0 // GFX10: encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xfa] @@ -623,6 +722,9 @@ s_buffer_store_dword s1, s[8:11], m0 // GFX10: encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xf8] +s_buffer_store_dword s1, s[8:11], null +// GFX10: encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xfa] + s_buffer_store_dword s1, s[8:11], 0x0 // GFX10: encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xfa] @@ -668,6 +770,9 @@ s_buffer_store_dwordx2 s[2:3], s[8:11], m0 // GFX10: encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xf8] +s_buffer_store_dwordx2 s[2:3], s[8:11], null +// GFX10: encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xfa] + s_buffer_store_dwordx2 s[2:3], s[8:11], 0x0 // GFX10: encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xfa] @@ -710,6 +815,9 @@ s_buffer_store_dwordx4 s[4:7], s[8:11], m0 // GFX10: encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xf8] +s_buffer_store_dwordx4 s[4:7], s[8:11], null +// GFX10: encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xfa] + s_buffer_store_dwordx4 s[4:7], s[8:11], 0x0 // GFX10: encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xfa] diff --git a/llvm/test/MC/AMDGPU/smem.s b/llvm/test/MC/AMDGPU/smem.s --- a/llvm/test/MC/AMDGPU/smem.s +++ b/llvm/test/MC/AMDGPU/smem.s @@ -9,9 +9,9 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck --check-prefixes=NOSICI,NOSICIGFX10,NOSICIGFX1030,NOSICIVIGFX1030 --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=kaveri %s 2>&1 | FileCheck --check-prefixes=NOSICI,NOSICIGFX10,NOSICIGFX1030,NOSICIVIGFX1030 --implicit-check-not=error: %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck --check-prefixes=NOVI,NOSICIVIGFX1030 --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefixes=NOGFX9,NOGFX9GFX1012 --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefixes=NOSICIGFX10,NOGFX9,NOGFX9GFX1012 --implicit-check-not=error: %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 %s 2>&1 | FileCheck --check-prefixes=NOSICIGFX1030,NOSICIVIGFX1030,NOSICIGFX10,NOGFX9,NOGFX1030 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefixes=NOGFX9GFX10,NOGFX9GFX1012,NOGFX9 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefixes=NOSICIGFX10,NOGFX9GFX10,NOGFX9GFX1012,NOGFX10 --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1030 %s 2>&1 | FileCheck --check-prefixes=NOSICIGFX1030,NOSICIVIGFX1030,NOSICIGFX10,NOGFX9GFX10,NOGFX1030,NOGFX10 --implicit-check-not=error: %s s_dcache_wb // GFX89: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00] @@ -50,12 +50,12 @@ s_memrealtime tba // VI: s_memrealtime tba ; encoding: [0x00,0x1b,0x94,0xc0,0x00,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: register not available on this GPU +// NOGFX9GFX10: error: register not available on this GPU s_memrealtime tma // VI: s_memrealtime tma ; encoding: [0x80,0x1b,0x94,0xc0,0x00,0x00,0x00,0x00] // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: register not available on this GPU +// NOGFX9GFX10: error: register not available on this GPU s_memrealtime ttmp[0:1] // VI: s_memrealtime ttmp[0:1] ; encoding: [0x00,0x1c,0x94,0xc0,0x00,0x00,0x00,0x00] @@ -177,22 +177,22 @@ s_buffer_load_dword tba_lo, s[92:95], m0 // VI: s_buffer_load_dword tba_lo, s[92:95], m0 ; encoding: [0x2e,0x1b,0x20,0xc0,0x7c,0x00,0x00,0x00] // SICI: s_buffer_load_dword tba_lo, s[92:95], m0 ; encoding: [0x7c,0x5c,0x36,0xc2] -// NOGFX9: error: register not available on this GPU +// NOGFX9GFX10: error: register not available on this GPU s_buffer_load_dword tba_hi, s[92:95], m0 // VI: s_buffer_load_dword tba_hi, s[92:95], m0 ; encoding: [0x6e,0x1b,0x20,0xc0,0x7c,0x00,0x00,0x00] // SICI: s_buffer_load_dword tba_hi, s[92:95], m0 ; encoding: [0x7c,0xdc,0x36,0xc2] -// NOGFX9: error: register not available on this GPU +// NOGFX9GFX10: error: register not available on this GPU s_buffer_load_dword tma_lo, s[92:95], m0 // VI: s_buffer_load_dword tma_lo, s[92:95], m0 ; encoding: [0xae,0x1b,0x20,0xc0,0x7c,0x00,0x00,0x00] // SICI: s_buffer_load_dword tma_lo, s[92:95], m0 ; encoding: [0x7c,0x5c,0x37,0xc2] -// NOGFX9: error: register not available on this GPU +// NOGFX9GFX10: error: register not available on this GPU s_buffer_load_dword tma_hi, s[92:95], m0 // VI: s_buffer_load_dword tma_hi, s[92:95], m0 ; encoding: [0xee,0x1b,0x20,0xc0,0x7c,0x00,0x00,0x00] // SICI: s_buffer_load_dword tma_hi, s[92:95], m0 ; encoding: [0x7c,0xdc,0x37,0xc2] -// NOGFX9: error: register not available on this GPU +// NOGFX9GFX10: error: register not available on this GPU s_buffer_load_dword ttmp0, s[92:95], m0 // VI: s_buffer_load_dword ttmp0, s[92:95], m0 ; encoding: [0x2e,0x1c,0x20,0xc0,0x7c,0x00,0x00,0x00] @@ -208,12 +208,12 @@ s_buffer_load_dwordx2 tba, s[92:95], m0 // VI: s_buffer_load_dwordx2 tba, s[92:95], m0 ; encoding: [0x2e,0x1b,0x24,0xc0,0x7c,0x00,0x00,0x00] // SICI: s_buffer_load_dwordx2 tba, s[92:95], m0 ; encoding: [0x7c,0x5c,0x76,0xc2] -// NOGFX9: error: register not available on this GPU +// NOGFX9GFX10: error: register not available on this GPU s_buffer_load_dwordx2 tma, s[92:95], m0 // VI: s_buffer_load_dwordx2 tma, s[92:95], m0 ; encoding: [0xae,0x1b,0x24,0xc0,0x7c,0x00,0x00,0x00] // SICI: s_buffer_load_dwordx2 tma, s[92:95], m0 ; encoding: [0x7c,0x5c,0x77,0xc2] -// NOGFX9: error: register not available on this GPU +// NOGFX9GFX10: error: register not available on this GPU s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 // VI: s_buffer_load_dwordx2 ttmp[0:1], s[92:95], m0 ; encoding: [0x2e,0x1c,0x24,0xc0,0x7c,0x00,0x00,0x00] @@ -664,19 +664,31 @@ s_atc_probe 0x7, s[4:5], 0x1FFFFF // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: expected a 21-bit signed offset +// NOGFX9GFX10: error: expected a 21-bit signed offset // NOVI: error: expected a 20-bit unsigned offset s_atc_probe_buffer 0x1, s[8:11], 0x1FFFFF // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: expected a 20-bit unsigned offset +// NOGFX9GFX10: error: expected a 20-bit unsigned offset // NOVI: error: expected a 20-bit unsigned offset +s_load_dword s1, s[2:3], s0 offset:0x1FFFFF +// NOSICI: error: operands are not valid for this GPU or mode +// NOVI: error: operands are not valid for this GPU or mode +// NOGFX9: error: operands are not valid for this GPU or mode +// NOGFX10: error: expected a 21-bit signed offset + s_store_dword s1, s[2:3], 0x1FFFFF // NOSICIGFX1030: error: instruction not supported on this GPU // NOGFX9GFX1012: error: expected a 21-bit signed offset // NOVI: error: expected a 20-bit unsigned offset +s_buffer_load_dword s10, s[92:95], s0 offset:-1 +// NOSICI: error: operands are not valid for this GPU or mode +// NOVI: error: operands are not valid for this GPU or mode +// NOGFX9: error: operands are not valid for this GPU or mode +// NOGFX10: error: expected a 20-bit unsigned offset + s_buffer_store_dword s10, s[92:95], 0x1FFFFF // NOSICIGFX1030: error: instruction not supported on this GPU // NOGFX9GFX1012: error: expected a 20-bit unsigned offset @@ -703,7 +715,7 @@ s_atc_probe_buffer 0x1, s[8:11], -1 // NOVI: error: expected a 20-bit unsigned offset // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: expected a 20-bit unsigned offset +// NOGFX9GFX10: error: expected a 20-bit unsigned offset s_store_dword s1, s[2:3], -1 // NOVI: error: expected a 20-bit unsigned offset @@ -725,7 +737,7 @@ s_buffer_load_dword s10, s[92:95], -1 // NOVI: error: expected a 20-bit unsigned offset // NOSICI: error: operands are not valid for this GPU or mode -// NOGFX9: error: expected a 20-bit unsigned offset +// NOGFX9GFX10: error: expected a 20-bit unsigned offset s_atomic_swap s5, s[2:3], -1 // NOVI: error: instruction not supported on this GPU @@ -746,7 +758,7 @@ s_atc_probe_buffer 0x1, s[8:11], 0xFFFFFFFFFFF00000 // NOSICI: error: instruction not supported on this GPU -// NOGFX9: error: expected a 20-bit unsigned offset +// NOGFX9GFX10: error: expected a 20-bit unsigned offset // NOVI: error: expected a 20-bit unsigned offset s_store_dword s1, s[2:3], 0xFFFFFFFFFFF00000 @@ -768,7 +780,7 @@ s_buffer_load_dword s10, s[92:95], 0xFFFFFFFFFFF00000 // NOSICI: error: operands are not valid for this GPU or mode -// NOGFX9: error: expected a 20-bit unsigned offset +// NOGFX9GFX10: error: expected a 20-bit unsigned offset // NOVI: error: expected a 20-bit unsigned offset s_atomic_swap s5, s[2:3], 0xFFFFFFFFFFF00000 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt @@ -11354,12 +11354,15 @@ # GFX10: s_buffer_load_dword s101, s[4:7], s0 ; encoding: [0x42,0x19,0x20,0xf4,0x00,0x00,0x00,0x00] 0x42,0x19,0x20,0xf4,0x00,0x00,0x00,0x00 -# GFX10: s_buffer_load_dword s5, s[4:7], 0x0 ; encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0xfa] +# GFX10: s_buffer_load_dword s5, s[4:7], null ; encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0xfa] 0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0xfa # GFX10: s_buffer_load_dword s5, s[4:7], 0x1234 glc dlc ; encoding: [0x42,0x41,0x21,0xf4,0x34,0x12,0x00,0xfa] 0x42,0x41,0x21,0xf4,0x34,0x12,0x00,0xfa +# GFX10: s_buffer_load_dword s5, s[4:7], s0 offset:0x12345 glc dlc ; encoding: [0x42,0x41,0x21,0xf4,0x45,0x23,0x01,0x00] +0x42,0x41,0x21,0xf4,0x45,0x23,0x01,0x00 + # GFX10: s_buffer_load_dword s5, s[4:7], m0 ; encoding: [0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0xf8] 0x42,0x01,0x20,0xf4,0x00,0x00,0x00,0xf8 @@ -11396,12 +11399,15 @@ # GFX10: s_buffer_load_dword vcc_lo, s[4:7], s0 ; encoding: [0x82,0x1a,0x20,0xf4,0x00,0x00,0x00,0x00] 0x82,0x1a,0x20,0xf4,0x00,0x00,0x00,0x00 -# GFX10: s_buffer_load_dwordx16 s[20:35], s[4:7], 0x0 ; encoding: [0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0xfa] +# GFX10: s_buffer_load_dwordx16 s[20:35], s[4:7], null ; encoding: [0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0xfa] 0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0xfa # GFX10: s_buffer_load_dwordx16 s[20:35], s[4:7], 0x1234 glc dlc ; encoding: [0x02,0x45,0x31,0xf4,0x34,0x12,0x00,0xfa] 0x02,0x45,0x31,0xf4,0x34,0x12,0x00,0xfa +# GFX10: s_buffer_load_dwordx16 s[20:35], s[4:7], s0 offset:0x12345 glc dlc ; encoding: [0x02,0x45,0x31,0xf4,0x45,0x23,0x01,0x00] +0x02,0x45,0x31,0xf4,0x45,0x23,0x01,0x00 + # GFX10: s_buffer_load_dwordx16 s[20:35], s[4:7], m0 ; encoding: [0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0xf8] 0x02,0x05,0x30,0xf4,0x00,0x00,0x00,0xf8 @@ -11441,12 +11447,15 @@ # GFX10: s_buffer_load_dwordx2 s[100:101], s[4:7], s0 ; encoding: [0x02,0x19,0x24,0xf4,0x00,0x00,0x00,0x00] 0x02,0x19,0x24,0xf4,0x00,0x00,0x00,0x00 -# GFX10: s_buffer_load_dwordx2 s[10:11], s[4:7], 0x0 ; encoding: [0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0xfa] +# GFX10: s_buffer_load_dwordx2 s[10:11], s[4:7], null ; encoding: [0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0xfa] 0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0xfa # GFX10: s_buffer_load_dwordx2 s[10:11], s[4:7], 0x1234 glc dlc ; encoding: [0x82,0x42,0x25,0xf4,0x34,0x12,0x00,0xfa] 0x82,0x42,0x25,0xf4,0x34,0x12,0x00,0xfa +# GFX10: s_buffer_load_dwordx2 s[10:11], s[4:7], s0 offset:0x12345 glc dlc ; encoding: [0x82,0x42,0x25,0xf4,0x45,0x23,0x01,0x00] +0x82,0x42,0x25,0xf4,0x45,0x23,0x01,0x00 + # GFX10: s_buffer_load_dwordx2 s[10:11], s[4:7], m0 ; encoding: [0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0xf8] 0x82,0x02,0x24,0xf4,0x00,0x00,0x00,0xf8 @@ -11483,12 +11492,15 @@ # GFX10: s_buffer_load_dwordx2 vcc, s[4:7], s0 ; encoding: [0x82,0x1a,0x24,0xf4,0x00,0x00,0x00,0x00] 0x82,0x1a,0x24,0xf4,0x00,0x00,0x00,0x00 -# GFX10: s_buffer_load_dwordx4 s[20:23], s[4:7], 0x0 ; encoding: [0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0xfa] +# GFX10: s_buffer_load_dwordx4 s[20:23], s[4:7], null ; encoding: [0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0xfa] 0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0xfa # GFX10: s_buffer_load_dwordx4 s[20:23], s[4:7], 0x1234 glc dlc ; encoding: [0x02,0x45,0x29,0xf4,0x34,0x12,0x00,0xfa] 0x02,0x45,0x29,0xf4,0x34,0x12,0x00,0xfa +# GFX10: s_buffer_load_dwordx4 s[20:23], s[4:7], s0 offset:0x12345 glc dlc ; encoding: [0x02,0x45,0x29,0xf4,0x45,0x23,0x01,0x00] +0x02,0x45,0x29,0xf4,0x45,0x23,0x01,0x00 + # GFX10: s_buffer_load_dwordx4 s[20:23], s[4:7], m0 ; encoding: [0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0xf8] 0x02,0x05,0x28,0xf4,0x00,0x00,0x00,0xf8 @@ -11525,12 +11537,15 @@ # GFX10: s_buffer_load_dwordx4 s[96:99], s[4:7], s0 ; encoding: [0x02,0x18,0x28,0xf4,0x00,0x00,0x00,0x00] 0x02,0x18,0x28,0xf4,0x00,0x00,0x00,0x00 -# GFX10: s_buffer_load_dwordx8 s[20:27], s[4:7], 0x0 ; encoding: [0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0xfa] +# GFX10: s_buffer_load_dwordx8 s[20:27], s[4:7], null ; encoding: [0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0xfa] 0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0xfa # GFX10: s_buffer_load_dwordx8 s[20:27], s[4:7], 0x1234 glc dlc ; encoding: [0x02,0x45,0x2d,0xf4,0x34,0x12,0x00,0xfa] 0x02,0x45,0x2d,0xf4,0x34,0x12,0x00,0xfa +# GFX10: s_buffer_load_dwordx8 s[20:27], s[4:7], s0 offset:0x12345 glc dlc ; encoding: [0x02,0x45,0x2d,0xf4,0x45,0x23,0x01,0x00] +0x02,0x45,0x2d,0xf4,0x45,0x23,0x01,0x00 + # GFX10: s_buffer_load_dwordx8 s[20:27], s[4:7], m0 ; encoding: [0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0xf8] 0x02,0x05,0x2c,0xf4,0x00,0x00,0x00,0xf8 @@ -11570,7 +11585,7 @@ # GFX10: s_buffer_store_dword s1, s[12:15], s0 ; encoding: [0x46,0x00,0x60,0xf4,0x00,0x00,0x00,0x00] 0x46,0x00,0x60,0xf4,0x00,0x00,0x00,0x00 -# GFX10: s_buffer_store_dword s1, s[8:11], 0x0 ; encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xfa] +# GFX10: s_buffer_store_dword s1, s[8:11], null ; encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xfa] 0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xfa # GFX10: s_buffer_store_dword s1, s[8:11], 0x1234 glc dlc ; encoding: [0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa] @@ -11618,7 +11633,7 @@ # GFX10: s_buffer_store_dwordx2 s[2:3], s[12:15], s0 ; encoding: [0x86,0x00,0x64,0xf4,0x00,0x00,0x00,0x00] 0x86,0x00,0x64,0xf4,0x00,0x00,0x00,0x00 -# GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], 0x0 ; encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xfa] +# GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], null ; encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xfa] 0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xfa # GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], 0x1234 glc dlc ; encoding: [0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa] @@ -11660,7 +11675,7 @@ # GFX10: s_buffer_store_dwordx4 s[4:7], s[12:15], s0 ; encoding: [0x06,0x01,0x68,0xf4,0x00,0x00,0x00,0x00] 0x06,0x01,0x68,0xf4,0x00,0x00,0x00,0x00 -# GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], 0x0 ; encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xfa] +# GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], null ; encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xfa] 0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xfa # GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], 0x1234 glc dlc ; encoding: [0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa] @@ -14159,12 +14174,15 @@ # GFX10: s_load_dword s5, s[100:101], s0 ; encoding: [0x72,0x01,0x00,0xf4,0x00,0x00,0x00,0x00] 0x72,0x01,0x00,0xf4,0x00,0x00,0x00,0x00 -# GFX10: s_load_dword s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xfa] +# GFX10: s_load_dword s5, s[2:3], null ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xfa] 0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xfa # GFX10: s_load_dword s5, s[2:3], 0x1234 glc dlc ; encoding: [0x41,0x41,0x01,0xf4,0x34,0x12,0x00,0xfa] 0x41,0x41,0x01,0xf4,0x34,0x12,0x00,0xfa +# GFX10: s_load_dword s5, s[2:3], s0 offset:0x12345 glc dlc ; encoding: [0x41,0x41,0x01,0xf4,0x45,0x23,0x01,0x00] +0x41,0x41,0x01,0xf4,0x45,0x23,0x01,0x00 + # GFX10: s_load_dword s5, s[2:3], m0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xf8] 0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xf8 @@ -14204,12 +14222,15 @@ # GFX10: s_load_dwordx16 s[20:35], s[100:101], s0 ; encoding: [0x32,0x05,0x10,0xf4,0x00,0x00,0x00,0x00] 0x32,0x05,0x10,0xf4,0x00,0x00,0x00,0x00 -# GFX10: s_load_dwordx16 s[20:35], s[2:3], 0x0 ; encoding: [0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0xfa] +# GFX10: s_load_dwordx16 s[20:35], s[2:3], null ; encoding: [0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0xfa] 0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0xfa # GFX10: s_load_dwordx16 s[20:35], s[2:3], 0x1234 glc dlc ; encoding: [0x01,0x45,0x11,0xf4,0x34,0x12,0x00,0xfa] 0x01,0x45,0x11,0xf4,0x34,0x12,0x00,0xfa +# GFX10: s_load_dwordx16 s[20:35], s[2:3], s0 offset:0x12345 glc dlc ; encoding: [0x01,0x45,0x11,0xf4,0x45,0x23,0x01,0x00] +0x01,0x45,0x11,0xf4,0x45,0x23,0x01,0x00 + # GFX10: s_load_dwordx16 s[20:35], s[2:3], m0 ; encoding: [0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0xf8] 0x01,0x05,0x10,0xf4,0x00,0x00,0x00,0xf8 @@ -14252,12 +14273,15 @@ # GFX10: s_load_dwordx2 s[10:11], s[100:101], s0 ; encoding: [0xb2,0x02,0x04,0xf4,0x00,0x00,0x00,0x00] 0xb2,0x02,0x04,0xf4,0x00,0x00,0x00,0x00 -# GFX10: s_load_dwordx2 s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0xfa] +# GFX10: s_load_dwordx2 s[10:11], s[2:3], null ; encoding: [0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0xfa] 0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0xfa # GFX10: s_load_dwordx2 s[10:11], s[2:3], 0x1234 glc dlc ; encoding: [0x81,0x42,0x05,0xf4,0x34,0x12,0x00,0xfa] 0x81,0x42,0x05,0xf4,0x34,0x12,0x00,0xfa +# GFX10: s_load_dwordx2 s[10:11], s[2:3], s0 offset:0x12345 glc dlc ; encoding: [0x81,0x42,0x05,0xf4,0x45,0x23,0x01,0x00] +0x81,0x42,0x05,0xf4,0x45,0x23,0x01,0x00 + # GFX10: s_load_dwordx2 s[10:11], s[2:3], m0 ; encoding: [0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0xf8] 0x81,0x02,0x04,0xf4,0x00,0x00,0x00,0xf8 @@ -14297,12 +14321,15 @@ # GFX10: s_load_dwordx4 s[20:23], s[100:101], s0 ; encoding: [0x32,0x05,0x08,0xf4,0x00,0x00,0x00,0x00] 0x32,0x05,0x08,0xf4,0x00,0x00,0x00,0x00 -# GFX10: s_load_dwordx4 s[20:23], s[2:3], 0x0 ; encoding: [0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0xfa] +# GFX10: s_load_dwordx4 s[20:23], s[2:3], null ; encoding: [0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0xfa] 0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0xfa # GFX10: s_load_dwordx4 s[20:23], s[2:3], 0x1234 glc dlc ; encoding: [0x01,0x45,0x09,0xf4,0x34,0x12,0x00,0xfa] 0x01,0x45,0x09,0xf4,0x34,0x12,0x00,0xfa +# GFX10: s_load_dwordx4 s[20:23], s[2:3], s0 offset:0x12345 glc dlc ; encoding: [0x01,0x45,0x09,0xf4,0x45,0x23,0x01,0x00] +0x01,0x45,0x09,0xf4,0x45,0x23,0x01,0x00 + # GFX10: s_load_dwordx4 s[20:23], s[2:3], m0 ; encoding: [0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0xf8] 0x01,0x05,0x08,0xf4,0x00,0x00,0x00,0xf8 @@ -14342,12 +14369,15 @@ # GFX10: s_load_dwordx8 s[20:27], s[100:101], s0 ; encoding: [0x32,0x05,0x0c,0xf4,0x00,0x00,0x00,0x00] 0x32,0x05,0x0c,0xf4,0x00,0x00,0x00,0x00 -# GFX10: s_load_dwordx8 s[20:27], s[2:3], 0x0 ; encoding: [0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0xfa] +# GFX10: s_load_dwordx8 s[20:27], s[2:3], null ; encoding: [0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0xfa] 0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0xfa # GFX10: s_load_dwordx8 s[20:27], s[2:3], 0x1234 glc dlc ; encoding: [0x01,0x45,0x0d,0xf4,0x34,0x12,0x00,0xfa] 0x01,0x45,0x0d,0xf4,0x34,0x12,0x00,0xfa +# GFX10: s_load_dwordx8 s[20:27], s[2:3], s0 offset:0x12345 glc dlc ; encoding: [0x01,0x45,0x0d,0xf4,0x45,0x23,0x01,0x00] +0x01,0x45,0x0d,0xf4,0x45,0x23,0x01,0x00 + # GFX10: s_load_dwordx8 s[20:27], s[2:3], m0 ; encoding: [0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0xf8] 0x01,0x05,0x0c,0xf4,0x00,0x00,0x00,0xf8 @@ -18287,7 +18317,7 @@ # GFX10: s_store_dword s1, s[100:101], s0 ; encoding: [0x72,0x00,0x40,0xf4,0x00,0x00,0x00,0x00] 0x72,0x00,0x40,0xf4,0x00,0x00,0x00,0x00 -# GFX10: s_store_dword s1, s[4:5], 0x0 ; encoding: [0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xfa] +# GFX10: s_store_dword s1, s[4:5], null ; encoding: [0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xfa] 0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xfa # GFX10: s_store_dword s1, s[4:5], 0x1234 glc dlc ; encoding: [0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa] @@ -18338,7 +18368,7 @@ # GFX10: s_store_dwordx2 s[2:3], s[100:101], s0 ; encoding: [0xb2,0x00,0x44,0xf4,0x00,0x00,0x00,0x00] 0xb2,0x00,0x44,0xf4,0x00,0x00,0x00,0x00 -# GFX10: s_store_dwordx2 s[2:3], s[4:5], 0x0 ; encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xfa] +# GFX10: s_store_dwordx2 s[2:3], s[4:5], null ; encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xfa] 0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xfa # GFX10: s_store_dwordx2 s[2:3], s[4:5], 0x1234 glc dlc ; encoding: [0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa] @@ -18383,7 +18413,7 @@ # GFX10: s_store_dwordx4 s[4:7], s[100:101], s0 ; encoding: [0x32,0x01,0x48,0xf4,0x00,0x00,0x00,0x00] 0x32,0x01,0x48,0xf4,0x00,0x00,0x00,0x00 -# GFX10: s_store_dwordx4 s[4:7], s[4:5], 0x0 ; encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xfa] +# GFX10: s_store_dwordx4 s[4:7], s[4:5], null ; encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xfa] 0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xfa # GFX10: s_store_dwordx4 s[4:7], s[4:5], 0x1234 glc dlc ; encoding: [0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa]