diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -494,6 +494,12 @@ "Support NSA encoding for image instructions" >; +def FeaturePartialNSAEncoding : SubtargetFeature<"partial-nsa-encoding", + "HasPartialNSAEncoding", + "true", + "Support partial NSA encoding for image instructions" +>; + def FeatureImageInsts : SubtargetFeature<"image-insts", "HasImageInsts", "true", @@ -724,15 +730,6 @@ "GFX11 with 50% more physical VGPRs and 50% larger allocation granule than GFX10" >; -class SubtargetFeatureNSAMaxSize : SubtargetFeature < - "nsa-max-size-"#Value, - "NSAMaxSize", - !cast(Value), - "The maximum non-sequential address size in VGPRs." ->; - -def FeatureNSAMaxSize5 : SubtargetFeatureNSAMaxSize<5>; -def FeatureNSAMaxSize13 : SubtargetFeatureNSAMaxSize<13>; def FeatureVOPD : SubtargetFeature<"vopd", "HasVOPDInsts", @@ -1227,7 +1224,6 @@ FeatureLDSBankCount32, FeatureDLInsts, FeatureNSAEncoding, - FeatureNSAMaxSize5, FeatureWavefrontSize32, FeatureScalarStores, FeatureScalarAtomics, @@ -1251,7 +1247,6 @@ FeatureDot7Insts, FeatureDot10Insts, FeatureNSAEncoding, - FeatureNSAMaxSize5, FeatureWavefrontSize32, FeatureScalarStores, FeatureScalarAtomics, @@ -1275,7 +1270,6 @@ FeatureDot7Insts, FeatureDot10Insts, FeatureNSAEncoding, - FeatureNSAMaxSize5, FeatureWavefrontSize32, FeatureScalarStores, FeatureScalarAtomics, @@ -1294,7 +1288,6 @@ FeatureLDSBankCount32, FeatureDLInsts, FeatureNSAEncoding, - FeatureNSAMaxSize5, FeatureWavefrontSize32, FeatureScalarStores, FeatureScalarAtomics, @@ -1320,7 +1313,6 @@ FeatureDot7Insts, FeatureDot10Insts, FeatureNSAEncoding, - FeatureNSAMaxSize13, FeatureWavefrontSize32, FeatureShaderCyclesRegister, FeatureBackOffBarrier]>; @@ -1335,7 +1327,7 @@ FeatureDot9Insts, FeatureDot10Insts, FeatureNSAEncoding, - FeatureNSAMaxSize5, + FeaturePartialNSAEncoding, FeatureWavefrontSize32, FeatureShaderCyclesRegister, FeatureArchitectedFlatScratch, diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1483,6 +1483,14 @@ return getFeatureBits()[AMDGPU::FeatureIntClamp]; } + bool hasPartialNSAEncoding() const { + return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding]; + } + + unsigned getNSAMaxSize() const { + return AMDGPU::getNSAMaxSize(getSTI()); + } + AMDGPUTargetStreamer &getTargetStreamer() { MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); return static_cast(TS); @@ -3683,7 +3691,15 @@ unsigned ExpectedAddrSize = AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); - if (!IsNSA) { + if (IsNSA) { + if (hasPartialNSAEncoding() && ExpectedAddrSize > getNSAMaxSize()) { + int VAddrLastIdx = SrsrcIdx - 1; + unsigned VAddrLastSize = + AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4; + + return VAddrLastIdx - VAddr0Idx + VAddrLastSize == ExpectedAddrSize; + } + } else { if (ExpectedAddrSize > 12) ExpectedAddrSize = 16; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -860,6 +860,8 @@ AMDGPU::OpName::vdata); int VAddr0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); + int RsrcIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dmask); @@ -883,6 +885,7 @@ bool IsAtomic = (VDstIdx != -1); bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4; bool IsNSA = false; + bool IsPartialNSA = false; unsigned AddrSize = Info->VAddrDwords; if (isGFX10Plus()) { @@ -904,9 +907,12 @@ AddrSize = 16; } else { if (AddrSize > Info->VAddrDwords) { - // The NSA encoding does not contain enough operands for the combination - // of base opcode / dimension. Should this be an error? - return MCDisassembler::Success; + if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) { + // The NSA encoding does not contain enough operands for the + // combination of base opcode / dimension. Should this be an error? + return MCDisassembler::Success; + } + IsPartialNSA = true; } } } @@ -949,17 +955,20 @@ } } - // If not using NSA on GFX10+, widen address register to correct size. - unsigned NewVAddr0 = AMDGPU::NoRegister; - if (isGFX10Plus() && !IsNSA && AddrSize != Info->VAddrDwords) { - unsigned VAddr0 = MI.getOperand(VAddr0Idx).getReg(); - unsigned VAddrSub0 = MRI.getSubReg(VAddr0, AMDGPU::sub0); - VAddr0 = (VAddrSub0 != 0) ? VAddrSub0 : VAddr0; - - auto AddrRCID = MCII->get(NewOpcode).operands()[VAddr0Idx].RegClass; - NewVAddr0 = MRI.getMatchingSuperReg(VAddr0, AMDGPU::sub0, + // If not using NSA on GFX10+, widen vaddr0 address register to correct size. + // If using partial NSA on GFX11+ widen last address register. + int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx; + unsigned NewVAddrSA = AMDGPU::NoRegister; + if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) && + AddrSize != Info->VAddrDwords) { + unsigned VAddrSA = MI.getOperand(VAddrSAIdx).getReg(); + unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0); + VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA; + + auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass; + NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &MRI.getRegClass(AddrRCID)); - if (NewVAddr0 == AMDGPU::NoRegister) + if (!NewVAddrSA) return MCDisassembler::Success; } @@ -974,8 +983,8 @@ } } - if (NewVAddr0 != AMDGPU::NoRegister) { - MI.getOperand(VAddr0Idx) = MCOperand::createReg(NewVAddr0); + if (NewVAddrSA) { + MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA); } else if (IsNSA) { assert(AddrSize <= Info->VAddrDwords); MI.erase(MI.begin() + VAddr0Idx + AddrSize, diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -20,6 +20,7 @@ #include "SIFrameLowering.h" #include "SIISelLowering.h" #include "SIInstrInfo.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #define GET_SUBTARGETINFO_HEADER @@ -133,7 +134,7 @@ bool HasA16 = false; bool HasG16 = false; bool HasNSAEncoding = false; - unsigned NSAMaxSize = 0; + bool HasPartialNSAEncoding = false; bool GFX10_AEncoding = false; bool GFX10_BEncoding = false; bool HasDLInsts = false; @@ -931,7 +932,9 @@ bool hasNSAEncoding() const { return HasNSAEncoding; } - unsigned getNSAMaxSize() const { return NSAMaxSize; } + bool hasPartialNSAEncoding() const { return HasPartialNSAEncoding; } + + unsigned getNSAMaxSize() const { return AMDGPU::getNSAMaxSize(*this); } bool hasGFX10_AEncoding() const { return GFX10_AEncoding; diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -235,22 +235,47 @@ let Key = ["Opcode"]; } +class NSAHelper { + dag AddrIns; + string AddrAsm; + int NSA; +} + // This class used to use !foldl to memoize the AddrAsmNames list. // It turned out that that was much slower than using !filter. class MIMGNSAHelper addr_types=!listsplat(VGPR_32, num_addrs)> { + list addr_types=!listsplat(VGPR_32, num_addrs)> + : NSAHelper<> { list AddrAsmNames = !foreach(i, !filter(i, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], !lt(i, num_addrs)), "vaddr" # i); - dag AddrIns = !dag(ins, addr_types, AddrAsmNames); - string AddrAsm = "[$" # !interleave(AddrAsmNames, ", $") # "]"; + let AddrIns = !dag(ins, addr_types, AddrAsmNames); + let AddrAsm = "[$" # !interleave(AddrAsmNames, ", $") # "]"; - int NSA = !if(!le(num_addrs, 1), ?, + let NSA = !if(!le(num_addrs, 1), ?, !if(!le(num_addrs, 5), 1, !if(!le(num_addrs, 9), 2, !if(!le(num_addrs, 13), 3, ?)))); } +class PartialNSAHelper + : NSAHelper<> { + + list addr_types = + !if(!ge(num_addrs, max_addr), + !listconcat(!listsplat(VGPR_32, !sub(max_addr, 1)), [LastAddrRC]), + !listsplat(VGPR_32, num_addrs)); + + int VAddrCount = !if(!gt(num_addrs, max_addr), max_addr, num_addrs); + list AddrAsmNames = + !foreach(i, !filter(i, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], + !lt(i, VAddrCount)), "vaddr" # i); + + let AddrIns = !dag(ins, addr_types, AddrAsmNames); + let AddrAsm = "[$" # !interleave(AddrAsmNames, ", $") # "]"; + let NSA = 1; +} + // Base class of all pre-gfx10 MIMG instructions. class MIMG_gfx6789 op, dag outs, string dns = ""> : MIMG, MIMGe_gfx6789 { @@ -321,7 +346,8 @@ // Base class for all NSA MIMG instructions. // Note that 1-dword addresses always use non-NSA variants. class MIMG_nsa_gfx11 addr_types=[]> + list addr_types=[], + RegisterClass LastAddrRC = VGPR_32> : MIMG, MIMGe_gfx11 { let SubtargetPredicate = isGFX11Plus; let AssemblerPredicate = isGFX11Plus; @@ -329,9 +355,9 @@ let MIMGEncoding = MIMGEncGfx11NSA; let VAddrOperands = num_addrs; - MIMGNSAHelper nsah = !if(!empty(addr_types), - MIMGNSAHelper, - MIMGNSAHelper); + NSAHelper nsah = !if(!empty(addr_types), + PartialNSAHelper, + MIMGNSAHelper); dag AddrIns = nsah.AddrIns; string AddrAsm = nsah.AddrAsm; @@ -934,8 +960,9 @@ class MIMG_Sampler_nsa_gfx11 - : MIMG_nsa_gfx11 { + RegisterClass LastVAddrSize, string dns=""> + : MIMG_nsa_gfx11 { let InOperandList = !con(AddrIns, (ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, @@ -946,29 +973,34 @@ #!if(BaseOpcode.HasD16, "$d16", ""); } -class MIMGAddrSize { +class MIMGAddrSize { int NumWords = dw; - RegisterClass RegClass = !if(!le(NumWords, 0), ?, - !if(!eq(NumWords, 1), VGPR_32, - !if(!eq(NumWords, 2), VReg_64, - !if(!eq(NumWords, 3), VReg_96, - !if(!eq(NumWords, 4), VReg_128, - !if(!eq(NumWords, 5), VReg_160, - !if(!eq(NumWords, 6), VReg_192, - !if(!eq(NumWords, 7), VReg_224, - !if(!le(NumWords, 8), VReg_256, - !if(!le(NumWords, 9), VReg_288, - !if(!le(NumWords, 10), VReg_320, - !if(!le(NumWords, 11), VReg_352, - !if(!le(NumWords, 12), VReg_384, - !if(!le(NumWords, 16), VReg_512, ?)))))))))))))); + RegisterClass RegClass = !if(!le(AddrDW, 0), ?, + !if(!eq(AddrDW, 1), VGPR_32, + !if(!eq(AddrDW, 2), VReg_64, + !if(!eq(AddrDW, 3), VReg_96, + !if(!eq(AddrDW, 4), VReg_128, + !if(!eq(AddrDW, 5), VReg_160, + !if(!eq(AddrDW, 6), VReg_192, + !if(!eq(AddrDW, 7), VReg_224, + !if(!eq(AddrDW, 8), VReg_256, + !if(!eq(AddrDW, 9), VReg_288, + !if(!eq(AddrDW, 10), VReg_320, + !if(!eq(AddrDW, 11), VReg_352, + !if(!eq(AddrDW, 12), VReg_384, + !if(!le(AddrDW, 16), VReg_512, ?)))))))))))))); // Whether the instruction variant with this vaddr size should be enabled for // the auto-generated disassembler. bit Disassemble = enable_disasm; } +// Returns the MIMGAddrSize with the size of last VAddr for partial NSA +class LastVAddrSize + : MIMGAddrSize; + // Return whether x is in lst. class isIntInList lst> { bit ret = !foldl(0, lst, lhs, y, !or(lhs, !eq(x, y))); @@ -985,7 +1017,8 @@ int Max = !if(!empty(!tail(range)), Min, !head(!tail(range))); } -class MIMG_Sampler_AddrSizes { +class MIMG_Sampler_AddrSizes { // List of all possible numbers of address words, taking all combinations of // A16 and image dimension into account (note: no MSAA, since this is for // sample/gather ops). @@ -1031,6 +1064,21 @@ !if(isIntInList.ret, !listconcat(lhs, [MIMGAddrSize]), lhs)))); + + // In NSA format if there is a requirement for more VGPRs than the format + // supports, then the rest are sequential after the last one. Generate + // machine instructions for all possible number of words. The disassembler + // defaults to the largest number of arguments but no larger than max nsa + // size. List is generated with the register class needed for last vaddr since + // it is the only one that could have a register other than VGPR32. + int EnableDisasmNum = !foldl(!head(AllNumAddrWords), !tail(AllNumAddrWords), + acc, var, !if(!le(var, nsa_max_addr), var, acc)); + list PartialNSAInstrs = + !foldl([], [12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2], lhs, dw, + !if(isIntInList.ret, + !listconcat(lhs, [LastVAddrSize]), + lhs)); } multiclass MIMG_Sampler_Src_Helper ; } - if !and(op.HAS_GFX11, !le(addr.NumWords, 5)) then { + } + } + + foreach addr = MIMG_Sampler_AddrSizes.PartialNSAInstrs in { + let VAddrDwords = addr.NumWords in { + if op.HAS_GFX11 then { def _V # addr.NumWords # _nsa_gfx11 - : MIMG_Sampler_nsa_gfx11; } } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1118,6 +1118,7 @@ bool hasA16(const MCSubtargetInfo &STI); bool hasG16(const MCSubtargetInfo &STI); bool hasPackedD16(const MCSubtargetInfo &STI); +unsigned getNSAMaxSize(const MCSubtargetInfo &STI); bool isSI(const MCSubtargetInfo &STI); bool isCI(const MCSubtargetInfo &STI); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1945,6 +1945,15 @@ !isSI(STI); } +unsigned getNSAMaxSize(const MCSubtargetInfo &STI) { + auto Version = getIsaVersion(STI.getCPU()); + if (Version.Major == 10) + return Version.Minor >= 3 ? 13 : 5; + if (Version.Major == 11) + return 5; + return 0; +} + bool isSI(const MCSubtargetInfo &STI) { return STI.hasFeature(AMDGPU::FeatureSouthernIslands); } diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_mimg_features.s b/llvm/test/MC/AMDGPU/gfx11_asm_mimg_features.s --- a/llvm/test/MC/AMDGPU/gfx11_asm_mimg_features.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_mimg_features.s @@ -217,6 +217,42 @@ image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D // GFX11: image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x07,0xb8,0xf0,0x20,0x40,0x01,0x64,0x00,0x10,0x00,0x00] +image_sample_d v64, [v32, v16, v8, v0, v[36:37]], s[4:11], s[4:7] dmask:0x8 dim:SQ_RSRC_IMG_2D +// GFX11: image_sample_d v64, [v32, v16, v8, v0, v[36:37]], s[4:11], s[4:7] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x05,0x08,0x70,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] + +image_sample_d v[64:65], [v32, v16, v8, v0, v[36:38]], s[4:11], s[4:7] dmask:0xc dim:SQ_RSRC_IMG_CUBE +// GFX11: image_sample_d v[64:65], [v32, v16, v8, v0, v[36:38]], s[4:11], s[4:7] dmask:0xc dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0d,0x0c,0x70,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] + +image_sample_d_o v[64:65], [v32, v16, v8, v0, v[36:39]], s[4:11], s[4:7] dmask:0xc dim:SQ_RSRC_IMG_CUBE +// GFX11: image_sample_d_o v[64:65], [v32, v16, v8, v0, v[36:39]], s[4:11], s[4:7] dmask:0xc dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0d,0x0c,0x98,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] + +image_sample_d v[64:65], [v32, v16, v8, v0, v[36:40]], s[4:11], s[4:7] dmask:0x3 dim:SQ_RSRC_IMG_3D +// GFX11: image_sample_d v[64:65], [v32, v16, v8, v0, v[36:40]], s[4:11], s[4:7] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x03,0x70,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] + +image_sample_d_o v[64:65], [v32, v16, v8, v0, v[36:41]], s[4:11], s[4:7] dmask:0x3 dim:SQ_RSRC_IMG_3D +// GFX11: image_sample_d_o v[64:65], [v32, v16, v8, v0, v[36:41]], s[4:11], s[4:7] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x03,0x98,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] + +image_sample_c_d_o v[64:65], [v32, v16, v8, v0, v[36:42]], s[4:11], s[4:7] dmask:0x3 dim:SQ_RSRC_IMG_3D +// GFX11: image_sample_c_d_o v[64:65], [v32, v16, v8, v0, v[36:42]], s[4:11], s[4:7] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x03,0xac,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] + +image_sample_c_d_cl_o v[64:65], [v32, v16, v8, v0, v[36:43]], s[4:11], s[4:7] dmask:0x3 dim:SQ_RSRC_IMG_3D +// GFX11: image_sample_c_d_cl_o v[64:65], [v32, v16, v8, v0, v[36:43]], s[4:11], s[4:7] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x03,0x28,0xf1,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] + +image_sample_d_o_g16 v[64:66], [v32, v16, v8, v0, v36], s[4:11], s[4:7] dmask:0xb dim:SQ_RSRC_IMG_1D_ARRAY +// GFX11: image_sample_d_o_g16 v[64:66], [v32, v16, v8, v0, v36], s[4:11], s[4:7] dmask:0xb dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x0b,0xec,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] + +image_sample_d_o_g16 v[64:65], [v32, v16, v8, v0, v[36:37]], s[4:11], s[4:7] dmask:0xc dim:SQ_RSRC_IMG_CUBE +// GFX11: image_sample_d_o_g16 v[64:65], [v32, v16, v8, v0, v[36:37]], s[4:11], s[4:7] dmask:0xc dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0d,0x0c,0xec,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] + +image_sample_c_d_o_g16 v[64:65], [v32, v16, v8, v0, v[36:38]], s[4:11], s[4:7] dmask:0xc dim:SQ_RSRC_IMG_CUBE +// GFX11: image_sample_c_d_o_g16 v[64:65], [v32, v16, v8, v0, v[36:38]], s[4:11], s[4:7] dmask:0xc dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0d,0x0c,0xf0,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] + +image_sample_d_o_g16 v[64:65], [v32, v16, v8, v0, v[36:39]], s[4:11], s[4:7] dmask:0x3 dim:SQ_RSRC_IMG_3D +// GFX11: image_sample_d_o_g16 v[64:65], [v32, v16, v8, v0, v[36:39]], s[4:11], s[4:7] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x03,0xec,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] + +image_sample_c_d_o_g16 v[64:65], [v32, v16, v8, v0, v[36:40]], s[4:11], s[4:7] dmask:0x3 dim:SQ_RSRC_IMG_3D +// GFX11: image_sample_c_d_o_g16 v[64:65], [v32, v16, v8, v0, v[36:40]], s[4:11], s[4:7] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x03,0xf0,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] + image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D // GFX11: image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt @@ -216,6 +216,42 @@ # GFX11: image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x07,0xb8,0xf0,0x20,0x40,0x01,0x64,0x00,0x10,0x00,0x00] 0x01,0x07,0xb8,0xf0,0x20,0x40,0x01,0x64,0x00,0x10,0x00,0x00 +# GFX11: image_sample_d v64, [v32, v16, v8, v0, v[36:37]], s[4:11], s[4:7] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x05,0x08,0x70,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] +0x05,0x08,0x70,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24 + +# GFX11: image_sample_d v[64:65], [v32, v16, v8, v0, v[36:38]], s[4:11], s[4:7] dmask:0xc dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0d,0x0c,0x70,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] +0x0d,0x0c,0x70,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24 + +# GFX11: image_sample_d_o v[64:65], [v32, v16, v8, v0, v[36:39]], s[4:11], s[4:7] dmask:0xc dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0d,0x0c,0x98,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] +0x0d,0x0c,0x98,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24 + +# GFX11: image_sample_d v[64:65], [v32, v16, v8, v0, v[36:40]], s[4:11], s[4:7] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x03,0x70,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] +0x09,0x03,0x70,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24 + +# GFX11: image_sample_d_o v[64:65], [v32, v16, v8, v0, v[36:41]], s[4:11], s[4:7] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x03,0x98,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] +0x09,0x03,0x98,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24 + +# GFX11: image_sample_c_d_o v[64:65], [v32, v16, v8, v0, v[36:42]], s[4:11], s[4:7] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x03,0xac,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] +0x09,0x03,0xac,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24 + +# GFX11: image_sample_c_d_cl_o v[64:65], [v32, v16, v8, v0, v[36:43]], s[4:11], s[4:7] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x03,0x28,0xf1,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] +0x09,0x03,0x28,0xf1,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24 + +# GFX11: image_sample_d_o_g16 v[64:66], [v32, v16, v8, v0, v36], s[4:11], s[4:7] dmask:0xb dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x0b,0xec,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] +0x11,0x0b,0xec,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24 + +# GFX11: image_sample_d_o_g16 v[64:65], [v32, v16, v8, v0, v[36:37]], s[4:11], s[4:7] dmask:0xc dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0d,0x0c,0xec,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] +0x0d,0x0c,0xec,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24 + +# GFX11: image_sample_c_d_o_g16 v[64:65], [v32, v16, v8, v0, v[36:38]], s[4:11], s[4:7] dmask:0xc dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0d,0x0c,0xf0,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] +0x0d,0x0c,0xf0,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24 + +# GFX11: image_sample_d_o_g16 v[64:65], [v32, v16, v8, v0, v[36:39]], s[4:11], s[4:7] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x03,0xec,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] +0x09,0x03,0xec,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24 + +# GFX11: image_sample_c_d_o_g16 v[64:65], [v32, v16, v8, v0, v[36:40]], s[4:11], s[4:7] dmask:0x3 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x03,0xf0,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24] +0x09,0x03,0xf0,0xf0,0x20,0x40,0x01,0x04,0x10,0x08,0x00,0x24 + # GFX11: image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64] 0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64