Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1053,6 +1053,7 @@ bool validateEarlyClobberLimitations(const MCInst &Inst); bool validateIntClampSupported(const MCInst &Inst); bool validateMIMGAtomicDMask(const MCInst &Inst); + bool validateMIMGGatherDMask(const MCInst &Inst); bool validateMIMGDataSize(const MCInst &Inst); bool validateMIMGR128(const MCInst &Inst); bool validateMIMGD16(const MCInst &Inst); @@ -2299,7 +2300,7 @@ if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) return true; - // Gather4 instructions seem to have special rules not described in spec. + // Gather4 instructions do not need validation: dst size is hardcoded. if (Desc.TSFlags & SIInstrFlags::Gather4) return true; @@ -2345,6 +2346,20 @@ return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; } +bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { + + const unsigned Opc = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opc); + + if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) + return true; + + int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); + unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; + + return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; +} + bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst) { const unsigned Opc = Inst.getOpcode(); @@ -2412,6 +2427,11 @@ "invalid atomic image dmask"); return false; } + if (!validateMIMGGatherDMask(Inst)) { + Error(IDLoc, + "invalid image_gather dmask: only one bit must be set"); + return false; + } return true; } Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -273,6 +273,11 @@ // Consequently, decoded instructions always show address // as if it has 1 dword, which could be not really so. DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { + + if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4) { + return MCDisassembler::Success; + } + int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst); @@ -289,7 +294,7 @@ assert(DMaskIdx != -1); assert(TFEIdx != -1); - bool isAtomic = (VDstIdx != -1); + bool IsAtomic = (VDstIdx != -1); unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; if (DMask == 0) @@ -310,7 +315,7 @@ int NewOpcode = -1; - if (isAtomic) { + if (IsAtomic) { if (DMask == 0x1 || DMask == 0x3 || DMask == 0xF) { NewOpcode = AMDGPU::getMaskedMIMGAtomicOp(*MCII, MI.getOpcode(), DstSize); } @@ -342,7 +347,7 @@ // in the instruction encoding. MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata); - if (isAtomic) { + if (IsAtomic) { // Atomic operations have an additional operand (a copy of data) MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata); } Index: lib/Target/AMDGPU/MIMGInstructions.td =================================================================== --- lib/Target/AMDGPU/MIMGInstructions.td +++ lib/Target/AMDGPU/MIMGInstructions.td @@ -283,7 +283,10 @@ class MIMG_Gather_Helper op, string asm, RegisterClass dst_rc, - RegisterClass src_rc, bit wqm, bit d16_bit=0> : MIMG < + RegisterClass src_rc, + bit wqm, + bit d16_bit=0, + string dns=""> : MIMG < (outs dst_rc:$vdata), (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp, dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc, @@ -305,49 +308,37 @@ let WQM = wqm; let D16 = d16; - let isAsmParserOnly = 1; // TBD: fix it later + let DecoderNamespace = dns; + let isAsmParserOnly = !if(!eq(dns,""), 1, 0); } -multiclass MIMG_Gather_Src_Helper_Helper op, string asm, +multiclass MIMG_Gather_Src_Helper op, string asm, RegisterClass dst_rc, - int channels, bit wqm, - bit d16_bit, string suffix> { - def _V1 # suffix : MIMG_Gather_Helper , - MIMG_Mask; - def _V2 # suffix : MIMG_Gather_Helper , - MIMG_Mask; - def _V4 # suffix : MIMG_Gather_Helper , - MIMG_Mask; - def _V8 # suffix : MIMG_Gather_Helper , - MIMG_Mask; - def _V16 # suffix : MIMG_Gather_Helper , - MIMG_Mask; + bit wqm, bit d16_bit, + string prefix, + string suffix> { + def prefix # _V1 # suffix : MIMG_Gather_Helper ; + def prefix # _V2 # suffix : MIMG_Gather_Helper ; + def prefix # _V4 # suffix : MIMG_Gather_Helper ; + def prefix # _V8 # suffix : MIMG_Gather_Helper ; + def prefix # _V16 # suffix : MIMG_Gather_Helper ; } -multiclass MIMG_Gather_Src_Helper op, string asm, - RegisterClass dst_rc, - int channels, bit wqm> { - defm : MIMG_Gather_Src_Helper_Helper; +multiclass MIMG_Gather op, string asm, bit wqm=0> { + defm : MIMG_Gather_Src_Helper; let d16 = 1 in { - let SubtargetPredicate = HasPackedD16VMem in { - defm : MIMG_Gather_Src_Helper_Helper; + let AssemblerPredicate = HasPackedD16VMem in { + defm : MIMG_Gather_Src_Helper; } // End HasPackedD16VMem. - let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in { - defm : MIMG_Gather_Src_Helper_Helper; + let AssemblerPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in { + defm : MIMG_Gather_Src_Helper; } // End HasUnpackedD16VMem. } // End d16 = 1. } -multiclass MIMG_Gather op, string asm, bit wqm=0> { - defm _V1 : MIMG_Gather_Src_Helper; - defm _V2 : MIMG_Gather_Src_Helper; - defm _V3 : MIMG_Gather_Src_Helper; - defm _V4 : MIMG_Gather_Src_Helper; -} - multiclass MIMG_Gather_WQM op, string asm> : MIMG_Gather; //===----------------------------------------------------------------------===// @@ -514,6 +505,22 @@ } // End HasPackedD16VMem. } +// ImageGather4 patterns. +multiclass ImageGather4Patterns { + defm : ImageSampleDataPatterns(opcode # _V4), v4f32>; +} + +// ImageGather4 alternative patterns for illegal vector half Types. +multiclass ImageGather4AltPatterns { + let SubtargetPredicate = HasUnpackedD16VMem in { + defm : ImageSampleDataPatterns(opcode # _V4), v4i32, "_D16_gfx80">; + } // End HasUnpackedD16VMem. + + let SubtargetPredicate = HasPackedD16VMem in { + defm : ImageSampleDataPatterns(opcode # _V2), v2i32, "_D16">; + } // End HasPackedD16VMem. +} + // ImageLoad for amdgcn. multiclass ImageLoadPattern { def : GCNPat < @@ -695,36 +702,36 @@ defm : ImageSamplePatterns; // Basic gather4. -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; // Gather4 with comparison. -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; // Gather4 with offsets. -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; // Gather4 with comparison and offsets. -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; -defm : ImageSamplePatterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; +defm : ImageGather4Patterns; // Basic sample alternative. defm : ImageSampleAltPatterns; @@ -775,36 +782,36 @@ defm : ImageSampleAltPatterns; // Basic gather4 alternative. -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; // Gather4 with comparison alternative. -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; // Gather4 with offsets alternative. -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; // Gather4 with comparison and offsets alternative. -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; -defm : ImageSampleAltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; +defm : ImageGather4AltPatterns; defm : ImageSamplePatterns; Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.ll @@ -3,39 +3,6 @@ ; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s -; GCN-LABEL: {{^}}image_gather4_f16: -; GCN: image_gather4 v[[HALF:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 d16 - -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] - -; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off -define amdgpu_kernel void @image_gather4_f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call half @llvm.amdgcn.image.gather4.f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 1, i1 0, i1 0, i1 0, i1 0, i1 0) - store half %tex, half addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}image_gather4_v2f16: -; UNPACKED: image_gather4 v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 d16 -; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; PACKED: image_gather4 v[[DATA:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 d16 - -; GFX81: v_lshrrev_b32_e32 v[[HI:[0-9]+]], 16, v[[DATA]] -; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] - -; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[DATA]], off -define amdgpu_kernel void @image_gather4_v2f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { -main_body: - %tex = call <2 x half> @llvm.amdgcn.image.gather4.v2f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 3, i1 0, i1 0, i1 0, i1 0, i1 0) - %elt = extractelement <2 x half> %tex, i32 1 - store half %elt, half addrspace(1)* %out - ret void -} - ; GCN-LABEL: {{^}}image_gather4_v4f16: ; UNPACKED: image_gather4 v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 ; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] @@ -126,8 +93,6 @@ ret void } -declare half @llvm.amdgcn.image.gather4.f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) -declare <2 x half> @llvm.amdgcn.image.gather4.v2f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) declare <4 x half> @llvm.amdgcn.image.gather4.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) Index: test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.ll @@ -320,24 +320,6 @@ ret void } -; GCN-LABEL: {{^}}gather4_f32: -; GCN: image_gather4 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da -define amdgpu_kernel void @gather4_f32(float addrspace(1)* %out) { -main_body: - %r = call float @llvm.amdgcn.image.gather4.f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 0, i1 0, i1 0, i1 0, i1 1) - store float %r, float addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}gather4_v2f32: -; GCN: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 da -define amdgpu_kernel void @gather4_v2f32(<2 x float> addrspace(1)* %out) { -main_body: - %r = call <2 x float> @llvm.amdgcn.image.gather4.v2f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 0, i1 0, i1 0, i1 0, i1 1) - store <2 x float> %r, <2 x float> addrspace(1)* %out - ret void -} - declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 declare <4 x float> @llvm.amdgcn.image.gather4.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 declare <4 x float> @llvm.amdgcn.image.gather4.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 @@ -377,7 +359,4 @@ declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare float @llvm.amdgcn.image.gather4.f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 -declare <2 x float> @llvm.amdgcn.image.gather4.v2f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0 - attributes #0 = { nounwind readnone } Index: test/MC/AMDGPU/mimg-err.s =================================================================== --- test/MC/AMDGPU/mimg-err.s +++ test/MC/AMDGPU/mimg-err.s @@ -58,3 +58,10 @@ image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xe tfe // NOGCN: error: invalid atomic image dmask + +//===----------------------------------------------------------------------===// +// Image Gather +//===----------------------------------------------------------------------===// + +image_gather4_cl v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x3 +// NOGCN: error: invalid image_gather dmask: only one bit must be set Index: test/MC/AMDGPU/mimg.s =================================================================== --- test/MC/AMDGPU/mimg.s +++ test/MC/AMDGPU/mimg.s @@ -234,3 +234,38 @@ // SICI: image_atomic_add v10, v6, s[8:15] dmask:0x1 r128 ; encoding: [0x00,0x81,0x44,0xf0,0x06,0x0a,0x02,0x00] // VI: image_atomic_add v10, v6, s[8:15] dmask:0x1 r128 ; encoding: [0x00,0x81,0x48,0xf0,0x06,0x0a,0x02,0x00] // NOGFX9: error: r128 modifier is not supported on this GPU + +//===----------------------------------------------------------------------===// +// Image Gather4 +//===----------------------------------------------------------------------===// + +image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 +// GCN: image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x00] + +image_gather4 v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x2 +// GCN: image_gather4 v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x2 ; encoding: [0x00,0x02,0x00,0xf1,0x01,0x05,0x62,0x00] + +image_gather4 v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x4 +// GCN: image_gather4 v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x4 ; encoding: [0x00,0x04,0x00,0xf1,0x01,0x05,0x62,0x00] + +image_gather4 v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x8 +// GCN: image_gather4 v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x8 ; encoding: [0x00,0x08,0x00,0xf1,0x01,0x05,0x62,0x00] + +image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 d16 +// NOSICI: error: instruction not supported on this GPU +// GFX8_0: image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80] +// NOGFX8_1: error: instruction not supported on this GPU +// NOGFX9: error: instruction not supported on this GPU + +image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 +// NOSICI: error: d16 modifier is not supported on this GPU +// NOGFX8_0: error: instruction not supported on this GPU +// GFX8_1: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80] +// GFX9: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80] + +// FIXME: d16 is handled as an optional modifier, should it be corrected? +image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 +// NOSICI: error: d16 modifier is not supported on this GPU +// NOGFX8_0: error: instruction not supported on this GPU +// GFX8_1: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80] +// GFX9: image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80] Index: test/MC/Disassembler/AMDGPU/mimg_vi.txt =================================================================== --- test/MC/Disassembler/AMDGPU/mimg_vi.txt +++ test/MC/Disassembler/AMDGPU/mimg_vi.txt @@ -155,3 +155,34 @@ # VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0xe unorm ; encoding: [0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00] 0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00 + +#===------------------------------------------------------------------------===# +# Image gather +#===------------------------------------------------------------------------===# + +# VI: image_gather4 v[252:255], v1, s[8:15], s[12:15] dmask:0x1 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0xfc,0x62,0x00] +0x00,0x01,0x00,0xf1,0x01,0xfc,0x62,0x00 + +# VI: image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x2 ; encoding: [0x00,0x02,0x00,0xf1,0x01,0x05,0x62,0x00] +0x00,0x02,0x00,0xf1,0x01,0x05,0x62,0x00 + +# VI: image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x4 ; encoding: [0x00,0x04,0x00,0xf1,0x01,0x05,0x62,0x00] +0x00,0x04,0x00,0xf1,0x01,0x05,0x62,0x00 + +# VI: image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x8 ; encoding: [0x00,0x08,0x00,0xf1,0x01,0x05,0x62,0x00] +0x00,0x08,0x00,0xf1,0x01,0x05,0x62,0x00 + +# GFX80: image_gather4 v[252:255], v1, s[8:15], s[12:15] dmask:0x1 unorm glc slc lwe da d16 ; encoding: [0x00,0x71,0x02,0xf3,0x01,0xfc,0x62,0x80] +# GFX81: image_gather4 v[252:253], v1, s[8:15], s[12:15] dmask:0x1 unorm glc slc lwe da d16 ; encoding: [0x00,0x71,0x02,0xf3,0x01,0xfc,0x62,0x80] +0x00,0x71,0x02,0xf3,0x01,0xfc,0x62,0x80 + +#===------------------------------------------------------------------------===# +# Invalid image gather (incorrect dmask value or tfe). +# Disassembler may produce a partially incorrect instruction but should not fail. +#===------------------------------------------------------------------------===# + +# VI: image_gather4 v[252:255], v1, s[8:15], s[12:15] dmask:0x3 ; encoding: [0x00,0x03,0x00,0xf1,0x01,0xfc,0x62,0x00] +0x00,0x03,0x00,0xf1,0x01,0xfc,0x62,0x00 + +# VI: image_gather4 v[252:255], v1, s[8:15], s[12:15] dmask:0x1 unorm glc slc tfe lwe da ; encoding: [0x00,0x71,0x03,0xf3,0x01,0xfc,0x62,0x00] +0x00,0x71,0x03,0xf3,0x01,0xfc,0x62,0x00