diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -715,15 +715,31 @@ if (STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { unsigned DimIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim); + int A16Idx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16); const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); const AMDGPU::MIMGDimInfo *Dim = AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm()); + const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm()) ? 1 : 0; + + // This mimics the calculation of AddrSize in + // SIInstrInfo::verifyInstruction. + AddrSize = BaseOpcode->NumExtraArgs; + unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + + (BaseOpcode->LodOrClampOrMip ? 1 : 0); + if (IsA16) + AddrComponents = divideCeil(AddrComponents, 2); + + AddrSize += AddrComponents; + + if (BaseOpcode->Gradients) { + if (IsA16 || BaseOpcode->G16) + AddrSize += alignTo<2>(Dim->NumGradients / 2); + else + AddrSize += Dim->NumGradients; + } - AddrSize = BaseOpcode->NumExtraArgs + - (BaseOpcode->Gradients ? Dim->NumGradients : 0) + - (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + - (BaseOpcode->LodOrClampOrMip ? 1 : 0); IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA; if (!IsNSA) { if (AddrSize > 8) diff --git a/llvm/test/MC/Disassembler/AMDGPU/mimg_gfx10.txt b/llvm/test/MC/Disassembler/AMDGPU/mimg_gfx10.txt new file mode 100644 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/mimg_gfx10.txt @@ -0,0 +1,109 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX10 + +# GFX10: image_load v[4:6], v238, s[28:35] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x17,0x00,0xf0,0xee,0x04,0x07,0x00] +0x00,0x17,0x00,0xf0,0xee,0x04,0x07,0x00 + +# GFX10: image_load_pck v5, v0, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x21,0x08,0xf0,0x00,0x05,0x02,0x00] +0x00,0x21,0x08,0xf0,0x00,0x05,0x02,0x00 + +# GFX10: image_load_pck_sgn v5, v0, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D lwe ; encoding: [0x00,0x01,0x0e,0xf0,0x00,0x05,0x02,0x00] +0x00,0x01,0x0e,0xf0,0x00,0x05,0x02,0x00 + +# GFX10: image_load_mip v5, v[0:1], s[8:15] dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x00,0x04,0xf0,0x00,0x05,0x02,0x00] +0x00,0x00,0x04,0xf0,0x00,0x05,0x02,0x00 + +# GFX10: image_load_mip_pck v5, v[1:2], s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0x10,0xf0,0x01,0x05,0x02,0x00] +0x00,0x01,0x10,0xf0,0x01,0x05,0x02,0x00 + +# GFX10: image_load_mip_pck_sgn v[4:5], v[0:1], s[8:15] dmask:0x5 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x05,0x14,0xf0,0x00,0x04,0x02,0x00] +0x00,0x05,0x14,0xf0,0x00,0x04,0x02,0x00 + +# GFX10: image_store v[192:194], v238, s[28:35] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x17,0x20,0xf0,0xee,0xc0,0x07,0x00] +0x00,0x17,0x20,0xf0,0xee,0xc0,0x07,0x00 + +# GFX10: image_store_pck v1, v2, s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x11,0x28,0xf0,0x02,0x01,0x03,0x00] +0x00,0x51,0x28,0xf0,0x02,0x01,0x03,0x00 + +# GFX10: image_store_mip v1, v[2:3], s[12:19] dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x00,0x24,0xf0,0x02,0x01,0x03,0x00] +0x00,0x00,0x24,0xf0,0x02,0x01,0x03,0x00 + +# GFX10: image_store_mip_pck v252, v[2:3], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_1D r128 ; encoding: [0x00,0x81,0x2c,0xf0,0x02,0xfc,0x03,0x00] +0x00,0x81,0x2c,0xf0,0x02,0xfc,0x03,0x00 + +# GFX10: image_atomic_sub v4, v192, s[28:35] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; encoding: [0x00,0x31,0x48,0xf0,0xc0,0x04,0x07,0x00] +0x00,0x31,0x48,0xf0,0xc0,0x04,0x07,0x00 + +# GFX10: image_atomic_and v4, v192, s[28:35] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x11,0x60,0xf0,0xc0,0x04,0x07,0x00] +0x00,0x11,0x60,0xf0,0xc0,0x04,0x07,0x00 + +# GFX10: image_atomic_cmpswap v[4:5], v192, s[28:35] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0xc0,0x04,0x07,0x00] +0x00,0x31,0x40,0xf0,0xc0,0x04,0x07,0x00 + +# GFX10: image_atomic_add v[4:5], v192, s[28:35] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc ; encoding: [0x00,0x33,0x44,0xf0,0xc0,0x04,0x07,0x00] +0x00,0x33,0x44,0xf0,0xc0,0x04,0x07,0x00 + +# GFX10: image_atomic_or v4, v192, s[28:35] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x11,0x64,0xf0,0xc0,0x04,0x07,0x00] +0x00,0x11,0x64,0xf0,0xc0,0x04,0x07,0x00 + +# GFX10: image_atomic_xor v4, v192, s[28:35] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x11,0x68,0xf0,0xc0,0x04,0x07,0x00] +0x00,0x11,0x68,0xf0,0xc0,0x04,0x07,0x00 + +# GFX10: image_atomic_sub v4, v192, s[28:35] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x11,0x48,0xf0,0xc0,0x04,0x07,0x00] +0x00,0x11,0x48,0xf0,0xc0,0x04,0x07,0x00 + +# GFX10: image_atomic_smin v4, v192, s[28:35] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x11,0x50,0xf0,0xc0,0x04,0x07,0x00] +0x00,0x11,0x50,0xf0,0xc0,0x04,0x07,0x00 + +# GFX10: image_atomic_smax v4, v192, s[28:35] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x11,0x58,0xf0,0xc0,0x04,0x07,0x00] +0x00,0x11,0x58,0xf0,0xc0,0x04,0x07,0x00 + +# GFX10: image_atomic_umin v4, v192, s[28:35] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x11,0x54,0xf0,0xc0,0x04,0x07,0x00] +0x00,0x11,0x54,0xf0,0xc0,0x04,0x07,0x00 + +# GFX10: image_atomic_umax v4, v192, s[28:35] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x11,0x5c,0xf0,0xc0,0x04,0x07,0x00] +0x00,0x11,0x5c,0xf0,0xc0,0x04,0x07,0x00 + +# GFX10: image_atomic_inc v4, v192, s[28:35] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x11,0x6c,0xf0,0xc0,0x04,0x07,0x00] +0x00,0x11,0x6c,0xf0,0xc0,0x04,0x07,0x00 + +# GFX10: image_atomic_dec v4, v192, s[28:35] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x11,0x70,0xf0,0xc0,0x04,0x07,0x00] +0x00,0x11,0x70,0xf0,0xc0,0x04,0x07,0x00 + +# GFX10: image_get_resinfo v5, v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0x38,0xf0,0x01,0x05,0x02,0x00] +0x00,0x01,0x38,0xf0,0x01,0x05,0x02,0x00 + +# GFX10: image_sample v5, v0, s[8:15], s[12:15] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0x80,0xf0,0x00,0x05,0x62,0x00] +0x00,0x01,0x80,0xf0,0x00,0x05,0x62,0x00 + +# GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 ; encoding: [0x08,0x1f,0x00,0xf0,0x00,0x00,0x00,0x40] +0x08,0x1f,0x00,0xf0,0x00,0x00,0x00,0x40 + +# GFX10: image_load v[0:4], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ; encoding: [0x08,0x1f,0x01,0xf0,0x00,0x00,0x00,0x00] +0x08,0x1f,0x01,0xf0,0x00,0x00,0x00,0x00 + +# GFX10: image_load v[0:4], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 tfe ; encoding: [0x08,0x1f,0x01,0xf0,0x00,0x00,0x00,0x40] +0x08,0x1f,0x01,0xf0,0x00,0x00,0x00,0x40 + +# GFX10: image_load v1, v1, s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 ; encoding: [0x08,0x01,0x00,0xf0,0x01,0x01,0x04,0x40] +0x08,0x01,0x00,0xf0,0x01,0x01,0x04,0x40 + +# GFX10: image_load v[1:2], v1, s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x08,0x01,0x01,0xf0,0x01,0x01,0x04,0x40] +0x08,0x01,0x01,0xf0,0x01,0x01,0x04,0x40 + +# GFX10: image_load v1, v1, s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 lwe ; encoding: [0x08,0x01,0x02,0xf0,0x01,0x01,0x04,0x40] +0x08,0x01,0x02,0xf0,0x01,0x01,0x04,0x40 + +# GFX10: image_load v[1:2], v1, s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 tfe lwe ; encoding: [0x08,0x01,0x03,0xf0,0x01,0x01,0x04,0x40] +0x08,0x01,0x03,0xf0,0x01,0x01,0x04,0x40 + +# GFX10: image_load v[1:2], v1, s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D a16 ; encoding: [0x08,0x03,0x00,0xf0,0x01,0x01,0x04,0x40] +0x08,0x03,0x00,0xf0,0x01,0x01,0x04,0x40 + +# GFX10: image_load v[1:4], v1, s[16:23] dmask:0x7 dim:SQ_RSRC_IMG_2D a16 tfe ; encoding: [0x08,0x07,0x01,0xf0,0x01,0x01,0x04,0x40] +0x08,0x07,0x01,0xf0,0x01,0x01,0x04,0x40 + +# GFX10: image_load v[1:4], v1, s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D a16 lwe ; encoding: [0x08,0x0f,0x02,0xf0,0x01,0x01,0x04,0x40] +0x08,0x0f,0x02,0xf0,0x01,0x01,0x04,0x40 + +# GFX10: image_load v[1:3], v1, s[16:23] dmask:0x5 dim:SQ_RSRC_IMG_2D a16 tfe lwe ; encoding: [0x08,0x05,0x03,0xf0,0x01,0x01,0x04,0x40] +0x08,0x05,0x03,0xf0,0x01,0x01,0x04,0x40