Index: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -911,6 +911,10 @@ return AMDGPU::hasMIMG_R128(getSTI()); } + bool hasPackedD16() const { + return AMDGPU::hasPackedD16(getSTI()); + } + bool isSI() const { return AMDGPU::isSI(getSTI()); } @@ -2309,7 +2313,12 @@ if (DMask == 0) DMask = 1; - return (VDataSize / 4) == countPopulation(DMask) + TFESize; + unsigned DataSize = countPopulation(DMask); + if ((Desc.TSFlags & SIInstrFlags::D16) != 0 && hasPackedD16()) { + DataSize = (DataSize + 1) / 2; + } + + return (VDataSize / 4) == DataSize + TFESize; } bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { @@ -2378,25 +2387,25 @@ "integer clamping is not supported on this GPU"); return false; } - if (!validateMIMGDataSize(Inst)) { + if (!validateMIMGR128(Inst)) { Error(IDLoc, - "image data size does not match dmask and tfe"); + "r128 modifier is not supported on this GPU"); return false; } - if (!validateMIMGAtomicDMask(Inst)) { + // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. + if (!validateMIMGD16(Inst)) { Error(IDLoc, - "invalid atomic image dmask"); + "d16 modifier is not supported on this GPU"); return false; } - if (!validateMIMGR128(Inst)) { + if (!validateMIMGDataSize(Inst)) { Error(IDLoc, - "r128 modifier is not supported on this GPU"); + "image data size does not match dmask and tfe"); return false; } - // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. - if (!validateMIMGD16(Inst)) { + if (!validateMIMGAtomicDMask(Inst)) { Error(IDLoc, - "d16 modifier is not supported on this GPU"); + "invalid atomic image dmask"); return false; } Index: llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -269,6 +269,9 @@ return MCDisassembler::Success; } +// Note that MIMG format provides no information about VADDR size. +// Consequently, decoded instructions always show address +// as if it has 1 dword, which could be not really so. DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst); @@ -279,8 +282,12 @@ int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dmask); + int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::tfe); + assert(VDataIdx != -1); assert(DMaskIdx != -1); + assert(TFEIdx != -1); bool isAtomic = (VDstIdx != -1); @@ -288,19 +295,28 @@ if (DMask == 0) return MCDisassembler::Success; - unsigned ChannelCount = countPopulation(DMask); - if (ChannelCount == 1) + unsigned DstSize = countPopulation(DMask); + if (DstSize == 1) + return MCDisassembler::Success; + + bool D16 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::D16; + if (D16 && AMDGPU::hasPackedD16(STI)) { + DstSize = (DstSize + 1) / 2; + } + + // FIXME: Add tfe support + if (MI.getOperand(TFEIdx).getImm()) return MCDisassembler::Success; int NewOpcode = -1; if (isAtomic) { if (DMask == 0x1 || DMask == 0x3 || DMask == 0xF) { - NewOpcode = AMDGPU::getMaskedMIMGAtomicOp(*MCII, MI.getOpcode(), ChannelCount); + NewOpcode = AMDGPU::getMaskedMIMGAtomicOp(*MCII, MI.getOpcode(), DstSize); } if (NewOpcode == -1) return MCDisassembler::Success; } else { - NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), ChannelCount); + NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), DstSize); assert(NewOpcode != -1 && "could not find matching mimg channel instruction"); } Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -284,8 +284,8 @@ } bool hasXNACK(const MCSubtargetInfo &STI); - bool hasMIMG_R128(const MCSubtargetInfo &STI); +bool hasPackedD16(const MCSubtargetInfo &STI); bool isSI(const MCSubtargetInfo &STI); bool isCI(const MCSubtargetInfo &STI); Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -628,6 +628,10 @@ return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128]; } +bool hasPackedD16(const MCSubtargetInfo &STI) { + return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]; +} + bool isSI(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; } Index: llvm/trunk/test/MC/AMDGPU/mimg.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/mimg.s +++ llvm/trunk/test/MC/AMDGPU/mimg.s @@ -1,13 +1,15 @@ // RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=SICIVI // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=SICIVI // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=SICIVI +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICIVI --check-prefix=VI --check-prefix=GFX89 --check-prefix=GFX8_0 +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICIVI --check-prefix=VI --check-prefix=GFX89 --check-prefix=GFX8_1 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX9 --check-prefix=GFX89 -// RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICIVI --check-prefix=VI --check-prefix=GFX89 - // RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI --check-prefix=NOGFX8_0 +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI --check-prefix=NOGFX8_1 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 //===----------------------------------------------------------------------===// @@ -64,6 +66,80 @@ // NOGFX9: error: r128 modifier is not supported on this GPU //===----------------------------------------------------------------------===// +// Image Load/Store: d16 unpacked +//===----------------------------------------------------------------------===// + +image_load v[5:6], v[1:4], s[8:15] dmask:0x3 d16 +// NOSICI: error: d16 modifier is not supported on this GPU +// GFX8_0: image_load v[5:6], v[1:4], s[8:15] dmask:0x3 d16 ; encoding: [0x00,0x03,0x00,0xf0,0x01,0x05,0x02,0x80] +// NOGFX8_1: error: image data size does not match dmask and tfe +// NOGFX9: error: image data size does not match dmask and tfe + +image_load v[5:7], v[1:4], s[8:15] dmask:0x7 d16 +// NOSICI: error: d16 modifier is not supported on this GPU +// GFX8_0: image_load v[5:7], v[1:4], s[8:15] dmask:0x7 d16 ; encoding: [0x00,0x07,0x00,0xf0,0x01,0x05,0x02,0x80] +// NOGFX8_1: error: image data size does not match dmask and tfe +// NOGFX9: error: image data size does not match dmask and tfe + +image_load v[5:8], v[1:4], s[8:15] dmask:0xf d16 +// NOSICI: error: d16 modifier is not supported on this GPU +// GFX8_0: image_load v[5:8], v[1:4], s[8:15] dmask:0xf d16 ; encoding: [0x00,0x0f,0x00,0xf0,0x01,0x05,0x02,0x80] +// NOGFX8_1: error: image data size does not match dmask and tfe +// NOGFX9: error: image data size does not match dmask and tfe + +image_load v[5:7], v[1:4], s[8:15] dmask:0x3 tfe d16 +// NOSICI: error: d16 modifier is not supported on this GPU +// GFX8_0: image_load v[5:7], v[1:4], s[8:15] dmask:0x3 tfe d16 ; encoding: [0x00,0x03,0x01,0xf0,0x01,0x05,0x02,0x80] +// NOGFX8_1: error: image data size does not match dmask and tfe +// NOGFX9: error: image data size does not match dmask and tfe + +image_load v[5:8], v[1:4], s[8:15] dmask:0x7 tfe d16 +// NOSICI: error: d16 modifier is not supported on this GPU +// GFX8_0: image_load v[5:8], v[1:4], s[8:15] dmask:0x7 tfe d16 ; encoding: [0x00,0x07,0x01,0xf0,0x01,0x05,0x02,0x80] +// NOGFX8_1: error: image data size does not match dmask and tfe +// NOGFX9: error: image data size does not match dmask and tfe + +//===----------------------------------------------------------------------===// +// Image Load/Store: d16 packed +//===----------------------------------------------------------------------===// + +image_load v5, v[1:4], s[8:15] dmask:0x3 d16 +// NOSICI: error: d16 modifier is not supported on this GPU +// NOGFX8_0: error: image data size does not match dmask and tfe +// GFX8_1: image_load v5, v[1:4], s[8:15] dmask:0x3 d16 ; encoding: [0x00,0x03,0x00,0xf0,0x01,0x05,0x02,0x80] +// GFX9: image_load v5, v[1:4], s[8:15] dmask:0x3 d16 ; encoding: [0x00,0x03,0x00,0xf0,0x01,0x05,0x02,0x80] + +image_load v[5:6], v[1:4], s[8:15] dmask:0x7 d16 +// NOSICI: error: d16 modifier is not supported on this GPU +// NOGFX8_0: error: image data size does not match dmask and tfe +// GFX8_1: image_load v[5:6], v[1:4], s[8:15] dmask:0x7 d16 ; encoding: [0x00,0x07,0x00,0xf0,0x01,0x05,0x02,0x80] +// GFX9: image_load v[5:6], v[1:4], s[8:15] dmask:0x7 d16 ; encoding: [0x00,0x07,0x00,0xf0,0x01,0x05,0x02,0x80] + +image_load v[5:6], v[1:4], s[8:15] dmask:0xf d16 +// NOSICI: error: d16 modifier is not supported on this GPU +// NOGFX8_0: error: image data size does not match dmask and tfe +// GFX8_1: image_load v[5:6], v[1:4], s[8:15] dmask:0xf d16 ; encoding: [0x00,0x0f,0x00,0xf0,0x01,0x05,0x02,0x80] +// GFX9: image_load v[5:6], v[1:4], s[8:15] dmask:0xf d16 ; encoding: [0x00,0x0f,0x00,0xf0,0x01,0x05,0x02,0x80] + +image_load v[5:6], v[1:4], s[8:15] dmask:0x3 tfe d16 +// NOSICI: error: d16 modifier is not supported on this GPU +// NOGFX8_0: error: image data size does not match dmask and tfe +// GFX8_1: image_load v[5:6], v[1:4], s[8:15] dmask:0x3 tfe d16 ; encoding: [0x00,0x03,0x01,0xf0,0x01,0x05,0x02,0x80] +// GFX9: image_load v[5:6], v[1:4], s[8:15] dmask:0x3 tfe d16 ; encoding: [0x00,0x03,0x01,0xf0,0x01,0x05,0x02,0x80] + +image_load v[5:7], v[1:4], s[8:15] dmask:0x7 tfe d16 +// NOSICI: error: d16 modifier is not supported on this GPU +// NOGFX8_0: error: image data size does not match dmask and tfe +// GFX8_1: image_load v[5:7], v[1:4], s[8:15] dmask:0x7 tfe d16 ; encoding: [0x00,0x07,0x01,0xf0,0x01,0x05,0x02,0x80] +// GFX9: image_load v[5:7], v[1:4], s[8:15] dmask:0x7 tfe d16 ; encoding: [0x00,0x07,0x01,0xf0,0x01,0x05,0x02,0x80] + +image_load v[5:7], v[1:4], s[8:15] dmask:0xf tfe d16 +// NOSICI: error: d16 modifier is not supported on this GPU +// NOGFX8_0: error: image data size does not match dmask and tfe +// GFX8_1: image_load v[5:7], v[1:4], s[8:15] dmask:0xf tfe d16 ; encoding: [0x00,0x0f,0x01,0xf0,0x01,0x05,0x02,0x80] +// GFX9: image_load v[5:7], v[1:4], s[8:15] dmask:0xf tfe d16 ; encoding: [0x00,0x0f,0x01,0xf0,0x01,0x05,0x02,0x80] + +//===----------------------------------------------------------------------===// // Image Sample //===----------------------------------------------------------------------===// @@ -86,6 +162,26 @@ // GFX89: image_sample v193, v[237:240], s[28:35], s[4:7] d16 ; encoding: [0x00,0x00,0x80,0xf0,0xed,0xc1,0x27,0x80] //===----------------------------------------------------------------------===// +// Image Sample: d16 packed +//===----------------------------------------------------------------------===// + +image_sample v[193:195], v[237:240], s[28:35], s[4:7] dmask:0x7 d16 +// NOSICI: error: d16 modifier is not supported on this GPU +// GFX8_0: image_sample v[193:195], v[237:240], s[28:35], s[4:7] dmask:0x7 d16 ; encoding: [0x00,0x07,0x80,0xf0,0xed,0xc1,0x27,0x80] +// NOGFX8_1: error: image data size does not match dmask and tfe +// NOGFX9: error: image data size does not match dmask and tfe + +//===----------------------------------------------------------------------===// +// Image Sample: d16 unpacked +//===----------------------------------------------------------------------===// + +image_sample v[193:194], v[237:240], s[28:35], s[4:7] dmask:0x7 d16 +// NOSICI: error: d16 modifier is not supported on this GPU +// NOGFX8_0: error: image data size does not match dmask and tfe +// GFX8_1: image_sample v[193:194], v[237:240], s[28:35], s[4:7] dmask:0x7 d16 ; encoding: [0x00,0x07,0x80,0xf0,0xed,0xc1,0x27,0x80] +// GFX9: image_sample v[193:194], v[237:240], s[28:35], s[4:7] dmask:0x7 d16 ; encoding: [0x00,0x07,0x80,0xf0,0xed,0xc1,0x27,0x80] + +//===----------------------------------------------------------------------===// // Image Atomics //===----------------------------------------------------------------------===// Index: llvm/trunk/test/MC/Disassembler/AMDGPU/mimg_vi.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/AMDGPU/mimg_vi.txt +++ llvm/trunk/test/MC/Disassembler/AMDGPU/mimg_vi.txt @@ -1,4 +1,5 @@ -# RUN: llvm-mc -arch=amdgcn -mcpu=tonga -disassemble -show-encoding < %s | FileCheck -check-prefix=VI %s +# RUN: llvm-mc -arch=amdgcn -mcpu=tonga -disassemble -show-encoding < %s | FileCheck %s -check-prefix=VI -check-prefix=GFX80 +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx810 -disassemble -show-encoding < %s | FileCheck %s -check-prefix=VI -check-prefix=GFX81 #===------------------------------------------------------------------------===# # Image load/store @@ -49,6 +50,53 @@ 0x00 0x13 0x00 0xf0 0x00 0xff 0x00 0x00 #===------------------------------------------------------------------------===# +# Image load/store: packed/unpacked d16 +#===------------------------------------------------------------------------===# + +# GFX80: image_load v[0:1], v4, s[8:15] dmask:0x3 d16 ; encoding: [0x00,0x03,0x00,0xf0,0x04,0x00,0x02,0x80] +# GFX81: image_load v0, v4, s[8:15] dmask:0x3 d16 ; encoding: [0x00,0x03,0x00,0xf0,0x04,0x00,0x02,0x80] +0x00,0x03,0x00,0xf0,0x04,0x00,0x02,0x80 + +# GFX80: image_load v[0:2], v4, s[8:15] dmask:0x7 d16 ; encoding: [0x00,0x07,0x00,0xf0,0x04,0x00,0x02,0x80] +# GFX81: image_load v[0:1], v4, s[8:15] dmask:0x7 d16 ; encoding: [0x00,0x07,0x00,0xf0,0x04,0x00,0x02,0x80] +0x00,0x07,0x00,0xf0,0x04,0x00,0x02,0x80 + +# GFX80: image_load v[0:3], v4, s[8:15] dmask:0xf d16 ; encoding: [0x00,0x0f,0x00,0xf0,0x04,0x00,0x02,0x80] +# GFX81: image_load v[0:1], v4, s[8:15] dmask:0xf d16 ; encoding: [0x00,0x0f,0x00,0xf0,0x04,0x00,0x02,0x80] +0x00,0x0f,0x00,0xf0,0x04,0x00,0x02,0x80 + +# GFX80: image_store v[0:1], v4, s[8:15] dmask:0x3 d16 ; encoding: [0x00,0x03,0x20,0xf0,0x04,0x00,0x02,0x80] +# GFX81: image_store v0, v4, s[8:15] dmask:0x3 d16 ; encoding: [0x00,0x03,0x20,0xf0,0x04,0x00,0x02,0x80] +0x00,0x03,0x20,0xf0,0x04,0x00,0x02,0x80 + +# GFX80: image_store v[0:2], v4, s[8:15] dmask:0x7 d16 ; encoding: [0x00,0x07,0x20,0xf0,0x04,0x00,0x02,0x80] +# GFX81: image_store v[0:1], v4, s[8:15] dmask:0x7 d16 ; encoding: [0x00,0x07,0x20,0xf0,0x04,0x00,0x02,0x80] +0x00,0x07,0x20,0xf0,0x04,0x00,0x02,0x80 + +# GFX80: image_store v[0:3], v4, s[8:15] dmask:0xf d16 ; encoding: [0x00,0x0f,0x20,0xf0,0x04,0x00,0x02,0x80] +# GFX81: image_store v[0:1], v4, s[8:15] dmask:0xf d16 ; encoding: [0x00,0x0f,0x20,0xf0,0x04,0x00,0x02,0x80] +0x00,0x0f,0x20,0xf0,0x04,0x00,0x02,0x80 + +#===------------------------------------------------------------------------===# +# Image sample +#===------------------------------------------------------------------------===# + +# VI: image_sample v[193:195], v237, s[28:35], s[4:7] dmask:0x7 unorm ; encoding: [0x00,0x17,0x80,0xf0,0xed,0xc1,0x27,0x00] +0x00,0x17,0x80,0xf0,0xed,0xc1,0x27,0x00 + +# GFX80: image_sample v[193:194], v237, s[28:35], s[4:7] dmask:0x3 d16 ; encoding: [0x00,0x03,0x80,0xf0,0xed,0xc1,0x27,0x80] +# GFX81: image_sample v193, v237, s[28:35], s[4:7] dmask:0x3 d16 ; encoding: [0x00,0x03,0x80,0xf0,0xed,0xc1,0x27,0x80] +0x00,0x03,0x80,0xf0,0xed,0xc1,0x27,0x80 + +# GFX80: image_sample v[193:195], v237, s[28:35], s[4:7] dmask:0x7 d16 ; encoding: [0x00,0x07,0x80,0xf0,0xed,0xc1,0x27,0x80] +# GFX81: image_sample v[193:194], v237, s[28:35], s[4:7] dmask:0x7 d16 ; encoding: [0x00,0x07,0x80,0xf0,0xed,0xc1,0x27,0x80] +0x00,0x07,0x80,0xf0,0xed,0xc1,0x27,0x80 + +# GFX80: image_sample v[193:196], v237, s[28:35], s[4:7] dmask:0xf d16 ; encoding: [0x00,0x0f,0x80,0xf0,0xed,0xc1,0x27,0x80] +# GFX81: image_sample v[193:194], v237, s[28:35], s[4:7] dmask:0xf d16 ; encoding: [0x00,0x0f,0x80,0xf0,0xed,0xc1,0x27,0x80] +0x00,0x0f,0x80,0xf0,0xed,0xc1,0x27,0x80 + +#===------------------------------------------------------------------------===# # Image atomics #===------------------------------------------------------------------------===#