Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1037,6 +1037,8 @@ bool validateConstantBusLimitations(const MCInst &Inst); bool validateEarlyClobberLimitations(const MCInst &Inst); bool validateIntClampSupported(const MCInst &Inst); + bool validateMIMGAtomicDMask(const MCInst &Inst); + bool validateMIMGDataSize(const MCInst &Inst); bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; @@ -2271,6 +2273,50 @@ return true; } +bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { + + const unsigned Opc = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opc); + + if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) return true; + + // Gather4 instructions seem to have special rules not described in spec. + if (Desc.TSFlags & SIInstrFlags::Gather4) return true; + + int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); + int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); + int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); + + assert(VDataIdx != -1); + assert(DMaskIdx != -1); + assert(TFEIdx != -1); + + unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); + unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; + unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; + if (DMask == 0) DMask = 1; + + return (VDataSize / 4) == countPopulation(DMask) + TFESize; +} + +bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { + + const unsigned Opc = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opc); + + if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) return true; + if (!Desc.mayLoad() || !Desc.mayStore()) return true; // Not atomic + + int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); + unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; + + // This is an incomplete check because image_atomic_cmpswap + // may only use 0x3 and 0xf while other atomic operations + // may use 0x1 and 0x3. However these limitations are + // verified when we check that dmask matches dst size. + return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; +} + bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, const SMLoc &IDLoc) { if (!validateConstantBusLimitations(Inst)) { @@ -2288,6 +2334,16 @@ "integer clamping is not supported on this GPU"); return false; } + if (!validateMIMGDataSize(Inst)) { + Error(IDLoc, + "image data size does not match dmask and tfe"); + return false; + } + if (!validateMIMGAtomicDMask(Inst)) { + Error(IDLoc, + "invalid atomic image dmask"); + return false; + } return true; } Index: test/MC/AMDGPU/mimg-err.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/mimg-err.s @@ -0,0 +1,61 @@ +// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN +// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOGCN + +//===----------------------------------------------------------------------===// +// Image Load/Store +//===----------------------------------------------------------------------===// + +image_load v[4:6], v[237:240], s[28:35] dmask:0x7 tfe +// NOGCN: error: image data size does not match dmask and tfe + +image_load v[4:5], v[237:240], s[28:35] dmask:0x7 +// NOGCN: error: image data size does not match dmask and tfe + +image_store v[4:7], v[237:240], s[28:35] dmask:0x7 +// NOGCN: error: image data size does not match dmask and tfe + +image_store v[4:7], v[237:240], s[28:35] dmask:0xe +// NOGCN: error: image data size does not match dmask and tfe + +image_load v4, v[237:240], s[28:35] tfe +// NOGCN: error: image data size does not match dmask and tfe + +//===----------------------------------------------------------------------===// +// Image Sample +//===----------------------------------------------------------------------===// + +image_sample v[193:195], v[237:240], s[28:35], s[4:7] dmask:0x7 tfe +// NOGCN: error: image data size does not match dmask and tfe + +image_sample v[193:195], v[237:240], s[28:35], s[4:7] dmask:0x3 +// NOGCN: error: image data size does not match dmask and tfe + +image_sample v[193:195], v[237:240], s[28:35], s[4:7] dmask:0xf +// NOGCN: error: image data size does not match dmask and tfe + +//===----------------------------------------------------------------------===// +// Image Atomics +//===----------------------------------------------------------------------===// + +image_atomic_add v252, v2, s[8:15] dmask:0x1 tfe +// NOGCN: error: image data size does not match dmask and tfe + +image_atomic_add v[6:7], v255, s[8:15] dmask:0x2 +// NOGCN: error: image data size does not match dmask and tfe + +image_atomic_add v[6:7], v255, s[8:15] dmask:0xf +// NOGCN: error: image data size does not match dmask and tfe + +image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf tfe +// NOGCN: error: image data size does not match dmask and tfe + +image_atomic_add v252, v2, s[8:15] +// NOGCN: error: invalid atomic image dmask + +image_atomic_add v[6:7], v255, s[8:15] dmask:0x2 tfe +// NOGCN: error: invalid atomic image dmask + +image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xe tfe +// NOGCN: error: invalid atomic image dmask + Index: test/MC/AMDGPU/mimg.s =================================================================== --- test/MC/AMDGPU/mimg.s +++ test/MC/AMDGPU/mimg.s @@ -10,10 +10,26 @@ // SICI: image_load v[4:6], v[237:240], s[28:35] dmask:0x7 unorm ; encoding: [0x00,0x17,0x00,0xf0,0xed,0x04,0x07,0x00] // VI: image_load v[4:6], v[237:240], s[28:35] dmask:0x7 unorm ; encoding: [0x00,0x17,0x00,0xf0,0xed,0x04,0x07,0x00] +image_load v4, v[237:240], s[28:35] +// SICI: image_load v4, v[237:240], s[28:35] ; encoding: [0x00,0x00,0x00,0xf0,0xed,0x04,0x07,0x00] +// VI: image_load v4, v[237:240], s[28:35] ; encoding: [0x00,0x00,0x00,0xf0,0xed,0x04,0x07,0x00] + +image_load v[4:7], v[237:240], s[28:35] dmask:0x7 tfe +// SICI: image_load v[4:7], v[237:240], s[28:35] dmask:0x7 tfe ; encoding: [0x00,0x07,0x01,0xf0,0xed,0x04,0x07,0x00] +// VI: image_load v[4:7], v[237:240], s[28:35] dmask:0x7 tfe ; encoding: [0x00,0x07,0x01,0xf0,0xed,0x04,0x07,0x00] + image_store v[193:195], v[237:240], s[28:35] dmask:0x7 unorm // SICI: image_store v[193:195], v[237:240], s[28:35] dmask:0x7 unorm ; encoding: [0x00,0x17,0x20,0xf0,0xed,0xc1,0x07,0x00] // VI: image_store v[193:195], v[237:240], s[28:35] dmask:0x7 unorm ; encoding: [0x00,0x17,0x20,0xf0,0xed,0xc1,0x07,0x00] +image_store v193, v[237:240], s[28:35] +// SICI: image_store v193, v[237:240], s[28:35] ; encoding: [0x00,0x00,0x20,0xf0,0xed,0xc1,0x07,0x00] +// VI: image_store v193, v[237:240], s[28:35] ; encoding: [0x00,0x00,0x20,0xf0,0xed,0xc1,0x07,0x00] + +image_store v[193:194], v[237:240], s[28:35] tfe +// SICI: image_store v[193:194], v[237:240], s[28:35] tfe ; encoding: [0x00,0x00,0x21,0xf0,0xed,0xc1,0x07,0x00] +// VI: image_store v[193:194], v[237:240], s[28:35] tfe ; encoding: [0x00,0x00,0x21,0xf0,0xed,0xc1,0x07,0x00] + //===----------------------------------------------------------------------===// // Image Sample //===----------------------------------------------------------------------===// @@ -22,6 +38,14 @@ // SICI: image_sample v[193:195], v[237:240], s[28:35], s[4:7] dmask:0x7 unorm ; encoding: [0x00,0x17,0x80,0xf0,0xed,0xc1,0x27,0x00] // VI: image_sample v[193:195], v[237:240], s[28:35], s[4:7] dmask:0x7 unorm ; encoding: [0x00,0x17,0x80,0xf0,0xed,0xc1,0x27,0x00] +image_sample v193, v[237:240], s[28:35], s[4:7] +// SICI: image_sample v193, v[237:240], s[28:35], s[4:7] ; encoding: [0x00,0x00,0x80,0xf0,0xed,0xc1,0x27,0x00] +// VI: image_sample v193, v[237:240], s[28:35], s[4:7] ; encoding: [0x00,0x00,0x80,0xf0,0xed,0xc1,0x27,0x00] + +image_sample v[193:194], v[237:240], s[28:35], s[4:7] tfe +// SICI: image_sample v[193:194], v[237:240], s[28:35], s[4:7] tfe ; encoding: [0x00,0x00,0x81,0xf0,0xed,0xc1,0x27,0x00] +// VI: image_sample v[193:194], v[237:240], s[28:35], s[4:7] tfe ; encoding: [0x00,0x00,0x81,0xf0,0xed,0xc1,0x27,0x00] + //===----------------------------------------------------------------------===// // Image Atomics //===----------------------------------------------------------------------===//