Index: lib/Target/AMDGPU/AMDGPUInstrInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -22,6 +22,7 @@ #define GET_INSTRINFO_HEADER #include "AMDGPUGenInstrInfo.inc" +#undef GET_INSTRINFO_HEADER namespace llvm { @@ -49,10 +50,6 @@ /// Return -1 if the target-specific opcode for the pseudo instruction does /// not exist. If Opcode is not a pseudo instruction, this is identity. int pseudoToMCOpcode(int Opcode) const; - - /// \brief Given a MIMG \p MI that writes any number of channels, return the - /// equivalent opcode that writes \p NewChannels Channels. - int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) const; }; } // End llvm namespace Index: lib/Target/AMDGPU/AMDGPUInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -23,7 +23,6 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR -#define GET_INSTRMAP_INFO #include "AMDGPUGenInstrInfo.inc" // Pin the vtable to this file. @@ -56,59 +55,6 @@ return (NumLoads <= 16 && (Offset1 - Offset0) < 64); } -static AMDGPU::Channels indexToChannel(unsigned Channel) { - switch (Channel) { - case 1: - return AMDGPU::Channels_1; - case 2: - return AMDGPU::Channels_2; - case 3: - return AMDGPU::Channels_3; - case 4: - return AMDGPU::Channels_4; - default: - llvm_unreachable("invalid MIMG channel"); - } -} - -// FIXME: Need to handle d16 images correctly. -static unsigned rcToChannels(unsigned RCID) { - switch (RCID) { - case AMDGPU::VGPR_32RegClassID: - return 1; - case AMDGPU::VReg_64RegClassID: - return 2; - case AMDGPU::VReg_96RegClassID: - return 3; - case AMDGPU::VReg_128RegClassID: - return 4; - default: - llvm_unreachable("invalid MIMG register class"); - } -} - -int AMDGPUInstrInfo::getMaskedMIMGOp(unsigned Opc, - unsigned NewChannels) const { - AMDGPU::Channels Channel = indexToChannel(NewChannels); - unsigned OrigChannels = rcToChannels(get(Opc).OpInfo[0].RegClass); - if (NewChannels == OrigChannels) - return Opc; - - switch (OrigChannels) { - case 1: - return AMDGPU::getMaskedMIMGOp1(Opc, Channel); - case 2: - return AMDGPU::getMaskedMIMGOp2(Opc, Channel); - case 3: - return AMDGPU::getMaskedMIMGOp3(Opc, Channel); - case 4: - return AMDGPU::getMaskedMIMGOp4(Opc, Channel); - default: - llvm_unreachable("invalid MIMG channel"); - } -} - - // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td enum SIEncodingFamily { SI = 0, @@ -118,17 +64,6 @@ GFX9 = 4 }; -// Wrapper for Tablegen'd function. enum Subtarget is not defined in any -// header files, so we need to wrap it in a function that takes unsigned -// instead. -namespace llvm { -namespace AMDGPU { -static int getMCOpcode(uint16_t Opcode, unsigned Gen) { - return getMCOpcodeGen(Opcode, static_cast(Gen)); -} -} -} - static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) { switch (ST.getGeneration()) { case AMDGPUSubtarget::SOUTHERN_ISLANDS: Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -17,16 +17,18 @@ #define LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCDisassembler/MCSymbolizer.h" + #include #include #include namespace llvm { -class MCContext; class MCInst; class MCOperand; class MCSubtargetInfo; @@ -38,13 +40,16 @@ class AMDGPUDisassembler : public MCDisassembler { private: + std::unique_ptr const MCII; + const MCRegisterInfo &MRI; mutable ArrayRef Bytes; mutable uint32_t Literal; mutable bool HasLiteral; public: - AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : - MCDisassembler(STI, Ctx) {} + AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, + MCInstrInfo const *MCII) : + MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()) {} ~AMDGPUDisassembler() override = default; @@ -64,6 +69,7 @@ uint64_t Address) const; DecodeStatus convertSDWAInst(MCInst &MI) const; + DecodeStatus convertMIMGInst(MCInst &MI) const; MCOperand decodeOperand_VGPR_32(unsigned Val) const; MCOperand decodeOperand_VS_32(unsigned Val) const; Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -234,6 +234,10 @@ AMDGPU::OpName::src2_modifiers); } + if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) { + Res = convertMIMGInst(MI); + } + if (Res && IsSDWA) Res = convertSDWAInst(MI); @@ -260,6 +264,42 @@ return MCDisassembler::Success; } +DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { + int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::vdata); + + int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::dmask); + unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; + if (DMask == 0) + return MCDisassembler::Success; + + unsigned ChannelCount = countPopulation(DMask); + if (ChannelCount == 1) + return MCDisassembler::Success; + + int NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), ChannelCount); + assert(NewOpcode != -1 && "could not find matching mimg channel instruction"); + auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass; + + // Widen the register to the correct number of enabled channels. + unsigned Vdata0 = MI.getOperand(VDataIdx).getReg(); + auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, + &MRI.getRegClass(RCID)); + if (NewVdata == AMDGPU::NoRegister) { + // It's possible to encode this such that the low register + enabled + // components exceeds the register count. + return MCDisassembler::Success; + } + + MI.setOpcode(NewOpcode); + // vaddr will be always appear as a single VGPR. This will look different than + // how it is usually emitted because the number of register components is not + // in the instruction encoding. + MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata); + return MCDisassembler::Success; +} + const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const { return getContext().getRegisterInfo()-> getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]); @@ -786,7 +826,7 @@ static MCDisassembler *createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx) { - return new AMDGPUDisassembler(STI, Ctx); + return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo()); } extern "C" void LLVMInitializeAMDGPUDisassembler() { Index: lib/Target/AMDGPU/MIMGInstructions.td =================================================================== --- lib/Target/AMDGPU/MIMGInstructions.td +++ lib/Target/AMDGPU/MIMGInstructions.td @@ -63,13 +63,13 @@ class MIMG_Store_Helper op, string asm, RegisterClass data_rc, - RegisterClass addr_rc> : MIMG_Helper < + RegisterClass addr_rc, + string dns = ""> : MIMG_Helper < (outs), (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc, r128:$r128, tfe:$tfe, lwe:$lwe, da:$da), - asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da" - >, MIMGe { + asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da", dns>, MIMGe { let ssamp = 0; let mayLoad = 0; let mayStore = 1; @@ -81,7 +81,8 @@ multiclass MIMG_Store_Addr_Helper op, string asm, RegisterClass data_rc, int channels> { - def _V1 : MIMG_Store_Helper , + def _V1 : MIMG_Store_Helper , MIMG_Mask; def _V2 : MIMG_Store_Helper , MIMG_Mask; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -6793,7 +6793,8 @@ unsigned BitsSet = countPopulation(NewDmask); const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); - int NewOpcode = TII->getMaskedMIMGOp(Node->getMachineOpcode(), BitsSet); + int NewOpcode = AMDGPU::getMaskedMIMGOp(*TII, + Node->getMachineOpcode(), BitsSet); assert(NewOpcode != -1 && NewOpcode != static_cast(Node->getMachineOpcode()) && "failed to find equivalent MIMG op"); Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -156,6 +156,12 @@ LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); +LLVM_READONLY +int getMaskedMIMGOp(const MCInstrInfo &MII, + unsigned Opc, unsigned NewChannels); +LLVM_READONLY +int getMCOpcode(uint16_t Opcode, unsigned Gen); + void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features); Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/Module.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -39,7 +40,9 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #define GET_INSTRINFO_NAMED_OPS +#define GET_INSTRMAP_INFO #include "AMDGPUGenInstrInfo.inc" +#undef GET_INSTRMAP_INFO #undef GET_INSTRINFO_NAMED_OPS namespace { @@ -100,6 +103,66 @@ namespace AMDGPU { +LLVM_READNONE +static inline Channels indexToChannel(unsigned Channel) { + switch (Channel) { + case 1: + return AMDGPU::Channels_1; + case 2: + return AMDGPU::Channels_2; + case 3: + return AMDGPU::Channels_3; + case 4: + return AMDGPU::Channels_4; + default: + llvm_unreachable("invalid MIMG channel"); + } +} + + +// FIXME: Need to handle d16 images correctly. +static unsigned rcToChannels(unsigned RCID) { + switch (RCID) { + case AMDGPU::VGPR_32RegClassID: + return 1; + case AMDGPU::VReg_64RegClassID: + return 2; + case AMDGPU::VReg_96RegClassID: + return 3; + case AMDGPU::VReg_128RegClassID: + return 4; + default: + llvm_unreachable("invalid MIMG register class"); + } +} + +int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) { + AMDGPU::Channels Channel = AMDGPU::indexToChannel(NewChannels); + unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass); + if (NewChannels == OrigChannels) + return Opc; + + switch (OrigChannels) { + case 1: + return AMDGPU::getMaskedMIMGOp1(Opc, Channel); + case 2: + return AMDGPU::getMaskedMIMGOp2(Opc, Channel); + case 3: + return AMDGPU::getMaskedMIMGOp3(Opc, Channel); + case 4: + return AMDGPU::getMaskedMIMGOp4(Opc, Channel); + default: + llvm_unreachable("invalid MIMG channel"); + } +} + +// Wrapper for Tablegen'd function. enum Subtarget is not defined in any +// header files, so we need to wrap it in a function that takes unsigned +// instead. +int getMCOpcode(uint16_t Opcode, unsigned Gen) { + return getMCOpcodeGen(Opcode, static_cast(Gen)); +} + namespace IsaInfo { IsaVersion getIsaVersion(const FeatureBitset &Features) { @@ -833,6 +896,7 @@ return isGCN3Encoding(ST) ? isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); } + } // end namespace AMDGPU } // end namespace llvm Index: test/MC/Disassembler/AMDGPU/mimg_vi.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/AMDGPU/mimg_vi.txt @@ -0,0 +1,39 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=tonga -disassemble -show-encoding < %s | FileCheck -check-prefix=VI %s + +# VI: image_load v[0:3], v4, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x00,0xf0,0x04,0x00,0x02,0x00] +0x00 0x1f 0x00 0xf0 0x04 0x00 0x02 0x00 + +# VI: image_load v[0:2], v4, s[8:15] dmask:0xe unorm ; encoding: [0x00,0x1e,0x00,0xf0,0x04,0x00,0x02,0x00] +0x00 0x1e 0x00 0xf0 0x04 0x00 0x02 0x00 + +# VI: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm ; encoding: [0x00,0x13,0x00,0xf0,0x00,0x00,0x00,0x00] +0x00 0x13 0x00 0xf0 0x00 0x00 0x00 0x00 + +# VI: image_load v0, v0, s[0:7] dmask:0x1 unorm ; encoding: [0x00,0x11,0x00,0xf0,0x00,0x00,0x00,0x00] +0x00 0x11 0x00 0xf0 0x00 0x00 0x00 0x00 + +# VI: image_store v[0:3], v4, s[0:7] dmask:0xf unorm ; encoding: [0x00,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00] +0x00 0x1f 0x20 0xf0 0x04 0x00 0x00 0x00 + +# VI: image_store v[0:2], v4, s[0:7] dmask:0xe unorm ; encoding: [0x00,0x1e,0x20,0xf0,0x04,0x00,0x00,0x00] +0x00 0x1e 0x20 0xf0 0x04 0x00 0x00 0x00 + +# VI: image_store v[0:1], v2, s[0:7] dmask:0x3 unorm ; encoding: [0x00,0x13,0x20,0xf0,0x02,0x00,0x00,0x00] +0x00 0x13 0x20 0xf0 0x02 0x00 0x00 0x00 + +# VI: image_store v0, v1, s[0:7] dmask:0x1 unorm ; encoding: [0x00,0x11,0x20,0xf0,0x01,0x00,0x00,0x00] +0x00 0x11 0x20 0xf0 0x01 0x00 0x00 0x00 + +# Test dmask == 0 +# VI: image_load v0, v4, s[8:15] unorm ; encoding: [0x00,0x10,0x00,0xf0,0x04,0x00,0x02,0x00] +0x00 0x10 0x00 0xf0 0x04 0x00 0x02 0x00 + +# Test out of bounds register width +# VI: image_load v254, v0, s[0:7] dmask:0x7 unorm ; encoding: [0x00,0x17,0x00,0xf0,0x00,0xfe,0x00,0x00] +0x00 0x17 0x00 0xf0 0x00 0xfe 0x00 0x00 + +# VI: image_load v255, v0, s[0:7] dmask:0x1 unorm ; encoding: [0x00,0x11,0x00,0xf0,0x00,0xff,0x00,0x00] +0x00 0x11 0x00 0xf0 0x00 0xff 0x00 0x00 + +# VI: image_load v255, v0, s[0:7] dmask:0x3 unorm ; encoding: [0x00,0x13,0x00,0xf0,0x00,0xff,0x00,0x00] +0x00 0x13 0x00 0xf0 0x00 0xff 0x00 0x00