Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -265,11 +265,20 @@ } DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { + int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::vdst); + int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dmask); + + assert(VDataIdx != -1); + assert(DMaskIdx != -1); + + bool isAtomic = (VDstIdx != -1); + unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; if (DMask == 0) return MCDisassembler::Success; @@ -278,12 +287,26 @@ if (ChannelCount == 1) return MCDisassembler::Success; - int NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), ChannelCount); - assert(NewOpcode != -1 && "could not find matching mimg channel instruction"); + int NewOpcode = -1; + + if (isAtomic) { + if (DMask == 0x1 || DMask == 0x3 || DMask == 0xF) { + NewOpcode = AMDGPU::getMaskedMIMGAtomicOp(*MCII, MI.getOpcode(), ChannelCount); + } + if (NewOpcode == -1) return MCDisassembler::Success; + } else { + NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), ChannelCount); + assert(NewOpcode != -1 && "could not find matching mimg channel instruction"); + } + auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass; - // Widen the register to the correct number of enabled channels. + // Get first subregister of VData unsigned Vdata0 = MI.getOperand(VDataIdx).getReg(); + unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0); + Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0; + + // Widen the register to the correct number of enabled channels. auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &MRI.getRegClass(RCID)); if (NewVdata == AMDGPU::NoRegister) { @@ -297,6 +320,12 @@ // how it is usually emitted because the number of register components is not // in the instruction encoding. MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata); + + if (isAtomic) { + // Atomic operations have an additional operand (a copy of data) + MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata); + } + return MCDisassembler::Success; } Index: lib/Target/AMDGPU/MIMGInstructions.td =================================================================== --- lib/Target/AMDGPU/MIMGInstructions.td +++ lib/Target/AMDGPU/MIMGInstructions.td @@ -12,6 +12,11 @@ int Channels = channels; } +class MIMG_Atomic_Size { + string Op = op; + int AtomicSize = !if(is32Bit, 1, 2); +} + class mimg si, bits<7> vi = si> { field bits<7> SI = si; field bits<7> VI = vi; @@ -136,12 +141,14 @@ } class MIMG_Atomic_Helper : MIMG_Helper < + RegisterClass addr_rc, string dns="", + bit enableDasm = 0> : MIMG_Helper < (outs data_rc:$vdst), (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc, r128:$r128, tfe:$tfe, lwe:$lwe, da:$da), - asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"> { + asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da", + !if(enableDasm, dns, "")> { let mayLoad = 1; let mayStore = 1; let hasSideEffects = 1; // FIXME: Remove this @@ -152,45 +159,68 @@ } class MIMG_Atomic_Real_si : - MIMG_Atomic_Helper, + RegisterClass data_rc, RegisterClass addr_rc, bit enableDasm> : + MIMG_Atomic_Helper, SIMCInstr, MIMGe { let isCodeGenOnly = 0; let AssemblerPredicates = [isSICI]; - let DecoderNamespace = "SICI"; let DisableDecoder = DisableSIDecoder; } class MIMG_Atomic_Real_vi : - MIMG_Atomic_Helper, + RegisterClass data_rc, RegisterClass addr_rc, bit enableDasm> : + MIMG_Atomic_Helper, SIMCInstr, MIMGe { let isCodeGenOnly = 0; let AssemblerPredicates = [isVI]; - let DecoderNamespace = "VI"; let DisableDecoder = DisableVIDecoder; } -multiclass MIMG_Atomic_Helper_m { +multiclass MIMG_Atomic_Helper_m { let isPseudo = 1, isCodeGenOnly = 1 in { def "" : MIMG_Atomic_Helper, SIMCInstr; } let ssamp = 0 in { - def _si : MIMG_Atomic_Real_si; + def _si : MIMG_Atomic_Real_si, + MIMG_Atomic_Size; - def _vi : MIMG_Atomic_Real_vi; + def _vi : MIMG_Atomic_Real_vi, + MIMG_Atomic_Size; } } -multiclass MIMG_Atomic { - defm _V1 : MIMG_Atomic_Helper_m ; - defm _V2 : MIMG_Atomic_Helper_m ; - defm _V4 : MIMG_Atomic_Helper_m ; +multiclass MIMG_Atomic_Addr_Helper_m { + // _V* variants have different address size, but the size is not encoded. + // So only one variant can be disassembled. V1 looks the safest to decode. + defm _V1 : MIMG_Atomic_Helper_m ; + defm _V2 : MIMG_Atomic_Helper_m ; + defm _V4 : MIMG_Atomic_Helper_m ; +} + +multiclass MIMG_Atomic { // 64-bit atomics + // _V* variants have different dst size, but the size is encoded implicitly, + // using dmask and tfe. Only 32-bit variant is registered with disassembler. + // Other variants are reconstructed by disassembler using dmask and tfe. + defm _V1 : MIMG_Atomic_Addr_Helper_m ; + defm _V2 : MIMG_Atomic_Addr_Helper_m ; } class MIMG_Sampler_Helper op, string asm, @@ -340,7 +370,7 @@ } defm IMAGE_ATOMIC_SWAP : MIMG_Atomic , "image_atomic_swap">; -defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic , "image_atomic_cmpswap", VReg_64>; +defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic , "image_atomic_cmpswap", VReg_64, VReg_128>; defm IMAGE_ATOMIC_ADD : MIMG_Atomic , "image_atomic_add">; defm IMAGE_ATOMIC_SUB : MIMG_Atomic , "image_atomic_sub">; //def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"image_atomic_rsub", 0x00000013>; -- not on VI @@ -586,9 +616,9 @@ // ImageAtomic patterns. multiclass ImageAtomicPatterns { - def : ImageAtomicPattern(opcode # _V1), i32>; - def : ImageAtomicPattern(opcode # _V2), v2i32>; - def : ImageAtomicPattern(opcode # _V4), v4i32>; + def : ImageAtomicPattern(opcode # _V1_V1), i32>; + def : ImageAtomicPattern(opcode # _V1_V2), v2i32>; + def : ImageAtomicPattern(opcode # _V1_V4), v4i32>; } // ImageAtomicCmpSwap for amdgcn. @@ -780,9 +810,9 @@ // Image atomics defm : ImageAtomicPatterns; -def : ImageAtomicCmpSwapPattern; -def : ImageAtomicCmpSwapPattern; -def : ImageAtomicCmpSwapPattern; +def : ImageAtomicCmpSwapPattern; +def : ImageAtomicCmpSwapPattern; +def : ImageAtomicCmpSwapPattern; defm : ImageAtomicPatterns; defm : ImageAtomicPatterns; defm : ImageAtomicPatterns; Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -2040,6 +2040,22 @@ let ValueCols = [["1"], ["2"], ["3"] ]; } +def getMIMGAtomicOp1 : InstrMapping { + let FilterClass = "MIMG_Atomic_Size"; + let RowFields = ["Op"]; + let ColFields = ["AtomicSize"]; + let KeyCol = ["1"]; + let ValueCols = [["2"]]; +} + +def getMIMGAtomicOp2 : InstrMapping { + let FilterClass = "MIMG_Atomic_Size"; + let RowFields = ["Op"]; + let ColFields = ["AtomicSize"]; + let KeyCol = ["2"]; + let ValueCols = [["1"]]; +} + // Maps an commuted opcode to its original version def getCommuteOrig : InstrMapping { let FilterClass = "Commutable_REV"; Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -159,6 +159,11 @@ LLVM_READONLY int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels); + +LLVM_READONLY +int getMaskedMIMGAtomicOp(const MCInstrInfo &MII, + unsigned Opc, unsigned NewChannels); + LLVM_READONLY int getMCOpcode(uint16_t Opcode, unsigned Gen); Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -156,6 +156,28 @@ } } +int getMaskedMIMGAtomicOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) { + assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst) != -1); + assert(NewChannels == 1 || NewChannels == 2 || NewChannels == 4); + + unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass); + assert(OrigChannels == 1 || OrigChannels == 2 || OrigChannels == 4); + + if (NewChannels == OrigChannels) return Opc; + + if (OrigChannels <= 2 && NewChannels <= 2) { + // This is an ordinary atomic (not an atomic_cmpswap) + return (OrigChannels == 1)? + AMDGPU::getMIMGAtomicOp1(Opc) : AMDGPU::getMIMGAtomicOp2(Opc); + } else if (OrigChannels >= 2 && NewChannels >= 2) { + // This is an atomic_cmpswap + return (OrigChannels == 2)? + AMDGPU::getMIMGAtomicOp1(Opc) : AMDGPU::getMIMGAtomicOp2(Opc); + } else { // invalid OrigChannels/NewChannels value + return -1; + } +} + // Wrapper for Tablegen'd function. enum Subtarget is not defined in any // header files, so we need to wrap it in a function that takes unsigned // instead. Index: test/MC/AMDGPU/mimg.s =================================================================== --- test/MC/AMDGPU/mimg.s +++ test/MC/AMDGPU/mimg.s @@ -30,25 +30,21 @@ // SICI: image_atomic_add v4, v[192:195], s[28:35] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x44,0xf0,0xc0,0x04,0x07,0x00] // VI: image_atomic_add v4, v[192:195], s[28:35] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x48,0xf0,0xc0,0x04,0x07,0x00] -image_atomic_add v5, v1, s[8:15] -// SICI: image_atomic_add v5, v1, s[8:15] ; encoding: [0x00,0x00,0x44,0xf0,0x01,0x05,0x02,0x00] -// VI: image_atomic_add v5, v1, s[8:15] ; encoding: [0x00,0x00,0x48,0xf0,0x01,0x05,0x02,0x00] +image_atomic_add v252, v2, s[8:15] dmask:0x1 unorm +// SICI: image_atomic_add v252, v2, s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x44,0xf0,0x02,0xfc,0x02,0x00] +// VI: image_atomic_add v252, v2, s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x48,0xf0,0x02,0xfc,0x02,0x00] -image_atomic_add v252, v2, s[8:15] unorm -// SICI: image_atomic_add v252, v2, s[8:15] unorm ; encoding: [0x00,0x10,0x44,0xf0,0x02,0xfc,0x02,0x00] -// VI: image_atomic_add v252, v2, s[8:15] unorm ; encoding: [0x00,0x10,0x48,0xf0,0x02,0xfc,0x02,0x00] +image_atomic_add v[6:7], v255, s[8:15] dmask:0x3 +// SICI: image_atomic_add v[6:7], v255, s[8:15] dmask:0x3 ; encoding: [0x00,0x03,0x44,0xf0,0xff,0x06,0x02,0x00] +// VI: image_atomic_add v[6:7], v255, s[8:15] dmask:0x3 ; encoding: [0x00,0x03,0x48,0xf0,0xff,0x06,0x02,0x00] -image_atomic_add v6, v255, s[8:15] dmask:0x1 -// SICI: image_atomic_add v6, v255, s[8:15] dmask:0x1 ; encoding: [0x00,0x01,0x44,0xf0,0xff,0x06,0x02,0x00] -// VI: image_atomic_add v6, v255, s[8:15] dmask:0x1 ; encoding: [0x00,0x01,0x48,0xf0,0xff,0x06,0x02,0x00] +image_atomic_add v7, v3, s[0:7] dmask:0x1 glc +// SICI: image_atomic_add v7, v3, s[0:7] dmask:0x1 glc ; encoding: [0x00,0x21,0x44,0xf0,0x03,0x07,0x00,0x00] +// VI: image_atomic_add v7, v3, s[0:7] dmask:0x1 glc ; encoding: [0x00,0x21,0x48,0xf0,0x03,0x07,0x00,0x00] -image_atomic_add v7, v3, s[0:7] glc -// SICI: image_atomic_add v7, v3, s[0:7] glc ; encoding: [0x00,0x20,0x44,0xf0,0x03,0x07,0x00,0x00] -// VI: image_atomic_add v7, v3, s[0:7] glc ; encoding: [0x00,0x20,0x48,0xf0,0x03,0x07,0x00,0x00] - -image_atomic_add v8, v4, s[8:15] slc -// SICI: image_atomic_add v8, v4, s[8:15] slc ; encoding: [0x00,0x00,0x44,0xf2,0x04,0x08,0x02,0x00] -// VI: image_atomic_add v8, v4, s[8:15] slc ; encoding: [0x00,0x00,0x48,0xf2,0x04,0x08,0x02,0x00] +image_atomic_add v8, v4, s[8:15] dmask:0x1 slc +// SICI: image_atomic_add v8, v4, s[8:15] dmask:0x1 slc ; encoding: [0x00,0x01,0x44,0xf2,0x04,0x08,0x02,0x00] +// VI: image_atomic_add v8, v4, s[8:15] dmask:0x1 slc ; encoding: [0x00,0x01,0x48,0xf2,0x04,0x08,0x02,0x00] image_atomic_add v9, v5, s[8:15] dmask:0x1 unorm glc slc lwe da // SICI: image_atomic_add v9, v5, s[8:15] dmask:0x1 unorm glc slc lwe da ; encoding: [0x00,0x71,0x46,0xf2,0x05,0x09,0x02,0x00] @@ -66,6 +62,10 @@ // SICI: image_atomic_swap v4, v[192:195], s[28:35] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x3c,0xf0,0xc0,0x04,0x07,0x00] // VI: image_atomic_swap v4, v[192:195], s[28:35] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0xc0,0x04,0x07,0x00] -image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x1 unorm glc -// SIIC: image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0xc0,0x04,0x07,0x00] -// VI: image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x44,0xf0,0xc0,0x04,0x07,0x00] +image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x3 unorm glc +// SICI: image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x3 unorm glc ; encoding: [0x00,0x33,0x40,0xf0,0xc0,0x04,0x07,0x00] +// VI: image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x3 unorm glc ; encoding: [0x00,0x33,0x44,0xf0,0xc0,0x04,0x07,0x00] + +image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf unorm glc +// SICI: image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf unorm glc ; encoding: [0x00,0x3f,0x40,0xf0,0xc0,0x04,0x07,0x00] +// VI: image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf unorm glc ; encoding: [0x00,0x3f,0x44,0xf0,0xc0,0x04,0x07,0x00] Index: test/MC/Disassembler/AMDGPU/mimg_vi.txt =================================================================== --- test/MC/Disassembler/AMDGPU/mimg_vi.txt +++ test/MC/Disassembler/AMDGPU/mimg_vi.txt @@ -1,5 +1,9 @@ # RUN: llvm-mc -arch=amdgcn -mcpu=tonga -disassemble -show-encoding < %s | FileCheck -check-prefix=VI %s +#===------------------------------------------------------------------------===# +# Image load/store +#===------------------------------------------------------------------------===# + # VI: image_load v[0:3], v4, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x00,0xf0,0x04,0x00,0x02,0x00] 0x00 0x1f 0x00 0xf0 0x04 0x00 0x02 0x00 @@ -37,3 +41,63 @@ # VI: image_load v255, v0, s[0:7] dmask:0x3 unorm ; encoding: [0x00,0x13,0x00,0xf0,0x00,0xff,0x00,0x00] 0x00 0x13 0x00 0xf0 0x00 0xff 0x00 0x00 + +#===------------------------------------------------------------------------===# +# Image atomics +#===------------------------------------------------------------------------===# + +# VI: image_atomic_add v5, v1, s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x48,0xf0,0x01,0x05,0x02,0x00] +0x00,0x11,0x48,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_add v252, v1, s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x48,0xf0,0x01,0xfc,0x02,0x00] +0x00,0x11,0x48,0xf0,0x01,0xfc,0x02,0x00 + +# VI: image_atomic_add v5, v255, s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x48,0xf0,0xff,0x05,0x02,0x00] +0x00,0x11,0x48,0xf0,0xff,0x05,0x02,0x00 + +# VI: image_atomic_add v5, v1, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x48,0xf0,0x01,0x05,0x17,0x00] +0x00,0x11,0x48,0xf0,0x01,0x05,0x17,0x00 + +# VI: image_atomic_add v5, v1, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x48,0xf0,0x01,0x05,0x02,0x00] +0x00,0x31,0x48,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_add v5, v1, s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x48,0xf2,0x01,0x05,0x02,0x00] +0x00,0x11,0x48,0xf2,0x01,0x05,0x02,0x00 + +# VI: image_atomic_add v5, v1, s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x4a,0xf0,0x01,0x05,0x02,0x00] +0x00,0x11,0x4a,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_add v5, v1, s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x48,0xf0,0x01,0x05,0x02,0x00] +0x00,0x51,0x48,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_add v[5:6], v1, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x48,0xf0,0x01,0x05,0x02,0x00] +0x00,0x13,0x48,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x44,0xf0,0x01,0x05,0x02,0x00] +0x00,0x13,0x44,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_cmpswap v[5:8], v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x44,0xf0,0x01,0x05,0x02,0x00] +0x00,0x1f,0x44,0xf0,0x01,0x05,0x02,0x00 + +#===------------------------------------------------------------------------===# +# Invalid image atomics (incorrect dmask value). +# Disassembler may produce a partially incorrect instruction but should not fail. +#===------------------------------------------------------------------------===# + +# VI: image_atomic_add v5, v1, s[8:15] dmask:0x2 unorm ; encoding: [0x00,0x12,0x48,0xf0,0x01,0x05,0x02,0x00] +0x00,0x12,0x48,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_add v5, v1, s[8:15] dmask:0x7 unorm ; encoding: [0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00] +0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_add v5, v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00] +0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] unorm ; encoding: [0x00,0x10,0x44,0xf0,0x01,0x05,0x02,0x00] +0x00,0x10,0x44,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x44,0xf0,0x01,0x05,0x02,0x00] +0x00,0x11,0x44,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0xe unorm ; encoding: [0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00] +0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00