Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -87,6 +87,17 @@ using namespace llvm; +// In some cases (e.g. buffer atomic instructions) MatchOperandParserImpl() +// may invoke tryCustomParseOperand() multiple times with the same MCK value. +// That leads to adding of the same "default" operand multiple times in a row, +// which is wrong. The workaround adds only the 1st default operand, while for +// the rest the "dummy" operands being added. The reason for dummies is that if +// we just skip adding an operand, then parser would get stuck in endless loop. +// Dummies shall be removed prior matching & emitting MCInsts. +// +// Comment out this macro to disable the workaround. +#define WORKAROUND_USE_DUMMY_OPERANDS_INSTEAD_MUTIPLE_DEFAULT_OPERANDS + namespace { struct OptionalOperand; @@ -99,6 +110,9 @@ Immediate, Register, Expression +#ifdef WORKAROUND_USE_DUMMY_OPERANDS_INSTEAD_MUTIPLE_DEFAULT_OPERANDS + ,Dummy +#endif } Kind; SMLoc StartLoc, EndLoc; @@ -204,6 +218,12 @@ } } +#ifdef WORKAROUND_USE_DUMMY_OPERANDS_INSTEAD_MUTIPLE_DEFAULT_OPERANDS + bool isDummy() const { + return Kind == Dummy; + } +#endif + bool isToken() const override { return Kind == Token; } @@ -440,6 +460,11 @@ case Expression: OS << "'; break; +#ifdef WORKAROUND_USE_DUMMY_OPERANDS_INSTEAD_MUTIPLE_DEFAULT_OPERANDS + case Dummy: + OS << ""; + break; +#endif } } @@ -490,6 +515,15 @@ return Op; } +#ifdef WORKAROUND_USE_DUMMY_OPERANDS_INSTEAD_MUTIPLE_DEFAULT_OPERANDS + static AMDGPUOperand::Ptr CreateDummy(SMLoc S) { + auto Op = llvm::make_unique(Dummy); + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } +#endif + bool isSWaitCnt() const; bool isHwreg() const; bool isSendMsg() const; @@ -545,6 +579,7 @@ bool ParseSectionDirectiveHSARodataReadonlyAgent(); bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum); bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth); + void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, bool IsAtomic, bool IsAtomicReturn); public: enum AMDGPUMatchResultTy { @@ -633,8 +668,9 @@ OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); AMDGPUOperand::Ptr defaultHwreg() const; - - void cvtMubuf(MCInst &Inst, const OperandVector &Operands); + void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } + void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } + void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } AMDGPUOperand::Ptr defaultMubufOffset() const; AMDGPUOperand::Ptr defaultGLC() const; AMDGPUOperand::Ptr defaultSLC() const; @@ -926,6 +962,17 @@ bool MatchingInlineAsm) { MCInst Inst; +#ifdef WORKAROUND_USE_DUMMY_OPERANDS_INSTEAD_MUTIPLE_DEFAULT_OPERANDS + // Remove dummies prior matching. Iterate backwards becase vector::erase() + // invalidates all iterators which refer after erase point. + for (auto I = Operands.rbegin(), E = Operands.rend(); I != E; ) { + auto X = I++; + if (static_cast(X->get())->isDummy()) { + Operands.erase(X.base() -1); + } + } +#endif + switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) { default: break; case Match_Success: @@ -1430,6 +1477,25 @@ } Operands.push_back(AMDGPUOperand::CreateImm(Value, S, ImmTy)); + +#ifdef WORKAROUND_USE_DUMMY_OPERANDS_INSTEAD_MUTIPLE_DEFAULT_OPERANDS + if (Value == Default && AddDefault) { + // Reverse lookup in previously added operands (skip just added one) + // for the first non-dummy operand. If it is of the same type, + // then replace just added default operand with dummy. + for (auto I = Operands.rbegin(), E = Operands.rend(); I != E; ++I) { + if (I == Operands.rbegin()) + continue; + if (static_cast(I->get())->isDummy()) + continue; + if (static_cast(I->get())->isImmTy(ImmTy)) { + Operands.pop_back(); + Operands.push_back(AMDGPUOperand::CreateDummy(S)); // invalidates iterators + break; + } + } + } +#endif return MatchOperand_Success; } @@ -2047,9 +2113,11 @@ return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyTFE); } -void AMDGPUAsmParser::cvtMubuf(MCInst &Inst, - const OperandVector &Operands) { +void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, + const OperandVector &Operands, + bool IsAtomic, bool IsAtomicReturn) { OptionalImmIndexMap OptionalIdx; + assert(IsAtomicReturn ? IsAtomic : true); for (unsigned i = 1, e = Operands.size(); i != e; ++i) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); @@ -2076,9 +2144,17 @@ // Handle optional arguments OptionalIdx[Op.getImmTy()] = i; } + + // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns. + if (IsAtomicReturn) { + MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning. + Inst.insert(I, *I); + } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); + if (!IsAtomic) { // glc is hard-coded. + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); + } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); } Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -2836,7 +2836,7 @@ let mayStore = 1, mayLoad = 1, hasPostISelHook = 1, hasSideEffects = 1 in { // No return variants - let glc = 0 in { + let glc = 0, AsmMatchConverter = "cvtMubufAtomic" in { defm _ADDR64 : MUBUFAtomicAddr64_m < op, name#"_addr64", (outs), @@ -2883,13 +2883,14 @@ // Variant that return values let glc = 1, Constraints = "$vdata = $vdata_in", + AsmMatchConverter = "cvtMubufAtomicReturn", DisableEncoding = "$vdata_in" in { defm _RTN_ADDR64 : MUBUFAtomicAddr64_m < op, name#"_rtn_addr64", (outs rc:$vdata), (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#" glc"#"$slc", + name#" $vdata, $vaddr, $srsrc, $soffset addr64$offset glc$slc", [(set vt:$vdata, (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc), vt:$vdata_in))], 1 @@ -2899,7 +2900,7 @@ op, name#"_rtn_offset", (outs rc:$vdata), (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_32:$soffset, offset:$offset, slc:$slc), - name#" $vdata, off, $srsrc, $soffset $offset glc$slc", + name#" $vdata, off, $srsrc, $soffset$offset glc$slc", [(set vt:$vdata, (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc), vt:$vdata_in))], 1 @@ -2910,7 +2911,7 @@ op, name#"_rtn_offen", (outs rc:$vdata), (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#" glc"#"$slc", + name#" $vdata, $vaddr, $srsrc, $soffset offen$offset glc$slc", [], 1 >; } @@ -2920,7 +2921,7 @@ op, name#"_rtn_idxen", (outs rc:$vdata), (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#" glc"#"$slc", + name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset glc$slc", [], 1 >; } @@ -2930,7 +2931,7 @@ op, name#"_rtn_bothen", (outs rc:$vdata), (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#" glc"#"$slc", + name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset glc$slc", [], 1 >; } Index: test/MC/AMDGPU/mubuf.s =================================================================== --- test/MC/AMDGPU/mubuf.s +++ test/MC/AMDGPU/mubuf.s @@ -500,4 +500,205 @@ // VI: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00] // NOSI: error: instruction not supported on this GPU -// TODO: Atomics +//===----------------------------------------------------------------------===// +// Atomics +//===----------------------------------------------------------------------===// +buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 ; encoding: [0x00,0x80,0xf0,0xe0,0x02,0x01,0x02,0xb8] +// NOVI: error: instruction not supported on this GPU + +buffer_atomic_inc v1, v[2:3], s[8:11], s4 addr64 +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], s4 addr64 ; encoding: [0x00,0x80,0xf0,0xe0,0x02,0x01,0x02,0x04] +// NOVI: error: instruction not supported on this GPU + +buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 slc +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 slc ; encoding: [0x00,0x80,0xf0,0xe0,0x02,0x01,0x42,0xb8] +// NOVI: error: instruction not supported on this GPU + +buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 offset:4 +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 offset:4 ; encoding: [0x04,0x80,0xf0,0xe0,0x02,0x01,0x02,0xb8] +// NOVI: error: instruction not supported on this GPU + +buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 offset:4 slc +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 offset:4 slc ; encoding: [0x04,0x80,0xf0,0xe0,0x02,0x01,0x42,0xb8] +// NOVI: error: instruction not supported on this GPU + +buffer_atomic_inc v1, off, s[8:11], 56 +// SICI: buffer_atomic_inc v1, off, s[8:11], 56 ; encoding: [0x00,0x00,0xf0,0xe0,0x00,0x01,0x02,0xb8] +// VI: buffer_atomic_inc v1, off, s[8:11], 56 ; encoding: [0x00,0x00,0x2c,0xe1,0x00,0x01,0x02,0xb8] + +buffer_atomic_inc v1, off, s[8:11], 56 slc +// SICI: buffer_atomic_inc v1, off, s[8:11], 56 slc ; encoding: [0x00,0x00,0xf0,0xe0,0x00,0x01,0x42,0xb8] +// VI: buffer_atomic_inc v1, off, s[8:11], 56 slc ; encoding: [0x00,0x00,0x2e,0xe1,0x00,0x01,0x02,0xb8] + +buffer_atomic_inc v1, off, s[8:11], s4 slc +// SICI: buffer_atomic_inc v1, off, s[8:11], s4 slc ; encoding: [0x00,0x00,0xf0,0xe0,0x00,0x01,0x42,0x04] +// VI: buffer_atomic_inc v1, off, s[8:11], s4 slc ; encoding: [0x00,0x00,0x2e,0xe1,0x00,0x01,0x02,0x04] + +buffer_atomic_inc v1, off, s[8:11], 56 offset:4 +// SICI: buffer_atomic_inc v1, off, s[8:11], 56 offset:4 ; encoding: [0x04,0x00,0xf0,0xe0,0x00,0x01,0x02,0xb8] +// VI: buffer_atomic_inc v1, off, s[8:11], 56 offset:4 ; encoding: [0x04,0x00,0x2c,0xe1,0x00,0x01,0x02,0xb8] + +buffer_atomic_inc v1, off, s[8:11], 56 offset:4 slc +// SICI: buffer_atomic_inc v1, off, s[8:11], 56 offset:4 slc ; encoding: [0x04,0x00,0xf0,0xe0,0x00,0x01,0x42,0xb8] +// VI: buffer_atomic_inc v1, off, s[8:11], 56 offset:4 slc ; encoding: [0x04,0x00,0x2e,0xe1,0x00,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v2, s[8:11], 56 offen +// SICI: buffer_atomic_inc v1, v2, s[8:11], 56 offen ; encoding: [0x00,0x10,0xf0,0xe0,0x02,0x01,0x02,0xb8] +// VI: buffer_atomic_inc v1, v2, s[8:11], 56 offen ; encoding: [0x00,0x10,0x2c,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v2, s[8:11], 56 offen slc +// SICI: buffer_atomic_inc v1, v2, s[8:11], 56 offen slc ; encoding: [0x00,0x10,0xf0,0xe0,0x02,0x01,0x42,0xb8] +// VI: buffer_atomic_inc v1, v2, s[8:11], 56 offen slc ; encoding: [0x00,0x10,0x2e,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v2, s[8:11], 56 offen offset:4 +// SICI: buffer_atomic_inc v1, v2, s[8:11], 56 offen offset:4 ; encoding: [0x04,0x10,0xf0,0xe0,0x02,0x01,0x02,0xb8] +// VI: buffer_atomic_inc v1, v2, s[8:11], 56 offen offset:4 ; encoding: [0x04,0x10,0x2c,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v2, s[8:11], s4 offen offset:4 +// SICI: buffer_atomic_inc v1, v2, s[8:11], s4 offen offset:4 ; encoding: [0x04,0x10,0xf0,0xe0,0x02,0x01,0x02,0x04] +// VI: buffer_atomic_inc v1, v2, s[8:11], s4 offen offset:4 ; encoding: [0x04,0x10,0x2c,0xe1,0x02,0x01,0x02,0x04] + +buffer_atomic_inc v1, v2, s[8:11], 56 offen offset:4 slc +// SICI: buffer_atomic_inc v1, v2, s[8:11], 56 offen offset:4 slc ; encoding: [0x04,0x10,0xf0,0xe0,0x02,0x01,0x42,0xb8] +// VI: buffer_atomic_inc v1, v2, s[8:11], 56 offen offset:4 slc ; encoding: [0x04,0x10,0x2e,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v2, s[8:11], 56 idxen +// SICI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen ; encoding: [0x00,0x20,0xf0,0xe0,0x02,0x01,0x02,0xb8] +// VI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen ; encoding: [0x00,0x20,0x2c,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v2, s[8:11], 56 idxen slc +// SICI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen slc ; encoding: [0x00,0x20,0xf0,0xe0,0x02,0x01,0x42,0xb8] +// VI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen slc ; encoding: [0x00,0x20,0x2e,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v2, s[8:11], 56 idxen offset:4 +// SICI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen offset:4 ; encoding: [0x04,0x20,0xf0,0xe0,0x02,0x01,0x02,0xb8] +// VI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen offset:4 ; encoding: [0x04,0x20,0x2c,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v2, s[8:11], 56 idxen offset:4 slc +// SICI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen offset:4 slc ; encoding: [0x04,0x20,0xf0,0xe0,0x02,0x01,0x42,0xb8] +// VI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen offset:4 slc ; encoding: [0x04,0x20,0x2e,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v2, s[8:11], s4 idxen offset:4 slc +// SICI: buffer_atomic_inc v1, v2, s[8:11], s4 idxen offset:4 slc ; encoding: [0x04,0x20,0xf0,0xe0,0x02,0x01,0x42,0x04] +// VI: buffer_atomic_inc v1, v2, s[8:11], s4 idxen offset:4 slc ; encoding: [0x04,0x20,0x2e,0xe1,0x02,0x01,0x02,0x04] + +buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen ; encoding: [0x00,0x30,0xf0,0xe0,0x02,0x01,0x02,0xb8] +// VI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen ; encoding: [0x00,0x30,0x2c,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v[2:3], s[8:11], s4 idxen offen +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], s4 idxen offen ; encoding: [0x00,0x30,0xf0,0xe0,0x02,0x01,0x02,0x04] +// VI: buffer_atomic_inc v1, v[2:3], s[8:11], s4 idxen offen ; encoding: [0x00,0x30,0x2c,0xe1,0x02,0x01,0x02,0x04] + +buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen slc +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen slc ; encoding: [0x00,0x30,0xf0,0xe0,0x02,0x01,0x42,0xb8] +// VI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen slc ; encoding: [0x00,0x30,0x2e,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 ; encoding: [0x04,0x30,0xf0,0xe0,0x02,0x01,0x02,0xb8] +// VI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 ; encoding: [0x04,0x30,0x2c,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 slc +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 slc ; encoding: [0x04,0x30,0xf0,0xe0,0x02,0x01,0x42,0xb8] +// VI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 slc ; encoding: [0x04,0x30,0x2e,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 glc +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 glc ; encoding: [0x00,0xc0,0xf0,0xe0,0x02,0x01,0x02,0xb8] +// NOVI: error: instruction not supported on this GPU + +buffer_atomic_inc v1, v[2:3], s[8:11], s4 addr64 glc +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], s4 addr64 glc ; encoding: [0x00,0xc0,0xf0,0xe0,0x02,0x01,0x02,0x04] +// NOVI: error: instruction not supported on this GPU + +buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 glc slc +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 glc slc ; encoding: [0x00,0xc0,0xf0,0xe0,0x02,0x01,0x42,0xb8] +// NOVI: error: instruction not supported on this GPU + +buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 offset:4 glc +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 offset:4 glc ; encoding: [0x04,0xc0,0xf0,0xe0,0x02,0x01,0x02,0xb8] +// NOVI: error: instruction not supported on this GPU + +buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 offset:4 glc slc +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 addr64 offset:4 glc slc ; encoding: [0x04,0xc0,0xf0,0xe0,0x02,0x01,0x42,0xb8] +// NOVI: error: instruction not supported on this GPU + +buffer_atomic_inc v1, off, s[8:11], 56 glc +// SICI: buffer_atomic_inc v1, off, s[8:11], 56 glc ; encoding: [0x00,0x40,0xf0,0xe0,0x00,0x01,0x02,0xb8] +// VI: buffer_atomic_inc v1, off, s[8:11], 56 glc ; encoding: [0x00,0x40,0x2c,0xe1,0x00,0x01,0x02,0xb8] + +buffer_atomic_inc v1, off, s[8:11], 56 glc slc +// SICI: buffer_atomic_inc v1, off, s[8:11], 56 glc slc ; encoding: [0x00,0x40,0xf0,0xe0,0x00,0x01,0x42,0xb8] +// VI: buffer_atomic_inc v1, off, s[8:11], 56 glc slc ; encoding: [0x00,0x40,0x2e,0xe1,0x00,0x01,0x02,0xb8] + +buffer_atomic_inc v1, off, s[8:11], s4 glc slc +// SICI: buffer_atomic_inc v1, off, s[8:11], s4 glc slc ; encoding: [0x00,0x40,0xf0,0xe0,0x00,0x01,0x42,0x04] +// VI: buffer_atomic_inc v1, off, s[8:11], s4 glc slc ; encoding: [0x00,0x40,0x2e,0xe1,0x00,0x01,0x02,0x04] + +buffer_atomic_inc v1, off, s[8:11], 56 offset:4 glc +// SICI: buffer_atomic_inc v1, off, s[8:11], 56 offset:4 glc ; encoding: [0x04,0x40,0xf0,0xe0,0x00,0x01,0x02,0xb8] +// VI: buffer_atomic_inc v1, off, s[8:11], 56 offset:4 glc ; encoding: [0x04,0x40,0x2c,0xe1,0x00,0x01,0x02,0xb8] + +buffer_atomic_inc v1, off, s[8:11], 56 offset:4 glc slc +// SICI: buffer_atomic_inc v1, off, s[8:11], 56 offset:4 glc slc ; encoding: [0x04,0x40,0xf0,0xe0,0x00,0x01,0x42,0xb8] +// VI: buffer_atomic_inc v1, off, s[8:11], 56 offset:4 glc slc ; encoding: [0x04,0x40,0x2e,0xe1,0x00,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v2, s[8:11], 56 offen glc +// SICI: buffer_atomic_inc v1, v2, s[8:11], 56 offen glc ; encoding: [0x00,0x50,0xf0,0xe0,0x02,0x01,0x02,0xb8] +// VI: buffer_atomic_inc v1, v2, s[8:11], 56 offen glc ; encoding: [0x00,0x50,0x2c,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v2, s[8:11], 56 offen glc slc +// SICI: buffer_atomic_inc v1, v2, s[8:11], 56 offen glc slc ; encoding: [0x00,0x50,0xf0,0xe0,0x02,0x01,0x42,0xb8] +// VI: buffer_atomic_inc v1, v2, s[8:11], 56 offen glc slc ; encoding: [0x00,0x50,0x2e,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v2, s[8:11], 56 offen offset:4 glc +// SICI: buffer_atomic_inc v1, v2, s[8:11], 56 offen offset:4 glc ; encoding: [0x04,0x50,0xf0,0xe0,0x02,0x01,0x02,0xb8] +// VI: buffer_atomic_inc v1, v2, s[8:11], 56 offen offset:4 glc ; encoding: [0x04,0x50,0x2c,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v2, s[8:11], s4 offen offset:4 glc +// SICI: buffer_atomic_inc v1, v2, s[8:11], s4 offen offset:4 glc ; encoding: [0x04,0x50,0xf0,0xe0,0x02,0x01,0x02,0x04] +// VI: buffer_atomic_inc v1, v2, s[8:11], s4 offen offset:4 glc ; encoding: [0x04,0x50,0x2c,0xe1,0x02,0x01,0x02,0x04] + +buffer_atomic_inc v1, v2, s[8:11], 56 offen offset:4 glc slc +// SICI: buffer_atomic_inc v1, v2, s[8:11], 56 offen offset:4 glc slc ; encoding: [0x04,0x50,0xf0,0xe0,0x02,0x01,0x42,0xb8] +// VI: buffer_atomic_inc v1, v2, s[8:11], 56 offen offset:4 glc slc ; encoding: [0x04,0x50,0x2e,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v2, s[8:11], 56 idxen glc +// SICI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen glc ; encoding: [0x00,0x60,0xf0,0xe0,0x02,0x01,0x02,0xb8] +// VI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen glc ; encoding: [0x00,0x60,0x2c,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v2, s[8:11], 56 idxen glc slc +// SICI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen glc slc ; encoding: [0x00,0x60,0xf0,0xe0,0x02,0x01,0x42,0xb8] +// VI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen glc slc ; encoding: [0x00,0x60,0x2e,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v2, s[8:11], 56 idxen offset:4 glc +// SICI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen offset:4 glc ; encoding: [0x04,0x60,0xf0,0xe0,0x02,0x01,0x02,0xb8] +// VI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen offset:4 glc ; encoding: [0x04,0x60,0x2c,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v2, s[8:11], 56 idxen offset:4 glc slc +// SICI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen offset:4 glc slc ; encoding: [0x04,0x60,0xf0,0xe0,0x02,0x01,0x42,0xb8] +// VI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen offset:4 glc slc ; encoding: [0x04,0x60,0x2e,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v2, s[8:11], s4 idxen offset:4 glc slc +// SICI: buffer_atomic_inc v1, v2, s[8:11], s4 idxen offset:4 glc slc ; encoding: [0x04,0x60,0xf0,0xe0,0x02,0x01,0x42,0x04] +// VI: buffer_atomic_inc v1, v2, s[8:11], s4 idxen offset:4 glc slc ; encoding: [0x04,0x60,0x2e,0xe1,0x02,0x01,0x02,0x04] + +buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen glc +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen glc ; encoding: [0x00,0x70,0xf0,0xe0,0x02,0x01,0x02,0xb8] +// VI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen glc ; encoding: [0x00,0x70,0x2c,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v[2:3], s[8:11], s4 idxen offen glc +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], s4 idxen offen glc ; encoding: [0x00,0x70,0xf0,0xe0,0x02,0x01,0x02,0x04] +// VI: buffer_atomic_inc v1, v[2:3], s[8:11], s4 idxen offen glc ; encoding: [0x00,0x70,0x2c,0xe1,0x02,0x01,0x02,0x04] + +buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen glc slc +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen glc slc ; encoding: [0x00,0x70,0xf0,0xe0,0x02,0x01,0x42,0xb8] +// VI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen glc slc ; encoding: [0x00,0x70,0x2e,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 glc +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 glc ; encoding: [0x04,0x70,0xf0,0xe0,0x02,0x01,0x02,0xb8] +// VI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 glc ; encoding: [0x04,0x70,0x2c,0xe1,0x02,0x01,0x02,0xb8] + +buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 glc slc +// SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 glc slc ; encoding: [0x04,0x70,0xf0,0xe0,0x02,0x01,0x42,0xb8] +// VI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 glc slc ; encoding: [0x04,0x70,0x2e,0xe1,0x02,0x01,0x02,0xb8] Index: test/MC/AMDGPU/trap.s =================================================================== --- test/MC/AMDGPU/trap.s +++ test/MC/AMDGPU/trap.s @@ -138,3 +138,7 @@ v_readfirstlane_b32 ttmp8, v1 // SICI: v_readfirstlane_b32 ttmp8, v1 ; encoding: [0x01,0x05,0xf0,0x7e] // VI: v_readfirstlane_b32 ttmp8, v1 ; encoding: [0x01,0x05,0xf0,0x7e] + +buffer_atomic_inc v1, off, ttmp[8:11], 56 glc +// SICI: buffer_atomic_inc v1, off, ttmp[8:11], 56 glc ; encoding: [0x00,0x40,0xf0,0xe0,0x00,0x01,0x1e,0xb8] +// VI: buffer_atomic_inc v1, off, ttmp[8:11], 56 glc ; encoding: [0x00,0x40,0x2c,0xe1,0x00,0x01,0x1e,0xb8] Index: test/MC/Disassembler/AMDGPU/mubuf_vi.txt =================================================================== --- test/MC/Disassembler/AMDGPU/mubuf_vi.txt +++ test/MC/Disassembler/AMDGPU/mubuf_vi.txt @@ -234,3 +234,122 @@ # VI: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00] 0x00 0x00 0xfc 0xe0 0x00 0x00 0x00 0x00 +# VI: buffer_atomic_inc v1, off, s[8:11], 56 ; encoding: [0x00,0x00,0x2c,0xe1,0x00,0x01,0x02,0xb8] +0x00 0x00 0x2c 0xe1 0x00 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, off, s[8:11], 56 slc ; encoding: [0x00,0x00,0x2e,0xe1,0x00,0x01,0x02,0xb8] +0x00 0x00 0x2e 0xe1 0x00 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, off, s[8:11], s4 slc ; encoding: [0x00,0x00,0x2e,0xe1,0x00,0x01,0x02,0x04] +0x00 0x00 0x2e 0xe1 0x00 0x01 0x02 0x04 + +# VI: buffer_atomic_inc v1, off, s[8:11], 56 offset:4 ; encoding: [0x04,0x00,0x2c,0xe1,0x00,0x01,0x02,0xb8] +0x04 0x00 0x2c 0xe1 0x00 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, off, s[8:11], 56 offset:4 slc ; encoding: [0x04,0x00,0x2e,0xe1,0x00,0x01,0x02,0xb8] +0x04 0x00 0x2e 0xe1 0x00 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v2, s[8:11], 56 offen ; encoding: [0x00,0x10,0x2c,0xe1,0x02,0x01,0x02,0xb8] +0x00 0x10 0x2c 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v2, s[8:11], 56 offen slc ; encoding: [0x00,0x10,0x2e,0xe1,0x02,0x01,0x02,0xb8] +0x00 0x10 0x2e 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v2, s[8:11], 56 offen offset:4 ; encoding: [0x04,0x10,0x2c,0xe1,0x02,0x01,0x02,0xb8] +0x04 0x10 0x2c 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v2, s[8:11], s4 offen offset:4 ; encoding: [0x04,0x10,0x2c,0xe1,0x02,0x01,0x02,0x04] +0x04 0x10 0x2c 0xe1 0x02 0x01 0x02 0x04 + +# VI: buffer_atomic_inc v1, v2, s[8:11], 56 offen offset:4 slc ; encoding: [0x04,0x10,0x2e,0xe1,0x02,0x01,0x02,0xb8] +0x04 0x10 0x2e 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen ; encoding: [0x00,0x20,0x2c,0xe1,0x02,0x01,0x02,0xb8] +0x00 0x20 0x2c 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen slc ; encoding: [0x00,0x20,0x2e,0xe1,0x02,0x01,0x02,0xb8] +0x00 0x20 0x2e 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen offset:4 ; encoding: [0x04,0x20,0x2c,0xe1,0x02,0x01,0x02,0xb8] +0x04 0x20 0x2c 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen offset:4 slc ; encoding: [0x04,0x20,0x2e,0xe1,0x02,0x01,0x02,0xb8] +0x04 0x20 0x2e 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v2, s[8:11], s4 idxen offset:4 slc ; encoding: [0x04,0x20,0x2e,0xe1,0x02,0x01,0x02,0x04] +0x04 0x20 0x2e 0xe1 0x02 0x01 0x02 0x04 + +# VI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen ; encoding: [0x00,0x30,0x2c,0xe1,0x02,0x01,0x02,0xb8] +0x00 0x30 0x2c 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v[2:3], s[8:11], s4 idxen offen ; encoding: [0x00,0x30,0x2c,0xe1,0x02,0x01,0x02,0x04] +0x00 0x30 0x2c 0xe1 0x02 0x01 0x02 0x04 + +# VI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen slc ; encoding: [0x00,0x30,0x2e,0xe1,0x02,0x01,0x02,0xb8] +0x00 0x30 0x2e 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 ; encoding: [0x04,0x30,0x2c,0xe1,0x02,0x01,0x02,0xb8] +0x04 0x30 0x2c 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 slc ; encoding: [0x04,0x30,0x2e,0xe1,0x02,0x01,0x02,0xb8] +0x04 0x30 0x2e 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, off, s[8:11], 56 glc ; encoding: [0x00,0x40,0x2c,0xe1,0x00,0x01,0x02,0xb8] +0x00 0x40 0x2c 0xe1 0x00 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, off, s[8:11], 56 glc slc ; encoding: [0x00,0x40,0x2e,0xe1,0x00,0x01,0x02,0xb8] +0x00 0x40 0x2e 0xe1 0x00 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, off, s[8:11], s4 glc slc ; encoding: [0x00,0x40,0x2e,0xe1,0x00,0x01,0x02,0x04] +0x00 0x40 0x2e 0xe1 0x00 0x01 0x02 0x04 + +# VI: buffer_atomic_inc v1, off, s[8:11], 56 offset:4 glc ; encoding: [0x04,0x40,0x2c,0xe1,0x00,0x01,0x02,0xb8] +0x04 0x40 0x2c 0xe1 0x00 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, off, s[8:11], 56 offset:4 glc slc ; encoding: [0x04,0x40,0x2e,0xe1,0x00,0x01,0x02,0xb8] +0x04 0x40 0x2e 0xe1 0x00 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v2, s[8:11], 56 offen glc ; encoding: [0x00,0x50,0x2c,0xe1,0x02,0x01,0x02,0xb8] +0x00 0x50 0x2c 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v2, s[8:11], 56 offen glc slc ; encoding: [0x00,0x50,0x2e,0xe1,0x02,0x01,0x02,0xb8] +0x00 0x50 0x2e 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v2, s[8:11], 56 offen offset:4 glc ; encoding: [0x04,0x50,0x2c,0xe1,0x02,0x01,0x02,0xb8] +0x04 0x50 0x2c 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v2, s[8:11], s4 offen offset:4 glc ; encoding: [0x04,0x50,0x2c,0xe1,0x02,0x01,0x02,0x04] +0x04 0x50 0x2c 0xe1 0x02 0x01 0x02 0x04 + +# VI: buffer_atomic_inc v1, v2, s[8:11], 56 offen offset:4 glc slc ; encoding: [0x04,0x50,0x2e,0xe1,0x02,0x01,0x02,0xb8] +0x04 0x50 0x2e 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen glc ; encoding: [0x00,0x60,0x2c,0xe1,0x02,0x01,0x02,0xb8] +0x00 0x60 0x2c 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen glc slc ; encoding: [0x00,0x60,0x2e,0xe1,0x02,0x01,0x02,0xb8] +0x00 0x60 0x2e 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen offset:4 glc ; encoding: [0x04,0x60,0x2c,0xe1,0x02,0x01,0x02,0xb8] +0x04 0x60 0x2c 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v2, s[8:11], 56 idxen offset:4 glc slc ; encoding: [0x04,0x60,0x2e,0xe1,0x02,0x01,0x02,0xb8] +0x04 0x60 0x2e 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v2, s[8:11], s4 idxen offset:4 glc slc ; encoding: [0x04,0x60,0x2e,0xe1,0x02,0x01,0x02,0x04] +0x04 0x60 0x2e 0xe1 0x02 0x01 0x02 0x04 + +# VI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen glc ; encoding: [0x00,0x70,0x2c,0xe1,0x02,0x01,0x02,0xb8] +0x00 0x70 0x2c 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v[2:3], s[8:11], s4 idxen offen glc ; encoding: [0x00,0x70,0x2c,0xe1,0x02,0x01,0x02,0x04] +0x00 0x70 0x2c 0xe1 0x02 0x01 0x02 0x04 + +# VI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen glc slc ; encoding: [0x00,0x70,0x2e,0xe1,0x02,0x01,0x02,0xb8] +0x00 0x70 0x2e 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 glc ; encoding: [0x04,0x70,0x2c,0xe1,0x02,0x01,0x02,0xb8] +0x04 0x70 0x2c 0xe1 0x02 0x01 0x02 0xb8 + +# VI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 glc slc ; encoding: [0x04,0x70,0x2e,0xe1,0x02,0x01,0x02,0xb8] +0x04 0x70 0x2e 0xe1 0x02 0x01 0x02 0xb8