Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -87,6 +87,17 @@ using namespace llvm; +// In some cases (e.g. buffer atomic instructions) MatchOperandParserImpl() +// may invoke tryCustomParseOperand() multiple times with the same MCK value. +// That leads to adding of the same "default" operand multiple times in a row, +// which is wrong. The workaround adds only the 1st default operand, while for +// the rest the "dummy" operands being added. The reason for dummies is that if +// we just skip adding an operand, then parser would get stuck in endless loop. +// Dummies shall be removed prior matching & emitting MCInsts. +// +// Comment out this macro to disable the workaround. +#define WORKAROUND_USE_DUMMY_OPERANDS_INSTEAD_MUTIPLE_DEFAULT_OPERANDS + namespace { struct OptionalOperand; @@ -99,6 +110,9 @@ Immediate, Register, Expression +#ifdef WORKAROUND_USE_DUMMY_OPERANDS_INSTEAD_MUTIPLE_DEFAULT_OPERANDS + ,Dummy +#endif } Kind; SMLoc StartLoc, EndLoc; @@ -204,6 +218,12 @@ } } +#ifdef WORKAROUND_USE_DUMMY_OPERANDS_INSTEAD_MUTIPLE_DEFAULT_OPERANDS + bool isDummy() const { + return Kind == Dummy; + } +#endif + bool isToken() const override { return Kind == Token; } @@ -440,6 +460,11 @@ case Expression: OS << "'; break; +#ifdef WORKAROUND_USE_DUMMY_OPERANDS_INSTEAD_MUTIPLE_DEFAULT_OPERANDS + case Dummy: + OS << ""; + break; +#endif } } @@ -490,6 +515,15 @@ return Op; } +#ifdef WORKAROUND_USE_DUMMY_OPERANDS_INSTEAD_MUTIPLE_DEFAULT_OPERANDS + static AMDGPUOperand::Ptr CreateDummy(SMLoc S) { + auto Op = llvm::make_unique(Dummy); + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } +#endif + bool isSWaitCnt() const; bool isHwreg() const; bool isSendMsg() const; @@ -926,6 +960,17 @@ bool MatchingInlineAsm) { MCInst Inst; +#ifdef WORKAROUND_USE_DUMMY_OPERANDS_INSTEAD_MUTIPLE_DEFAULT_OPERANDS + // Remove dummies prior matching. Iterate backwards becase vector::erase() + // invalidates all iterators which refer after erase point. + for (auto I = Operands.rbegin(), E = Operands.rend(); I != E; ) { + auto X = I++; + if (static_cast(X->get())->isDummy()) { + Operands.erase(X.base() -1); + } + } +#endif + switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) { default: break; case Match_Success: @@ -1430,6 +1475,23 @@ } Operands.push_back(AMDGPUOperand::CreateImm(Value, S, ImmTy)); + +#ifdef WORKAROUND_USE_DUMMY_OPERANDS_INSTEAD_MUTIPLE_DEFAULT_OPERANDS + if (Value == Default && AddDefault) { + // Reverse lookup in previously added operands for the first non-dummy + // operand. If it is of the same type, then replace just added + // default operand with dummy. + for (auto I = Operands.rbegin(), E = Operands.rend(); I != E; ++I) { + if (static_cast(I->get())->isDummy()) + continue; + if (static_cast(I->get())->isImmTy(ImmTy)) { + Operands.pop_back(); + Operands.push_back(AMDGPUOperand::CreateDummy(S)); // invalidates iterators + break; + } + } + } +#endif return MatchOperand_Success; } @@ -2050,6 +2112,7 @@ void AMDGPUAsmParser::cvtMubuf(MCInst &Inst, const OperandVector &Operands) { OptionalImmIndexMap OptionalIdx; + bool IsBufferActomicRtn = false; for (unsigned i = 1, e = Operands.size(); i != e; ++i) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); @@ -2069,6 +2132,10 @@ // Handle tokens like 'offen' which are sometimes hard-coded into the // asm string. There are no MCInst operands for these. if (Op.isToken()) { + // Detect MUBUF_Atomic RTN insns. Only those have hard-coded 'glc'. + if (Op.getToken() == "glc") { + IsBufferActomicRtn = true; + } continue; } assert(Op.isImm()); @@ -2076,6 +2143,12 @@ // Handle optional arguments OptionalIdx[Op.getImmTy()] = i; } + + // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns. + if (IsBufferActomicRtn) { + MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning. + Inst.insert(I, *I); + } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -2894,7 +2894,7 @@ op, name#"_rtn_addr64", (outs rc:$vdata), (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#" glc"#"$slc", + name#" $vdata, $vaddr, $srsrc, $soffset addr64$offset glc$slc", [(set vt:$vdata, (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc), vt:$vdata_in))], 1 @@ -2904,7 +2904,7 @@ op, name#"_rtn_offset", (outs rc:$vdata), (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_32:$soffset, offset:$offset, slc:$slc), - name#" $vdata, off, $srsrc, $soffset $offset glc$slc", + name#" $vdata, off, $srsrc, $soffset$offset glc$slc", [(set vt:$vdata, (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc), vt:$vdata_in))], 1 @@ -2915,7 +2915,7 @@ op, name#"_rtn_offen", (outs rc:$vdata), (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#" glc"#"$slc", + name#" $vdata, $vaddr, $srsrc, $soffset offen$offset glc$slc", [], 1 >; } @@ -2925,7 +2925,7 @@ op, name#"_rtn_idxen", (outs rc:$vdata), (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#" glc"#"$slc", + name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset glc$slc", [], 1 >; } @@ -2935,7 +2935,7 @@ op, name#"_rtn_bothen", (outs rc:$vdata), (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#" glc"#"$slc", + name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset glc$slc", [], 1 >; } Index: test/MC/AMDGPU/trap.s =================================================================== --- test/MC/AMDGPU/trap.s +++ test/MC/AMDGPU/trap.s @@ -138,3 +138,7 @@ v_readfirstlane_b32 ttmp8, v1 // SICI: v_readfirstlane_b32 ttmp8, v1 ; encoding: [0x01,0x05,0xf0,0x7e] // VI: v_readfirstlane_b32 ttmp8, v1 ; encoding: [0x01,0x05,0xf0,0x7e] + +buffer_atomic_inc v1, off, ttmp[8:11], 56 glc +// SICI: buffer_atomic_inc v1, off, ttmp[8:11], 56 glc ; encoding: [0x00,0x40,0xf0,0xe0,0x00,0x01,0x1e,0xb8] +// VI: buffer_atomic_inc v1, off, ttmp[8:11], 56 glc ; encoding: [0x00,0x40,0x2c,0xe1,0x00,0x01,0x1e,0xb8]