Index: llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -114,10 +114,7 @@ ImmTyInstOffset, ImmTyOffset0, ImmTyOffset1, - ImmTyDLC, - ImmTySCCB, - ImmTyGLC, - ImmTySLC, + ImmTyCPol, ImmTySWZ, ImmTyTFE, ImmTyD16, @@ -340,13 +337,10 @@ bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } bool isGDS() const { return isImmTy(ImmTyGDS); } bool isLDS() const { return isImmTy(ImmTyLDS); } - bool isDLC() const { return isImmTy(ImmTyDLC); } - bool isSCCB() const { return isImmTy(ImmTySCCB); } - bool isGLC() const { return isImmTy(ImmTyGLC); } - // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced - // value of the GLC operand. - bool isGLC_1() const { return isImmTy(ImmTyGLC); } - bool isSLC() const { return isImmTy(ImmTySLC); } + bool isCPol() const { return isImmTy(ImmTyCPol); } + // "CPol_GLC1" is a MatchClass of the CPOL_GLC1 operand with the default and + // forced value of the GLC operand. + bool isCPol_GLC1() const { return isImmTy(ImmTyCPol); } bool isSWZ() const { return isImmTy(ImmTySWZ); } bool isTFE() const { return isImmTy(ImmTyTFE); } bool isD16() const { return isImmTy(ImmTyD16); } @@ -998,10 +992,7 @@ case ImmTyInstOffset: OS << "InstOffset"; break; case ImmTyOffset0: OS << "Offset0"; break; case ImmTyOffset1: OS << "Offset1"; break; - case ImmTyDLC: OS << "DLC"; break; - case ImmTySCCB: OS << "SCCB"; break; - case ImmTyGLC: OS << "GLC"; break; - case ImmTySLC: OS << "SLC"; break; + case ImmTyCPol: OS << "CPol"; break; case ImmTySWZ: OS << "SWZ"; break; case ImmTyTFE: OS << "TFE"; break; case ImmTyD16: OS << "D16"; break; @@ -1267,7 +1258,7 @@ bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth); void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, - bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); + bool IsAtomic, bool IsLds = false); void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, bool IsGdsHardcoded); @@ -1462,6 +1453,7 @@ OperandMatchResultTy parseNamedBit(StringRef Name, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); + OperandMatchResultTy parseCPol(OperandVector &Operands); OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, StringRef &Value, SMLoc &StringLoc); @@ -1620,17 +1612,13 @@ OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); int64_t parseGPRIdxMacro(); - void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } - void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } - void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } - void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } + void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } + void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } + void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); - AMDGPUOperand::Ptr defaultDLC() const; - AMDGPUOperand::Ptr defaultSCCB() const; - AMDGPUOperand::Ptr defaultGLC() const; - AMDGPUOperand::Ptr defaultGLC_1() const; - AMDGPUOperand::Ptr defaultSLC() const; + AMDGPUOperand::Ptr defaultCPol() const; + AMDGPUOperand::Ptr defaultCPol_GLC1() const; AMDGPUOperand::Ptr defaultSMRDOffset8() const; AMDGPUOperand::Ptr defaultSMEMOffset() const; @@ -1652,6 +1640,8 @@ void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); + void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); + bool parseDimId(unsigned &Encoding); OperandMatchResultTy parseDim(OperandVector &Operands); OperandMatchResultTy parseDPP8(OperandVector &Operands); @@ -4072,15 +4062,29 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, const SMLoc &IDLoc) { - int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), - AMDGPU::OpName::glc1); - if (GLCPos != -1) { - // -1 is set by GLC_1 default operand. In all cases "glc" must be present - // in the asm string, and the default value means it is not present. - if (Inst.getOperand(GLCPos).getImm() == -1) { + uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; + if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) + return true; + + int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), + AMDGPU::OpName::cpol); + if (CPolPos == -1) + return true; + + if (TSFlags & SIInstrFlags::IsAtomicRet) { + if (!(TSFlags & SIInstrFlags::MIMG) && + !(Inst.getOperand(CPolPos).getImm() & CPol::GLC)) { Error(IDLoc, "instruction must use glc"); return false; } + } else { + if (Inst.getOperand(CPolPos).getImm() & CPol::GLC) { + SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); + StringRef CStr(S.getPointer()); + S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); + Error(S, "instruction must not use glc"); + return false; + } } return true; @@ -5375,17 +5379,60 @@ Error(S, "a16 modifier is not supported on this GPU"); return MatchOperand_ParseFail; } - if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC) { + + if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) + ImmTy = AMDGPUOperand::ImmTyR128A16; + + Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); + return MatchOperand_Success; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseCPol(OperandVector &Operands) { + unsigned CPolOn = 0; + unsigned CPolOff = 0; + SMLoc S = getLoc(); + + if (trySkipId("glc")) + CPolOn = AMDGPU::CPol::GLC; + else if (trySkipId("noglc")) + CPolOff = AMDGPU::CPol::GLC; + else if (trySkipId("slc")) + CPolOn = AMDGPU::CPol::SLC; + else if (trySkipId("noslc")) + CPolOff = AMDGPU::CPol::SLC; + else if (trySkipId("dlc")) + CPolOn = AMDGPU::CPol::DLC; + else if (trySkipId("nodlc")) + CPolOff = AMDGPU::CPol::DLC; + else if (trySkipId("scc")) + CPolOn = AMDGPU::CPol::SCC; + else if (trySkipId("noscc")) + CPolOff = AMDGPU::CPol::SCC; + else + return MatchOperand_NoMatch; + + if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { Error(S, "dlc modifier is not supported on this GPU"); return MatchOperand_ParseFail; } - if (!isGFX90A() && ImmTy == AMDGPUOperand::ImmTySCCB) + + if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { + Error(S, "scc modifier is not supported on this GPU"); return MatchOperand_ParseFail; + } - if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) - ImmTy = AMDGPUOperand::ImmTyR128A16; + for (unsigned I = 1; I != Operands.size(); ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (Op.isCPol()) { + Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); + return MatchOperand_Success; + } + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, + AMDGPUOperand::ImmTyCPol)); - Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); return MatchOperand_Success; } @@ -6806,36 +6853,43 @@ // mubuf //===----------------------------------------------------------------------===// -AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { - return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); -} - -AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSCCB() const { - return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySCCB); -} - -AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { - return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); } -AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const { - return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC); -} - -AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { - return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol_GLC1() const { + return AMDGPUOperand::CreateImm(this, CPol::GLC, SMLoc(), + AMDGPUOperand::ImmTyCPol); } void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, - const OperandVector &Operands, - bool IsAtomic, - bool IsAtomicReturn, - bool IsLds) { + const OperandVector &Operands, + bool IsAtomic, + bool IsLds) { bool IsLdsOpcode = IsLds; bool HasLdsModifier = false; OptionalImmIndexMap OptionalIdx; - assert(IsAtomicReturn ? IsAtomic : true); unsigned FirstOperandIdx = 1; + bool IsAtomicReturn = false; + + if (IsAtomic) { + for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + if (!Op.isCPol()) + continue; + IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; + break; + } + + if (!IsAtomicReturn) { + int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); + if (NewOpc != -1) + Inst.setOpcode(NewOpc); + } + + IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & + SIInstrFlags::IsAtomicRet; + } for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); @@ -6886,19 +6940,12 @@ } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); - if (!IsAtomic || IsAtomicReturn) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC, - IsAtomicReturn ? -1 : 0); - } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); if (!IsLdsOpcode) { // tfe is not legal with lds opcodes addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); } - - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySCCB); } void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { @@ -6933,12 +6980,9 @@ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySCCB); } //===----------------------------------------------------------------------===// @@ -6980,16 +7024,7 @@ if (IsGFX10Plus) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); - - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::sccb) != -1) - addOptionalImmOperand(Inst, Operands, OptionalIdx, - AMDGPUOperand::ImmTySCCB); - - if (IsGFX10Plus) - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); - - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); @@ -7005,6 +7040,61 @@ cvtMIMG(Inst, Operands, true); } +void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { + OptionalImmIndexMap OptionalIdx; + bool IsAtomicReturn = false; + + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + if (!Op.isCPol()) + continue; + IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; + break; + } + + if (!IsAtomicReturn) { + int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); + if (NewOpc != -1) + Inst.setOpcode(NewOpc); + } + + IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & + SIInstrFlags::IsAtomicRet; + + for (unsigned i = 1, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + + // Add the register arguments + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); + if (IsAtomicReturn && i == 1) + Op.addRegOperands(Inst, 1); + continue; + } + + // Handle the case where soffset is an immediate + if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { + Op.addImmOperands(Inst, 1); + continue; + } + + // Handle tokens like 'offen' which are sometimes hard-coded into the + // asm string. There are no MCInst operands for these. + if (Op.isToken()) { + continue; + } + assert(Op.isImm()); + + // Handle optional arguments + OptionalIdx[Op.getImmTy()] = i; + } + + if ((int)Inst.getNumOperands() <= + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); +} + void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, const OperandVector &Operands) { for (unsigned I = 1; I < Operands.size(); ++I) { @@ -7101,10 +7191,7 @@ {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, - {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, - {"scc", AMDGPUOperand::ImmTySCCB, true, nullptr}, - {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, - {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, + {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, @@ -7187,6 +7274,8 @@ Op.ConvertResult); } else if (Op.Type == AMDGPUOperand::ImmTyDim) { res = parseDim(Operands); + } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { + res = parseCPol(Operands); } else { res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); } @@ -8052,8 +8141,6 @@ return Operand.isGDS() ? Match_Success : Match_InvalidOperand; case MCK_lds: return Operand.isLDS() ? Match_Success : Match_InvalidOperand; - case MCK_glc: - return Operand.isGLC() ? Match_Success : Match_InvalidOperand; case MCK_idxen: return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; case MCK_offen: Index: llvm/lib/Target/AMDGPU/BUFInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/BUFInstructions.td +++ llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -124,20 +124,17 @@ let TSFlags = ps.TSFlags; bits<12> offset; - bits<1> glc; - bits<1> dlc; + bits<5> cpol; bits<7> format; bits<8> vaddr; bits<10> vdata; bits<7> srsrc; - bits<1> slc; bits<1> tfe; bits<8> soffset; bits<4> dfmt = format{3-0}; bits<3> nfmt = format{6-4}; - bits<1> sccb; // GFX90A+ only: instruction uses AccVGPR for data // Bit superceedes tfe. bits<1> acc = !if(ps.has_vdata, vdata{9}, 0); @@ -150,17 +147,17 @@ RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret; dag InsNoData = !if(!empty(vaddrList), (ins SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb), + offset:$offset, FORMAT:$format, CPol:$cpol, TFE:$tfe, SWZ:$swz), (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb) + offset:$offset, FORMAT:$format, CPol:$cpol, TFE:$tfe, SWZ:$swz) ); dag InsData = !if(!empty(vaddrList), (ins vdata_op:$vdata, SReg_128:$srsrc, - SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc, - SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb), + SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, CPol:$cpol, + TFE:$tfe, SWZ:$swz), (ins vdata_op:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, - SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc, - SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb) + SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, CPol:$cpol, + TFE:$tfe, SWZ:$swz) ); dag ret = !if(!empty(vdataList), InsNoData, InsData); } @@ -212,7 +209,7 @@ : MTBUF_Pseudo<opName, (outs getLdStRegisterOperand<vdataClass>.ret:$vdata), getMTBUFIns<addrKindCopy>.ret, - " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz$sccb", + " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$cpol$tfe$swz", pattern>, MTBUF_SetupAddr<addrKindCopy> { let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; @@ -228,13 +225,13 @@ def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems, [(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$format, - i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)))]>, + CPol:$cpol, i1:$tfe, i1:$swz)))]>, MTBUFAddr64Table<0, NAME>; def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, elems, [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, - i8:$format, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)))]>, + i8:$format, CPol:$cpol, i1:$tfe, i1:$swz)))]>, MTBUFAddr64Table<1, NAME>; def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>; @@ -260,7 +257,7 @@ : MTBUF_Pseudo<opName, (outs), getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret, - " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz$sccb", + " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$cpol$tfe$swz", pattern>, MTBUF_SetupAddr<addrKindCopy> { let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; @@ -275,14 +272,14 @@ def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems, [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i8:$format, i1:$glc, - i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, + i16:$offset, i8:$format, CPol:$cpol, + i1:$tfe, i1:$swz))]>, MTBUFAddr64Table<0, NAME>; def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, elems, [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i8:$format, i1:$glc, - i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, + i16:$offset, i8:$format, CPol:$cpol, + i1:$tfe, i1:$swz))]>, MTBUFAddr64Table<1, NAME>; def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>; @@ -369,16 +366,13 @@ let UseNamedOperandTable = ps.UseNamedOperandTable; bits<12> offset; - bits<1> glc; - bits<1> dlc; + bits<5> cpol; bits<8> vaddr; bits<10> vdata; bits<7> srsrc; - bits<1> slc; bits<1> tfe; bits<8> soffset; - bits<1> sccb; // GFX90A+ only: instruction uses AccVGPR for data // Bit superceedes tfe. bits<1> acc = !if(ps.has_vdata, vdata{9}, 0); @@ -422,19 +416,19 @@ RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret; dag InsNoData = !if(!empty(vaddrList), (ins SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, GLC:$glc, SLC:$slc), + offset:$offset, CPol:$cpol), (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, GLC:$glc, SLC:$slc) + offset:$offset, CPol:$cpol) ); dag InsData = !if(!empty(vaddrList), (ins vdata_op:$vdata, SReg_128:$srsrc, - SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc), + SCSrc_b32:$soffset, offset:$offset, CPol:$cpol), (ins vdata_op:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, - SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc) + SCSrc_b32:$soffset, offset:$offset, CPol:$cpol) ); dag ret = !con( !if(!empty(vdataList), InsNoData, InsData), - !if(isLds, (ins DLC:$dlc, SWZ:$swz, SCCB_0:$sccb), (ins TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb)) + !if(isLds, (ins SWZ:$swz), (ins TFE:$tfe, SWZ:$swz)) ); } @@ -507,8 +501,8 @@ (outs vdata_op:$vdata), !con(getMUBUFIns<addrKindCopy, [], isLds>.ret, !if(HasTiedDest, (ins vdata_op:$vdata_in), (ins))), - " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc" # - !if(isLds, " lds", "$tfe") # "$dlc$swz$sccb", + " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol" # + !if(isLds, " lds", "$tfe") # "$swz", pattern>, MUBUF_SetupAddr<addrKindCopy> { let PseudoInstr = opName # !if(isLds, "_lds", "") # @@ -526,15 +520,15 @@ } class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat < - (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), - (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)) + (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))), + (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz)) >; class MUBUF_Addr64_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat < - (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), - (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)) + (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))), + (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz)) >; multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> { @@ -587,7 +581,7 @@ : MUBUF_Pseudo<opName, (outs), getMUBUFIns<addrKindCopy, [getVregSrcForVT<store_vt>.ret]>.ret, - " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz$sccb", + " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol$tfe$swz", pattern>, MUBUF_SetupAddr<addrKindCopy> { let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; @@ -605,12 +599,12 @@ def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, legal_store_vt, [(st legal_store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, + i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))]>, MUBUFAddr64Table<0, NAME>; def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, legal_store_vt, [(st legal_store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>, + i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))]>, MUBUFAddr64Table<1, NAME>; def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, legal_store_vt>; @@ -628,8 +622,8 @@ class MUBUF_Pseudo_Store_Lds<string opName> : MUBUF_Pseudo<opName, (outs), - (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc, SWZ:$swz, SCCB_0:$sccb), - " $srsrc, $soffset$offset lds$glc$slc$swz$sccb"> { + (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol:$cpol, SWZ:$swz), + " $srsrc, $soffset$offset lds$cpol$swz"> { let mayLoad = 0; let mayStore = 1; let maybeAtomic = 1; @@ -650,15 +644,15 @@ dag ret = !if(vdata_in, !if(!empty(vaddrList), (ins vdata_op:$vdata_in, - SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC_1:$glc1, SLC:$slc), + SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_GLC1:$cpol), (ins vdata_op:$vdata_in, vaddrClass:$vaddr, - SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC_1:$glc1, SLC:$slc) + SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_GLC1:$cpol) ), !if(!empty(vaddrList), (ins vdata_op:$vdata, - SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, SLC:$slc), + SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol), (ins vdata_op:$vdata, vaddrClass:$vaddr, - SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, SLC:$slc) + SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol) )); } @@ -701,6 +695,7 @@ let has_tfe = 0; let has_sccb = 0; let maybeAtomic = 1; + let AsmMatchConverter = "cvtMubufAtomic"; } class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind, @@ -712,7 +707,7 @@ : MUBUF_Atomic_Pseudo<opName, addrKindCopy, (outs), getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 0>.ret, - " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$slc", + " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol", pattern>, AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 0> { let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret; @@ -720,7 +715,6 @@ let dlc_value = 0; let sccb_value = 0; let IsAtomicNoRet = 1; - let AsmMatchConverter = "cvtMubufAtomic"; } class MUBUF_AtomicRet_Pseudo<string opName, int addrKind, @@ -733,7 +727,7 @@ : MUBUF_Atomic_Pseudo<opName, addrKindCopy, (outs vdata_op:$vdata), getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 1>.ret, - " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc1$slc", + " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol", pattern>, AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 1> { let PseudoInstr = opName # "_rtn_" # getAddrName<addrKindCopy>.ret; @@ -743,7 +737,6 @@ let IsAtomicRet = 1; let Constraints = "$vdata = $vdata_in"; let DisableEncoding = "$vdata_in"; - let AsmMatchConverter = "cvtMubufAtomicReturn"; } multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName, @@ -778,15 +771,15 @@ let FPAtomic = isFP in def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass, [(set vdataType:$vdata, - (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc), + (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, CPol_GLC1:$cpol), vdataType:$vdata_in))]>, MUBUFAddr64Table <0, NAME # "_RTN">; let FPAtomic = isFP in def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, [(set vdataType:$vdata, - (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc), - vdataType:$vdata_in))]>, + (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, + CPol_GLC1:$cpol), vdataType:$vdata_in))]>, MUBUFAddr64Table <1, NAME # "_RTN">; let FPAtomic = isFP in @@ -1222,24 +1215,21 @@ (vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, timm:$auxiliary, 0)), (!cast<MUBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary), (extract_sccb $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< (vt (st v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, timm:$auxiliary, 0)), (!cast<MUBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary), (extract_sccb $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< (vt (st v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, timm:$auxiliary, timm)), (!cast<MUBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary), (extract_sccb $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1248,8 +1238,7 @@ (!cast<MUBUF_Pseudo>(opcode # _BOTHEN) (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary), (extract_sccb $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; } @@ -1308,26 +1297,21 @@ (st vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, timm:$auxiliary, 0), (!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary), (extract_sccb $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< (st vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, timm:$auxiliary, 0), (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, - (as_i16timm $offset), (extract_glc $auxiliary), - (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary), (extract_sccb $auxiliary)) + (as_i16timm $offset), (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< (st vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, timm:$auxiliary, timm), (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, - (as_i16timm $offset), (extract_glc $auxiliary), - (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary), (extract_sccb $auxiliary)) + (as_i16timm $offset), (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1336,9 +1320,8 @@ (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact) getVregSrcForVT<vt>.ret:$vdata, (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), - SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_glc $auxiliary), - (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary), (extract_sccb $auxiliary)) + SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_cpol $auxiliary), + 0, (extract_swz $auxiliary)) >; } @@ -1567,21 +1550,21 @@ class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt, PatFrag constant_ld> : GCNPat < (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz) + i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))), + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz) >; multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET, ValueType vt, PatFrag atomic_ld> { def : GCNPat < (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$slc))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0, 0) + i16:$offset, CPol:$cpol))), + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $cpol, 0) >; def : GCNPat < (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))), - (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0, 0) + (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) >; } @@ -1602,8 +1585,8 @@ def : GCNPat < (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), - (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz) + i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))), + (Instr_OFFSET $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz) >; } @@ -1626,12 +1609,12 @@ def : GCNPat < (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset))), - (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, 0) + (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) >; def : GCNPat < (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))), - (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, 0) + (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0) >; } @@ -1641,12 +1624,12 @@ ValueType vt, PatFrag ld_frag> { def : GCNPat < (ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in), - (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in) + (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $in) >; def : GCNPat < (ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in), - (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in) + (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $in) >; } @@ -1692,13 +1675,13 @@ // Store follows atomic op convention so address is first def : GCNPat < (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$slc), vt:$val), - (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0, 0) + i16:$offset, CPol:$cpol), vt:$val), + (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $cpol, 0) >; def : GCNPat < (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), - (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0, 0) + (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) >; } let SubtargetPredicate = isGFX6GFX7 in { @@ -1712,8 +1695,8 @@ def : GCNPat < (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)), - (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz) + i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz)), + (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz) >; } @@ -1727,13 +1710,13 @@ def : GCNPat < (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset)), - (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, 0) + (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) >; def : GCNPat < (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)), - (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, 0) + (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0) >; } @@ -1779,8 +1762,7 @@ timm:$format, timm:$auxiliary, 0)), (!cast<MTBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary), (extract_sccb $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1788,8 +1770,7 @@ timm:$format, timm:$auxiliary, timm)), (!cast<MTBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary), (extract_sccb $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1797,8 +1778,7 @@ timm:$format, timm:$auxiliary, 0)), (!cast<MTBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary), (extract_sccb $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1808,8 +1788,7 @@ (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary), (extract_sccb $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; } @@ -1847,8 +1826,7 @@ timm:$format, timm:$auxiliary, 0), (!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary), (extract_sccb $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1856,8 +1834,7 @@ timm:$format, timm:$auxiliary, timm), (!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary), (extract_sccb $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1865,8 +1842,7 @@ timm:$format, timm:$auxiliary, 0), (!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary), (extract_sccb $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1876,8 +1852,7 @@ getVregSrcForVT<vt>.ret:$vdata, (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary), (extract_sccb $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; } @@ -1919,21 +1894,21 @@ let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; - let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); let Inst{16} = ps.lds; let Inst{24-18} = op; let Inst{31-26} = 0x38; let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); - let Inst{54} = !if(ps.has_slc, slc, ?); + let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{55} = !if(ps.has_tfe, tfe, ?); let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } class MUBUF_Real_gfx10<bits<8> op, MUBUF_Pseudo ps> : Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10> { - let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value); + let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value); let Inst{25} = op{7}; } @@ -1985,16 +1960,33 @@ } multiclass MUBUF_Real_Atomics_RTN_gfx10<bits<8> op> { def _BOTHEN_RTN_gfx10 : - MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>; + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>, + AtomicNoRet<NAME # "_BOTHEN_gfx10", 1>; def _IDXEN_RTN_gfx10 : - MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>; + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>, + AtomicNoRet<NAME # "_IDXEN_gfx10", 1>; def _OFFEN_RTN_gfx10 : - MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>; + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>, + AtomicNoRet<NAME # "_OFFEN_gfx10", 1>; def _OFFSET_RTN_gfx10 : - MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>; + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>, + AtomicNoRet<NAME # "_OFFSET_gfx10", 1>; } multiclass MUBUF_Real_Atomics_gfx10<bits<8> op> : - MUBUF_Real_AllAddr_gfx10<op>, MUBUF_Real_Atomics_RTN_gfx10<op>; + MUBUF_Real_Atomics_RTN_gfx10<op> { + def _BOTHEN_gfx10 : + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>, + AtomicNoRet<NAME # "_BOTHEN_gfx10", 0>; + def _IDXEN_gfx10 : + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>, + AtomicNoRet<NAME # "_IDXEN_gfx10", 0>; + def _OFFEN_gfx10 : + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>, + AtomicNoRet<NAME # "_OFFEN_gfx10", 0>; + def _OFFSET_gfx10 : + MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, + AtomicNoRet<NAME # "_OFFSET_gfx10", 0>; + } } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>; @@ -2074,18 +2066,38 @@ def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>, MUBUFLdsTable<1, NAME # "_BOTHEN_gfx6_gfx7">; } - multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> : - MUBUF_Real_AllAddr_gfx6_gfx7<op> { + multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> { + def _ADDR64_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>, + AtomicNoRet<NAME # "_ADDR64_gfx6_gfx7", 0>; + def _BOTHEN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>, + AtomicNoRet<NAME # "_BOTHEN_gfx6_gfx7", 0>; + def _IDXEN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>, + AtomicNoRet<NAME # "_IDXEN_gfx6_gfx7", 0>; + def _OFFEN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>, + AtomicNoRet<NAME # "_OFFEN_gfx6_gfx7", 0>; + def _OFFSET_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, + AtomicNoRet<NAME # "_OFFSET_gfx6_gfx7", 0>; + def _ADDR64_RTN_gfx6_gfx7 : - MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>; + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>, + AtomicNoRet<NAME # "_ADDR64_gfx6_gfx7", 1>; def _BOTHEN_RTN_gfx6_gfx7 : - MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>; + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>, + AtomicNoRet<NAME # "_BOTHEN_gfx6_gfx7", 1>; def _IDXEN_RTN_gfx6_gfx7 : - MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>; + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>, + AtomicNoRet<NAME # "_IDXEN_gfx6_gfx7", 1>; def _OFFEN_RTN_gfx6_gfx7 : - MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>; + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>, + AtomicNoRet<NAME # "_OFFEN_gfx6_gfx7", 1>; def _OFFSET_RTN_gfx6_gfx7 : - MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>; + MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>, + AtomicNoRet<NAME # "_OFFSET_gfx6_gfx7", 1>; } } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" @@ -2174,13 +2186,13 @@ let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; - let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); let Inst{18-16} = op; let Inst{31-26} = 0x3a; //encoding let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); - let Inst{54} = !if(ps.has_slc, slc, ?); + let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{55} = !if(ps.has_tfe, tfe, ?); let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } @@ -2191,7 +2203,7 @@ class MTBUF_Real_gfx10<bits<4> op, MTBUF_Pseudo ps> : Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.GFX10> { - let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value); + let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value); let Inst{25-19} = format; let Inst{53} = op{3}; } @@ -2263,15 +2275,17 @@ class MUBUF_Real_Base_vi <bits<7> op, MUBUF_Pseudo ps, int Enc> : MUBUF_Real<ps>, Enc64, - SIMCInstr<ps.PseudoInstr, Enc> { + SIMCInstr<ps.PseudoInstr, Enc>, + AtomicNoRet<!subst("_RTN","",NAME), !if(ps.IsAtomicNoRet, 0, + !if(ps.IsAtomicRet, 1, ?))> { let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; - let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); - let Inst{15} = !if(ps.has_sccb, sccb, ps.sccb_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); + let Inst{15} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccb_value); let Inst{16} = ps.lds; - let Inst{17} = !if(ps.has_slc, slc, ?); + let Inst{17} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{24-18} = op; let Inst{31-26} = 0x38; //encoding let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); @@ -2302,7 +2316,7 @@ def _gfx90a : MUBUF_Real_gfx90a<op, ps>; } -multiclass MUBUF_Real_AllAddr_vi<bits<7> op> { +multiclass MUBUF_Real_AllAddr_vi<bits<7> op, bit isAtomic = 0, bit isAtomicRet = 0> { defm _OFFSET : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>; defm _OFFEN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>; defm _IDXEN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>; @@ -2358,9 +2372,9 @@ let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; - let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); let Inst{16} = ps.lds; - let Inst{17} = !if(ps.has_slc, slc, ?); + let Inst{17} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{24-18} = op; let Inst{31-26} = 0x38; //encoding let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); @@ -2378,7 +2392,7 @@ } multiclass MUBUF_Real_Atomic_vi<bits<7> op> : - MUBUF_Real_AllAddr_vi<op> { + MUBUF_Real_AllAddr_vi<op, 1, 0> { defm _OFFSET_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>; defm _OFFEN_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>; defm _IDXEN_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>; @@ -2500,7 +2514,7 @@ let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; - let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); let Inst{18-15} = op; let Inst{22-19} = dfmt; let Inst{25-23} = nfmt; @@ -2508,8 +2522,9 @@ let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); - let Inst{53} = !if(ps.has_sccb, sccb, ps.sccb_value); - let Inst{54} = !if(ps.has_slc, slc, ?); + let Inst{53} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccb_value); + let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); + let Inst{55} = !if(ps.has_tfe, tfe, ?); let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } @@ -2552,7 +2567,7 @@ let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; - let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); let Inst{18-15} = op; let Inst{22-19} = dfmt; let Inst{25-23} = nfmt; @@ -2560,7 +2575,7 @@ let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); - let Inst{54} = !if(ps.has_slc, slc, ?); + let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{55} = !if(ps.has_tfe, tfe, ?); let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } Index: llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -541,9 +541,20 @@ } if (Res && (MCII->get(MI.getOpcode()).TSFlags & - (SIInstrFlags::MUBUF | SIInstrFlags::FLAT)) && - AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::glc1) != -1) { - insertNamedMCOperand(MI, MCOperand::createImm(1), AMDGPU::OpName::glc1); + (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD))) { + int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::cpol); + if (CPolPos != -1) { + unsigned CPol = + (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ? + AMDGPU::CPol::GLC : 0; + if (MI.getNumOperands() <= (unsigned)CPolPos) { + insertNamedMCOperand(MI, MCOperand::createImm(CPol), + AMDGPU::OpName::cpol); + } else if (CPol) { + MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol); + } + } } if (Res && (MCII->get(MI.getOpcode()).TSFlags & @@ -559,20 +570,6 @@ } } - if (Res && (MCII->get(MI.getOpcode()).TSFlags & - (SIInstrFlags::FLAT | - SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) { - if (!isGFX10()) { - int DLCOpIdx = - AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dlc); - if (DLCOpIdx != -1) { - auto DLCIter = MI.begin(); - std::advance(DLCIter, DLCOpIdx); - MI.insert(DLCIter, MCOperand::createImm(0)); - } - } - } - if (Res && (MCII->get(MI.getOpcode()).TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) { int SWZOpIdx = Index: llvm/lib/Target/AMDGPU/FLATInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/FLATInstructions.td +++ llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -94,12 +94,7 @@ bits<7> saddr; bits<10> vdst; - bits<1> slc; - bits<1> glc; - bits<1> dlc; - - // Only valid on gfx90a+ - bits<1> sccb; + bits<5> cpol; // Only valid on gfx9 bits<1> lds = 0; // XXX - What does this actually do? @@ -122,8 +117,8 @@ let Inst{13} = lds; let Inst{15-14} = seg; - let Inst{16} = !if(ps.has_glc, glc, ps.glcValue); - let Inst{17} = slc; + let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); + let Inst{17} = cpol{CPolBit.SLC}; let Inst{24-18} = op; let Inst{31-26} = 0x37; // Encoding. let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); @@ -156,9 +151,9 @@ (ins VReg_64:$vaddr)), (ins flat_offset:$offset)), // FIXME: Operands with default values do not work with following non-optional operands. - !if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, SCCB:$sccb, vdata_op:$vdst_in), - (ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb))), - " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc$sccb"> { + !if(HasTiedOutput, (ins CPol:$cpol, vdata_op:$vdst_in), + (ins CPol_0:$cpol))), + " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { let has_data = 0; let mayLoad = 1; let has_saddr = HasSaddr; @@ -178,8 +173,8 @@ !if(EnableSaddr, (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr), (ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)), - (ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb)), - " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc$sccb"> { + (ins flat_offset:$offset, CPol_0:$cpol)), + " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { let mayLoad = 0; let mayStore = 1; let has_vdst = 0; @@ -203,9 +198,9 @@ opName, (outs regClass:$vdst), !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)), - (ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb), + (ins flat_offset:$offset, CPol_0:$cpol), !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), - " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc$sccb"> { + " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { let is_flat_global = 1; let has_data = 0; let mayLoad = 1; @@ -241,8 +236,8 @@ opName, (outs), !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)), - (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc, SCCB_0:$sccb)), - " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc$sccb"> { + (ins flat_offset:$offset, CPol:$cpol)), + " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { let is_flat_global = 1; let mayLoad = 0; let mayStore = 1; @@ -280,9 +275,9 @@ !if(EnableVaddr, (ins VGPR_32:$vaddr, flat_offset:$offset), (ins flat_offset:$offset))), - !if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, SCCB:$sccb, getLdStRegisterOperand<regClass>.ret:$vdst_in), - (ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb))), - " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc$sccb"> { + !if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in), + (ins CPol_0:$cpol))), + " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { let has_data = 0; let mayLoad = 1; let has_saddr = 1; @@ -301,11 +296,11 @@ opName, (outs), !if(EnableSaddr, - (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb), + (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), !if(EnableVaddr, - (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb), - (ins vdata_op:$vdata, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb))), - " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc$sccb"> { + (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol), + (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol))), + " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { let mayLoad = 0; let mayStore = 1; let has_vdst = 0; @@ -382,8 +377,8 @@ RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { def "" : FLAT_AtomicNoRet_Pseudo <opName, (outs), - (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, SLC_0:$slc, SCCB_0:$sccb), - " $vaddr, $vdata$offset$slc$sccb">, + (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), + " $vaddr, $vdata$offset$cpol">, GlobalSaddrTable<0, opName>, AtomicNoRet <opName, 0> { let PseudoInstr = NAME; @@ -393,8 +388,8 @@ def _RTN : FLAT_AtomicRet_Pseudo <opName, (outs getLdStRegisterOperand<vdst_rc>.ret:$vdst), - (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc, SCCB_0:$sccb), - " $vdst, $vaddr, $vdata$offset$glc1$slc$sccb", + (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), + " $vdst, $vaddr, $vdata$offset$cpol", [(set vt:$vdst, (atomic (FLATOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>, GlobalSaddrTable<0, opName#"_rtn">, @@ -416,8 +411,8 @@ def "" : FLAT_AtomicNoRet_Pseudo <opName, (outs), - (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, SLC_0:$slc, SCCB_0:$sccb), - " $vaddr, $vdata, off$offset$slc$sccb">, + (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), + " $vaddr, $vdata, off$offset$cpol">, GlobalSaddrTable<0, opName>, AtomicNoRet <opName, 0> { let has_saddr = 1; @@ -427,8 +422,8 @@ def _SADDR : FLAT_AtomicNoRet_Pseudo <opName, (outs), - (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC_0:$slc, SCCB_0:$sccb), - " $vaddr, $vdata, $saddr$offset$slc$sccb">, + (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol), + " $vaddr, $vdata, $saddr$offset$cpol">, GlobalSaddrTable<1, opName>, AtomicNoRet <opName#"_saddr", 0> { let has_saddr = 1; @@ -451,8 +446,8 @@ def _RTN : FLAT_AtomicRet_Pseudo <opName, (outs vdst_op:$vdst), - (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc, SCCB_0:$sccb), - " $vdst, $vaddr, $vdata, off$offset$glc1$slc$sccb", + (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), + " $vdst, $vaddr, $vdata, off$offset$cpol", [(set vt:$vdst, (atomic (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$vdata))]>, GlobalSaddrTable<0, opName#"_rtn">, @@ -463,8 +458,8 @@ def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName, (outs vdst_op:$vdst), - (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc, SCCB_0:$sccb), - " $vdst, $vaddr, $vdata, $saddr$offset$glc1$slc$sccb">, + (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol), + " $vdst, $vaddr, $vdata, $saddr$offset$cpol">, GlobalSaddrTable<1, opName#"_rtn">, AtomicNoRet <opName#"_saddr", 1> { let has_saddr = 1; @@ -827,17 +822,17 @@ class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in), - (inst $vaddr, $offset, 0, 0, 0, 0, $in) + (inst $vaddr, $offset, 0, $in) >; class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset), vt:$in), - (inst $vaddr, $offset, 0, 0, 0, 0, $in) + (inst $vaddr, $offset, 0, $in) >; class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$in)), - (inst $saddr, $voffset, $offset, 0, 0, 0, 0, $in) + (inst $saddr, $voffset, $offset, 0, $in) >; class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < @@ -847,7 +842,7 @@ class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset))), - (inst $saddr, $voffset, $offset, 0, 0, 0) + (inst $saddr, $voffset, $offset, 0) >; class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, @@ -928,7 +923,7 @@ class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset), vt:$in), - (inst $vaddr, $offset, 0, 0, 0, 0, $in) + (inst $vaddr, $offset, 0, $in) >; class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < @@ -943,7 +938,7 @@ class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset), vt:$in)), - (inst $saddr, $offset, 0, 0, 0, 0, $in) + (inst $saddr, $offset, 0, $in) >; class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, @@ -1390,7 +1385,7 @@ let AssemblerPredicate = isGFX8GFX9; let DecoderNamespace = "GFX8"; - let Inst{25} = !if(ps.has_sccb, sccb, ps.sccbValue); + let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); } multiclass FLAT_Real_AllAddr_vi<bits<7> op> { @@ -1557,7 +1552,7 @@ let DecoderNamespace = "GFX10"; let Inst{11-0} = offset{11-0}; - let Inst{12} = !if(ps.has_dlc, dlc, ps.dlcValue); + let Inst{12} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d); let Inst{55} = 0; } Index: llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h =================================================================== --- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -68,14 +68,8 @@ const MCSubtargetInfo &STI, raw_ostream &O); void printGDS(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); - void printDLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, - raw_ostream &O); - void printSCCB(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, - raw_ostream &O); - void printGLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, - raw_ostream &O); - void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, - raw_ostream &O); + void printCPol(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printSWZ(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, Index: llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp =================================================================== --- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -202,26 +202,19 @@ printNamedBit(MI, OpNo, O, "gds"); } -void AMDGPUInstPrinter::printDLC(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O) { - if (AMDGPU::isGFX10Plus(STI)) - printNamedBit(MI, OpNo, O, "dlc"); -} - -void AMDGPUInstPrinter::printSCCB(const MCInst *MI, unsigned OpNo, +void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - if (AMDGPU::isGFX90A(STI)) - printNamedBit(MI, OpNo, O, "scc"); -} - -void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O) { - printNamedBit(MI, OpNo, O, "glc"); -} - -void AMDGPUInstPrinter::printSLC(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O) { - printNamedBit(MI, OpNo, O, "slc"); + auto Imm = MI->getOperand(OpNo).getImm(); + if (Imm & CPol::GLC) + O << " glc"; + if (Imm & CPol::SLC) + O << " slc"; + if ((Imm & CPol::DLC) && AMDGPU::isGFX10Plus(STI)) + O << " dlc"; + if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI)) + O << " scc"; + if (Imm & ~CPol::ALL) + O << " /* unexpected cache policy bit */"; } void AMDGPUInstPrinter::printSWZ(const MCInst *MI, unsigned OpNo, Index: llvm/lib/Target/AMDGPU/MIMGInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -262,10 +262,10 @@ string dns=""> : MIMG_gfx6789 <op.BASE, (outs dst_rc:$vdata), dns> { let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc, - DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc, + DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$tfe$lwe$da" + let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -275,10 +275,10 @@ string dns=""> : MIMG_gfx90a <op.BASE, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> { let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc, - DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc, + DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, LWE:$lwe, DA:$da), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$lwe$da" + let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -287,10 +287,10 @@ string dns=""> : MIMG_gfx10<op.BASE, (outs DataRC:$vdata), dns> { let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask, - Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, - SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), + Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe" + let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -300,10 +300,10 @@ : MIMG_nsa_gfx10<op.BASE, (outs DataRC:$vdata), num_addrs, dns> { let InOperandList = !con(AddrIns, (ins SReg_256:$srsrc, DMask:$dmask, - Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, - SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), + Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe" + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -387,10 +387,10 @@ string dns = ""> : MIMG_gfx6789<op.BASE, (outs), dns> { let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, - DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc, + DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$tfe$lwe$da" + let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -401,10 +401,10 @@ : MIMG_gfx90a<op.BASE, (outs), dns> { let InOperandList = !con((ins getLdStRegisterOperand<data_rc>.ret:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, - DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc, + DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, LWE:$lwe, DA:$da), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$lwe$da" + let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -413,10 +413,10 @@ string dns=""> : MIMG_gfx10<op.BASE, (outs), dns> { let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc, - DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, - GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), + DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe" + let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -427,10 +427,10 @@ let InOperandList = !con((ins DataRC:$vdata), AddrIns, (ins SReg_256:$srsrc, DMask:$dmask, - Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, - SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), + Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe" + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -505,9 +505,9 @@ let AsmMatchConverter = "cvtMIMGAtomic"; let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, - DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc, + DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da); - let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$tfe$lwe$da"; + let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da"; } class MIMG_Atomic_gfx90a_base <bits<8> op, string asm, RegisterClass data_rc, @@ -518,9 +518,9 @@ let InOperandList = (ins getLdStRegisterOperand<data_rc>.ret:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, - DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc, + DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, LWE:$lwe, DA:$da); - let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$lwe$da"; + let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da"; } class MIMG_Atomic_si<mimgopc op, string asm, RegisterClass data_rc, @@ -553,9 +553,9 @@ let AsmMatchConverter = "cvtMIMGAtomic"; let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc, - DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, - GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe); - let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"; + DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe); + let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"; } class MIMG_Atomic_nsa_gfx10<mimgopc op, string opcode, @@ -569,9 +569,9 @@ let InOperandList = !con((ins DataRC:$vdata), AddrIns, (ins SReg_256:$srsrc, DMask:$dmask, - Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, - SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe)); - let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"; + Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe)); + let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"; } multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm, @@ -658,10 +658,10 @@ RegisterClass src_rc, string dns=""> : MIMG_gfx6789 <op.BASE, (outs dst_rc:$vdata), dns> { let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp, - DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc, + DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$sccb$glc$slc$r128$tfe$lwe$da" + let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$cpol$r128$tfe$lwe$da" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -669,10 +669,10 @@ RegisterClass src_rc, string dns=""> : MIMG_gfx90a<op.BASE, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> { let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp, - DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc, + DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, LWE:$lwe, DA:$da), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); - let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$sccb$glc$slc$r128$lwe$da" + let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$cpol$r128$lwe$da" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -681,11 +681,11 @@ string dns=""> : MIMG_gfx10<op.BASE, (outs DataRC:$vdata), dns> { let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp, - DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc, - GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), + DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm" - #"$dlc$glc$slc$r128$a16$tfe$lwe" + #"$cpol$r128$a16$tfe$lwe" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -695,11 +695,11 @@ : MIMG_nsa_gfx10<op.BASE, (outs DataRC:$vdata), num_addrs, dns> { let InOperandList = !con(AddrIns, (ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask, - Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc, - SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), + Dim:$dim, UNorm:$unorm, CPol:$cpol, + R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe), !if(BaseOpcode.HasD16, (ins D16:$d16), (ins))); let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm" - #"$dlc$glc$slc$r128$a16$tfe$lwe" + #"$cpol$r128$a16$tfe$lwe" #!if(BaseOpcode.HasD16, "$d16", ""); } @@ -888,9 +888,7 @@ dmask = 0xf, unorm = 1, d16 = 0, - glc = 0, - slc = 0, - dlc = 0, + cpol = 0, tfe = 0, lwe = 0, r128 = 1, Index: llvm/lib/Target/AMDGPU/SIDefines.h =================================================================== --- llvm/lib/Target/AMDGPU/SIDefines.h +++ llvm/lib/Target/AMDGPU/SIDefines.h @@ -276,6 +276,18 @@ } // namespace AMDGPU namespace AMDGPU { +namespace CPol { + +enum CPol { + GLC = 1, + SLC = 2, + DLC = 4, + SCC = 16, + ALL = GLC | SLC | DLC | SCC +}; + +} // namespace CPol + namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns. enum Id { // Message ID, width(4) [3:0]. Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -11222,10 +11222,12 @@ int NoRetAtomicOp = AMDGPU::getAtomicNoRetOp(MI.getOpcode()); if (NoRetAtomicOp != -1) { if (!Node->hasAnyUseOfValue(0)) { - int Glc1Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), - AMDGPU::OpName::glc1); - if (Glc1Idx != -1) - MI.RemoveOperand(Glc1Idx); + int CPolIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::cpol); + if (CPolIdx != -1) { + MachineOperand &CPol = MI.getOperand(CPolIdx); + CPol.setImm(CPol.getImm() | AMDGPU::CPol::GLC); + } MI.RemoveOperand(0); MI.setDesc(TII->get(NoRetAtomicOp)); return; Index: llvm/lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -261,6 +261,13 @@ int Size = 8; } +def CPolBit { + int GLC = 0; + int SLC = 1; + int DLC = 2; + int SCC = 4; +} + class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">; class VINTRPe <bits<2> op> : Enc32 { @@ -281,21 +288,20 @@ bits<10> vdata; bits<4> dmask; bits<1> unorm; - bits<1> glc; + bits<5> cpol; bits<1> r128; bits<1> tfe; bits<1> lwe; - bits<1> slc; bit d16; bits<7> srsrc; bits<7> ssamp; let Inst{11-8} = dmask; let Inst{12} = unorm; - let Inst{13} = glc; + let Inst{13} = cpol{CPolBit.GLC}; let Inst{15} = r128; let Inst{17} = lwe; - let Inst{25} = slc; + let Inst{25} = cpol{CPolBit.SLC}; let Inst{31-26} = 0x3c; let Inst{47-40} = vdata{7-0}; let Inst{52-48} = srsrc{6-2}; @@ -306,10 +312,9 @@ class MIMGe_gfx6789 <bits<8> op> : MIMGe { bits<8> vaddr; bits<1> da; - bits<1> sccb; let Inst{0} = op{7}; - let Inst{7} = sccb; + let Inst{7} = cpol{CPolBit.SCC}; let Inst{14} = da; let Inst{16} = tfe; let Inst{24-18} = op{6-0}; @@ -319,10 +324,9 @@ class MIMGe_gfx90a <bits<8> op> : MIMGe { bits<8> vaddr; bits<1> da; - bits<1> sccb; let Inst{0} = op{7}; - let Inst{7} = sccb; + let Inst{7} = cpol{CPolBit.SCC}; let Inst{14} = da; let Inst{16} = vdata{9}; // ACC bit let Inst{24-18} = op{6-0}; @@ -333,13 +337,12 @@ bits<8> vaddr0; bits<3> dim; bits<2> nsa; - bits<1> dlc; bits<1> a16; let Inst{0} = op{7}; let Inst{2-1} = nsa; let Inst{5-3} = dim; - let Inst{7} = dlc; + let Inst{7} = cpol{CPolBit.DLC}; let Inst{16} = tfe; let Inst{24-18} = op{6-0}; let Inst{39-32} = vaddr0; Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5573,21 +5573,10 @@ .add(*SOffset) .add(*Offset); - // Atomics do not have this operand. - if (const MachineOperand *GLC = - getNamedOperand(MI, AMDGPU::OpName::glc)) { - MIB.addImm(GLC->getImm()); + if (const MachineOperand *CPol = + getNamedOperand(MI, AMDGPU::OpName::cpol)) { + MIB.addImm(CPol->getImm()); } - if (const MachineOperand *DLC = - getNamedOperand(MI, AMDGPU::OpName::dlc)) { - MIB.addImm(DLC->getImm()); - } - if (const MachineOperand *SCCB = - getNamedOperand(MI, AMDGPU::OpName::sccb)) { - MIB.addImm(SCCB->getImm()); - } - - MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc)); if (const MachineOperand *TFE = getNamedOperand(MI, AMDGPU::OpName::tfe)) { @@ -5607,7 +5596,7 @@ .addReg(NewSRsrc) .add(*SOffset) .add(*Offset) - .addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc)) + .addImm(getNamedImmOperand(MI, AMDGPU::OpName::cpol)) .cloneMemRefs(MI); } Index: llvm/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -833,6 +833,10 @@ return CurDAG->getTargetConstant((N->getZExtValue() >> 4) & 1, SDLoc(N), MVT::i8); }]>; +def extract_cpol : SDNodeXForm<timm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() & AMDGPU::CPol::ALL, SDLoc(N), MVT::i8); +}]>; + //===----------------------------------------------------------------------===// // Custom Operands //===----------------------------------------------------------------------===// @@ -1085,6 +1089,12 @@ let ParserMatchClass = MatchClass; } +class NamedOperandU32Default1<string Name, AsmOperandClass MatchClass> : + OperandWithDefaultOps<i32, (ops (i32 1))> { + let PrintMethod = "print"#Name; + let ParserMatchClass = MatchClass; +} + let OperandType = "OPERAND_IMMEDIATE" in { def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>; @@ -1108,18 +1118,9 @@ def clampmod0 : NamedOperandBit_0<"ClampSI", NamedMatchClass<"ClampSI">>; def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>; -def SCCB : NamedOperandBit<"SCCB", NamedMatchClass<"SCCB">>; -def SCCB_0 : NamedOperandBit_0<"SCCB", NamedMatchClass<"SCCB">>; - -def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>; -def DLC_0 : NamedOperandBit_0<"DLC", NamedMatchClass<"DLC">>; - -def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>; -def GLC_0 : NamedOperandBit_0<"GLC", NamedMatchClass<"GLC">>; -def GLC_1 : NamedOperandBit_1<"GLC", NamedMatchClass<"GLC_1">>; - -def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>; -def SLC_0 : NamedOperandBit_0<"SLC", NamedMatchClass<"SLC">>; +def CPol : NamedOperandU32<"CPol", NamedMatchClass<"CPol">>; +def CPol_0 : NamedOperandU32Default0<"CPol", NamedMatchClass<"CPol">>; +def CPol_GLC1 : NamedOperandU32Default1<"CPol", NamedMatchClass<"CPol">>; def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>; def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>; Index: llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -104,10 +104,7 @@ unsigned BaseOff; unsigned DMask; InstClassEnum InstClass; - bool GLC = 0; - bool SLC = 0; - bool DLC = 0; - bool SCCB = 0; // vmem only. + unsigned CPol; bool UseST64; int AddrIdx[MaxAddressRegs]; const MachineOperand *AddrReg[MaxAddressRegs]; @@ -533,14 +530,7 @@ if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) { Offset &= 0xffff; } else if (InstClass != MIMG) { - GLC = TII.getNamedOperand(*I, AMDGPU::OpName::glc)->getImm(); - if (InstClass != S_BUFFER_LOAD_IMM) { - SLC = TII.getNamedOperand(*I, AMDGPU::OpName::slc)->getImm(); - } - DLC = TII.getNamedOperand(*I, AMDGPU::OpName::dlc)->getImm(); - if (InstClass != S_BUFFER_LOAD_IMM) { - SCCB = TII.getNamedOperand(*I, AMDGPU::OpName::sccb)->getImm(); - } + CPol = TII.getNamedOperand(*I, AMDGPU::OpName::cpol)->getImm(); } AddressRegs Regs = getRegs(Opc, TII); @@ -690,10 +680,9 @@ return false; // Check other optional immediate operands for equality. - unsigned OperandsToMatch[] = {AMDGPU::OpName::glc, AMDGPU::OpName::slc, - AMDGPU::OpName::d16, AMDGPU::OpName::unorm, - AMDGPU::OpName::da, AMDGPU::OpName::r128, - AMDGPU::OpName::a16, AMDGPU::OpName::dlc}; + unsigned OperandsToMatch[] = {AMDGPU::OpName::cpol, AMDGPU::OpName::d16, + AMDGPU::OpName::unorm, AMDGPU::OpName::da, + AMDGPU::OpName::r128, AMDGPU::OpName::a16}; for (auto op : OperandsToMatch) { int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op); @@ -798,9 +787,8 @@ if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) { return (EltOffset0 + CI.Width == EltOffset1 || EltOffset1 + Paired.Width == EltOffset0) && - CI.GLC == Paired.GLC && CI.DLC == Paired.DLC && - (CI.InstClass == S_BUFFER_LOAD_IMM || - (CI.SLC == Paired.SLC && CI.SCCB == Paired.SCCB)); + CI.CPol == Paired.CPol && + (CI.InstClass == S_BUFFER_LOAD_IMM || CI.CPol == Paired.CPol); } // If the offset in elements doesn't fit in 8-bits, we might be able to use @@ -1301,8 +1289,7 @@ BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase)) .addImm(MergedOffset) // offset - .addImm(CI.GLC) // glc - .addImm(CI.DLC) // dlc + .addImm(CI.CPol) // cpol .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired); @@ -1361,12 +1348,9 @@ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) .addImm(MergedOffset) // offset - .addImm(CI.GLC) // glc - .addImm(CI.SLC) // slc + .addImm(CI.CPol) // cpol .addImm(0) // tfe - .addImm(CI.DLC) // dlc .addImm(0) // swz - .addImm(CI.SCCB) // scc .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired); @@ -1429,12 +1413,9 @@ .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) .addImm(MergedOffset) // offset .addImm(JoinedFormat) // format - .addImm(CI.GLC) // glc - .addImm(CI.SLC) // slc + .addImm(CI.CPol) // cpol .addImm(0) // tfe - .addImm(CI.DLC) // dlc .addImm(0) // swz - .addImm(CI.SCCB) // scc .addMemOperand( combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); @@ -1510,12 +1491,9 @@ .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) .addImm(std::min(CI.Offset, Paired.Offset)) // offset .addImm(JoinedFormat) // format - .addImm(CI.GLC) // glc - .addImm(CI.SLC) // slc + .addImm(CI.CPol) // cpol .addImm(0) // tfe - .addImm(CI.DLC) // dlc .addImm(0) // swz - .addImm(CI.SCCB) // scc .addMemOperand( combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); @@ -1680,12 +1658,9 @@ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) .addImm(std::min(CI.Offset, Paired.Offset)) // offset - .addImm(CI.GLC) // glc - .addImm(CI.SLC) // slc + .addImm(CI.CPol) // cpol .addImm(0) // tfe - .addImm(CI.DLC) // dlc .addImm(0) // swz - .addImm(CI.SCCB) // scc .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); moveInstsAfter(MIB, InstsToMove); Index: llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -84,22 +84,6 @@ LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL) }; -/// Sets named bit \p BitName to "true" if present in instruction \p MI. -/// \returns Returns true if \p MI is modified, false otherwise. -template <uint16_t BitName> -bool enableNamedBit(const MachineBasicBlock::iterator &MI) { - int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName); - if (BitIdx == -1) - return false; - - MachineOperand &Bit = MI->getOperand(BitIdx); - if (Bit.getImm() != 0) - return false; - - Bit.setImm(1); - return true; -} - class SIMemOpInfo final { private: @@ -288,6 +272,11 @@ SICacheControl(const GCNSubtarget &ST); + /// Sets named bit \p BitName to "true" if present in instruction \p MI. + /// \returns Returns true if \p MI is modified, false otherwise. + bool enableNamedBit(const MachineBasicBlock::iterator MI, + AMDGPU::CPol::CPol Bit) const; + public: /// Create a cache control for the subtarget \p ST. @@ -369,13 +358,13 @@ /// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI /// is modified, false otherwise. bool enableGLCBit(const MachineBasicBlock::iterator &MI) const { - return enableNamedBit<AMDGPU::OpName::glc>(MI); + return enableNamedBit(MI, AMDGPU::CPol::GLC); } /// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI /// is modified, false otherwise. bool enableSLCBit(const MachineBasicBlock::iterator &MI) const { - return enableNamedBit<AMDGPU::OpName::slc>(MI); + return enableNamedBit(MI, AMDGPU::CPol::SLC); } public: @@ -436,7 +425,7 @@ /// Sets SCC bit to "true" if present in \p MI. Returns true if \p MI /// is modified, false otherwise. bool enableSCCBit(const MachineBasicBlock::iterator &MI) const { - return enableNamedBit<AMDGPU::OpName::sccb>(MI); + return enableNamedBit(MI, AMDGPU::CPol::SCC);; } public: @@ -485,7 +474,7 @@ /// Sets DLC bit to "true" if present in \p MI. Returns true if \p MI /// is modified, false otherwise. bool enableDLCBit(const MachineBasicBlock::iterator &MI) const { - return enableNamedBit<AMDGPU::OpName::dlc>(MI); + return enableNamedBit(MI, AMDGPU::CPol::DLC); } public: @@ -785,6 +774,16 @@ InsertCacheInv = !AmdgcnSkipCacheInvalidations; } +bool SICacheControl::enableNamedBit(const MachineBasicBlock::iterator MI, + AMDGPU::CPol::CPol Bit) const { + MachineOperand *CPol = TII->getNamedOperand(*MI, AMDGPU::OpName::cpol); + if (!CPol) + return false; + + CPol->setImm(CPol->getImm() | Bit); + return true; +} + /* static */ std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) { GCNSubtarget::Generation Generation = ST.getGeneration(); Index: llvm/lib/Target/AMDGPU/SMInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SMInstructions.td +++ llvm/lib/Target/AMDGPU/SMInstructions.td @@ -71,6 +71,7 @@ bits<7> sdst; bits<32> offset; bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0); + bits<5> cpol; } class SM_Probe_Pseudo <string opName, dag ins, bit isImm> @@ -122,8 +123,8 @@ RegisterClass dstClass> { def _IMM : SM_Load_Pseudo <opName, (outs dstClass:$sdst), - (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc), - " $sdst, $sbase, $offset$glc$dlc", []> { + (ins baseClass:$sbase, i32imm:$offset, CPol:$cpol), + " $sdst, $sbase, $offset$cpol", []> { let offset_is_imm = 1; let BaseClass = baseClass; let PseudoInstr = opName # "_IMM"; @@ -133,8 +134,8 @@ def _SGPR : SM_Load_Pseudo <opName, (outs dstClass:$sdst), - (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc), - " $sdst, $sbase, $offset$glc$dlc", []> { + (ins baseClass:$sbase, SReg_32:$soff, CPol:$cpol), + " $sdst, $sbase, $offset$cpol", []> { let BaseClass = baseClass; let PseudoInstr = opName # "_SGPR"; let has_glc = 1; @@ -146,8 +147,8 @@ RegisterClass baseClass, RegisterClass srcClass> { def _IMM : SM_Store_Pseudo <opName, - (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc), - " $sdata, $sbase, $offset$glc$dlc", []> { + (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, CPol:$cpol), + " $sdata, $sbase, $offset$cpol", []> { let offset_is_imm = 1; let BaseClass = baseClass; let SrcClass = srcClass; @@ -155,8 +156,8 @@ } def _SGPR : SM_Store_Pseudo <opName, - (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc), - " $sdata, $sbase, $offset$glc$dlc", []> { + (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, CPol:$cpol), + " $sdata, $sbase, $offset$cpol", []> { let BaseClass = baseClass; let SrcClass = srcClass; let PseudoInstr = opName # "_SGPR"; @@ -232,6 +233,8 @@ let IsAtomicNoRet = !not(isRet); let IsAtomicRet = isRet; + + let AsmMatchConverter = "cvtSMEMAtomic"; } class SM_Pseudo_Atomic<string opName, @@ -241,13 +244,14 @@ bit isRet, string opNameWithSuffix = opName # !if(isImm, !if(isRet, "_IMM_RTN", "_IMM"), - !if(isRet, "_SGPR_RTN", "_SGPR"))> : + !if(isRet, "_SGPR_RTN", "_SGPR")), + Operand CPolTy = !if(isRet, CPol_GLC1, CPol)> : SM_Atomic_Pseudo<opName, !if(isRet, (outs dataClass:$sdst), (outs)), !if(isImm, - (ins dataClass:$sdata, baseClass:$sbase, smem_offset:$offset, DLC:$dlc), - (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, DLC:$dlc)), - !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", "") # "$dlc", + (ins dataClass:$sdata, baseClass:$sbase, smem_offset:$offset, CPolTy:$cpol), + (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, CPolTy:$cpol)), + !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset$cpol", isRet>, AtomicNoRet <opNameWithSuffix, isRet> { let offset_is_imm = isImm; @@ -463,13 +467,13 @@ SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> { def _IMM_si : SMRD_Real_si <op, immPs> { - let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc, DLC:$dlc); + let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, CPol:$cpol); } // FIXME: The operand name $offset is inconsistent with $soff used // in the pseudo def _SGPR_si : SMRD_Real_si <op, sgprPs> { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc); + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol); } } @@ -497,15 +501,13 @@ : SM_Real<ps> , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> , Enc64 { - bit glc; - let AssemblerPredicate = isGFX8GFX9; let DecoderNamespace = "GFX8"; let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); - let Inst{16} = !if(ps.has_glc, glc, ?); + let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); let Inst{17} = imm; let Inst{25-18} = op; let Inst{31-26} = 0x30; //encoding @@ -519,10 +521,10 @@ SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM), SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> { def _IMM_vi : SMEM_Real_vi <op, immPs> { - let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc); + let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); } def _SGPR_vi : SMEM_Real_vi <op, sgprPs> { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc); + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol); } } @@ -540,11 +542,11 @@ // FIXME: The operand name $offset is inconsistent with $soff used // in the pseudo def _IMM_vi : SMEM_Real_Store_vi <op, immPs> { - let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc); + let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); } def _SGPR_vi : SMEM_Real_Store_vi <op, sgprPs> { - let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc); + let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol); } } @@ -604,8 +606,8 @@ let Constraints = ps.Constraints; let DisableEncoding = ps.DisableEncoding; - let glc = ps.glc; - let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0}); + let cpol{CPolBit.GLC} = ps.glc; + let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0}); } multiclass SM_Real_Atomics_vi<bits<8> op, string ps> { @@ -694,7 +696,7 @@ let AssemblerPredicate = isGFX7Only; let DecoderNamespace = "GFX7"; - let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc, DLC:$dlc); + let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, CPol:$cpol); let LGKM_CNT = ps.LGKM_CNT; let mayLoad = ps.mayLoad; @@ -772,26 +774,26 @@ // 1. IMM offset def : GCNPat < (smrd_load (SMRDImm i64:$sbase, i32:$offset)), - (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0, 0)) + (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0)) >; // 2. 32-bit IMM offset on CI def : GCNPat < (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)), - (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0, 0))> { + (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> { let OtherPredicates = [isGFX7Only]; } // 3. SGPR offset def : GCNPat < (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)), - (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0, 0)) + (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0)) >; // 4. No offset def : GCNPat < (vt (smrd_load (i64 SReg_64:$sbase))), - (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0, 0)) + (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0)) >; } @@ -799,8 +801,7 @@ // 1. Offset as an immediate def : GCNPat < (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy), - (vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_glc $cachepolicy), - (extract_dlc $cachepolicy)))> { + (vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_cpol $cachepolicy)))> { let AddedComplexity = 2; } @@ -808,7 +809,7 @@ def : GCNPat < (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)), (!cast<InstSI>(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset, - (extract_glc $cachepolicy), (extract_dlc $cachepolicy))> { + (extract_cpol $cachepolicy))> { let OtherPredicates = [isGFX7Only]; let AddedComplexity = 1; } @@ -816,8 +817,7 @@ // 3. Offset loaded in an 32bit SGPR def : GCNPat < (SIsbuffer_load v4i32:$sbase, i32:$offset, timm:$cachepolicy), - (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$offset, (extract_glc $cachepolicy), - (extract_dlc $cachepolicy))) + (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$offset, (extract_cpol $cachepolicy))) >; } @@ -881,16 +881,13 @@ class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> : SM_Real<ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10>, Enc64 { - bit glc; - bit dlc; - let AssemblerPredicate = isGFX10Plus; let DecoderNamespace = "GFX10"; let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); - let Inst{14} = !if(ps.has_dlc, dlc, ?); - let Inst{16} = !if(ps.has_glc, glc, ?); + let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ?); + let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); let Inst{25-18} = op; let Inst{31-26} = 0x3d; let Inst{52-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{20-0}, ?), ?); @@ -902,10 +899,10 @@ SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM), SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> { def _IMM_gfx10 : SMEM_Real_gfx10<op, immPs> { - let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc); + let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); } def _SGPR_gfx10 : SMEM_Real_gfx10<op, sgprPs> { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc); + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol); } } @@ -922,11 +919,11 @@ // FIXME: The operand name $offset is inconsistent with $soff used // in the pseudo def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs> { - let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc); + let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); } def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> { - let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc); + let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol); } } @@ -985,15 +982,14 @@ AtomicNoRet <!subst("_RTN","",NAME), ps.glc> { bits<7> sdata; - bit dlc; let Constraints = ps.Constraints; let DisableEncoding = ps.DisableEncoding; - let glc = ps.glc; + let cpol{CPolBit.GLC} = ps.glc; - let Inst{14} = !if(ps.has_dlc, dlc, 0); - let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0}); + let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0); + let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0}); } multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> { Index: llvm/test/MC/AMDGPU/atomic-fadd-insts.s =================================================================== --- llvm/test/MC/AMDGPU/atomic-fadd-insts.s +++ llvm/test/MC/AMDGPU/atomic-fadd-insts.s @@ -41,7 +41,7 @@ // GFX908: encoding: [0x07,0x00,0x34,0xe1,0x00,0x05,0x02,0x03] buffer_atomic_add_f32 v5, off, s[8:11], s3 offset:4095 glc -// GFX908-ERR: error: operands are not valid for this GPU or mode +// GFX908-ERR: error: instruction must not use glc buffer_atomic_add_f32 v5, off, s[8:11], s3 offset:4095 slc // GFX908: encoding: [0xff,0x0f,0x36,0xe1,0x00,0x05,0x02,0x03] @@ -86,7 +86,7 @@ // GFX908: encoding: [0x07,0x00,0x38,0xe1,0x00,0x05,0x02,0x03] buffer_atomic_pk_add_f16 v5, off, s[8:11], s3 offset:4095 glc -// GFX908-ERR: error: operands are not valid for this GPU or mode +// GFX908-ERR: error: instruction must not use glc buffer_atomic_pk_add_f16 v5, off, s[8:11], s3 offset:4095 slc // GFX908: encoding: [0xff,0x0f,0x3a,0xe1,0x00,0x05,0x02,0x03] Index: llvm/test/MC/AMDGPU/cpol-err.s =================================================================== --- /dev/null +++ llvm/test/MC/AMDGPU/cpol-err.s @@ -0,0 +1,21 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --implicit-check-not=error: --strict-whitespace + +scratch_load_ubyte v1, v2, off cpol:2 +// CHECK: error: not a valid operand. +// CHECK-NEXT:{{^}}scratch_load_ubyte v1, v2, off cpol:2 +// CHECK-NEXT:{{^}} ^ + +scratch_load_ubyte v1, v2, off glc slc dlc +// CHECK: error: dlc modifier is not supported on this GPU +// CHECK-NEXT:{{^}}scratch_load_ubyte v1, v2, off glc slc dlc +// CHECK-NEXT:{{^}} ^ + +global_atomic_add v[3:4], v5, off slc glc +// CHECK: error: instruction must not use glc +// CHECK-NEXT:{{^}}global_atomic_add v[3:4], v5, off slc glc +// CHECK-NEXT:{{^}} ^ + +global_atomic_add v0, v[1:2], v2, off glc 1 +// CHECK: error: invalid operand for instruction +// CHECK-NEXT:{{^}}global_atomic_add v0, v[1:2], v2, off glc 1 +// CHECK-NEXT:{{^}} ^ Index: llvm/test/MC/AMDGPU/flat-gfx10.s =================================================================== --- llvm/test/MC/AMDGPU/flat-gfx10.s +++ llvm/test/MC/AMDGPU/flat-gfx10.s @@ -38,10 +38,10 @@ // GFX10: encoding: [0x00,0x00,0xc6,0xdc,0x01,0x03,0x7d,0x00] flat_atomic_cmpswap v[1:2], v[3:4] offset:2047 glc -// GFX10-ERR: error: invalid operand for instruction +// GFX10-ERR: error: instruction must not use glc flat_atomic_cmpswap v[1:2], v[3:4] glc -// GFX10-ERR: error: invalid operand for instruction +// GFX10-ERR: error: instruction must not use glc flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:2047 glc // GFX10: encoding: [0xff,0x07,0xc5,0xdc,0x01,0x03,0x7d,0x00] Index: llvm/test/MC/AMDGPU/flat-gfx9.s =================================================================== --- llvm/test/MC/AMDGPU/flat-gfx9.s +++ llvm/test/MC/AMDGPU/flat-gfx9.s @@ -53,10 +53,11 @@ // VI: flat_atomic_cmpswap v[1:2], v[3:4] slc ; encoding: [0x00,0x00,0x06,0xdd,0x01,0x03,0x00,0x00] flat_atomic_cmpswap v[1:2], v[3:4] offset:4095 glc -// GCNERR: error: invalid operand for instruction +// GFX9-ERR: error: instruction must not use glc +// VI-ERR: error: flat offset modifier is not supported on this GPU flat_atomic_cmpswap v[1:2], v[3:4] glc -// GCNERR: error: invalid operand for instruction +// GCNERR: error: instruction must not use glc flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:4095 glc // GFX9: flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:4095 glc ; encoding: [0xff,0x0f,0x05,0xdd,0x01,0x03,0x00,0x00] Index: llvm/test/MC/AMDGPU/gfx90a_asm_features.s =================================================================== --- llvm/test/MC/AMDGPU/gfx90a_asm_features.s +++ llvm/test/MC/AMDGPU/gfx90a_asm_features.s @@ -329,19 +329,19 @@ // GFX90A: tbuffer_load_format_xyzw v[4:7], off, s[0:3], 0 scc ; encoding: [0x00,0x80,0x09,0xe8,0x00,0x04,0x20,0x80] // NOT-GFX1010: error: not a valid operand. -// NOT-GFX908: error: failed parsing operand. +// NOT-GFX908: error: scc modifier is not supported on this GPU tbuffer_load_format_xyzw v[4:7], off, s[0:3], dfmt:1, nfmt:0, 0 scc // GFX90A: tbuffer_load_format_xyzw v[4:7], off, s[0:3], 0 glc scc ; encoding: [0x00,0xc0,0x09,0xe8,0x00,0x04,0x20,0x80] // NOT-GFX1010: error: not a valid operand. -// NOT-GFX908: error: failed parsing operand. +// NOT-GFX908: error: scc modifier is not supported on this GPU tbuffer_load_format_xyzw v[4:7], off, s[0:3], dfmt:1, nfmt:0, 0 glc scc -// NOT-GFX90A: error: failed parsing operand +// NOT-GFX90A: error: scc modifier is not supported on this GPU // GFX90A: buffer_load_dword v5, off, s[8:11], s3 offset:4095 scc ; encoding: [0xff,0x8f,0x50,0xe0,0x00,0x05,0x02,0x03] buffer_load_dword v5, off, s[8:11], s3 offset:4095 scc -// NOT-GFX90A: error: failed parsing operand +// NOT-GFX90A: error: scc modifier is not supported on this GPU // GFX90A: buffer_load_dword v5, off, s[8:11], s3 offset:4095 glc scc ; encoding: [0xff,0xcf,0x50,0xe0,0x00,0x05,0x02,0x03] buffer_load_dword v5, off, s[8:11], s3 offset:4095 glc scc @@ -569,11 +569,11 @@ // GFX90A: ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:65535 gds ; encoding: [0xff,0xff,0xf9,0xd8,0x01,0x02,0x00,0x04] ds_add_rtn_f64 v[4:5], v1, v[2:3] offset:65535 gds -// NOT-GFX90A: error: failed parsing operand +// NOT-GFX90A: error: scc modifier is not supported on this GPU // GFX90A: flat_load_dword v0, v[0:1] scc ; encoding: [0x00,0x00,0x50,0xde,0x00,0x00,0x00,0x00] flat_load_dword v0, v[0:1] scc -// NOT-GFX90A: error: failed parsing operand +// NOT-GFX90A: error: scc modifier is not supported on this GPU // GFX90A: flat_load_dword v0, v[0:1] glc scc ; encoding: [0x00,0x00,0x51,0xde,0x00,0x00,0x00,0x00] flat_load_dword v0, v[0:1] glc scc @@ -689,7 +689,7 @@ // GFX90A: global_atomic_max_f64 v[0:1], v[2:3], off ; encoding: [0x00,0x80,0x44,0xdd,0x00,0x02,0x7f,0x00] global_atomic_max_f64 v[0:1], v[2:3], off -// NOT-GFX90A: error: failed parsing operand +// NOT-GFX90A: error: scc modifier is not supported on this GPU // GFX90A: image_load v[0:4], v2, s[0:7] dmask:0xf unorm scc ; encoding: [0x80,0x1f,0x00,0xf0,0x02,0x00,0x00,0x00] image_load v[0:4], v2, s[0:7] dmask:0xf unorm scc @@ -982,17 +982,17 @@ // GFX90A: buffer_atomic_add_f32 v0, v2, s[4:7], 0 idxen glc ; encoding: [0x00,0x60,0x34,0xe1,0x02,0x00,0x01,0x80] // NOT-GFX1010: error: instruction not supported on this GPU -// NOT-GFX908: error: operands are not valid for this GPU or mode +// NOT-GFX908: error: instruction must not use glc buffer_atomic_add_f32 v0, v2, s[4:7], 0 idxen glc // GFX90A: buffer_atomic_add_f32 v0, v2, s[4:7], 0 idxen glc ; encoding: [0x00,0x60,0x34,0xe1,0x02,0x00,0x01,0x80] // NOT-GFX1010: error: instruction not supported on this GPU -// NOT-GFX908: error: operands are not valid for this GPU or mode +// NOT-GFX908: error: instruction must not use glc buffer_atomic_add_f32 v0, v2, s[4:7], 0 idxen glc // GFX90A: buffer_atomic_pk_add_f16 v0, v2, s[4:7], 0 idxen glc ; encoding: [0x00,0x60,0x38,0xe1,0x02,0x00,0x01,0x80] // NOT-GFX1010: error: instruction not supported on this GPU -// NOT-GFX908: error: operands are not valid for this GPU or mode +// NOT-GFX908: error: instruction must not use glc buffer_atomic_pk_add_f16 v0, v2, s[4:7], 0 idxen glc // GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off glc ; encoding: [0x00,0x80,0x35,0xdd,0x00,0x02,0x7f,0x00] Index: llvm/test/MC/AMDGPU/mubuf-gfx10.s =================================================================== --- llvm/test/MC/AMDGPU/mubuf-gfx10.s +++ llvm/test/MC/AMDGPU/mubuf-gfx10.s @@ -4,7 +4,7 @@ // GFX10: buffer_load_sbyte v5, off, s[8:11], s3 glc slc lds ; encoding: [0x00,0x40,0x25,0xe0,0x00,0x05,0x42,0x03] buffer_load_sbyte v5, off, s[8:11], s3 glc slc lds dlc -// GFX10: buffer_load_sbyte v5, off, s[8:11], s3 glc slc lds dlc ; encoding: [0x00,0xc0,0x25,0xe0,0x00,0x05,0x42,0x03] +// GFX10: buffer_load_sbyte v5, off, s[8:11], s3 glc slc dlc lds ; encoding: [0x00,0xc0,0x25,0xe0,0x00,0x05,0x42,0x03] buffer_load_sbyte v5, off, s[8:11], s3 glc slc dlc // GFX10: buffer_load_sbyte v5, off, s[8:11], s3 glc slc dlc ; encoding: [0x00,0xc0,0x24,0xe0,0x00,0x05,0x42,0x03]