Index: llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -116,6 +116,7 @@ ImmTyDLC, ImmTyGLC, ImmTySLC, + ImmTyCPol, ImmTySWZ, ImmTyTFE, ImmTyD16, @@ -338,10 +339,11 @@ bool isLDS() const { return isImmTy(ImmTyLDS); } bool isDLC() const { return isImmTy(ImmTyDLC); } bool isGLC() const { return isImmTy(ImmTyGLC); } - // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced - // value of the GLC operand. - bool isGLC_1() const { return isImmTy(ImmTyGLC); } bool isSLC() const { return isImmTy(ImmTySLC); } + bool isCPol() const { return isImmTy(ImmTyCPol); } + // "CPol_GLC1" is a MatchClass of the CPOL_GLC1 operand with the default and + // forced value of the GLC operand. + bool isCPol_GLC1() const { return isImmTy(ImmTyCPol); } bool isSWZ() const { return isImmTy(ImmTySWZ); } bool isTFE() const { return isImmTy(ImmTyTFE); } bool isD16() const { return isImmTy(ImmTyD16); } @@ -840,6 +842,7 @@ case ImmTyDLC: OS << "DLC"; break; case ImmTyGLC: OS << "GLC"; break; case ImmTySLC: OS << "SLC"; break; + case ImmTyCPol: OS << "CPol"; break; case ImmTySWZ: OS << "SWZ"; break; case ImmTyTFE: OS << "TFE"; break; case ImmTyD16: OS << "D16"; break; @@ -1105,7 +1108,7 @@ bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth); void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, - bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); + bool IsAtomic, bool IsLds = false); void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, bool IsGdsHardcoded); @@ -1452,16 +1455,16 @@ OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); int64_t parseGPRIdxMacro(); - void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } - void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } - void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } - void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } + void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } + void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } + void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); AMDGPUOperand::Ptr defaultDLC() const; AMDGPUOperand::Ptr defaultGLC() const; - AMDGPUOperand::Ptr defaultGLC_1() const; AMDGPUOperand::Ptr defaultSLC() const; + AMDGPUOperand::Ptr defaultCPol() const; + AMDGPUOperand::Ptr defaultCPol_GLC1() const; AMDGPUOperand::Ptr defaultSMRDOffset8() const; AMDGPUOperand::Ptr defaultSMEMOffset() const; @@ -3807,15 +3810,28 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, const SMLoc &IDLoc) { - int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), - AMDGPU::OpName::glc1); - if (GLCPos != -1) { - // -1 is set by GLC_1 default operand. In all cases "glc" must be present - // in the asm string, and the default value means it is not present. - if (Inst.getOperand(GLCPos).getImm() == -1) { + uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; + if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) + return true; + + int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), + AMDGPU::OpName::cpol); + if (CPolPos == -1) + return true; + + if (TSFlags & SIInstrFlags::IsAtomicRet) { + if (!(Inst.getOperand(CPolPos).getImm() & CPol::GLC)) { Error(IDLoc, "instruction must use glc"); return false; } + } else { + if (Inst.getOperand(CPolPos).getImm() & CPol::GLC) { + SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); + StringRef CStr(S.getPointer()); + S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); + Error(S, "instruction must not use glc"); + return false; + } } return true; @@ -4867,6 +4883,45 @@ // Try to parse with a custom parser OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + // This is hack to combine cache policy bits into a single operand + // since parseOptionalOperand just consumed all of the individual bits. + if ((ResTy == MatchOperand_Success || ResTy == MatchOperand_NoMatch) && + (Mnemonic.startswith("scratch_") || Mnemonic.startswith("flat_") || + Mnemonic.startswith("global_") || Mnemonic.startswith("buffer_") || + Mnemonic.startswith("tbuffer_"))) { + unsigned CPPos = 0; + unsigned CPol = 0; + + for (unsigned I = 1; I != Operands.size(); ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (Op.isGLC()) + CPol |= CPol::GLC; + else if (Op.isSLC()) + CPol |= CPol::SLC; + else if (Op.isDLC()) { + CPol |= CPol::DLC; + if (!isGFX10Plus()) { + Error(Op.getStartLoc(), "dlc modifier is not supported on this GPU"); + return MatchOperand_ParseFail; + } + } else + continue; + + if (!CPPos) { + CPPos = I; + } else { + Operands.erase(&Operands[I]); + --I; + } + } + + if (CPol) { + SMLoc S = ((AMDGPUOperand &)*Operands[CPPos]).getStartLoc(); + Operands[CPPos] = AMDGPUOperand::CreateImm(this, CPol, S, + AMDGPUOperand::ImmTyCPol); + } + } + // If we successfully parsed the operand or if there as an error parsing, // we are done. // @@ -6508,24 +6563,47 @@ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); } -AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const { - return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC); -} - AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); } +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); +} + +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol_GLC1() const { + return AMDGPUOperand::CreateImm(this, CPol::GLC, SMLoc(), + AMDGPUOperand::ImmTyCPol); +} + void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, - const OperandVector &Operands, - bool IsAtomic, - bool IsAtomicReturn, - bool IsLds) { + const OperandVector &Operands, + bool IsAtomic, + bool IsLds) { bool IsLdsOpcode = IsLds; bool HasLdsModifier = false; OptionalImmIndexMap OptionalIdx; - assert(IsAtomicReturn ? IsAtomic : true); unsigned FirstOperandIdx = 1; + bool IsAtomicReturn = false; + + if (IsAtomic) { + for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); + if (!Op.isCPol()) + continue; + IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; + break; + } + + if (!IsAtomicReturn) { + int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); + if (NewOpc != -1) + Inst.setOpcode(NewOpc); + } + + IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & + SIInstrFlags::IsAtomicRet; + } for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); @@ -6576,18 +6654,11 @@ } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); - if (!IsAtomic || IsAtomicReturn) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC, - IsAtomicReturn ? -1 : 0); - } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); if (!IsLdsOpcode) { // tfe is not legal with lds opcodes addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); } - - if (isGFX10Plus()) - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); } void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { @@ -6622,12 +6693,8 @@ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); - - if (isGFX10Plus()) - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); } //===----------------------------------------------------------------------===// @@ -6786,6 +6853,7 @@ {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, + {"cpol", AMDGPUOperand::ImmTyCPol, false, nullptr}, {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, @@ -6868,6 +6936,8 @@ Op.ConvertResult); } else if (Op.Type == AMDGPUOperand::ImmTyDim) { res = parseDim(Operands); + } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { + continue; } else { res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); } Index: llvm/lib/Target/AMDGPU/BUFInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/BUFInstructions.td +++ llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -122,13 +122,11 @@ let TSFlags = ps.TSFlags; bits<12> offset; - bits<1> glc; - bits<1> dlc; + bits<3> cpol; bits<7> format; bits<8> vaddr; bits<8> vdata; bits<7> srsrc; - bits<1> slc; bits<1> tfe; bits<8> soffset; @@ -142,17 +140,17 @@ RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList)); dag InsNoData = !if(!empty(vaddrList), (ins SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz), + offset:$offset, FORMAT:$format, CPol:$cpol, TFE:$tfe, SWZ:$swz), (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz) + offset:$offset, FORMAT:$format, CPol:$cpol, TFE:$tfe, SWZ:$swz) ); dag InsData = !if(!empty(vaddrList), (ins vdataClass:$vdata, SReg_128:$srsrc, - SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc, - SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz), + SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, CPol:$cpol, + TFE:$tfe, SWZ:$swz), (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, - SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc, - SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz) + SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, CPol:$cpol, + TFE:$tfe, SWZ:$swz) ); dag ret = !if(!empty(vdataList), InsNoData, InsData); } @@ -204,7 +202,7 @@ : MTBUF_Pseudo.ret, - " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe$dlc$swz", + " $vdata, " # getMTBUFAsmOps.ret # "$cpol$tfe$swz", pattern>, MTBUF_SetupAddr { let PseudoInstr = opName # "_" # getAddrName.ret; @@ -220,13 +218,13 @@ def _OFFSET : MTBUF_Load_Pseudo , + CPol:$cpol, i1:$tfe, i1:$swz)))]>, MTBUFAddr64Table<0, NAME>; def _ADDR64 : MTBUF_Load_Pseudo , + i8:$format, CPol:$cpol, i1:$tfe, i1:$swz)))]>, MTBUFAddr64Table<1, NAME>; def _OFFEN : MTBUF_Load_Pseudo ; @@ -252,7 +250,7 @@ : MTBUF_Pseudo.ret, - " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe$dlc$swz", + " $vdata, " # getMTBUFAsmOps.ret # "$cpol$tfe$swz", pattern>, MTBUF_SetupAddr { let PseudoInstr = opName # "_" # getAddrName.ret; @@ -267,14 +265,14 @@ def _OFFSET : MTBUF_Store_Pseudo , + i16:$offset, i8:$format, CPol:$cpol, + i1:$tfe, i1:$swz))]>, MTBUFAddr64Table<0, NAME>; def _ADDR64 : MTBUF_Store_Pseudo , + i16:$offset, i8:$format, CPol:$cpol, + i1:$tfe, i1:$swz))]>, MTBUFAddr64Table<1, NAME>; def _OFFEN : MTBUF_Store_Pseudo ; @@ -359,12 +357,10 @@ let UseNamedOperandTable = ps.UseNamedOperandTable; bits<12> offset; - bits<1> glc; - bits<1> dlc; + bits<3> cpol; bits<8> vaddr; bits<8> vdata; bits<7> srsrc; - bits<1> slc; bits<1> tfe; bits<8> soffset; } @@ -404,19 +400,19 @@ RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList)); dag InsNoData = !if(!empty(vaddrList), (ins SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, GLC:$glc, SLC:$slc), + offset:$offset, CPol:$cpol), (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, GLC:$glc, SLC:$slc) + offset:$offset, CPol:$cpol) ); dag InsData = !if(!empty(vaddrList), (ins vdataClass:$vdata, SReg_128:$srsrc, - SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc), + SCSrc_b32:$soffset, offset:$offset, CPol:$cpol), (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, - SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc) + SCSrc_b32:$soffset, offset:$offset, CPol:$cpol) ); dag ret = !con( !if(!empty(vdataList), InsNoData, InsData), - !if(isLds, (ins DLC:$dlc, SWZ:$swz), (ins TFE:$tfe, DLC:$dlc,SWZ:$swz)) + !if(isLds, (ins SWZ:$swz), (ins TFE:$tfe, SWZ:$swz)) ); } @@ -487,8 +483,8 @@ (outs getVregSrcForVT.ret:$vdata), !con(getMUBUFIns.ret, !if(HasTiedDest, (ins getVregSrcForVT.ret:$vdata_in), (ins))), - " $vdata, " # getMUBUFAsmOps.ret # "$glc$slc" # - !if(isLds, " lds", "$tfe") # "$dlc$swz", + " $vdata, " # getMUBUFAsmOps.ret # "$cpol" # + !if(isLds, " lds", "$tfe") # "$swz", pattern>, MUBUF_SetupAddr { let PseudoInstr = opName # !if(isLds, "_lds", "") # @@ -506,15 +502,15 @@ } class MUBUF_Offset_Load_Pat : Pat < - (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), - (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)) + (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))), + (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz)) >; class MUBUF_Addr64_Load_Pat : Pat < - (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), - (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)) + (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))), + (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz)) >; multiclass MUBUF_Pseudo_Load_Pats { @@ -567,7 +563,7 @@ : MUBUF_Pseudo.ret]>.ret, - " $vdata, " # getMUBUFAsmOps.ret # "$glc$slc$tfe$dlc$swz", + " $vdata, " # getMUBUFAsmOps.ret # "$cpol$tfe$swz", pattern>, MUBUF_SetupAddr { let PseudoInstr = opName # "_" # getAddrName.ret; @@ -585,12 +581,12 @@ def _OFFSET : MUBUF_Store_Pseudo , + i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))]>, MUBUFAddr64Table<0, NAME>; def _ADDR64 : MUBUF_Store_Pseudo , + i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))]>, MUBUFAddr64Table<1, NAME>; def _OFFEN : MUBUF_Store_Pseudo ; @@ -608,8 +604,8 @@ class MUBUF_Pseudo_Store_Lds : MUBUF_Pseudo { + (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol:$cpol, SWZ:$swz), + " $srsrc, $soffset$offset lds$cpol$swz"> { let mayLoad = 0; let mayStore = 1; let maybeAtomic = 1; @@ -629,15 +625,15 @@ dag ret = !if(vdata_in, !if(!empty(vaddrList), (ins vdataClass:$vdata_in, - SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC_1:$glc1, SLC:$slc), + SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_GLC1:$cpol), (ins vdataClass:$vdata_in, vaddrClass:$vaddr, - SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC_1:$glc1, SLC:$slc) + SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_GLC1:$cpol) ), !if(!empty(vaddrList), (ins vdataClass:$vdata, - SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, SLC:$slc), + SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol), (ins vdataClass:$vdata, vaddrClass:$vaddr, - SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, SLC:$slc) + SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol) )); } @@ -679,6 +675,7 @@ let has_dlc = 0; let has_tfe = 0; let maybeAtomic = 1; + let AsmMatchConverter = "cvtMubufAtomic"; } class MUBUF_AtomicNoRet_Pseudo.ret, - " $vdata, " # getMUBUFAsmOps.ret # "$slc", + " $vdata, " # getMUBUFAsmOps.ret # "$cpol", pattern>, AtomicNoRet.ret, 0> { let PseudoInstr = opName # "_" # getAddrName.ret; let glc_value = 0; let dlc_value = 0; let IsAtomicNoRet = 1; - let AsmMatchConverter = "cvtMubufAtomic"; } class MUBUF_AtomicRet_Pseudo.ret, - " $vdata, " # getMUBUFAsmOps.ret # "$glc1$slc", + " $vdata, " # getMUBUFAsmOps.ret # "$cpol", pattern>, AtomicNoRet.ret, 1> { let PseudoInstr = opName # "_rtn_" # getAddrName.ret; @@ -718,7 +714,6 @@ let IsAtomicRet = 1; let Constraints = "$vdata = $vdata_in"; let DisableEncoding = "$vdata_in"; - let AsmMatchConverter = "cvtMubufAtomicReturn"; } multiclass MUBUF_Pseudo_Atomics_NO_RTN , MUBUFAddr64Table <0, NAME # "_RTN">; let FPAtomic = isFP in def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo , + (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, + CPol_GLC1:$cpol), vdataType:$vdata_in))]>, MUBUFAddr64Table <1, NAME # "_RTN">; let FPAtomic = isFP in @@ -1177,24 +1172,21 @@ (vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, timm:$auxiliary, 0)), (!cast(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< (vt (st v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, timm:$auxiliary, 0)), (!cast(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< (vt (st v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, timm:$auxiliary, timm)), (!cast(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1203,8 +1195,7 @@ (!cast(opcode # _BOTHEN) (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; } @@ -1263,26 +1254,21 @@ (st vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, timm:$auxiliary, 0), (!cast(opcode # _OFFSET_exact) getVregSrcForVT.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< (st vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, timm:$auxiliary, 0), (!cast(opcode # _OFFEN_exact) getVregSrcForVT.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, - (as_i16timm $offset), (extract_glc $auxiliary), - (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (as_i16timm $offset), (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< (st vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, timm:$auxiliary, timm), (!cast(opcode # _IDXEN_exact) getVregSrcForVT.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, - (as_i16timm $offset), (extract_glc $auxiliary), - (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (as_i16timm $offset), (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1291,9 +1277,8 @@ (!cast(opcode # _BOTHEN_exact) getVregSrcForVT.ret:$vdata, (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), - SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_glc $auxiliary), - (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_cpol $auxiliary), + 0, (extract_swz $auxiliary)) >; } @@ -1513,21 +1498,21 @@ class MUBUFLoad_PatternADDR64 : GCNPat < (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz) + i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))), + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz) >; multiclass MUBUFLoad_Atomic_Pattern { def : GCNPat < (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$slc))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0) + i16:$offset, CPol:$cpol))), + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $cpol, 0) >; def : GCNPat < (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))), - (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0) + (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) >; } @@ -1548,8 +1533,8 @@ def : GCNPat < (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))), - (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz) + i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))), + (Instr_OFFSET $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz) >; } @@ -1572,12 +1557,12 @@ def : GCNPat < (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset))), - (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0) + (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) >; def : GCNPat < (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))), - (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0) + (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0) >; } @@ -1587,12 +1572,12 @@ ValueType vt, PatFrag ld_frag> { def : GCNPat < (ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in), - (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in) + (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $in) >; def : GCNPat < (ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in), - (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in) + (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $in) >; } @@ -1638,13 +1623,13 @@ // Store follows atomic op convention so address is first def : GCNPat < (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$slc), vt:$val), - (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0) + i16:$offset, CPol:$cpol), vt:$val), + (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $cpol, 0) >; def : GCNPat < (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), - (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0) + (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) >; } let SubtargetPredicate = isGFX6GFX7 in { @@ -1658,8 +1643,8 @@ def : GCNPat < (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)), - (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz) + i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz)), + (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz) >; } @@ -1673,13 +1658,13 @@ def : GCNPat < (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset)), - (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0) + (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) >; def : GCNPat < (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)), - (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0) + (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0) >; } @@ -1725,8 +1710,7 @@ timm:$format, timm:$auxiliary, 0)), (!cast(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1734,8 +1718,7 @@ timm:$format, timm:$auxiliary, timm)), (!cast(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1743,8 +1726,7 @@ timm:$format, timm:$auxiliary, 0)), (!cast(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1754,8 +1736,7 @@ (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; } @@ -1793,8 +1774,7 @@ timm:$format, timm:$auxiliary, 0), (!cast(opcode # _OFFSET_exact) getVregSrcForVT.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1802,8 +1782,7 @@ timm:$format, timm:$auxiliary, timm), (!cast(opcode # _IDXEN_exact) getVregSrcForVT.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1811,8 +1790,7 @@ timm:$format, timm:$auxiliary, 0), (!cast(opcode # _OFFEN_exact) getVregSrcForVT.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; def : GCNPat< @@ -1822,8 +1800,7 @@ getVregSrcForVT.ret:$vdata, (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format), - (extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary), - (extract_swz $auxiliary)) + (extract_cpol $auxiliary), 0, (extract_swz $auxiliary)) >; } @@ -1865,21 +1842,21 @@ let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; - let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); let Inst{16} = ps.lds; let Inst{24-18} = op; let Inst{31-26} = 0x38; let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); let Inst{47-40} = !if(ps.has_vdata, vdata, ?); let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); - let Inst{54} = !if(ps.has_slc, slc, ?); + let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{55} = !if(ps.has_tfe, tfe, ?); let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } class MUBUF_Real_gfx10 op, MUBUF_Pseudo ps> : Base_MUBUF_Real_gfx6_gfx7_gfx10 { - let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value); + let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value); let Inst{25} = op{7}; } @@ -1931,16 +1908,33 @@ } multiclass MUBUF_Real_Atomics_RTN_gfx10 op> { def _BOTHEN_RTN_gfx10 : - MUBUF_Real_gfx10(NAME#"_BOTHEN_RTN")>; + MUBUF_Real_gfx10(NAME#"_BOTHEN_RTN")>, + AtomicNoRet; def _IDXEN_RTN_gfx10 : - MUBUF_Real_gfx10(NAME#"_IDXEN_RTN")>; + MUBUF_Real_gfx10(NAME#"_IDXEN_RTN")>, + AtomicNoRet; def _OFFEN_RTN_gfx10 : - MUBUF_Real_gfx10(NAME#"_OFFEN_RTN")>; + MUBUF_Real_gfx10(NAME#"_OFFEN_RTN")>, + AtomicNoRet; def _OFFSET_RTN_gfx10 : - MUBUF_Real_gfx10(NAME#"_OFFSET_RTN")>; + MUBUF_Real_gfx10(NAME#"_OFFSET_RTN")>, + AtomicNoRet; } multiclass MUBUF_Real_Atomics_gfx10 op> : - MUBUF_Real_AllAddr_gfx10, MUBUF_Real_Atomics_RTN_gfx10; + MUBUF_Real_Atomics_RTN_gfx10 { + def _BOTHEN_gfx10 : + MUBUF_Real_gfx10(NAME#"_BOTHEN")>, + AtomicNoRet; + def _IDXEN_gfx10 : + MUBUF_Real_gfx10(NAME#"_IDXEN")>, + AtomicNoRet; + def _OFFEN_gfx10 : + MUBUF_Real_gfx10(NAME#"_OFFEN")>, + AtomicNoRet; + def _OFFSET_gfx10 : + MUBUF_Real_gfx10(NAME#"_OFFSET")>, + AtomicNoRet; + } } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>; @@ -2020,18 +2014,38 @@ def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_BOTHEN")>, MUBUFLdsTable<1, NAME # "_BOTHEN_gfx6_gfx7">; } - multiclass MUBUF_Real_Atomics_gfx6_gfx7 op> : - MUBUF_Real_AllAddr_gfx6_gfx7 { + multiclass MUBUF_Real_Atomics_gfx6_gfx7 op> { + def _ADDR64_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7(NAME#"_ADDR64")>, + AtomicNoRet; + def _BOTHEN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7(NAME#"_BOTHEN")>, + AtomicNoRet; + def _IDXEN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7(NAME#"_IDXEN")>, + AtomicNoRet; + def _OFFEN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7(NAME#"_OFFEN")>, + AtomicNoRet; + def _OFFSET_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7(NAME#"_OFFSET")>, + AtomicNoRet; + def _ADDR64_RTN_gfx6_gfx7 : - MUBUF_Real_gfx6_gfx7(NAME#"_ADDR64_RTN")>; + MUBUF_Real_gfx6_gfx7(NAME#"_ADDR64_RTN")>, + AtomicNoRet; def _BOTHEN_RTN_gfx6_gfx7 : - MUBUF_Real_gfx6_gfx7(NAME#"_BOTHEN_RTN")>; + MUBUF_Real_gfx6_gfx7(NAME#"_BOTHEN_RTN")>, + AtomicNoRet; def _IDXEN_RTN_gfx6_gfx7 : - MUBUF_Real_gfx6_gfx7(NAME#"_IDXEN_RTN")>; + MUBUF_Real_gfx6_gfx7(NAME#"_IDXEN_RTN")>, + AtomicNoRet; def _OFFEN_RTN_gfx6_gfx7 : - MUBUF_Real_gfx6_gfx7(NAME#"_OFFEN_RTN")>; + MUBUF_Real_gfx6_gfx7(NAME#"_OFFEN_RTN")>, + AtomicNoRet; def _OFFSET_RTN_gfx6_gfx7 : - MUBUF_Real_gfx6_gfx7(NAME#"_OFFSET_RTN")>; + MUBUF_Real_gfx6_gfx7(NAME#"_OFFSET_RTN")>, + AtomicNoRet; } } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" @@ -2120,13 +2134,13 @@ let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; - let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); let Inst{18-16} = op; let Inst{31-26} = 0x3a; //encoding let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); let Inst{47-40} = !if(ps.has_vdata, vdata, ?); let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); - let Inst{54} = !if(ps.has_slc, slc, ?); + let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{55} = !if(ps.has_tfe, tfe, ?); let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } @@ -2137,7 +2151,7 @@ class MTBUF_Real_gfx10 op, MTBUF_Pseudo ps> : Base_MTBUF_Real_gfx6_gfx7_gfx10 { - let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value); + let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value); let Inst{25-19} = format; let Inst{53} = op{3}; } @@ -2216,9 +2230,9 @@ let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; - let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); let Inst{16} = ps.lds; - let Inst{17} = !if(ps.has_slc, slc, ?); + let Inst{17} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{24-18} = op; let Inst{31-26} = 0x38; //encoding let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); @@ -2266,9 +2280,9 @@ let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; - let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); let Inst{16} = ps.lds; - let Inst{17} = !if(ps.has_slc, slc, ?); + let Inst{17} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{24-18} = op; let Inst{31-26} = 0x38; //encoding let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); @@ -2285,12 +2299,23 @@ def _BOTHEN_gfx80 : MUBUF_Real_gfx80 (NAME#"_BOTHEN")>; } -multiclass MUBUF_Real_Atomic_vi op> : - MUBUF_Real_AllAddr_vi { - def _OFFSET_RTN_vi : MUBUF_Real_vi (NAME#"_OFFSET_RTN")>; - def _OFFEN_RTN_vi : MUBUF_Real_vi (NAME#"_OFFEN_RTN")>; - def _IDXEN_RTN_vi : MUBUF_Real_vi (NAME#"_IDXEN_RTN")>; - def _BOTHEN_RTN_vi : MUBUF_Real_vi (NAME#"_BOTHEN_RTN")>; +multiclass MUBUF_Real_Atomic_vi op> { + def _OFFSET_vi : MUBUF_Real_vi (NAME#"_OFFSET")>, + AtomicNoRet; + def _OFFEN_vi : MUBUF_Real_vi (NAME#"_OFFEN")>, + AtomicNoRet; + def _IDXEN_vi : MUBUF_Real_vi (NAME#"_IDXEN")>, + AtomicNoRet; + def _BOTHEN_vi : MUBUF_Real_vi (NAME#"_BOTHEN")>, + AtomicNoRet; + def _OFFSET_RTN_vi : MUBUF_Real_vi (NAME#"_OFFSET_RTN")>, + AtomicNoRet; + def _OFFEN_RTN_vi : MUBUF_Real_vi (NAME#"_OFFEN_RTN")>, + AtomicNoRet; + def _IDXEN_RTN_vi : MUBUF_Real_vi (NAME#"_IDXEN_RTN")>, + AtomicNoRet; + def _BOTHEN_RTN_vi : MUBUF_Real_vi (NAME#"_BOTHEN_RTN")>, + AtomicNoRet; } defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_Lds_vi <0x00>; @@ -2398,7 +2423,7 @@ let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; - let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); let Inst{18-15} = op; let Inst{22-19} = dfmt; let Inst{25-23} = nfmt; @@ -2406,7 +2431,7 @@ let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); let Inst{47-40} = !if(ps.has_vdata, vdata, ?); let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); - let Inst{54} = !if(ps.has_slc, slc, ?); + let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{55} = !if(ps.has_tfe, tfe, ?); let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } @@ -2428,7 +2453,7 @@ let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; - let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); let Inst{18-15} = op; let Inst{22-19} = dfmt; let Inst{25-23} = nfmt; @@ -2436,7 +2461,7 @@ let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); let Inst{47-40} = !if(ps.has_vdata, vdata, ?); let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); - let Inst{54} = !if(ps.has_slc, slc, ?); + let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{55} = !if(ps.has_tfe, tfe, ?); let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } Index: llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -379,9 +379,20 @@ } if (Res && (MCII->get(MI.getOpcode()).TSFlags & - (SIInstrFlags::MUBUF | SIInstrFlags::FLAT)) && - AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::glc1) != -1) { - insertNamedMCOperand(MI, MCOperand::createImm(1), AMDGPU::OpName::glc1); + (SIInstrFlags::IsAtomicRet | SIInstrFlags::IsAtomicNoRet))) { + int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::cpol); + if (CPolPos != -1) { + unsigned CPol = + (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ? + AMDGPU::CPol::GLC : 0; + if (MI.getNumOperands() < (unsigned)CPolPos) { + insertNamedMCOperand(MI, MCOperand::createImm(CPol), + AMDGPU::OpName::cpol); + } else if (CPol) { + MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol); + } + } } if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) { Index: llvm/lib/Target/AMDGPU/FLATInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/FLATInstructions.td +++ llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -92,9 +92,7 @@ bits<7> saddr; bits<8> vdst; - bits<1> slc; - bits<1> glc; - bits<1> dlc; + bits<3> cpol; // Only valid on gfx9 bits<1> lds = 0; // XXX - What does this actually do? @@ -116,8 +114,8 @@ let Inst{13} = lds; let Inst{15-14} = seg; - let Inst{16} = !if(ps.has_glc, glc, ps.glcValue); - let Inst{17} = slc; + let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); + let Inst{17} = cpol{CPolBit.SLC}; let Inst{24-18} = op; let Inst{31-26} = 0x37; // Encoding. let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); @@ -149,9 +147,9 @@ (ins VReg_64:$vaddr)), (ins flat_offset:$offset)), // FIXME: Operands with default values do not work with following non-optional operands. - !if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, regClass:$vdst_in), - (ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc))), - " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> { + !if(HasTiedOutput, (ins CPol:$cpol, regClass:$vdst_in), + (ins CPol_0:$cpol))), + " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { let has_data = 0; let mayLoad = 1; let has_saddr = HasSaddr; @@ -171,8 +169,8 @@ !if(EnableSaddr, (ins VGPR_32:$vaddr, vdataClass:$vdata, SReg_64:$saddr), (ins VReg_64:$vaddr, vdataClass:$vdata)), - (ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc)), - " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> { + (ins flat_offset:$offset, CPol_0:$cpol)), + " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { let mayLoad = 0; let mayStore = 1; let has_vdst = 0; @@ -196,9 +194,9 @@ opName, (outs regClass:$vdst), !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)), - (ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc), + (ins flat_offset:$offset, CPol_0:$cpol), !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), - " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> { + " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { let is_flat_global = 1; let has_data = 0; let mayLoad = 1; @@ -234,8 +232,8 @@ opName, (outs), !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)), - (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), - " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> { + (ins flat_offset:$offset, CPol:$cpol)), + " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { let is_flat_global = 1; let mayLoad = 0; let mayStore = 1; @@ -273,9 +271,9 @@ !if(EnableVaddr, (ins VGPR_32:$vaddr, flat_offset:$offset), (ins flat_offset:$offset))), - !if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, regClass:$vdst_in), - (ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc))), - " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> { + !if(HasTiedOutput, (ins CPol:$cpol, regClass:$vdst_in), + (ins CPol_0:$cpol))), + " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { let has_data = 0; let mayLoad = 1; let has_saddr = 1; @@ -293,11 +291,11 @@ opName, (outs), !if(EnableSaddr, - (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc), + (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), !if(EnableVaddr, - (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc), - (ins vdataClass:$vdata, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc))), - " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> { + (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol), + (ins vdataClass:$vdata, flat_offset:$offset, CPol_0:$cpol))), + " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { let mayLoad = 0; let mayStore = 1; let has_vdst = 0; @@ -370,8 +368,8 @@ bit isFP = isFloatType.ret> { def "" : FLAT_AtomicNoRet_Pseudo , + (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, CPol_0:$cpol), + " $vaddr, $vdata$offset$cpol">, GlobalSaddrTable<0, opName>, AtomicNoRet { let PseudoInstr = NAME; @@ -381,8 +379,8 @@ def _RTN : FLAT_AtomicRet_Pseudo , GlobalSaddrTable<0, opName#"_rtn">, @@ -403,8 +401,8 @@ def "" : FLAT_AtomicNoRet_Pseudo , + (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, CPol_0:$cpol), + " $vaddr, $vdata, off$offset$cpol">, GlobalSaddrTable<0, opName>, AtomicNoRet { let has_saddr = 1; @@ -414,8 +412,8 @@ def _SADDR : FLAT_AtomicNoRet_Pseudo , + (ins VGPR_32:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol), + " $vaddr, $vdata, $saddr$offset$cpol">, GlobalSaddrTable<1, opName>, AtomicNoRet { let has_saddr = 1; @@ -436,8 +434,8 @@ def _RTN : FLAT_AtomicRet_Pseudo , GlobalSaddrTable<0, opName#"_rtn">, @@ -448,8 +446,8 @@ def _SADDR_RTN : FLAT_AtomicRet_Pseudo , + (ins VGPR_32:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol), + " $vdst, $vaddr, $vdata, $saddr$offset$cpol">, GlobalSaddrTable<1, opName#"_rtn">, AtomicNoRet { let has_saddr = 1; @@ -794,17 +792,17 @@ class FlatLoadPat_D16 : GCNPat < (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in), - (inst $vaddr, $offset, 0, 0, 0, $in) + (inst $vaddr, $offset, 0, $in) >; class FlatSignedLoadPat_D16 : GCNPat < (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset), vt:$in), - (inst $vaddr, $offset, 0, 0, 0, $in) + (inst $vaddr, $offset, 0, $in) >; class GlobalLoadSaddrPat_D16 : GCNPat < (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$in)), - (inst $saddr, $voffset, $offset, 0, 0, 0, $in) + (inst $saddr, $voffset, $offset, 0, $in) >; class FlatLoadSignedPat : GCNPat < @@ -814,7 +812,7 @@ class GlobalLoadSaddrPat : GCNPat < (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset))), - (inst $saddr, $voffset, $offset, 0, 0, 0) + (inst $saddr, $voffset, $offset, 0) >; class GlobalStoreSaddrPat : GCNPat < (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset), vt:$in), - (inst $vaddr, $offset, 0, 0, 0, $in) + (inst $vaddr, $offset, 0, $in) >; class ScratchStoreSignedPat : GCNPat < @@ -910,7 +908,7 @@ class ScratchLoadSaddrPat_D16 : GCNPat < (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset), vt:$in)), - (inst $saddr, $offset, 0, 0, 0, $in) + (inst $saddr, $offset, 0, $in) >; class ScratchStoreSaddrPat getOperand(OpNo).getImm(); + if (Imm & CPol::GLC) + O << " glc"; + if (Imm & CPol::SLC) + O << " slc"; + if (Imm & CPol::DLC) + O << " dlc"; + if (Imm & ~CPol::ALL) + O << " /* unexpected cache policy bit */"; +} + void AMDGPUInstPrinter::printSWZ(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { } Index: llvm/lib/Target/AMDGPU/SIDefines.h =================================================================== --- llvm/lib/Target/AMDGPU/SIDefines.h +++ llvm/lib/Target/AMDGPU/SIDefines.h @@ -269,6 +269,17 @@ } // namespace AMDGPU namespace AMDGPU { +namespace CPol { + +enum CPol { + GLC = 1, + SLC = 2, + DLC = 4, + ALL = GLC | SLC | DLC +}; + +} // namespace CPol + namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns. enum Id { // Message ID, width(4) [3:0]. Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -11134,10 +11134,12 @@ int NoRetAtomicOp = AMDGPU::getAtomicNoRetOp(MI.getOpcode()); if (NoRetAtomicOp != -1) { if (!Node->hasAnyUseOfValue(0)) { - int Glc1Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), - AMDGPU::OpName::glc1); - if (Glc1Idx != -1) - MI.RemoveOperand(Glc1Idx); + int CPolIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::cpol); + if (CPolIdx != -1) { + MachineOperand &CPol = MI.getOperand(CPolIdx); + CPol.setImm(CPol.getImm() | AMDGPU::CPol::GLC); + } MI.RemoveOperand(0); MI.setDesc(TII->get(NoRetAtomicOp)); return; Index: llvm/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -818,6 +818,10 @@ return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8); }]>; +def extract_cpol : SDNodeXFormgetTargetConstant(N->getZExtValue() & AMDGPU::CPol::ALL, SDLoc(N), MVT::i8); +}]>; + def extract_swz : SDNodeXFormgetTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8); }]>; @@ -1074,6 +1078,12 @@ let ParserMatchClass = MatchClass; } +class NamedOperandU32Default1 : + OperandWithDefaultOps { + let PrintMethod = "print"#Name; + let ParserMatchClass = MatchClass; +} + let OperandType = "OPERAND_IMMEDIATE" in { def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>; @@ -1102,11 +1112,14 @@ def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>; def GLC_0 : NamedOperandBit_0<"GLC", NamedMatchClass<"GLC">>; -def GLC_1 : NamedOperandBit_1<"GLC", NamedMatchClass<"GLC_1">>; def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>; def SLC_0 : NamedOperandBit_0<"SLC", NamedMatchClass<"SLC">>; +def CPol : NamedOperandU32<"CPol", NamedMatchClass<"CPol">>; +def CPol_0 : NamedOperandU32Default0<"CPol", NamedMatchClass<"CPol">>; +def CPol_GLC1 : NamedOperandU32Default1<"CPol", NamedMatchClass<"CPol">>; + def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>; def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>; def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>; @@ -1363,6 +1376,12 @@ int NONE = 0; } +def CPolBit { + int GLC = 0; + int SLC = 1; + int DLC = 2; +} + def TRAPID{ int LLVM_TRAP = 2; int LLVM_DEBUG_TRAP = 3; Index: llvm/test/MC/AMDGPU/atomic-fadd-insts.s =================================================================== --- llvm/test/MC/AMDGPU/atomic-fadd-insts.s +++ llvm/test/MC/AMDGPU/atomic-fadd-insts.s @@ -41,7 +41,7 @@ // GFX908: encoding: [0x07,0x00,0x34,0xe1,0x00,0x05,0x02,0x03] buffer_atomic_add_f32 v5, off, s[8:11], s3 offset:4095 glc -// GFX908-ERR: error: invalid operand for instruction +// GFX908-ERR: error: instruction must not use glc buffer_atomic_add_f32 v5, off, s[8:11], s3 offset:4095 slc // GFX908: encoding: [0xff,0x0f,0x36,0xe1,0x00,0x05,0x02,0x03] @@ -86,7 +86,7 @@ // GFX908: encoding: [0x07,0x00,0x38,0xe1,0x00,0x05,0x02,0x03] buffer_atomic_pk_add_f16 v5, off, s[8:11], s3 offset:4095 glc -// GFX908-ERR: error: invalid operand for instruction +// GFX908-ERR: error: instruction must not use glc buffer_atomic_pk_add_f16 v5, off, s[8:11], s3 offset:4095 slc // GFX908: encoding: [0xff,0x0f,0x3a,0xe1,0x00,0x05,0x02,0x03] Index: llvm/test/MC/AMDGPU/cpol-err.s =================================================================== --- /dev/null +++ llvm/test/MC/AMDGPU/cpol-err.s @@ -0,0 +1,16 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --implicit-check-not=error: --strict-whitespace + +scratch_load_ubyte v1, v2, off cpol:2 +// CHECK: error: not a valid operand. +// CHECK-NEXT:{{^}}scratch_load_ubyte v1, v2, off cpol:2 +// CHECK-NEXT:{{^}} ^ + +scratch_load_ubyte v1, v2, off glc slc dlc +// CHECK: error: dlc modifier is not supported on this GPU +// CHECK-NEXT:{{^}}scratch_load_ubyte v1, v2, off glc slc dlc +// CHECK-NEXT:{{^}} ^ + +global_atomic_add v[3:4], v5, off slc glc +// CHECK: error: instruction must not use glc +// CHECK-NEXT:{{^}}global_atomic_add v[3:4], v5, off slc glc +// CHECK-NEXT:{{^}} ^ Index: llvm/test/MC/AMDGPU/flat-gfx10.s =================================================================== --- llvm/test/MC/AMDGPU/flat-gfx10.s +++ llvm/test/MC/AMDGPU/flat-gfx10.s @@ -38,10 +38,10 @@ // GFX10: encoding: [0x00,0x00,0xc6,0xdc,0x01,0x03,0x7d,0x00] flat_atomic_cmpswap v[1:2], v[3:4] offset:2047 glc -// GFX10-ERR: error: invalid operand for instruction +// GFX10-ERR: error: instruction must not use glc flat_atomic_cmpswap v[1:2], v[3:4] glc -// GFX10-ERR: error: invalid operand for instruction +// GFX10-ERR: error: instruction must not use glc flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:2047 glc // GFX10: encoding: [0xff,0x07,0xc5,0xdc,0x01,0x03,0x7d,0x00] Index: llvm/test/MC/AMDGPU/flat-gfx9.s =================================================================== --- llvm/test/MC/AMDGPU/flat-gfx9.s +++ llvm/test/MC/AMDGPU/flat-gfx9.s @@ -53,10 +53,11 @@ // VI: flat_atomic_cmpswap v[1:2], v[3:4] slc ; encoding: [0x00,0x00,0x06,0xdd,0x01,0x03,0x00,0x00] flat_atomic_cmpswap v[1:2], v[3:4] offset:4095 glc -// GCNERR: error: invalid operand for instruction +// GFX9-ERR: error: instruction must not use glc +// VI-ERR: error: flat offset modifier is not supported on this GPU flat_atomic_cmpswap v[1:2], v[3:4] glc -// GCNERR: error: invalid operand for instruction +// GCNERR: error: instruction must not use glc flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:4095 glc // GFX9: flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:4095 glc ; encoding: [0xff,0x0f,0x05,0xdd,0x01,0x03,0x00,0x00] Index: llvm/test/MC/AMDGPU/mubuf-gfx10.s =================================================================== --- llvm/test/MC/AMDGPU/mubuf-gfx10.s +++ llvm/test/MC/AMDGPU/mubuf-gfx10.s @@ -4,7 +4,7 @@ // GFX10: buffer_load_sbyte v5, off, s[8:11], s3 glc slc lds ; encoding: [0x00,0x40,0x25,0xe0,0x00,0x05,0x42,0x03] buffer_load_sbyte v5, off, s[8:11], s3 glc slc lds dlc -// GFX10: buffer_load_sbyte v5, off, s[8:11], s3 glc slc lds dlc ; encoding: [0x00,0xc0,0x25,0xe0,0x00,0x05,0x42,0x03] +// GFX10: buffer_load_sbyte v5, off, s[8:11], s3 glc slc dlc lds ; encoding: [0x00,0xc0,0x25,0xe0,0x00,0x05,0x42,0x03] buffer_load_sbyte v5, off, s[8:11], s3 glc slc dlc // GFX10: buffer_load_sbyte v5, off, s[8:11], s3 glc slc dlc ; encoding: [0x00,0xc0,0x24,0xe0,0x00,0x05,0x42,0x03]