Index: llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -116,6 +116,7 @@ ImmTyDLC, ImmTyGLC, ImmTySLC, + ImmTyCPol, ImmTySWZ, ImmTyTFE, ImmTyD16, @@ -342,6 +343,7 @@ // value of the GLC operand. bool isGLC_1() const { return isImmTy(ImmTyGLC); } bool isSLC() const { return isImmTy(ImmTySLC); } + bool isCPol() const { return isImmTy(ImmTyCPol); } bool isSWZ() const { return isImmTy(ImmTySWZ); } bool isTFE() const { return isImmTy(ImmTyTFE); } bool isD16() const { return isImmTy(ImmTyD16); } @@ -840,6 +842,7 @@ case ImmTyDLC: OS << "DLC"; break; case ImmTyGLC: OS << "GLC"; break; case ImmTySLC: OS << "SLC"; break; + case ImmTyCPol: OS << "CPol"; break; case ImmTySWZ: OS << "SWZ"; break; case ImmTyTFE: OS << "TFE"; break; case ImmTyD16: OS << "D16"; break; @@ -1462,6 +1465,7 @@ AMDGPUOperand::Ptr defaultGLC() const; AMDGPUOperand::Ptr defaultGLC_1() const; AMDGPUOperand::Ptr defaultSLC() const; + AMDGPUOperand::Ptr defaultCPol() const; AMDGPUOperand::Ptr defaultSMRDOffset8() const; AMDGPUOperand::Ptr defaultSMEMOffset() const; @@ -4867,6 +4871,42 @@ // Try to parse with a custom parser OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + // This is hack to combine cache policy bits into a single operand + // since parseOptionalOperand just consumed all of the individual bits. + if (ResTy == MatchOperand_Success && Mnemonic.startswith("scratch_")) { + unsigned CPPos = 0; + unsigned CPol = 0; + + for (unsigned I = 1; I != Operands.size(); ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (Op.isGLC()) + CPol |= CPol::GLC; + else if (Op.isSLC()) + CPol |= CPol::SLC; + else if (Op.isDLC()) { + CPol |= CPol::DLC; + if (!isGFX10Plus()) { + Error(Op.getStartLoc(), "dlc modifier is not supported on this GPU"); + return MatchOperand_ParseFail; + } + } else + continue; + + if (!CPPos) { + CPPos = I; + } else { + Operands.erase(&Operands[I]); + --I; + } + } + + if (CPol) { + SMLoc S = ((AMDGPUOperand &)*Operands[CPPos]).getStartLoc(); + Operands[CPPos] = AMDGPUOperand::CreateImm(this, CPol, S, + AMDGPUOperand::ImmTyCPol); + } + } + // If we successfully parsed the operand or if there as an error parsing, // we are done. // @@ -6516,6 +6556,10 @@ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); } +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); +} + void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, bool IsAtomic, @@ -6786,6 +6830,7 @@ {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, + {"cpol", AMDGPUOperand::ImmTyCPol, false, nullptr}, {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, @@ -6868,6 +6913,8 @@ Op.ConvertResult); } else if (Op.Type == AMDGPUOperand::ImmTyDim) { res = parseDim(Operands); + } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { + continue; } else { res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); } Index: llvm/lib/Target/AMDGPU/FLATInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/FLATInstructions.td +++ llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -54,6 +54,9 @@ bits<1> glcValue = 0; bits<1> has_dlc = 1; bits<1> dlcValue = 0; + // Temporary flag for instructions using cpol operand and + // not individual bits. + bits<1> has_cpol = 0; let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); @@ -95,6 +98,7 @@ bits<1> slc; bits<1> glc; bits<1> dlc; + bits<3> cpol; // Only valid on gfx9 bits<1> lds = 0; // XXX - What does this actually do? @@ -116,8 +120,8 @@ let Inst{13} = lds; let Inst{15-14} = seg; - let Inst{16} = !if(ps.has_glc, glc, ps.glcValue); - let Inst{17} = slc; + let Inst{16} = !if(ps.has_glc, !if(ps.has_cpol, cpol{CPolBit.GLC}, glc), ps.glcValue); + let Inst{17} = !if(ps.has_cpol, cpol{CPolBit.SLC}, slc); let Inst{24-18} = op; let Inst{31-26} = 0x37; // Encoding. let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); @@ -273,9 +277,9 @@ !if(EnableVaddr, (ins VGPR_32:$vaddr, flat_offset:$offset), (ins flat_offset:$offset))), - !if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, regClass:$vdst_in), - (ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc))), - " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> { + !if(HasTiedOutput, (ins CPol:$cpol, regClass:$vdst_in), + (ins CPol_0:$cpol))), + " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { let has_data = 0; let mayLoad = 1; let has_saddr = 1; @@ -283,6 +287,7 @@ let has_vaddr = EnableVaddr; let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")); let maybeAtomic = 1; + let has_cpol = 1; let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); @@ -293,11 +298,11 @@ opName, (outs), !if(EnableSaddr, - (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc), + (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), !if(EnableVaddr, - (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc), - (ins vdataClass:$vdata, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc))), - " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> { + (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol), + (ins vdataClass:$vdata, flat_offset:$offset, CPol_0:$cpol))), + " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { let mayLoad = 0; let mayStore = 1; let has_vdst = 0; @@ -306,6 +311,7 @@ let has_vaddr = EnableVaddr; let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")); let maybeAtomic = 1; + let has_cpol = 1; } multiclass FLAT_Scratch_Load_Pseudo { @@ -892,7 +898,7 @@ class ScratchLoadSignedPat_D16 : GCNPat < (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset), vt:$in), - (inst $vaddr, $offset, 0, 0, 0, $in) + (inst $vaddr, $offset, 0, $in) >; class ScratchStoreSignedPat : GCNPat < @@ -907,7 +913,7 @@ class ScratchLoadSaddrPat_D16 : GCNPat < (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset), vt:$in)), - (inst $saddr, $offset, 0, 0, 0, $in) + (inst $saddr, $offset, 0, $in) >; class ScratchStoreSaddrPat getOperand(OpNo).getImm(); + if (Imm & CPol::GLC) + O << " glc"; + if (Imm & CPol::SLC) + O << " slc"; + if (Imm & CPol::DLC) + O << " dlc"; + if (Imm & ~CPol::ALL) + O << " /* unexpected cache policy bit */"; +} + void AMDGPUInstPrinter::printSWZ(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { } Index: llvm/lib/Target/AMDGPU/SIDefines.h =================================================================== --- llvm/lib/Target/AMDGPU/SIDefines.h +++ llvm/lib/Target/AMDGPU/SIDefines.h @@ -263,6 +263,17 @@ } // namespace AMDGPU namespace AMDGPU { +namespace CPol { + +enum CPol { + GLC = 1, + SLC = 2, + DLC = 4, + ALL = GLC | SLC | DLC +}; + +} // namespace CPol + namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns. enum Id { // Message ID, width(4) [3:0]. Index: llvm/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1107,6 +1107,9 @@ def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>; def SLC_0 : NamedOperandBit_0<"SLC", NamedMatchClass<"SLC">>; +def CPol : NamedOperandU32<"CPol", NamedMatchClass<"CPol">>; +def CPol_0 : NamedOperandU32Default0<"CPol", NamedMatchClass<"CPol">>; + def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>; def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>; def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>; @@ -1363,6 +1366,12 @@ int NONE = 0; } +def CPolBit { + int GLC = 0; + int SLC = 1; + int DLC = 2; +} + def TRAPID{ int LLVM_TRAP = 2; int LLVM_DEBUG_TRAP = 3; Index: llvm/test/MC/AMDGPU/cpol-err.s =================================================================== --- /dev/null +++ llvm/test/MC/AMDGPU/cpol-err.s @@ -0,0 +1,11 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --implicit-check-not=error: --strict-whitespace + +scratch_load_ubyte v1, v2, off cpol:2 +// CHECK: error: not a valid operand. +// CHECK-NEXT:{{^}}scratch_load_ubyte v1, v2, off cpol:2 +// CHECK-NEXT:{{^}} ^ + +scratch_load_ubyte v1, v2, off glc slc dlc' +// CHECK: error: dlc modifier is not supported on this GPU +// CHECK-NEXT:{{^}}scratch_load_ubyte v1, v2, off glc slc dlc +// CHECK-NEXT:{{^}} ^