Index: llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -116,6 +116,7 @@
     ImmTyDLC,
     ImmTyGLC,
     ImmTySLC,
+    ImmTyCPol,
     ImmTySWZ,
     ImmTyTFE,
     ImmTyD16,
@@ -342,6 +343,7 @@
   // value of the GLC operand.
   bool isGLC_1() const { return isImmTy(ImmTyGLC); }
   bool isSLC() const { return isImmTy(ImmTySLC); }
+  bool isCPol() const { return isImmTy(ImmTyCPol); }
   bool isSWZ() const { return isImmTy(ImmTySWZ); }
   bool isTFE() const { return isImmTy(ImmTyTFE); }
   bool isD16() const { return isImmTy(ImmTyD16); }
@@ -840,6 +842,7 @@
     case ImmTyDLC: OS << "DLC"; break;
     case ImmTyGLC: OS << "GLC"; break;
     case ImmTySLC: OS << "SLC"; break;
+    case ImmTyCPol: OS << "CPol"; break;
     case ImmTySWZ: OS << "SWZ"; break;
     case ImmTyTFE: OS << "TFE"; break;
     case ImmTyD16: OS << "D16"; break;
@@ -1462,6 +1465,7 @@
   AMDGPUOperand::Ptr defaultGLC() const;
   AMDGPUOperand::Ptr defaultGLC_1() const;
   AMDGPUOperand::Ptr defaultSLC() const;
+  AMDGPUOperand::Ptr defaultCPol() const;
 
   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
   AMDGPUOperand::Ptr defaultSMEMOffset() const;
@@ -4867,6 +4871,42 @@
   // Try to parse with a custom parser
   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
 
+  // This is hack to combine cache policy bits into a single operand
+  // since parseOptionalOperand just consumed all of the individual bits.
+  if (ResTy == MatchOperand_Success && Mnemonic.startswith("scratch_")) {
+    unsigned CPPos = 0;
+    unsigned CPol = 0;
+
+    for (unsigned I = 1; I != Operands.size(); ++I) {
+      AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
+      if (Op.isGLC())
+        CPol |= CPol::GLC;
+      else if (Op.isSLC())
+        CPol |= CPol::SLC;
+      else if (Op.isDLC()) {
+        CPol |= CPol::DLC;
+        if (!isGFX10Plus()) {
+          Error(Op.getStartLoc(), "dlc modifier is not supported on this GPU");
+          return MatchOperand_ParseFail;
+        }
+      } else
+        continue;
+
+      if (!CPPos) {
+        CPPos = I;
+      } else {
+        Operands.erase(&Operands[I]);
+        --I;
+      }
+    }
+
+    if (CPol) {
+      SMLoc S = ((AMDGPUOperand &)*Operands[CPPos]).getStartLoc();
+      Operands[CPPos] = AMDGPUOperand::CreateImm(this, CPol, S,
+                                                 AMDGPUOperand::ImmTyCPol);
+    }
+  }
+
   // If we successfully parsed the operand or if there as an error parsing,
   // we are done.
   //
@@ -6516,6 +6556,10 @@
   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
 }
 
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
+}
+
 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
                                const OperandVector &Operands,
                                bool IsAtomic,
@@ -6786,6 +6830,7 @@
   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
+  {"cpol",    AMDGPUOperand::ImmTyCPol, false, nullptr},
   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
@@ -6868,6 +6913,8 @@
                                         Op.ConvertResult);
     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
       res = parseDim(Operands);
+    } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
+      continue;
     } else {
       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
     }
Index: llvm/lib/Target/AMDGPU/FLATInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -54,6 +54,9 @@
   bits<1> glcValue = 0;
   bits<1> has_dlc  = 1;
   bits<1> dlcValue = 0;
+  // Temporary flag for instructions using cpol operand and
+  // not individual bits.
+  bits<1> has_cpol = 0;
 
   let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
     !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
@@ -95,6 +98,7 @@
   bits<1> slc;
   bits<1> glc;
   bits<1> dlc;
+  bits<3> cpol;
 
   // Only valid on gfx9
   bits<1> lds = 0; // XXX - What does this actually do?
@@ -116,8 +120,8 @@
   let Inst{13} = lds;
   let Inst{15-14} = seg;
 
-  let Inst{16}    = !if(ps.has_glc, glc, ps.glcValue);
-  let Inst{17}    = slc;
+  let Inst{16}    = !if(ps.has_glc, !if(ps.has_cpol, cpol{CPolBit.GLC}, glc), ps.glcValue);
+  let Inst{17}    = !if(ps.has_cpol, cpol{CPolBit.SLC}, slc);
   let Inst{24-18} = op;
   let Inst{31-26} = 0x37; // Encoding.
   let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
@@ -273,9 +277,9 @@
        !if(EnableVaddr,
          (ins VGPR_32:$vaddr, flat_offset:$offset),
          (ins flat_offset:$offset))),
-     !if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, regClass:$vdst_in),
-                        (ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc))),
-  " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
+     !if(HasTiedOutput, (ins CPol:$cpol, regClass:$vdst_in),
+                        (ins CPol_0:$cpol))),
+  " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
   let has_data = 0;
   let mayLoad = 1;
   let has_saddr = 1;
@@ -283,6 +287,7 @@
   let has_vaddr = EnableVaddr;
   let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"));
   let maybeAtomic = 1;
+  let has_cpol = 1;
 
   let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
   let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
@@ -293,11 +298,11 @@
   opName,
   (outs),
   !if(EnableSaddr,
-    (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc),
+    (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
     !if(EnableVaddr,
-      (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc),
-      (ins vdataClass:$vdata, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc))),
-  " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
+      (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol),
+      (ins vdataClass:$vdata, flat_offset:$offset, CPol_0:$cpol))),
+  " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
   let mayLoad  = 0;
   let mayStore = 1;
   let has_vdst = 0;
@@ -306,6 +311,7 @@
   let has_vaddr = EnableVaddr;
   let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"));
   let maybeAtomic = 1;
+  let has_cpol = 1;
 }
 
 multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedOutput = 0> {
@@ -892,7 +898,7 @@
 
 class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
   (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset), vt:$in),
-  (inst $vaddr, $offset, 0, 0, 0, $in)
+  (inst $vaddr, $offset, 0, $in)
 >;
 
 class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@@ -907,7 +913,7 @@
 
 class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
   (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset), vt:$in)),
-  (inst $saddr, $offset, 0, 0, 0, $in)
+  (inst $saddr, $offset, 0, $in)
 >;
 
 class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
@@ -1500,7 +1506,7 @@
   let DecoderNamespace = "GFX10";
 
   let Inst{11-0}  = offset{11-0};
-  let Inst{12}    = !if(ps.has_dlc, dlc, ps.dlcValue);
+  let Inst{12}    = !if(ps.has_dlc, !if(ps.has_cpol, cpol{CPolBit.DLC}, dlc), ps.dlcValue);
   let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
   let Inst{55}    = 0;
 }
Index: llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
===================================================================
--- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -74,6 +74,8 @@
                 raw_ostream &O);
   void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                 raw_ostream &O);
+  void printCPol(const MCInst *MI, unsigned OpNo,
+                 const MCSubtargetInfo &STI, raw_ostream &O);
   void printSWZ(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                 raw_ostream &O);
   void printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
Index: llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -217,6 +217,19 @@
   printNamedBit(MI, OpNo, O, "slc");
 }
 
+void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
+  auto Imm = MI->getOperand(OpNo).getImm();
+  if (Imm & CPol::GLC)
+    O << " glc";
+  if (Imm & CPol::SLC)
+    O << " slc";
+  if (Imm & CPol::DLC)
+    O << " dlc";
+  if (Imm & ~CPol::ALL)
+    O << " /* unexpected cache policy bit */";
+}
+
 void AMDGPUInstPrinter::printSWZ(const MCInst *MI, unsigned OpNo,
                                  const MCSubtargetInfo &STI, raw_ostream &O) {
 }
Index: llvm/lib/Target/AMDGPU/SIDefines.h
===================================================================
--- llvm/lib/Target/AMDGPU/SIDefines.h
+++ llvm/lib/Target/AMDGPU/SIDefines.h
@@ -263,6 +263,17 @@
 } // namespace AMDGPU
 
 namespace AMDGPU {
+namespace CPol {
+
+enum CPol {
+  GLC = 1,
+  SLC = 2,
+  DLC = 4,
+  ALL = GLC | SLC | DLC
+};
+
+} // namespace CPol
+
 namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns.
 
 enum Id { // Message ID, width(4) [3:0].
Index: llvm/lib/Target/AMDGPU/SIInstrInfo.td
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1107,6 +1107,9 @@
 def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
 def SLC_0 : NamedOperandBit_0<"SLC", NamedMatchClass<"SLC">>;
 
+def CPol : NamedOperandU32<"CPol", NamedMatchClass<"CPol">>;
+def CPol_0 : NamedOperandU32Default0<"CPol", NamedMatchClass<"CPol">>;
+
 def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
 def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>;
 def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
@@ -1363,6 +1366,12 @@
   int NONE = 0;
 }
 
+def CPolBit {
+  int GLC = 0;
+  int SLC = 1;
+  int DLC = 2;
+}
+
 def TRAPID{
   int LLVM_TRAP = 2;
   int LLVM_DEBUG_TRAP = 3;
Index: llvm/test/MC/AMDGPU/cpol-err.s
===================================================================
--- /dev/null
+++ llvm/test/MC/AMDGPU/cpol-err.s
@@ -0,0 +1,11 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --implicit-check-not=error: --strict-whitespace
+
+scratch_load_ubyte v1, v2, off cpol:2
+// CHECK: error: not a valid operand.
+// CHECK-NEXT:{{^}}scratch_load_ubyte v1, v2, off cpol:2
+// CHECK-NEXT:{{^}}                               ^
+
+scratch_load_ubyte v1, v2, off glc slc dlc'
+// CHECK: error: dlc modifier is not supported on this GPU
+// CHECK-NEXT:{{^}}scratch_load_ubyte v1, v2, off glc slc dlc
+// CHECK-NEXT:{{^}}                                       ^