diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
--- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -136,13 +136,17 @@
 
 // Compute program resource register 3 for GFX10+. Must match hardware
 // definition.
-#define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \
-  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_ ## NAME, SHIFT, WIDTH)
+#define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
+  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
 enum : int32_t {
-  COMPUTE_PGM_RSRC3_GFX10(SHARED_VGPR_COUNT, 0, 4), // GFX10+
-  COMPUTE_PGM_RSRC3_GFX10(RESERVED0, 4, 28),
+  COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4), // GFX10+
+  COMPUTE_PGM_RSRC3_GFX10_PLUS(INST_PREF_SIZE, 4, 6),    // GFX11+
+  COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_START, 10, 1),    // GFX11+
+  COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_END, 11, 1),      // GFX11+
+  COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED0, 12, 19),
+  COMPUTE_PGM_RSRC3_GFX10_PLUS(IMAGE_OP, 31, 1),         // GFX11+
 };
-#undef COMPUTE_PGM_RSRC3_GFX10
+#undef COMPUTE_PGM_RSRC3_GFX10_PLUS
 
 // Kernel code properties. Must be kept backwards compatible.
 #define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -695,7 +695,7 @@
   ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
 
   const uint64_t MaxScratchPerWorkitem =
-      GCNSubtarget::MaxWaveScratchSize / STM.getWavefrontSize();
+      STM.getMaxWaveScratchSize() / STM.getWavefrontSize();
   if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) {
     DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
                                           ProgInfo.ScratchSize,
@@ -879,15 +879,14 @@
   ProgInfo.LDSBlocks =
       alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
 
-  // Scratch is allocated in 256 dword blocks.
-  unsigned ScratchAlignShift = 10;
+  // Scratch is allocated in 64-dword or 256-dword blocks.
+  unsigned ScratchAlignShift =
+      STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 8 : 10;
   // We need to program the hardware with the amount of scratch memory that
   // is used by the entire wave.  ProgInfo.ScratchSize is the amount of
   // scratch memory used per thread.
-  ProgInfo.ScratchBlocks =
-      alignTo(ProgInfo.ScratchSize * STM.getWavefrontSize(),
-              1ULL << ScratchAlignShift) >>
-      ScratchAlignShift;
+  ProgInfo.ScratchBlocks = divideCeil(
+      ProgInfo.ScratchSize * STM.getWavefrontSize(), 1ULL << ScratchAlignShift);
 
   if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
     ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
@@ -946,6 +945,7 @@
 void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
                                          const SIProgramInfo &CurrentProgramInfo) {
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
   unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
 
   if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
@@ -957,7 +957,10 @@
     OutStreamer->emitInt32(CurrentProgramInfo.ComputePGMRSrc2);
 
     OutStreamer->emitInt32(R_00B860_COMPUTE_TMPRING_SIZE);
-    OutStreamer->emitInt32(S_00B860_WAVESIZE(CurrentProgramInfo.ScratchBlocks));
+    OutStreamer->emitInt32(
+        STM.getGeneration() >= AMDGPUSubtarget::GFX11
+            ? S_00B860_WAVESIZE_GFX11Plus(CurrentProgramInfo.ScratchBlocks)
+            : S_00B860_WAVESIZE_PreGFX11(CurrentProgramInfo.ScratchBlocks));
 
     // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =
     // 0" comment but I don't see a corresponding field in the register spec.
@@ -966,14 +969,18 @@
     OutStreamer->emitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
                               S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
     OutStreamer->emitInt32(R_0286E8_SPI_TMPRING_SIZE);
-    OutStreamer->emitIntValue(
-        S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
+    OutStreamer->emitInt32(
+        STM.getGeneration() >= AMDGPUSubtarget::GFX11
+            ? S_0286E8_WAVESIZE_GFX11Plus(CurrentProgramInfo.ScratchBlocks)
+            : S_0286E8_WAVESIZE_PreGFX11(CurrentProgramInfo.ScratchBlocks));
   }
 
   if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
     OutStreamer->emitInt32(R_00B02C_SPI_SHADER_PGM_RSRC2_PS);
-    OutStreamer->emitInt32(
-        S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks));
+    unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
+                                ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
+                                : CurrentProgramInfo.LDSBlocks;
+    OutStreamer->emitInt32(S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));
     OutStreamer->emitInt32(R_0286CC_SPI_PS_INPUT_ENA);
     OutStreamer->emitInt32(MFI->getPSInputEnable());
     OutStreamer->emitInt32(R_0286D0_SPI_PS_INPUT_ADDR);
@@ -1017,7 +1024,10 @@
   // ScratchSize is in bytes, 16 aligned.
   MD->setScratchSize(CC, alignTo(CurrentProgramInfo.ScratchSize, 16));
   if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
-    MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks));
+    unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
+                                ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
+                                : CurrentProgramInfo.LDSBlocks;
+    MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));
     MD->setSpiPsInputEna(MFI->getPSInputEnable());
     MD->setSpiPsInputAddr(MFI->getPSInputAddr());
   }
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1650,6 +1650,7 @@
                              const SMLoc &IDLoc);
   bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
                           const SMLoc &IDLoc);
+  bool validateExeczVcczOperands(const OperandVector &Operands);
   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
   unsigned getConstantBusLimit(unsigned Opcode) const;
   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
@@ -4495,6 +4496,22 @@
   return true;
 }
 
+bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
+  if (!isGFX11Plus())
+    return true;
+  for (auto& Operand: Operands) {
+    if (!Operand->isReg())
+      continue;
+    unsigned Reg = Operand->getReg();
+    if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
+      Error(getRegLoc(Reg, Operands),
+        "execz and vccz are not supported on this GPU");
+      return false;
+    }
+  }
+  return true;
+}
+
 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
                                           const SMLoc &IDLoc,
                                           const OperandVector &Operands) {
@@ -4609,6 +4626,9 @@
   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
     return false;
   }
+  if (!validateExeczVcczOperands(Operands)) {
+    return false;
+  }
 
   if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
     return false;
@@ -5088,7 +5108,7 @@
         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
       SharedVGPRCount = Val;
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
-                       COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val,
+                       COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
                        ValRange);
     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
       PARSE_BITS_ENTRY(
@@ -5586,7 +5606,7 @@
   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
     return isGFX9Plus();
 
-  // GFX10 has 2 more SGPRs 104 and 105.
+  // GFX10+ has 2 more SGPRs 104 and 105.
   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
     return hasSGPR104_SGPR105();
 
@@ -5595,8 +5615,9 @@
   case AMDGPU::SRC_SHARED_LIMIT:
   case AMDGPU::SRC_PRIVATE_BASE:
   case AMDGPU::SRC_PRIVATE_LIMIT:
-  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
     return isGFX9Plus();
+  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
+    return isGFX9Plus() && !isGFX11Plus();
   case AMDGPU::TBA:
   case AMDGPU::TBA_LO:
   case AMDGPU::TBA_HI:
@@ -5619,7 +5640,7 @@
 
   if (isSI() || isGFX10Plus()) {
     // No flat_scr on SI.
-    // On GFX10 flat scratch is not a valid register operand and can only be
+    // On GFX10Plus flat scratch is not a valid register operand and can only be
     // accessed with s_setreg/s_getreg.
     switch (RegNo) {
     case AMDGPU::FLAT_SCR:
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -201,9 +201,6 @@
   SIFrameLowering FrameLowering;
 
 public:
-  // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
-  static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
-
   GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
                const GCNTargetMachine &TM);
   ~GCNSubtarget() override;
@@ -266,9 +263,19 @@
     return (Generation)Gen;
   }
 
+  unsigned getMaxWaveScratchSize() const {
+    // See COMPUTE_TMPRING_SIZE.WAVESIZE.
+    if (getGeneration() < GFX11) {
+      // 13-bit field in units of 256-dword.
+      return (256 * 4) * ((1 << 13) - 1);
+    }
+    // 15-bit field in units of 64-dword.
+    return (64 * 4) * ((1 << 15) - 1);
+  }
+
   /// Return the number of high bits known to be zero for a frame index.
   unsigned getKnownHighZeroBitsForFrameIndex() const {
-    return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
+    return countLeadingZeros(getMaxWaveScratchSize()) + getWavefrontSizeLog2();
   }
 
   int getLDSBankCount() const {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -301,7 +301,7 @@
   uint32_t Encoded_pad = Encoded_s_code_end;
 
   // Instruction cache line size in bytes.
-  const unsigned Log2CacheLineSize = 6;
+  const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
   const unsigned CacheLineSize = 1u << Log2CacheLineSize;
 
   // Extra padding amount in bytes to support prefetch mode 3.
@@ -456,7 +456,7 @@
                 compute_pgm_rsrc1,
                 amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
     PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
-                amdhsa::COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT);
+                amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
   }
   PRINT_FIELD(
       OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
@@ -824,7 +824,7 @@
   uint32_t Encoded_pad = Encoded_s_code_end;
 
   // Instruction cache line size in bytes.
-  const unsigned Log2CacheLineSize = 6;
+  const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
   const unsigned CacheLineSize = 1u << Log2CacheLineSize;
 
   // Extra padding amount in bytes to support prefetch mode 3.
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -1036,10 +1036,12 @@
 #define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6)
 
 #define R_00B860_COMPUTE_TMPRING_SIZE                                   0x00B860
-#define   S_00B860_WAVESIZE(x)                                        (((x) & 0x1FFF) << 12)
+#define   S_00B860_WAVESIZE_PreGFX11(x)                               (((x) & 0x1FFF) << 12)
+#define   S_00B860_WAVESIZE_GFX11Plus(x)                              (((x) & 0x7FFF) << 12)
 
 #define R_0286E8_SPI_TMPRING_SIZE                                       0x0286E8
-#define   S_0286E8_WAVESIZE(x)                                        (((x) & 0x1FFF) << 12)
+#define   S_0286E8_WAVESIZE_PreGFX11(x)                               (((x) & 0x1FFF) << 12)
+#define   S_0286E8_WAVESIZE_GFX11Plus(x)                              (((x) & 0x7FFF) << 12)
 
 #define R_028B54_VGT_SHADER_STAGES_EN                                 0x028B54
 #define   S_028B54_HS_W32_EN(x)                                       (((x) & 0x1) << 21)
diff --git a/llvm/test/MC/AMDGPU/gfx10-constant-bus.s b/llvm/test/MC/AMDGPU/gfx10-constant-bus.s
--- a/llvm/test/MC/AMDGPU/gfx10-constant-bus.s
+++ b/llvm/test/MC/AMDGPU/gfx10-constant-bus.s
@@ -1,63 +1,72 @@
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GFX10 %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefix=GFX10-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefixes=GCN,GFX10 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck -check-prefixes=GCN-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck -check-prefixes=GCN,GFX11 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck -check-prefixes=GCN-ERR,GFX11-ERR --implicit-check-not=error: %s
 
 //-----------------------------------------------------------------------------------------
 // On GFX10 we can use two scalar operands (except for 64-bit shift instructions)
 
 v_add_f32 v0, s0, s1
-// GFX10: v_add_f32_e64 v0, s0, s1        ; encoding: [0x00,0x00,0x03,0xd5,0x00,0x02,0x00,0x00]
+// GCN: v_add_f32_e64 v0, s0, s1        ; encoding: [0x00,0x00,0x03,0xd5,0x00,0x02,0x00,0x00]
 
 v_madak_f32 v0, s0, v1, 42.42
 // GFX10: v_madak_f32 v0, s0, v1, 0x4229ae14 ; encoding: [0x00,0x02,0x00,0x42,0x14,0xae,0x29,0x42]
+// GFX11-ERR: error: instruction not supported on this GPU
 
 v_med3_f32 v0, s0, s0, s1
 // GFX10: v_med3_f32 v0, s0, s0, s1       ; encoding: [0x00,0x00,0x57,0xd5,0x00,0x00,0x04,0x00]
+// GFX11: v_med3_f32 v0, s0, s0, s1       ; encoding: [0x00,0x00,0x1f,0xd6,0x00,0x00,0x04,0x00]
 
 //-----------------------------------------------------------------------------------------
 // 64-bit shift instructions can use only one scalar value input
 
 v_ashrrev_i64 v[0:1], 0x100, s[0:1]
-// GFX10-ERR: error: invalid operand (violates constant bus restrictions)
+// GCN-ERR: error: invalid operand (violates constant bus restrictions)
 
 v_ashrrev_i64 v[0:1], s2, s[0:1]
-// GFX10-ERR: error: invalid operand (violates constant bus restrictions)
+// GCN-ERR: error: invalid operand (violates constant bus restrictions)
 
 //-----------------------------------------------------------------------------------------
 // v_div_fmas implicitly reads VCC, so only one scalar operand is possible
 
 v_div_fmas_f32 v5, s3, s3, s3
 // GFX10: v_div_fmas_f32 v5, s3, s3, s3   ; encoding: [0x05,0x00,0x6f,0xd5,0x03,0x06,0x0c,0x00]
+// GFX11: v_div_fmas_f32 v5, s3, s3, s3   ; encoding: [0x05,0x00,0x37,0xd6,0x03,0x06,0x0c,0x00]
 
 v_div_fmas_f32 v5, s3, s3, s2
-// GFX10-ERR: error: invalid operand (violates constant bus restrictions)
+// GCN-ERR: error: invalid operand (violates constant bus restrictions)
 
 v_div_fmas_f32 v5, s3, 0x123, v3
-// GFX10-ERR: error: invalid operand (violates constant bus restrictions)
+// GCN-ERR: error: invalid operand (violates constant bus restrictions)
 
 v_div_fmas_f64 v[5:6], 0x12345678, 0x12345678, 0x12345678
 // GFX10: v_div_fmas_f64 v[5:6], 0x12345678, 0x12345678, 0x12345678 ; encoding: [0x05,0x00,0x70,0xd5,0xff,0xfe,0xfd,0x03,0x78,0x56,0x34,0x12]
+// GFX11: v_div_fmas_f64 v[5:6], 0x12345678, 0x12345678, 0x12345678 ; encoding: [0x05,0x00,0x38,0xd6,0xff,0xfe,0xfd,0x03,0x78,0x56,0x34,0x12]
 
 v_div_fmas_f64 v[5:6], v[1:2], s[2:3], v[3:4]
 // GFX10: v_div_fmas_f64 v[5:6], v[1:2], s[2:3], v[3:4] ; encoding: [0x05,0x00,0x70,0xd5,0x01,0x05,0x0c,0x04]
+// GFX11: v_div_fmas_f64 v[5:6], v[1:2], s[2:3], v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0x01,0x05,0x0c,0x04]
 
 v_div_fmas_f64 v[5:6], v[1:2], s[2:3], 0x123456
-// GFX10-ERR: error: invalid operand (violates constant bus restrictions)
+// GCN-ERR: error: invalid operand (violates constant bus restrictions)
 
 //-----------------------------------------------------------------------------------------
 // v_mad_u64_u32 has operands of different sizes.
 // When these operands are literals, they are counted as 2 scalar values even if literals are identical.
 
 v_lshlrev_b64 v[5:6], 0x3f717273, 0x3f717273
-// GFX10-ERR: error: invalid operand (violates constant bus restrictions)
+// GCN-ERR: error: invalid operand (violates constant bus restrictions)
 
 v_mad_u64_u32 v[5:6], s12, v1, 0x12345678, 0x12345678
 // GFX10: v_mad_u64_u32 v[5:6], s12, v1, 0x12345678, 0x12345678 ; encoding: [0x05,0x0c,0x76,0xd5,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12]
+// GFX11: v_mad_u64_u32 v[5:6], s12, v1, 0x12345678, 0x12345678 ; encoding: [0x05,0x0c,0xfe,0xd6,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12]
 
 v_mad_u64_u32 v[5:6], s12, s1, 0x12345678, 0x12345678
-// GFX10-ERR: error: invalid operand (violates constant bus restrictions)
+// GCN-ERR: error: invalid operand (violates constant bus restrictions)
 
 //-----------------------------------------------------------------------------------------
 // null is free
 
 v_bfe_u32 v5, s1, s2, null
 // GFX10: v_bfe_u32 v5, s1, s2, null      ; encoding: [0x05,0x00,0x48,0xd5,0x01,0x04,0xf4,0x01]
+// GFX11: v_bfe_u32 v5, s1, s2, null      ; encoding: [0x05,0x00,0x10,0xd6,0x01,0x04,0xf0,0x01]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_operands.s b/llvm/test/MC/AMDGPU/gfx11_asm_operands.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_operands.s
@@ -0,0 +1,144 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1030 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck --check-prefix=GFX11 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX11-ERR %s
+
+// On GFX11+, EXECZ and VCCZ are no longer allowed to be used as sources to SALU and VALU instructions.
+// The inline constants are removed. VCCZ and EXECZ still exist and can be use for conditional branches.
+// LDS_DIRECT and POPS_EXITING_WAVE_ID are also no longer allowed.
+
+//---------------------------------------------------------------------------//
+// EXECZ
+//---------------------------------------------------------------------------//
+
+s_cbranch_execz 0x100
+// GFX10: encoding: [0x00,0x01,0x88,0xbf]
+// GFX11: encoding: [0x00,0x01,0xa5,0xbf]
+
+s_add_i32 s0, execz, s2
+// GFX10: encoding: [0xfc,0x02,0x00,0x81]
+// GFX11-ERR: error: execz and vccz are not supported on this GPU
+
+s_add_i32 s0, src_execz, s2
+// GFX10: encoding: [0xfc,0x02,0x00,0x81]
+// GFX11-ERR: error: execz and vccz are not supported on this GPU
+
+s_add_i32 s0, s1, execz
+// GFX10: encoding: [0x01,0xfc,0x00,0x81]
+// GFX11-ERR: error: execz and vccz are not supported on this GPU
+
+s_add_i32 s0, s1, src_execz
+// GFX10: encoding: [0x01,0xfc,0x00,0x81]
+// GFX11-ERR: error: execz and vccz are not supported on this GPU
+
+v_add_f64 v[0:1], execz, v[2:3]
+// GFX10: encoding: [0x00,0x00,0x64,0xd5,0xfc,0x04,0x02,0x00]
+// GFX11-ERR: error: execz and vccz are not supported on this GPU
+
+v_add_f64 v[0:1], src_execz, v[2:3]
+// GFX10: encoding: [0x00,0x00,0x64,0xd5,0xfc,0x04,0x02,0x00]
+// GFX11-ERR: error: execz and vccz are not supported on this GPU
+
+v_add_f64 v[0:1], v[1:2], execz
+// GFX10: encoding: [0x00,0x00,0x64,0xd5,0x01,0xf9,0x01,0x00]
+// GFX11-ERR: error: execz and vccz are not supported on this GPU
+
+v_add_f64 v[0:1], v[1:2], src_execz
+// GFX10: encoding: [0x00,0x00,0x64,0xd5,0x01,0xf9,0x01,0x00]
+// GFX11-ERR: error: execz and vccz are not supported on this GPU
+
+//---------------------------------------------------------------------------//
+// VCCZ
+//---------------------------------------------------------------------------//
+
+s_cbranch_vccz 0x100
+// GFX10: encoding: [0x00,0x01,0x86,0xbf]
+// GFX11: encoding: [0x00,0x01,0xa3,0xbf]
+
+s_add_i32 s0, vccz, s2
+// GFX10: encoding: [0xfb,0x02,0x00,0x81]
+// GFX11-ERR: error: execz and vccz are not supported on this GPU
+
+s_add_i32 s0, src_vccz, s2
+// GFX10: encoding: [0xfb,0x02,0x00,0x81]
+// GFX11-ERR: error: execz and vccz are not supported on this GPU
+
+s_add_i32 s0, s1, vccz
+// GFX10: encoding: [0x01,0xfb,0x00,0x81]
+// GFX11-ERR: error: execz and vccz are not supported on this GPU
+
+s_add_i32 s0, s1, src_vccz
+// GFX10: encoding: [0x01,0xfb,0x00,0x81]
+// GFX11-ERR: error: execz and vccz are not supported on this GPU
+
+v_add_f64 v[0:1], vccz, v[2:3]
+// GFX10: encoding: [0x00,0x00,0x64,0xd5,0xfb,0x04,0x02,0x00]
+// GFX11-ERR: error: execz and vccz are not supported on this GPU
+
+v_add_f64 v[0:1], src_vccz, v[2:3]
+// GFX10: encoding: [0x00,0x00,0x64,0xd5,0xfb,0x04,0x02,0x00]
+// GFX11-ERR: error: execz and vccz are not supported on this GPU
+
+v_add_f64 v[0:1], v[1:2], vccz
+// GFX10: encoding: [0x00,0x00,0x64,0xd5,0x01,0xf7,0x01,0x00]
+// GFX11-ERR: error: execz and vccz are not supported on this GPU
+
+v_add_f64 v[0:1], v[1:2], src_vccz
+// GFX10: encoding: [0x00,0x00,0x64,0xd5,0x01,0xf7,0x01,0x00]
+// GFX11-ERR: error: execz and vccz are not supported on this GPU
+
+//---------------------------------------------------------------------------//
+// LDS_DIRECT
+//---------------------------------------------------------------------------//
+
+v_readfirstlane_b32 s0, lds_direct
+// GFX10: encoding: [0xfe,0x04,0x00,0x7e]
+// GFX11-ERR: error: lds_direct is not supported on this GPU
+
+v_readfirstlane_b32 s0, src_lds_direct
+// GFX10: encoding: [0xfe,0x04,0x00,0x7e]
+// GFX11-ERR: error: lds_direct is not supported on this GPU
+
+v_mov_b32 v0, lds_direct
+// GFX10: encoding: [0xfe,0x02,0x00,0x7e]
+// GFX11-ERR: error: lds_direct is not supported on this GPU
+
+v_mov_b32 v0, src_lds_direct
+// GFX10: encoding: [0xfe,0x02,0x00,0x7e]
+// GFX11-ERR: error: lds_direct is not supported on this GPU
+
+//---------------------------------------------------------------------------//
+// POPS_EXITING_WAVE_ID
+//---------------------------------------------------------------------------//
+
+s_add_i32 s0, src_pops_exiting_wave_id, s1
+// GFX10: encoding: [0xef,0x01,0x00,0x81]
+// GFX11-ERR: error: register not available on this GPU
+
+s_add_i32 s0, s1, src_pops_exiting_wave_id
+// GFX10: encoding: [0x01,0xef,0x00,0x81]
+// GFX11-ERR: error: register not available on this GPU
+
+s_add_i32 s0, pops_exiting_wave_id, s1
+// GFX10: encoding: [0xef,0x01,0x00,0x81]
+// GFX11-ERR: error: register not available on this GPU
+
+s_add_i32 s0, s1, pops_exiting_wave_id
+// GFX10: encoding: [0x01,0xef,0x00,0x81]
+// GFX11-ERR: error: register not available on this GPU
+
+v_add_co_u32 v0, s0, pops_exiting_wave_id, v1
+// GFX10: encoding: [0x00,0x00,0x0f,0xd7,0xef,0x02,0x02,0x00]
+// GFX11-ERR: error: register not available on this GPU
+
+v_add_co_u32 v0, s0, src_pops_exiting_wave_id, v1
+// GFX10: encoding: [0x00,0x00,0x0f,0xd7,0xef,0x02,0x02,0x00]
+// GFX11-ERR: error: register not available on this GPU
+
+v_add_co_u32 v0, s0, v1, pops_exiting_wave_id
+// GFX10: encoding: [0x00,0x00,0x0f,0xd7,0x01,0xdf,0x01,0x00]
+// GFX11-ERR: error: register not available on this GPU
+
+v_add_co_u32 v0, s0, v1, src_pops_exiting_wave_id
+// GFX10: encoding: [0x00,0x00,0x0f,0xd7,0x01,0xdf,0x01,0x00]
+// GFX11-ERR: error: register not available on this GPU
+
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s
new file mode 100644
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s
@@ -0,0 +1,213 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1100 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1100 --amdhsa-code-object-version=3 -filetype=obj < %s > %t
+// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// READOBJ: Section Headers
+// READOBJ: .text   PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
+// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}}        0000c0 {{[0-9]+}}  A {{[0-9]+}} {{[0-9]+}} 64
+
+// READOBJ: Relocation section '.rela.rodata' at offset
+// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
+// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
+// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
+
+// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
+// READOBJ:      0000000000000000  0 FUNC    LOCAL  PROTECTED 2 minimal
+// READOBJ-NEXT: 0000000000000100  0 FUNC    LOCAL  PROTECTED 2 complete
+// READOBJ-NEXT: 0000000000000200  0 FUNC    LOCAL  PROTECTED 2 special_sgpr
+// READOBJ-NEXT: 0000000000000000 64 OBJECT  LOCAL  DEFAULT   3 minimal.kd
+// READOBJ-NEXT: 0000000000000040 64 OBJECT  LOCAL  DEFAULT   3 complete.kd
+// READOBJ-NEXT: 0000000000000080 64 OBJECT  LOCAL  DEFAULT   3 special_sgpr.kd
+
+// OBJDUMP: Contents of section .rodata
+// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
+// minimal
+// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000
+// complete
+// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
+// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0070 015001e4 130f007f 5e040000 00000000
+// special_sgpr
+// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000
+
+.text
+// ASM: .text
+
+.amdgcn_target "amdgcn-amd-amdhsa--gfx1100"
+// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1100"
+
+.p2align 8
+.type minimal,@function
+minimal:
+  s_endpgm
+
+.p2align 8
+.type complete,@function
+complete:
+  s_endpgm
+
+.p2align 8
+.type special_sgpr,@function
+special_sgpr:
+  s_endpgm
+
+.rodata
+// ASM: .rodata
+
+// Test that only specifying required directives is allowed, and that defaulted
+// values are omitted.
+.p2align 6
+.amdhsa_kernel minimal
+  .amdhsa_next_free_vgpr 0
+  .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel minimal
+// ASM: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM: .end_amdhsa_kernel
+
+// Test that we can specify all available directives with non-default values.
+.p2align 6
+.amdhsa_kernel complete
+  .amdhsa_group_segment_fixed_size 1
+  .amdhsa_private_segment_fixed_size 1
+  .amdhsa_kernarg_size 8
+  .amdhsa_user_sgpr_dispatch_ptr 1
+  .amdhsa_user_sgpr_queue_ptr 1
+  .amdhsa_user_sgpr_kernarg_segment_ptr 1
+  .amdhsa_user_sgpr_dispatch_id 1
+  .amdhsa_user_sgpr_private_segment_size 1
+  .amdhsa_wavefront_size32 1
+  .amdhsa_enable_private_segment 1
+  .amdhsa_system_sgpr_workgroup_id_x 0
+  .amdhsa_system_sgpr_workgroup_id_y 1
+  .amdhsa_system_sgpr_workgroup_id_z 1
+  .amdhsa_system_sgpr_workgroup_info 1
+  .amdhsa_system_vgpr_workitem_id 1
+  .amdhsa_next_free_vgpr 9
+  .amdhsa_next_free_sgpr 27
+  .amdhsa_reserve_vcc 0
+  .amdhsa_float_round_mode_32 1
+  .amdhsa_float_round_mode_16_64 1
+  .amdhsa_float_denorm_mode_32 1
+  .amdhsa_float_denorm_mode_16_64 0
+  .amdhsa_dx10_clamp 0
+  .amdhsa_ieee_mode 0
+  .amdhsa_fp16_overflow 1
+  .amdhsa_workgroup_processor_mode 1
+  .amdhsa_memory_ordered 1
+  .amdhsa_forward_progress 1
+  .amdhsa_exception_fp_ieee_invalid_op 1
+  .amdhsa_exception_fp_denorm_src 1
+  .amdhsa_exception_fp_ieee_div_zero 1
+  .amdhsa_exception_fp_ieee_overflow 1
+  .amdhsa_exception_fp_ieee_underflow 1
+  .amdhsa_exception_fp_ieee_inexact 1
+  .amdhsa_exception_int_div_zero 1
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel complete
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
+// ASM-NEXT: .amdhsa_kernarg_size 8
+// ASM-NEXT: .amdhsa_user_sgpr_count 9
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
+// ASM-NEXT: .amdhsa_wavefront_size32 1
+// ASM-NEXT: .amdhsa_enable_private_segment 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
+// ASM-NEXT: .amdhsa_next_free_vgpr 9
+// ASM-NEXT: .amdhsa_next_free_sgpr 27
+// ASM-NEXT: .amdhsa_reserve_vcc 0
+// ASM-NEXT: .amdhsa_float_round_mode_32 1
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
+// ASM-NEXT: .amdhsa_dx10_clamp 0
+// ASM-NEXT: .amdhsa_ieee_mode 0
+// ASM-NEXT: .amdhsa_fp16_overflow 1
+// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
+// ASM-NEXT: .amdhsa_memory_ordered 1
+// ASM-NEXT: .amdhsa_forward_progress 1
+// ASM-NEXT: .amdhsa_shared_vgpr_count 0
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
+// ASM-NEXT: .amdhsa_exception_int_div_zero 1
+// ASM-NEXT: .end_amdhsa_kernel
+
+// Test that we are including special SGPR usage in the granulated count.
+.p2align 6
+.amdhsa_kernel special_sgpr
+  // Same next_free_sgpr as "complete", but...
+  .amdhsa_next_free_sgpr 27
+  // ...on GFX10+ this should require an additional 6 SGPRs, pushing us from
+  // 3 granules to 4
+
+  .amdhsa_reserve_vcc 0
+
+  .amdhsa_float_denorm_mode_16_64 0
+  .amdhsa_dx10_clamp 0
+  .amdhsa_ieee_mode 0
+  .amdhsa_next_free_vgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel special_sgpr
+// ASM: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 27
+// ASM-NEXT: .amdhsa_reserve_vcc 0
+// ASM: .amdhsa_float_denorm_mode_16_64 0
+// ASM-NEXT: .amdhsa_dx10_clamp 0
+// ASM-NEXT: .amdhsa_ieee_mode 0
+// ASM: .end_amdhsa_kernel
+
+.section .foo
+
+.byte .amdgcn.gfx_generation_number
+// ASM: .byte 11
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 0
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 0
+
+v_mov_b32_e32 v7, s10
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 8
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 11
+
+.set .amdgcn.next_free_vgpr, 0
+.set .amdgcn.next_free_sgpr, 0
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 0
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 0
+
+v_mov_b32_e32 v16, s3
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 17
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 4
diff --git a/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt b/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
--- a/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
@@ -1,4 +1,9 @@
 # RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -disassemble -show-encoding < %s 2>&1 | FileCheck -check-prefix=GCN %s
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s 2>&1 | FileCheck -check-prefix=GFX11 %s
 
 # GCN: warning: invalid instruction encoding
 0xdf,0x00,0x00,0x02
+
+# this is buffer_atomic_csub_u32 v5, off, s[8:11], s3 offset:4095. Invalid without glc
+# GFX11: warning: invalid instruction encoding
+0xff,0x0f,0xdc,0xe0,0x00,0x05,0x02,0x03