diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1542,7 +1542,7 @@
   bool validateOpSel(const MCInst &Inst);
   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
   bool validateVccOperand(unsigned Reg) const;
-  bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
+  bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
   bool validateAGPRLdSt(const MCInst &Inst) const;
   bool validateVGPRAlign(const MCInst &Inst) const;
@@ -1715,6 +1715,7 @@
   switch (OperandType) {
   case AMDGPU::OPERAND_REG_IMM_INT32:
   case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -1723,6 +1724,7 @@
   case AMDGPU::OPERAND_REG_IMM_V2FP32:
   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
   case AMDGPU::OPERAND_REG_IMM_V2INT32:
+  case AMDGPU::OPERAND_KIMM32:
     return &APFloat::IEEEsingle();
   case AMDGPU::OPERAND_REG_IMM_INT64:
   case AMDGPU::OPERAND_REG_IMM_FP64:
@@ -1732,6 +1734,7 @@
     return &APFloat::IEEEdouble();
   case AMDGPU::OPERAND_REG_IMM_INT16:
   case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
@@ -1742,6 +1745,7 @@
   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
   case AMDGPU::OPERAND_REG_IMM_V2INT16:
   case AMDGPU::OPERAND_REG_IMM_V2FP16:
+  case AMDGPU::OPERAND_KIMM16:
     return &APFloat::IEEEhalf();
   default:
     llvm_unreachable("unsupported fp type");
@@ -2017,12 +2021,14 @@
 
     case AMDGPU::OPERAND_REG_IMM_INT32:
     case AMDGPU::OPERAND_REG_IMM_FP32:
+    case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
     case AMDGPU::OPERAND_REG_IMM_INT16:
     case AMDGPU::OPERAND_REG_IMM_FP16:
+    case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
@@ -2036,7 +2042,9 @@
     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
     case AMDGPU::OPERAND_REG_IMM_V2FP32:
     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
-    case AMDGPU::OPERAND_REG_IMM_V2INT32: {
+    case AMDGPU::OPERAND_REG_IMM_V2INT32:
+    case AMDGPU::OPERAND_KIMM32:
+    case AMDGPU::OPERAND_KIMM16: {
       bool lost;
       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
       // Convert literal to single precision
@@ -2062,6 +2070,7 @@
   switch (OpTy) {
   case AMDGPU::OPERAND_REG_IMM_INT32:
   case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -2101,6 +2110,7 @@
 
   case AMDGPU::OPERAND_REG_IMM_INT16:
   case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
@@ -2128,6 +2138,14 @@
     Inst.addOperand(MCOperand::createImm(Val));
     return;
   }
+  case AMDGPU::OPERAND_KIMM32:
+    Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
+    setImmKindNone();
+    return;
+  case AMDGPU::OPERAND_KIMM16:
+    Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
+    setImmKindNone();
+    return;
   default:
     llvm_unreachable("invalid operand size");
   }
@@ -3250,7 +3268,8 @@
        SIInstrFlags::SDWA)) {
     // Check special imm operands (used by madmk, etc)
     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
-      ++ConstantBusUseCount;
+      ++NumLiterals;
+      LiteralSize = 4;
     }
 
     SmallDenseSet<unsigned> SGPRsUsed;
@@ -3290,7 +3309,7 @@
 
           // An instruction may use only one literal.
           // This has been validated on the previous step.
-          // See validateVOP3Literal.
+          // See validateVOPLiteral.
           // This literal may be used as more than one operand.
           // If all these operands are of the same size,
           // this literal counts as one scalar value.
@@ -3981,26 +4000,29 @@
     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
 }
 
-// VOP3 literal is only allowed in GFX10+ and only one can be used
-bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
-                                          const OperandVector &Operands) {
+// One unique literal can be used. VOP3 literal is only allowed in GFX10+
+bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
+                                         const OperandVector &Operands) {
   unsigned Opcode = Inst.getOpcode();
   const MCInstrDesc &Desc = MII.get(Opcode);
-  if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
+  const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
+  if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
+      ImmIdx == -1)
     return true;
 
   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
 
-  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
+  const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
 
   unsigned NumExprs = 0;
   unsigned NumLiterals = 0;
   uint32_t LiteralValue;
 
   for (int OpIdx : OpIndices) {
-    if (OpIdx == -1) break;
+    if (OpIdx == -1)
+      continue;
 
     const MCOperand &MO = Inst.getOperand(OpIdx);
     if (!MO.isImm() && !MO.isExpr())
@@ -4030,7 +4052,7 @@
   if (!NumLiterals)
     return true;
 
-  if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
+  if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
     Error(getLitLoc(Operands), "literal operands are not supported");
     return false;
   }
@@ -4202,7 +4224,7 @@
       "only one literal operand is allowed");
     return false;
   }
-  if (!validateVOP3Literal(Inst, Operands)) {
+  if (!validateVOPLiteral(Inst, Operands)) {
     return false;
   }
   if (!validateConstantBusLimitations(Inst, Operands)) {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -87,6 +87,7 @@
   DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer,
                                        raw_string_ostream &KdStream) const;
 
+  DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const;
   DecodeStatus convertSDWAInst(MCInst &MI) const;
   DecodeStatus convertDPP8Inst(MCInst &MI) const;
   DecodeStatus convertMIMGInst(MCInst &MI) const;
@@ -150,9 +151,11 @@
 
   static MCOperand decodeIntImmed(unsigned Imm);
   static MCOperand decodeFPImmed(OpWidthTy Width, unsigned Imm);
+  MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
   MCOperand decodeLiteralConstant() const;
 
-  MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const;
+  MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val,
+                        bool MandatoryLiteral = false) const;
   MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const;
   MCOperand decodeSpecialReg32(unsigned Val) const;
   MCOperand decodeSpecialReg64(unsigned Val) const;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -26,6 +26,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/TargetRegistry.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/AMDHSAKernelDescriptor.h"
 
 using namespace llvm;
@@ -264,6 +265,34 @@
   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm));
 }
 
+static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm,
+                                          uint64_t Addr, const void *Decoder) {
+  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
+}
+
+static DecodeStatus decodeOperand_f16kimm(MCInst &Inst, unsigned Imm,
+                                          uint64_t Addr, const void *Decoder) {
+  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
+}
+
+static DecodeStatus decodeOperand_VS_16_Deferred(MCInst &Inst, unsigned Imm,
+                                                 uint64_t Addr,
+                                                 const void *Decoder) {
+  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(
+      Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW16, Imm, true));
+}
+
+static DecodeStatus decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm,
+                                                 uint64_t Addr,
+                                                 const void *Decoder) {
+  const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(
+      Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true));
+}
+
 static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
                           const MCRegisterInfo *MRI) {
   if (OpIdx < 0)
@@ -626,6 +655,11 @@
     }
   }
 
+  int ImmLitIdx =
+      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
+  if (Res && ImmLitIdx != -1)
+    Res = convertFMAanyK(MI, ImmLitIdx);
+
   // if the opcode was not recognized we'll assume a Size of 4 bytes
   // (unless there are fewer bytes left)
   Size = Res ? (MaxInstBytesNum - Bytes.size())
@@ -810,6 +844,24 @@
   return MCDisassembler::Success;
 }
 
+DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
+                                                int ImmLitIdx) const {
+  assert(HasLiteral && "Should have decoded a literal");
+  const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
+  unsigned DescNumOps = Desc.getNumOperands();
+  assert(DescNumOps == MI.getNumOperands());
+  for (unsigned I = 0; I < DescNumOps; ++I) {
+    auto &Op = MI.getOperand(I);
+    auto OpType = Desc.OpInfo[I].OperandType;
+    bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
+                         OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED);
+    if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
+        IsDeferredOp)
+      Op.setImm(Literal);
+  }
+  return MCDisassembler::Success;
+}
+
 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
   return getContext().getRegisterInfo()->
     getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
@@ -1019,6 +1071,18 @@
   return decodeDstOp(OPW512, Val);
 }
 
+// Decode Literals for insts which always have a literal in the encoding
+MCOperand
+AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
+  if (HasLiteral) {
+    if (Literal != Val)
+      return errOperand(Val, "More than one unique literal is illegal");
+  }
+  HasLiteral = true;
+  Literal = Val;
+  return MCOperand::createImm(Literal);
+}
+
 MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
   // For now all literal constants are supposed to be unsigned integer
   // ToDo: deal with signed/unsigned 64-bit integer constants
@@ -1232,7 +1296,8 @@
   return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
 }
 
-MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const {
+MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
+                                          bool MandatoryLiteral) const {
   using namespace AMDGPU::EncValues;
 
   assert(Val < 1024); // enum10
@@ -1261,8 +1326,13 @@
   if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
     return decodeFPImmed(Width, Val);
 
-  if (Val == LITERAL_CONST)
-    return decodeLiteralConstant();
+  if (Val == LITERAL_CONST) {
+    if (MandatoryLiteral)
+      // Keep a sentinel value for deferred setting
+      return MCOperand::createImm(LITERAL_CONST);
+    else
+      return decodeLiteralConstant();
+  }
 
   switch (Width) {
   case OPW32:
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -605,6 +605,7 @@
     switch (OpTy) {
     case AMDGPU::OPERAND_REG_IMM_INT32:
     case AMDGPU::OPERAND_REG_IMM_FP32:
+    case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -631,6 +632,7 @@
     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
     case AMDGPU::OPERAND_REG_IMM_FP16:
+    case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
       printImmediate16(Op.getImm(), STI, O);
       break;
     case AMDGPU::OPERAND_REG_IMM_V2INT16:
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -233,6 +233,7 @@
   switch (OpInfo.OperandType) {
   case AMDGPU::OPERAND_REG_IMM_INT32:
   case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -255,6 +256,7 @@
   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
     return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
   case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
     // FIXME Is this correct? What do inline immediates do on SI for f16 src
@@ -277,6 +279,9 @@
     uint32_t Encoding = getLit16Encoding(Lo16, STI);
     return Encoding;
   }
+  case AMDGPU::OPERAND_KIMM32:
+  case AMDGPU::OPERAND_KIMM16:
+    return MO.getImm();
   default:
     llvm_unreachable("invalid operand size");
   }
@@ -341,7 +346,13 @@
       (bytes > 4 && !STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]))
     return;
 
-  // Check for additional literals in SRC0/1/2 (Op 1/2/3)
+  // Do not print literals from SISrc Operands for insts with mandatory literals
+  int ImmLitIdx =
+      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
+  if (ImmLitIdx != -1)
+    return;
+
+  // Check for additional literals
   for (unsigned i = 0, e = Desc.getNumOperands(); i < e; ++i) {
 
     // Check if this operand should be encoded as [SV]Src
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -139,64 +139,67 @@
 }
 
 namespace AMDGPU {
-  enum OperandType : unsigned {
-    /// Operands with register or 32-bit immediate
-    OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET,
-    OPERAND_REG_IMM_INT64,
-    OPERAND_REG_IMM_INT16,
-    OPERAND_REG_IMM_FP32,
-    OPERAND_REG_IMM_FP64,
-    OPERAND_REG_IMM_FP16,
-    OPERAND_REG_IMM_V2FP16,
-    OPERAND_REG_IMM_V2INT16,
-    OPERAND_REG_IMM_V2INT32,
-    OPERAND_REG_IMM_V2FP32,
-
-    /// Operands with register or inline constant
-    OPERAND_REG_INLINE_C_INT16,
-    OPERAND_REG_INLINE_C_INT32,
-    OPERAND_REG_INLINE_C_INT64,
-    OPERAND_REG_INLINE_C_FP16,
-    OPERAND_REG_INLINE_C_FP32,
-    OPERAND_REG_INLINE_C_FP64,
-    OPERAND_REG_INLINE_C_V2INT16,
-    OPERAND_REG_INLINE_C_V2FP16,
-    OPERAND_REG_INLINE_C_V2INT32,
-    OPERAND_REG_INLINE_C_V2FP32,
-
-    /// Operands with an AccVGPR register or inline constant
-    OPERAND_REG_INLINE_AC_INT16,
-    OPERAND_REG_INLINE_AC_INT32,
-    OPERAND_REG_INLINE_AC_FP16,
-    OPERAND_REG_INLINE_AC_FP32,
-    OPERAND_REG_INLINE_AC_FP64,
-    OPERAND_REG_INLINE_AC_V2INT16,
-    OPERAND_REG_INLINE_AC_V2FP16,
-    OPERAND_REG_INLINE_AC_V2INT32,
-    OPERAND_REG_INLINE_AC_V2FP32,
-
-    OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
-    OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
-
-    OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
-    OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2FP32,
-
-    OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16,
-    OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2FP32,
-
-    OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
-    OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
-
-    // Operand for source modifiers for VOP instructions
-    OPERAND_INPUT_MODS,
-
-    // Operand for SDWA instructions
-    OPERAND_SDWA_VOPC_DST,
-
-    /// Operand with 32-bit immediate that uses the constant bus.
-    OPERAND_KIMM32,
-    OPERAND_KIMM16
-  };
+enum OperandType : unsigned {
+  /// Operands with register or 32-bit immediate
+  OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET,
+  OPERAND_REG_IMM_INT64,
+  OPERAND_REG_IMM_INT16,
+  OPERAND_REG_IMM_FP32,
+  OPERAND_REG_IMM_FP64,
+  OPERAND_REG_IMM_FP16,
+  OPERAND_REG_IMM_FP16_DEFERRED,
+  OPERAND_REG_IMM_FP32_DEFERRED,
+  OPERAND_REG_IMM_V2FP16,
+  OPERAND_REG_IMM_V2INT16,
+  OPERAND_REG_IMM_V2INT32,
+  OPERAND_REG_IMM_V2FP32,
+
+  /// Operands with register or inline constant
+  OPERAND_REG_INLINE_C_INT16,
+  OPERAND_REG_INLINE_C_INT32,
+  OPERAND_REG_INLINE_C_INT64,
+  OPERAND_REG_INLINE_C_FP16,
+  OPERAND_REG_INLINE_C_FP32,
+  OPERAND_REG_INLINE_C_FP64,
+  OPERAND_REG_INLINE_C_V2INT16,
+  OPERAND_REG_INLINE_C_V2FP16,
+  OPERAND_REG_INLINE_C_V2INT32,
+  OPERAND_REG_INLINE_C_V2FP32,
+
+  /// Operand with 32-bit immediate that uses the constant bus.
+  OPERAND_KIMM32,
+  OPERAND_KIMM16,
+
+  /// Operands with an AccVGPR register or inline constant
+  OPERAND_REG_INLINE_AC_INT16,
+  OPERAND_REG_INLINE_AC_INT32,
+  OPERAND_REG_INLINE_AC_FP16,
+  OPERAND_REG_INLINE_AC_FP32,
+  OPERAND_REG_INLINE_AC_FP64,
+  OPERAND_REG_INLINE_AC_V2INT16,
+  OPERAND_REG_INLINE_AC_V2FP16,
+  OPERAND_REG_INLINE_AC_V2INT32,
+  OPERAND_REG_INLINE_AC_V2FP32,
+
+  OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
+  OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
+
+  OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
+  OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2FP32,
+
+  OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16,
+  OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2FP32,
+
+  OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
+  OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
+
+  // Operand for source modifiers for VOP instructions
+  OPERAND_INPUT_MODS,
+
+  // Operand for SDWA instructions
+  OPERAND_SDWA_VOPC_DST
+
+};
 }
 
 // Input operand modifiers bit-masks
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3405,6 +3405,7 @@
   switch (OperandType) {
   case AMDGPU::OPERAND_REG_IMM_INT32:
   case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
   case AMDGPU::OPERAND_REG_IMM_V2FP32:
@@ -3443,6 +3444,7 @@
     // This suffers the same problem as the scalar 16-bit cases.
     return AMDGPU::isInlinableIntLiteralV216(Imm);
   case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   case AMDGPU::OPERAND_REG_INLINE_AC_FP16: {
     if (isInt<16>(Imm) || isUInt<16>(Imm)) {
@@ -3836,6 +3838,7 @@
       break;
     case AMDGPU::OPERAND_REG_IMM_INT32:
     case AMDGPU::OPERAND_REG_IMM_FP32:
+    case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
       break;
     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1173,6 +1173,7 @@
   let OperandType = "OPERAND_KIMM"#vt.Size;
   let PrintMethod = "printU"#vt.Size#"ImmOperand";
   let ParserMatchClass = !cast<AsmOperandClass>("KImmFP"#vt.Size#"MatchClass");
+  let DecoderMethod = "decodeOperand_f"#vt.Size#"kimm";
 }
 
 // 32-bit VALU immediate operand that uses the constant bus.
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1019,6 +1019,30 @@
   let DecoderMethod = "DecodeVS_128RegisterClass";
 }
 
+//===----------------------------------------------------------------------===//
+//  VSrc_*_Deferred Operands with an SGPR, VGPR or a 32-bit immediate for use
+//  with FMAMK/FMAAK
+//===----------------------------------------------------------------------===//
+
+multiclass SIRegOperand32_Deferred <string rc, string MatchName, string opType,
+                           string rc_suffix = "_32"> {
+  let OperandNamespace = "AMDGPU" in {
+    def _f16_Deferred : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
+      let OperandType = opType#"_FP16_DEFERRED";
+      let ParserMatchClass = RegImmMatcher<MatchName#"F16">;
+      let DecoderMethod = "decodeOperand_" # rc # "_16_Deferred";
+    }
+
+    def _f32_Deferred : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
+      let OperandType = opType#"_FP32_DEFERRED";
+      let ParserMatchClass = RegImmMatcher<MatchName#"F32">;
+      let DecoderMethod = "decodeOperand_" # rc # "_32_Deferred";
+    }
+  }
+}
+
+defm VSrc : SIRegOperand32_Deferred<"VS", "VSrc", "OPERAND_REG_IMM">;
+
 //===----------------------------------------------------------------------===//
 //  VRegSrc_* Operands with a VGPR
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -789,6 +789,7 @@
   switch (OpInfo.OperandType) {
   case AMDGPU::OPERAND_REG_IMM_INT32:
   case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -797,6 +798,8 @@
   case AMDGPU::OPERAND_REG_IMM_V2FP32:
   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
+  case AMDGPU::OPERAND_KIMM32:
+  case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
     return 4;
 
   case AMDGPU::OPERAND_REG_IMM_INT64:
@@ -808,6 +811,7 @@
 
   case AMDGPU::OPERAND_REG_IMM_INT16:
   case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1574,8 +1574,10 @@
   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
   switch (OpType) {
   case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
   case AMDGPU::OPERAND_REG_IMM_FP64:
   case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
   case AMDGPU::OPERAND_REG_IMM_V2FP16:
   case AMDGPU::OPERAND_REG_IMM_V2INT16:
   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -270,12 +270,11 @@
 class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
   field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
   field dag Ins32 = !if(!eq(vt.Size, 32),
-                        (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm),
-                        (ins VCSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm));
+                        (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm),
+                        (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm));
+  field string Asm32 = "$vdst, $src0, $src1, $imm";
   field bit HasExt = 0;
   let IsSingle = 1;
-
-  field string Asm32 = "$vdst, $src0, $src1, $imm";
 }
 
 def VOP_MADAK_F16 : VOP_MADAK <f16>;
@@ -283,11 +282,10 @@
 
 class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
   field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
-  field dag Ins32 = (ins VCSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1);
+  field dag Ins32 = (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1);
+  field string Asm32 = "$vdst, $src0, $imm, $src1";
   field bit HasExt = 0;
   let IsSingle = 1;
-
-  field string Asm32 = "$vdst, $src0, $imm, $src1";
 }
 
 def VOP_MADMK_F16 : VOP_MADMK <f16>;
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_err.s b/llvm/test/MC/AMDGPU/gfx10_asm_err.s
--- a/llvm/test/MC/AMDGPU/gfx10_asm_err.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_err.s
@@ -274,6 +274,26 @@
 // GFX6-7: error: dpp variant of this instruction is not supported
 // GFX8-9: error: not a valid operand
 
+//===----------------------------------------------------------------------===//
+// VOP2
+//===----------------------------------------------------------------------===//
+
+v_fmaak_f32 v0, 0xff32ff, v0, 0x11213141
+// GFX6-9: error: instruction not supported on this GPU
+// GFX10: error: only one literal operand is allowed
+
+v_fmamk_f32 v0, 0xff32ff, 0x11213141, v0
+// GFX6-9: error: instruction not supported on this GPU
+// GFX10: error: only one literal operand is allowed
+
+v_fmaak_f32 v0, 0xff32, v0, 0x1122
+// GFX6-9: error: instruction not supported on this GPU
+// GFX10: error: only one literal operand is allowed
+
+v_fmamk_f32 v0, 0xff32, 0x1122, v0
+// GFX6-9: error: instruction not supported on this GPU
+// GFX10: error: only one literal operand is allowed
+
 //===----------------------------------------------------------------------===//
 // VOP2 E64.
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
--- a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s
@@ -10229,9 +10229,15 @@
 v_fmamk_f32 v5, v1, 0x11213141, v255
 // GFX10: encoding: [0x01,0xff,0x0b,0x58,0x41,0x31,0x21,0x11]
 
+v_fmamk_f32 v5, 0x11213141, 0x11213141, v255
+// GFX10: encoding: [0xff,0xfe,0x0b,0x58,0x41,0x31,0x21,0x11]
+
 v_fmaak_f32 v5, v1, v2, 0x11213141
 // GFX10: encoding: [0x01,0x05,0x0a,0x5a,0x41,0x31,0x21,0x11]
 
+v_fmaak_f32 v5, 0x11213141, v2, 0x11213141
+// GFX10: encoding: [0xff,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11]
+
 v_fmaak_f32 v255, v1, v2, 0x11213141
 // GFX10: encoding: [0x01,0x05,0xfe,0x5b,0x41,0x31,0x21,0x11]
 
@@ -11969,6 +11975,9 @@
 v_fmamk_f16 v255, v1, 0x1121, v3
 // GFX10: encoding: [0x01,0x07,0xfe,0x6f,0x21,0x11,0x00,0x00]
 
+v_fmamk_f16 v255, 0x1121, 0x1121, v3
+// GFX10: encoding: [0xff,0x06,0xfe,0x6f,0x21,0x11,0x00,0x00]
+
 v_fmamk_f16 v5, v255, 0x1121, v3
 // GFX10: encoding: [0xff,0x07,0x0a,0x6e,0x21,0x11,0x00,0x00]
 
@@ -12014,6 +12023,9 @@
 v_fmaak_f16 v5, v1, v255, 0x1121
 // GFX10: encoding: [0x01,0xff,0x0b,0x70,0x21,0x11,0x00,0x00]
 
+v_fmaak_f16 v5, 0x1121, v255, 0x1121
+// GFX10: encoding: [0xff,0xfe,0x0b,0x70,0x21,0x11,0x00,0x00]
+
 v_fmaak_f16 v5, v1, v2, 0xa1b1
 // GFX10: encoding: [0x01,0x05,0x0a,0x70,0xb1,0xa1,0x00,0x00]
 
diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx9_asm_vop2.s
--- a/llvm/test/MC/AMDGPU/gfx9_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx9_asm_vop2.s
@@ -2337,6 +2337,9 @@
 v_madmk_f16 v5, v1, 0x1121, v255
 // CHECK: [0x01,0xff,0x0b,0x48,0x21,0x11,0x00,0x00]
 
+v_madmk_f16 v5, 0x1121, 0x1121, v255
+// CHECK: [0xff,0xfe,0x0b,0x48,0x21,0x11,0x00,0x00]
+
 v_madak_f16 v5, v1, v2, 0x1121
 // CHECK: [0x01,0x05,0x0a,0x4a,0x21,0x11,0x00,0x00]
 
@@ -2367,6 +2370,9 @@
 v_madak_f16 v5, v1, v2, 0xa1b1
 // CHECK: [0x01,0x05,0x0a,0x4a,0xb1,0xa1,0x00,0x00]
 
+v_madak_f16 v5, 0x1121, v2, 0x1121
+// CHECK: [0xff,0x04,0x0a,0x4a,0x21,0x11,0x00,0x00]
+
 v_add_u16 v5, v1, v2
 // CHECK: [0x01,0x05,0x0a,0x4c]
 
diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s
--- a/llvm/test/MC/AMDGPU/literals.s
+++ b/llvm/test/MC/AMDGPU/literals.s
@@ -843,6 +843,20 @@
 // NOGCN: error: invalid operand (violates constant bus restrictions)
 v_madak_f32 v0, scc, v0, 0x11213141
 
+// NOGCN: error: only one literal operand is allowed
+v_madak_f32 v0, 0xff32ff, v0, 0x11213141
+
+// NOGCN: error: only one literal operand is allowed
+v_madmk_f32 v0, 0xff32ff, 0x11213141, v0
+
+// NOSICI: error: instruction not supported on this GPU
+// NOGFX89: error: only one literal operand is allowed
+v_madak_f16 v0, 0xff32, v0, 0x1122
+
+// NOSICI: error: instruction not supported on this GPU
+// NOGFX89: error: only one literal operand is allowed
+v_madmk_f16 v0, 0xff32, 0x1122, v0
+
 // NOSICIVI: error: register not available on this GPU
 // NOGFX9: error: invalid operand (violates constant bus restrictions)
 v_cmp_eq_f32 s[0:1], private_base, private_limit
diff --git a/llvm/test/MC/AMDGPU/vop2.s b/llvm/test/MC/AMDGPU/vop2.s
--- a/llvm/test/MC/AMDGPU/vop2.s
+++ b/llvm/test/MC/AMDGPU/vop2.s
@@ -270,6 +270,14 @@
 // VI:   v_madak_f32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x30,0x00,0x00,0x80,0x42]
 v_madak_f32 v1, v2, v3, 64.0
 
+// SICI: v_madak_f32 v0, 0x11213141, v0, 0x11213141 ; encoding: [0xff,0x00,0x00,0x42,0x41,0x31,0x21,0x11]
+// VI: v_madak_f32 v0, 0x11213141, v0, 0x11213141 ; encoding: [0xff,0x00,0x00,0x30,0x41,0x31,0x21,0x11]
+v_madak_f32 v0, 0x11213141, v0, 0x11213141
+
+// SICI: v_madmk_f32 v0, 0x11213141, 0x11213141, v0 ; encoding: [0xff,0x00,0x00,0x40,0x41,0x31,0x21,0x11]
+// VI: v_madmk_f32 v0, 0x11213141, 0x11213141, v0 ; encoding: [0xff,0x00,0x00,0x2e,0x41,0x31,0x21,0x11]
+v_madmk_f32 v0, 0x11213141, 0x11213141, v0
+
 // SICI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00]
 // VI:   v_bcnt_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00]
 v_bcnt_u32_b32_e64 v1, v2, v3
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
@@ -73781,6 +73781,9 @@
 # GFX10: v_fmaak_f16 v5, -1, v2, 0x1121          ; encoding: [0xc1,0x04,0x0a,0x70,0x21,0x11,0x00,0x00]
 0xc1,0x04,0x0a,0x70,0x21,0x11,0x00,0x00
 
+# GFX10: v_fmaak_f16 v5, 0x1121, v2, 0x1121      ; encoding: [0xff,0x04,0x0a,0x70,0x21,0x11,0x00,0x00]
+0xff,0x04,0x0a,0x70,0x21,0x11,0x00,0x00
+
 # GFX10: v_fmaak_f32 v5, -1, v2, 0x11213141      ; encoding: [0xc1,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11]
 0xc1,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11
 
@@ -73796,6 +73799,9 @@
 # GFX10: v_fmaak_f32 v5, 0, v2, 0x11213141       ; encoding: [0x80,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11]
 0x80,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11
 
+# GFX10: v_fmaak_f32 v5, 0x11213141, v2, 0x11213141 ; encoding: [0xff,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11]
+0xff,0x04,0x0a,0x5a,0x41,0x31,0x21,0x11
+
 # GFX10: v_fmaak_f16 v5, 0.5, v2, 0x1121         ; encoding: [0xf0,0x04,0x0a,0x70,0x21,0x11,0x00,0x00]
 0xf0,0x04,0x0a,0x70,0x21,0x11,0x00,0x00
 
@@ -74150,6 +74156,9 @@
 # GFX10: v_fmamk_f16 v255, v1, 0x1121, v3        ; encoding: [0x01,0x07,0xfe,0x6f,0x21,0x11,0x00,0x00]
 0x01,0x07,0xfe,0x6f,0x21,0x11,0x00,0x00
 
+# GFX10: v_fmamk_f16 v255, 0x1121, 0x1121, v3    ; encoding: [0xff,0x06,0xfe,0x6f,0x21,0x11,0x00,0x00]
+0xff,0x06,0xfe,0x6f,0x21,0x11,0x00,0x00
+
 # GFX10: v_fmamk_f32 v255, v1, 0x11213141, v3    ; encoding: [0x01,0x07,0xfe,0x59,0x41,0x31,0x21,0x11]
 0x01,0x07,0xfe,0x59,0x41,0x31,0x21,0x11
 
@@ -74159,6 +74168,9 @@
 # GFX10: v_fmamk_f32 v5, -1, 0x11213141, v3      ; encoding: [0xc1,0x06,0x0a,0x58,0x41,0x31,0x21,0x11]
 0xc1,0x06,0x0a,0x58,0x41,0x31,0x21,0x11
 
+# GFX10: v_fmamk_f32 v5, 0x11213141, 0x11213141, v3 ; encoding: [0xff,0x06,0x0a,0x58,0x41,0x31,0x21,0x11]
+0xff,0x06,0x0a,0x58,0x41,0x31,0x21,0x11
+
 # GFX10: v_fmamk_f16 v5, -4.0, 0x1121, v3        ; encoding: [0xf7,0x06,0x0a,0x6e,0x21,0x11,0x00,0x00]
 0xf7,0x06,0x0a,0x6e,0x21,0x11,0x00,0x00
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt
@@ -32160,6 +32160,9 @@
 # CHECK: v_madmk_f32 v5, v1, 0x11213141, v255    ; encoding: [0x01,0xff,0x0b,0x2e,0x41,0x31,0x21,0x11]
 0x01,0xff,0x0b,0x2e,0x41,0x31,0x21,0x11
 
+# CHECK: v_madmk_f32 v0, 0x11213141, 0x11213141, v0 ; encoding: [0xff,0x00,0x00,0x2e,0x41,0x31,0x21,0x11]
+0xff,0x00,0x00,0x2e,0x41,0x31,0x21,0x11
+
 # CHECK: v_madak_f32 v5, v1, v2, 0x11213141      ; encoding: [0x01,0x05,0x0a,0x30,0x41,0x31,0x21,0x11]
 0x01,0x05,0x0a,0x30,0x41,0x31,0x21,0x11
 
@@ -32187,6 +32190,9 @@
 # CHECK: v_madak_f32 v5, v1, v2, 0xa1b1c1d1      ; encoding: [0x01,0x05,0x0a,0x30,0xd1,0xc1,0xb1,0xa1]
 0x01,0x05,0x0a,0x30,0xd1,0xc1,0xb1,0xa1
 
+# CHECK: v_madak_f32 v0, 0x11213141, v0, 0x11213141 ; encoding: [0xff,0x00,0x00,0x30,0x41,0x31,0x21,0x11]
+0xff,0x00,0x00,0x30,0x41,0x31,0x21,0x11
+
 # CHECK: v_add_co_u32_e32 v5, vcc, v1, v2        ; encoding: [0x01,0x05,0x0a,0x32]
 0x01,0x05,0x0a,0x32
 
@@ -33783,6 +33789,9 @@
 # CHECK: v_madmk_f16 v5, v1, 0x1121, v255        ; encoding: [0x01,0xff,0x0b,0x48,0x21,0x11,0x00,0x00]
 0x01,0xff,0x0b,0x48,0x21,0x11,0x00,0x00
 
+# CHECK: v_madmk_f16 v5, 0x1121, 0x1121, v255    ; encoding: [0xff,0xfe,0x0b,0x48,0x21,0x11,0x00,0x00]
+0xff,0xfe,0x0b,0x48,0x21,0x11,0x00,0x00
+
 # CHECK: v_madak_f16 v5, v1, v2, 0x1121          ; encoding: [0x01,0x05,0x0a,0x4a,0x21,0x11,0x00,0x00]
 0x01,0x05,0x0a,0x4a,0x21,0x11,0x00,0x00
 
@@ -33810,6 +33819,9 @@
 # CHECK: v_madak_f16 v5, v1, v2, 0xa1b1          ; encoding: [0x01,0x05,0x0a,0x4a,0xb1,0xa1,0x00,0x00]
 0x01,0x05,0x0a,0x4a,0xb1,0xa1,0x00,0x00
 
+# CHECK: v_madak_f16 v5, 0x1121, v2, 0x1121      ; encoding: [0xff,0x04,0x0a,0x4a,0x21,0x11,0x00,0x00]
+0xff,0x04,0x0a,0x4a,0x21,0x11,0x00,0x00
+
 # CHECK: v_add_u16_e32 v5, v1, v2                ; encoding: [0x01,0x05,0x0a,0x4c]
 0x01,0x05,0x0a,0x4c