diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -15,8 +15,10 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H
 #define LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H
 
+#include "llvm/ADT/APInt.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/Support/DataExtractor.h"
 #include <memory>
 
@@ -27,6 +29,54 @@
 class MCSubtargetInfo;
 class Twine;
 
+// Exposes an interface expected by autogenerated code in
+// FixedLenDecoderEmitter
+class DecoderUInt128 {
+private:
+  uint64_t Lo = 0;
+  uint64_t Hi = 0;
+
+public:
+  DecoderUInt128() = default;
+  DecoderUInt128(uint64_t Lo, uint64_t Hi = 0) : Lo(Lo), Hi(Hi) {}
+  operator bool() const { return Lo || Hi; }
+  void insertBits(uint64_t SubBits, unsigned bitPosition, unsigned numBits) {
+    assert(numBits && numBits <= 64);
+    assert(SubBits >> 1 >> (numBits - 1) == 0);
+    assert(bitPosition < 128);
+    if (bitPosition < 64) {
+      Lo |= SubBits << bitPosition;
+      Hi |= SubBits >> 1 >> (63 - bitPosition);
+    } else {
+      Hi |= SubBits << (bitPosition - 64);
+    }
+  }
+  uint64_t extractBitsAsZExtValue(unsigned numBits,
+                                  unsigned bitPosition) const {
+    assert(numBits && numBits <= 64);
+    assert(bitPosition < 128);
+    uint64_t Val;
+    if (bitPosition < 64)
+      Val = Lo >> bitPosition | Hi << 1 << (63 - bitPosition);
+    else
+      Val = Hi >> (bitPosition - 64);
+    return Val & ((uint64_t(2) << (numBits - 1)) - 1);
+  }
+  DecoderUInt128 operator&(const DecoderUInt128 &RHS) const {
+    return DecoderUInt128(Lo & RHS.Lo, Hi & RHS.Hi);
+  }
+  DecoderUInt128 operator~() const { return DecoderUInt128(~Lo, ~Hi); }
+  bool operator==(const DecoderUInt128 &RHS) {
+    return Lo == RHS.Lo && Hi == RHS.Hi;
+  }
+  bool operator!=(const DecoderUInt128 &RHS) {
+    return Lo != RHS.Lo || Hi != RHS.Hi;
+  }
+  friend raw_ostream &operator<<(raw_ostream &OS, const DecoderUInt128 &RHS) {
+    return OS << APInt(128, {RHS.Lo, RHS.Hi});
+  }
+};
+
 //===----------------------------------------------------------------------===//
 // AMDGPUDisassembler
 //===----------------------------------------------------------------------===//
@@ -57,8 +107,21 @@
 
   MCOperand errOperand(unsigned V, const Twine& ErrMsg) const;
 
-  DecodeStatus tryDecodeInst(const uint8_t* Table, MCInst &MI, uint64_t Inst,
-                             uint64_t Address) const;
+  template <typename InsnType>
+  DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst,
+                             uint64_t Address) const {
+    assert(MI.getOpcode() == 0);
+    assert(MI.getNumOperands() == 0);
+    MCInst TmpInst;
+    HasLiteral = false;
+    const auto SavedBytes = Bytes;
+    if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) {
+      MI = TmpInst;
+      return MCDisassembler::Success;
+    }
+    Bytes = SavedBytes;
+    return MCDisassembler::Fail;
+  }
 
   Optional<DecodeStatus> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
                                        ArrayRef<uint8_t> Bytes,
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -388,23 +388,6 @@
   return Res;
 }
 
-DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table,
-                                               MCInst &MI,
-                                               uint64_t Inst,
-                                               uint64_t Address) const {
-  assert(MI.getOpcode() == 0);
-  assert(MI.getNumOperands() == 0);
-  MCInst TmpInst;
-  HasLiteral = false;
-  const auto SavedBytes = Bytes;
-  if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) {
-    MI = TmpInst;
-    return MCDisassembler::Success;
-  }
-  Bytes = SavedBytes;
-  return MCDisassembler::Fail;
-}
-
 // The disassembler is greedy, so we need to check FI operand value to
 // not parse a dpp if the correct literal is not set. For dpp16 the
 // autogenerated decoder checks the dpp literal
@@ -439,7 +422,7 @@
       const uint64_t QW = eatBytes<uint64_t>(Bytes);
 
       if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) {
-        Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address);
+        Res = tryDecodeInst<uint64_t>(DecoderTableGFX10_B64, MI, QW, Address);
         if (Res) {
           if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8)
               == -1)
@@ -450,26 +433,26 @@
         }
       }
 
-      Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address);
+      Res = tryDecodeInst<uint64_t>(DecoderTableDPP864, MI, QW, Address);
       if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
         break;
 
       MI = MCInst(); // clear
 
-      Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address);
+      Res = tryDecodeInst<uint64_t>(DecoderTableDPP64, MI, QW, Address);
       if (Res) break;
 
-      Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address);
+      Res = tryDecodeInst<uint64_t>(DecoderTableSDWA64, MI, QW, Address);
       if (Res) { IsSDWA = true;  break; }
 
-      Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address);
+      Res = tryDecodeInst<uint64_t>(DecoderTableSDWA964, MI, QW, Address);
       if (Res) { IsSDWA = true;  break; }
 
-      Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address);
+      Res = tryDecodeInst<uint64_t>(DecoderTableSDWA1064, MI, QW, Address);
       if (Res) { IsSDWA = true;  break; }
 
       if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) {
-        Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address);
+        Res = tryDecodeInst<uint64_t>(DecoderTableGFX80_UNPACKED64, MI, QW, Address);
         if (Res)
           break;
       }
@@ -478,7 +461,7 @@
       // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
       // table first so we print the correct name.
       if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) {
-        Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address);
+        Res = tryDecodeInst<uint64_t>(DecoderTableGFX9_DL64, MI, QW, Address);
         if (Res)
           break;
       }
@@ -490,48 +473,51 @@
     // Try decode 32-bit instruction
     if (Bytes.size() < 4) break;
     const uint32_t DW = eatBytes<uint32_t>(Bytes);
-    Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address);
+    Res = tryDecodeInst<uint64_t>(DecoderTableGFX832, MI, DW, Address);
     if (Res) break;
 
-    Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address);
+    Res = tryDecodeInst<uint64_t>(DecoderTableAMDGPU32, MI, DW, Address);
     if (Res) break;
 
-    Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address);
+    Res = tryDecodeInst<uint64_t>(DecoderTableGFX932, MI, DW, Address);
     if (Res) break;
 
     if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) {
-      Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address);
+      Res = tryDecodeInst<uint64_t>(DecoderTableGFX90A32, MI, DW, Address);
       if (Res)
         break;
     }
 
     if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) {
-      Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address);
+      Res = tryDecodeInst<uint64_t>(DecoderTableGFX10_B32, MI, DW, Address);
       if (Res) break;
     }
 
-    Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address);
+    Res = tryDecodeInst<uint64_t>(DecoderTableGFX1032, MI, DW, Address);
     if (Res) break;
 
     if (Bytes.size() < 4) break;
     const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
 
     if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) {
-      Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address);
+      Res = tryDecodeInst<uint64_t>(DecoderTableGFX90A64, MI, QW, Address);
       if (Res)
         break;
     }
 
-    Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address);
+    Res = tryDecodeInst<uint64_t>(DecoderTableGFX864, MI, QW, Address);
     if (Res) break;
 
-    Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address);
+    Res = tryDecodeInst<uint64_t>(DecoderTableAMDGPU64, MI, QW, Address);
     if (Res) break;
 
-    Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address);
+    Res = tryDecodeInst<uint64_t>(DecoderTableGFX964, MI, QW, Address);
     if (Res) break;
 
-    Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address);
+    Res = tryDecodeInst<uint64_t>(DecoderTableGFX1064, MI, QW, Address);
+    if (Res) break;
+
+
   } while (false);
 
   if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||