diff --git a/llvm/test/TableGen/BitOffsetDecoder.td b/llvm/test/TableGen/BitOffsetDecoder.td
--- a/llvm/test/TableGen/BitOffsetDecoder.td
+++ b/llvm/test/TableGen/BitOffsetDecoder.td
@@ -57,8 +57,27 @@
 
 }
 
-// CHECK: tmp = fieldFromInstruction(insn, 8, 7);
-// CHECK: tmp = fieldFromInstruction(insn, 8, 8) << 3;
-// CHECK: insertBits(tmp, fieldFromInstruction(insn, 8, 4), 7, 4);
-// CHECK: insertBits(tmp, fieldFromInstruction(insn, 12, 4), 3, 4);
-// CHECK: tmp = fieldFromInstruction(insn, 8, 8) << 4;
+// Equivalent to...
+// Tmp = 0
+// Tmp |= Inst[8+7-1:8] << 0
+// CHECK: // ExtractorID 1
+// CHECK-NEXT: 5, 0, 0, 8, 7, 0,
+
+// Equivalent to...
+// Tmp = 0
+// Tmp |= Inst[8+8-1:8] << 3
+// CHECK: // ExtractorID 2
+// CHECK-NEXT: 5, 0, 0, 8, 8, 3,
+
+// Equivalent to...
+// Tmp = 0
+// Tmp |= Inst[8+4-1:8] << 7
+// Tmp |= Inst[12+4-1:12] << 3
+// CHECK: // ExtractorID 3
+// CHECK-NEXT: 8, 0, 0, 8, 4, 7, 12, 4, 3,
+
+// Equivalent to...
+// Tmp = 0
+// Tmp |= Inst[8+8-1:8] << 4
+// CHECK: // ExtractorID 4
+// CHECK-NEXT: 5, 0, 0, 8, 8, 4,
diff --git a/llvm/test/TableGen/FixedLenDecoderEmitter/InitValue.td b/llvm/test/TableGen/FixedLenDecoderEmitter/InitValue.td
--- a/llvm/test/TableGen/FixedLenDecoderEmitter/InitValue.td
+++ b/llvm/test/TableGen/FixedLenDecoderEmitter/InitValue.td
@@ -39,8 +39,20 @@
 
 }
 
-// CHECK: tmp = fieldFromInstruction(insn, 9, 7) << 1;
-// CHECK: tmp = 0x1;
-// CHECK: insertBits(tmp, fieldFromInstruction(insn, 9, 7), 1, 7);
-// CHECK: tmp = 0x100000000;
-// CHECK: insertBits(tmp, fieldFromInstruction(insn, 8, 7), 25, 7);
+// Equivalent to...
+// Tmp = 0
+// Tmp |= Inst[9+7-1:9] << 1
+// CHECK: // ExtractorID 1
+// CHECK-NEXT: 5, 0, 0, 9, 7, 1,
+
+// Equivalent to...
+// Tmp = 1
+// Tmp |= Inst[9+7-1:9] << 1
+// CHECK: // ExtractorID 2
+// CHECK-NEXT: 5, 0, 1, 9, 7, 1,
+
+// Equivalent to...
+// Tmp = 0x100000000
+// Tmp |= Inst[8+7-1:8] << 25
+// CHECK: // ExtractorID 3
+// CHECK-NEXT: 5, 1, 0, 8, 7, 25,
diff --git a/llvm/test/TableGen/VarLenDecoder.td b/llvm/test/TableGen/VarLenDecoder.td
--- a/llvm/test/TableGen/VarLenDecoder.td
+++ b/llvm/test/TableGen/VarLenDecoder.td
@@ -47,9 +47,9 @@
 
 // CHECK:      MCD::OPC_ExtractField, 3, 5,  // Inst{7-3} ...
 // CHECK-NEXT: MCD::OPC_FilterValue, 8, 4, 0, 0, // Skip to: 12
-// CHECK-NEXT: MCD::OPC_Decode, [[#OPCODE:]], 1, 0, // Opcode: FOO16
+// CHECK-NEXT: MCD::OPC_Decode, [[#OPCODE:]], 1, 1, // Opcode: FOO16
 // CHECK-NEXT: MCD::OPC_FilterValue, 9, 4, 0, 0, // Skip to: 21
-// CHECK-NEXT: MCD::OPC_Decode, [[#OPCODE+1]], 1, 1, // Opcode: FOO32
+// CHECK-NEXT: MCD::OPC_Decode, [[#OPCODE+1]], 1, 2, // Opcode: FOO32
 // CHECK-NEXT: MCD::OPC_Fail,
 
 // Instruction length table
@@ -57,24 +57,51 @@
 // CHECK-NEXT: 43,
 // CHECK-NEXT: };
 
-// CHECK:      case 0:
-// CHECK-NEXT: tmp = fieldFromInstruction(insn, 8, 3);
-// CHECK-NEXT: if (DecodeRegClassRegisterClass(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { return MCDisassembler::Fail; }
-// CHECK-NEXT: tmp = fieldFromInstruction(insn, 0, 3);
-// CHECK-NEXT: if (DecodeRegClassRegisterClass(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { return MCDisassembler::Fail; }
-// CHECK-NEXT: tmp = fieldFromInstruction(insn, 11, 16);
-// CHECK-NEXT: MI.addOperand(MCOperand::createImm(tmp));
-// CHECK-NEXT: return S;
-// CHECK-NEXT: case 1:
-// CHECK-NEXT: tmp = fieldFromInstruction(insn, 8, 3);
-// CHECK-NEXT: if (DecodeRegClassRegisterClass(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { return MCDisassembler::Fail; }
-// CHECK-NEXT: tmp = fieldFromInstruction(insn, 0, 3);
-// CHECK-NEXT: if (DecodeRegClassRegisterClass(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { return MCDisassembler::Fail; }
-// CHECK-NEXT: tmp = 0x0;
-// CHECK-NEXT: insertBits(tmp, fieldFromInstruction(insn, 11, 16), 16, 16);
-// CHECK-NEXT: insertBits(tmp, fieldFromInstruction(insn, 27, 16), 0, 16);
-// CHECK-NEXT: MI.addOperand(MCOperand::createImm(tmp));
-// CHECK-NEXT: return S;
+// The sequence of decoder methods used.
+// This identical sequence of decoder methods appears
+// in both instructions.
+// CHECK: // DecoderMethodSequenceID 1
+// CHECK-NEXT: 0, // DecodeRegClassRegisterClass
+// CHECK-NEXT: 0, // DecodeRegClassRegisterClass
+// CHECK-NEXT: 1, // DefaultDecodeImm
+
+// The sequence of bit extractors used.
+// Notice how both instructions start with the same bit extractions
+// to decode the registers, but the bit extractor for the immediate operand
+// afterwards is different.
+// CHECK: // DecoderExtractorSequenceID 1
+// CHECK-NEXT: 1, 2, 3, 0,
+// CHECK-NEXT: // DecoderExtractorSequenceID 2
+// CHECK-NEXT: 1, 2, 4, 0,
+
+// The description of the bit extraction operations used.
+// Sequences 1 and 2 are bit extractions for the registers.
+// Sequences 3 and 4 describe the two ways immediate operands are decoded.
+// ---
+// Equivalent to...
+// Tmp = 0
+// Tmp |= Inst[8+3-1:8] << 0
+// CHECK: // ExtractorID 1
+// CHECK-NEXT: 5, 0, 0, 8, 3, 0,
+// ---
+// Equivalent to...
+// Tmp = 0
+// Tmp |= Inst[0+3-1:0] << 0
+// CHECK-NEXT: // ExtractorID 2
+// CHECK-NEXT: 5, 0, 0, 0, 3, 0,
+// ---
+// Equivalent to...
+// Tmp = 0
+// Tmp |= Inst[11+16-1:11] << 0
+// CHECK-NEXT: // ExtractorID 3
+// CHECK-NEXT: 5, 0, 0, 11, 16, 0,
+// ---
+// Equivalent to...
+// Tmp = 0
+// Tmp |= Inst[11+16-1:11] << 16
+// Tmp |= Inst[27+16-1:27] << 0
+// CHECK-NEXT: // ExtractorID 4
+// CHECK-NEXT: 8, 0, 0, 11, 16, 16, 27, 16, 0,
 
 // CHECK-LABEL: case MCD::OPC_ExtractField: {
 // CHECK: makeUp(insn, Start + Len);
diff --git a/llvm/test/TableGen/trydecode-emission.td b/llvm/test/TableGen/trydecode-emission.td
--- a/llvm/test/TableGen/trydecode-emission.td
+++ b/llvm/test/TableGen/trydecode-emission.td
@@ -36,8 +36,9 @@
 // CHECK:      /* 0 */       MCD::OPC_ExtractField, 4, 4,  // Inst{7-4} ...
 // CHECK-NEXT: /* 3 */       MCD::OPC_FilterValue, 0, 18, 0, 0, // Skip to: 26
 // CHECK-NEXT: /* 8 */       MCD::OPC_CheckField, 2, 2, 0, 7, 0, 0, // Skip to: 22
-// CHECK-NEXT: /* 15 */      MCD::OPC_TryDecode, {{[0-9]+}}, 1, 0, 0, 0, 0, // Opcode: InstB, skip to: 22
-// CHECK-NEXT: /* 22 */      MCD::OPC_Decode, {{[0-9]+}}, 1, 1, // Opcode: InstA
+// CHECK-NEXT: /* 15 */      MCD::OPC_TryDecode, {{[0-9]+}}, 1, 1, 0, 0, 0, // Opcode: InstB, skip to: 22
+// CHECK-NEXT: /* 22 */      MCD::OPC_Decode, {{[0-9]+}}, 1, 2, // Opcode: InstA
 // CHECK-NEXT: /* 26 */      MCD::OPC_Fail,
 
-// CHECK: if (DecodeInstB(MI, insn, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; }
+// CHECK: // DecoderMethodSequenceID 1
+// CHECK-NEXT: 0, // DecodeInstB
diff --git a/llvm/test/TableGen/trydecode-emission2.td b/llvm/test/TableGen/trydecode-emission2.td
--- a/llvm/test/TableGen/trydecode-emission2.td
+++ b/llvm/test/TableGen/trydecode-emission2.td
@@ -35,10 +35,12 @@
 // CHECK-NEXT: /* 8 */       MCD::OPC_ExtractField, 5, 3,  // Inst{7-5} ...
 // CHECK-NEXT: /* 11 */      MCD::OPC_FilterValue, 0, 28, 0, 0, // Skip to: 44
 // CHECK-NEXT: /* 16 */      MCD::OPC_CheckField, 0, 2, 3, 7, 0, 0, // Skip to: 30
-// CHECK-NEXT: /* 23 */      MCD::OPC_TryDecode, {{[0-9]+}}, 1, 0, 0, 0, 0, // Opcode: InstB, skip to: 30
+// CHECK-NEXT: /* 23 */      MCD::OPC_TryDecode, {{[0-9]+}}, 1, 1, 0, 0, 0, // Opcode: InstB, skip to: 30
 // CHECK-NEXT: /* 30 */      MCD::OPC_CheckField, 3, 2, 0, 7, 0, 0, // Skip to: 44
-// CHECK-NEXT: /* 37 */      MCD::OPC_TryDecode, {{[0-9]+}}, 1, 1, 0, 0, 0, // Opcode: InstA, skip to: 44
+// CHECK-NEXT: /* 37 */      MCD::OPC_TryDecode, {{[0-9]+}}, 1, 2, 0, 0, 0, // Opcode: InstA, skip to: 44
 // CHECK-NEXT: /* 44 */      MCD::OPC_Fail,
 
-// CHECK: if (DecodeInstB(MI, insn, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; }
-// CHECK: if (DecodeInstA(MI, insn, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; }
+// CHECK: // DecoderMethodSequenceID 1
+// CHECK-NEXT: 0, // DecodeInstB
+// CHECK-NEXT: // DecoderMethodSequenceID 2
+// CHECK-NEXT: 1, // DecodeInstA
diff --git a/llvm/test/TableGen/trydecode-emission3.td b/llvm/test/TableGen/trydecode-emission3.td
--- a/llvm/test/TableGen/trydecode-emission3.td
+++ b/llvm/test/TableGen/trydecode-emission3.td
@@ -37,8 +37,9 @@
 // CHECK:      /* 0 */       MCD::OPC_ExtractField, 4, 4,  // Inst{7-4} ...
 // CHECK-NEXT: /* 3 */       MCD::OPC_FilterValue, 0, 18, 0, 0, // Skip to: 26
 // CHECK-NEXT: /* 8 */       MCD::OPC_CheckField, 2, 2, 0, 7, 0, 0, // Skip to: 22
-// CHECK-NEXT: /* 15 */      MCD::OPC_TryDecode, {{[0-9]+}}, 1, 0, 0, 0, 0, // Opcode: InstB, skip to: 22
-// CHECK-NEXT: /* 22 */      MCD::OPC_Decode, {{[0-9]+}}, 1, 1, // Opcode: InstA
+// CHECK-NEXT: /* 15 */      MCD::OPC_TryDecode, {{[0-9]+}}, 1, 1, 0, 0, 0, // Opcode: InstB, skip to: 22
+// CHECK-NEXT: /* 22 */      MCD::OPC_Decode, {{[0-9]+}}, 1, 2, // Opcode: InstA
 // CHECK-NEXT: /* 26 */      MCD::OPC_Fail,
 
-// CHECK: if (DecodeInstBOp(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; }
+// CHECK: // DecoderMethodSequenceID 1
+// CHECK-NEXT: 0, // DecodeInstBOp
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -18,12 +18,14 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/CachedHashString.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/UniqueVector.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
@@ -33,6 +35,9 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
+
+#include "Types.h"
+
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
@@ -83,17 +88,86 @@
   const_iterator end() const   { return Fields.end();   }
 };
 
+// After determining the opcode of an instruction, the bits of the instruction
+// are read and used to build the operands of an MCInst. This conversion is done
+// by a data-driven state machine.
+// This state machine works by executing a sequence of commands ("codelets").
+// These commands generally extract bits from the instruction, then pass those
+// bits to DecoderMethods. These DecoderMethods convert the bits into
+// MCOperands.
+enum class DecoderCodeletID : unsigned {
+  DispatchExtractor,
+  DispatchDecoderMethod,
+  DispatchCompleteDecoderMethod,
+  CopyInsnToTmp,
+  Terminator,
+};
+
+raw_ostream &operator<<(raw_ostream &OS, DecoderCodeletID ID) {
+  OS << (unsigned)ID;
+  return OS;
+}
+
+typedef SmallVector<DecoderCodeletID, 16> DecoderCodeletIDVector;
+typedef SmallVector<unsigned, 16> DecoderMethodIDVector;
+typedef SmallVector<unsigned, 16> DecoderExtractorIDVector;
+typedef UniqueVector<DecoderCodeletIDVector> DecoderCodeletSequenceSet;
+typedef UniqueVector<DecoderMethodIDVector> DecoderMethodSequenceSet;
+typedef UniqueVector<DecoderExtractorIDVector> DecoderExtractorSequenceSet;
+struct PerDecoderInfo {
+  // The ID of the codelet sequence used to implement this decoder.
+  // The decoding state machine will execute each codelet in this sequence
+  // one-by-one in order.
+  unsigned DecoderCodeletSequenceID;
+  // The ID of the sequence of decoder method calls that this decoder will make.
+  // When the decoding state machine executes a DispatchDecoderMethod codelet,
+  // it pops a DecoderMethod ID from this sequence, then calls that method.
+  unsigned DecoderMethodSequenceID;
+  // The ID of the sequence of bit extraction commands that this decoder will
+  // make. When the decoding state machine executes a DispatchExtractor
+  // codelet, it pops an Extractor ID from this sequence, then executes the
+  // bit extraction operations specified by that popped extractor.
+  unsigned DecoderExtractorSequenceID;
+
+  // For UniqueVector
+  bool operator==(const PerDecoderInfo &RHS) const {
+    return std::make_tuple(DecoderCodeletSequenceID, DecoderMethodSequenceID,
+                           DecoderExtractorSequenceID) ==
+           std::make_tuple(RHS.DecoderCodeletSequenceID,
+                           RHS.DecoderMethodSequenceID,
+                           RHS.DecoderExtractorSequenceID);
+  }
+  bool operator<(const PerDecoderInfo &RHS) const {
+    return std::make_tuple(DecoderCodeletSequenceID, DecoderMethodSequenceID,
+                           DecoderExtractorSequenceID) <
+           std::make_tuple(RHS.DecoderCodeletSequenceID,
+                           RHS.DecoderMethodSequenceID,
+                           RHS.DecoderExtractorSequenceID);
+  }
+};
+
 typedef std::vector<uint8_t> DecoderTable;
 typedef uint32_t DecoderFixup;
 typedef std::vector<DecoderFixup> FixupList;
 typedef std::vector<FixupList> FixupScopeList;
 typedef SmallSetVector<CachedHashString, 16> PredicateSet;
-typedef SmallSetVector<CachedHashString, 16> DecoderSet;
+typedef UniqueVector<PerDecoderInfo> DecoderMap;
+typedef SmallSetVector<CachedHashString, 16> DecoderMethodSet;
+typedef UniqueVector<SmallVector<uint32_t, 16>> DecoderExtractorSet;
 struct DecoderTableInfo {
   DecoderTable Table;
   FixupScopeList FixupStack;
   PredicateSet Predicates;
-  DecoderSet Decoders;
+  // For compression purposes, the operations of the decoder state machine are
+  // de-duplicated as much as possible. Set data structures are used to keep
+  // track of unique operations, and IDs are used by the state machine to refer
+  // to items in these sets.
+  DecoderMap Decoders;
+  DecoderMethodSet DecoderMethods;
+  DecoderExtractorSet DecoderExtractors;
+  DecoderCodeletSequenceSet DecoderCodeletSequences;
+  DecoderMethodSequenceSet DecoderMethodSequences;
+  DecoderExtractorSequenceSet DecoderExtractorSequences;
 };
 
 struct EncodingAndInst {
@@ -148,9 +222,13 @@
   void emitPredicateFunction(formatted_raw_ostream &OS,
                              PredicateSet &Predicates,
                              unsigned Indentation) const;
-  void emitDecoderFunction(formatted_raw_ostream &OS,
-                           DecoderSet &Decoders,
-                           unsigned Indentation) const;
+  void emitDecoderFunction(
+      formatted_raw_ostream &OS, const DecoderMap &Decoders,
+      const DecoderMethodSet &DecoderMethods,
+      const DecoderExtractorSet &DecoderExtractors,
+      const DecoderCodeletSequenceSet DecoderCodeletSequences,
+      const DecoderMethodSequenceSet DecoderMethodSequences,
+      const DecoderExtractorSequenceSet DecoderExtractorSequences) const;
 
   // run - Output the code emitter
   void run(raw_ostream &o);
@@ -526,14 +604,24 @@
   void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
                                const Filter &Best) const;
 
-  void emitBinaryParser(raw_ostream &o, unsigned &Indentation,
+  void emitBinaryParser(SmallVector<DecoderCodeletID, 16> &CodeletIDs,
+                        SmallVector<SmallString<256>, 16> &DecoderMethods,
+                        SmallVector<SmallVector<uint32_t, 16>, 16> &Extractors,
                         const OperandInfo &OpInfo,
                         bool &OpHasCompleteDecoder) const;
 
-  void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc,
-                   bool &HasCompleteDecoder) const;
-  unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc,
-                           bool &HasCompleteDecoder) const;
+  void emitDecoder(SmallVector<DecoderCodeletID, 16> &CodeletIDs,
+                   SmallVector<SmallString<256>, 16> &DecoderMethods,
+                   SmallVector<SmallVector<uint32_t, 16>, 16> &Extractors,
+                   unsigned Opc, bool &HasCompleteDecoder) const;
+
+  unsigned
+  getDecoderIndex(DecoderMap &Decoders, DecoderMethodSet &UniqueDecoderMethods,
+                  DecoderExtractorSet &UniqueExtractors,
+                  DecoderCodeletSequenceSet &UniqueDecoderCodeletSequences,
+                  DecoderMethodSequenceSet &UniqueDecoderMethodSequences,
+                  DecoderExtractorSequenceSet &UniqueDecoderExtractorSequences,
+                  unsigned Opc, bool &HasCompleteDecoder) const;
 
   // Assign a single filter and run with it.
   void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed);
@@ -991,37 +1079,416 @@
   OS.indent(Indentation) << "}\n\n";
 }
 
-void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
-                                         DecoderSet &Decoders,
-                                         unsigned Indentation) const {
-  // The decoder function is just a big switch statement based on the
-  // input decoder index.
-  OS.indent(Indentation) << "template <typename InsnType>\n";
-  OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S,"
-    << " unsigned Idx, InsnType insn, MCInst &MI,\n";
-  OS.indent(Indentation)
-      << "                                   uint64_t "
-      << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n";
-  Indentation += 2;
-  OS.indent(Indentation) << "DecodeComplete = true;\n";
+void emitDecoderMethodComment(raw_ostream &OS, StringRef Str) {
+  StringRef NewStr = Str.take_until([](char C) { return C == '\n'; });
+  OS << " // " << NewStr << (NewStr.size() == Str.size() ? "" : "...");
+}
+
+void DecoderEmitter::emitDecoderFunction(
+    formatted_raw_ostream &OS, const DecoderMap &Decoders,
+    const DecoderMethodSet &DecoderMethods,
+    const DecoderExtractorSet &DecoderExtractors,
+    const DecoderCodeletSequenceSet DecoderCodeletSequences,
+    const DecoderMethodSequenceSet DecoderMethodSequences,
+    const DecoderExtractorSequenceSet DecoderExtractorSequences) const {
+  OS << "// Utility function for decoding plain immediates. Used as the default DecoderMethod.\n";
+  OS << "template<class InsnType>\n";
+  OS << "static DecodeStatus DefaultDecodeImm(MCInst &MI, InsnType insn, uint64_t Address, const MCDisassembler *Decoder) {\n";
+  OS << "  MI.addOperand(MCOperand::createImm(insn));\n";
+  OS << "  return DecodeStatus::Success;\n";
+  OS << "}\n\n";
+  // Output some useful mappings
+  OS << "// The ID of the sequence of commands (\"codelets\") to execute to implement each decoder.\n";
+  OS << "// Execution of the decoder consists of executing the codelets in the sequence in order.\n";
+  OS << "// This indirection table allows two decoders to share the same sequence of codelets, which allows removing redundancies.\n";
+  OS << "static const unsigned DecoderIDToDecoderCodeletSequenceID[] = {\n";
+  // DecoderID 0 is the null decoder, so it returns a null sequence ID.
+  OS << "  0,";
+  for (size_t DecoderIdx = 0; DecoderIdx < Decoders.size(); DecoderIdx++) {
+    if (DecoderIdx % 10 == 0) {
+      OS << "\n ";
+    }
+    OS << " " << Decoders[DecoderIdx + 1].DecoderCodeletSequenceID << ",";
+  }
+  OS << "\n};\n\n";
+  OS << "// The ID of the list of decoder methods called by each decoder.\n";
+  OS << "// This indirection table allows two decoders to share the same sequence of decoder methods, which allows removing redundancies.\n";
+  OS << "static const unsigned DecoderIDToDecoderMethodSequenceID[] = {\n";
+  // DecoderID 0 is the null decoder, so it returns a null sequence ID.
+  OS << "  0,";
+  for (size_t DecoderIdx = 0; DecoderIdx < Decoders.size(); DecoderIdx++) {
+    if (DecoderIdx % 10 == 0) {
+      OS << "\n ";
+    }
+    OS << " " << Decoders[DecoderIdx + 1].DecoderMethodSequenceID << ",";
+  }
+  OS << "\n};\n\n";
+  OS << "// The ID of the list of bit extraction operations done by each decoder.\n";
+  OS << "// This indirection table allows two decoders to share the same sequence of extractions, which allows removing redundancies.\n";
+  OS << "static const unsigned DecoderIDToDecoderExtractorSequenceID[] = {\n";
+  // DecoderID 0 is the null decoder, so it returns a null sequence ID.
+  OS << "  0,";
+  for (size_t DecoderIdx = 0; DecoderIdx < Decoders.size(); DecoderIdx++) {
+    if (DecoderIdx % 10 == 0) {
+      OS << "\n ";
+    }
+    OS << " " << Decoders[DecoderIdx + 1].DecoderExtractorSequenceID << ",";
+  }
+  OS << "\n};\n\n";
+
+  // Lookup table for the IDs of decoder methods associated to a sequence ID.
+  OS << "// The ID of the DecoderMethods to call for each decoder method sequence.\n";
+  OS << "// Every time a decoder's execution sees a DispatchDecoderMethod codelet,\n";
+  OS << "// it executes the next DecoderMethod in its sequence of decoder methods.\n";
+  OS << "static const unsigned Flat_DecoderMethodSequenceIDToDecoderMethodIDs[] = "
+        "{\n";
+  std::vector<unsigned> DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex;
+  // Sequence IDs start at 1, so write a null value for ID 0.
+  DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex.push_back(0);
+  OS << "  ~0U,\n";
+  // Push the start for the first iteration.
+  DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex.push_back(1);
+  // Now write the non-null sequence IDs' method IDs.
+  for (size_t DecoderMethodSequenceID = 1;
+       DecoderMethodSequenceID <= DecoderMethodSequences.size();
+       DecoderMethodSequenceID++) {
+    OS << "  // DecoderMethodSequenceID " << DecoderMethodSequenceID << "\n";
+    const auto &DecoderMethodIDs =
+        DecoderMethodSequences[DecoderMethodSequenceID];
+    for (size_t DecoderMethodIdx = 0;
+         DecoderMethodIdx < DecoderMethodIDs.size(); DecoderMethodIdx++) {
+      OS << "  " << DecoderMethodIDs[DecoderMethodIdx] << ",";
+      emitDecoderMethodComment(
+          OS, DecoderMethods[DecoderMethodIDs[DecoderMethodIdx]]);
+      OS << "\n";
+    }
+    // Push the start for the next iteration.
+    DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex.push_back(
+        DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex.back() +
+        DecoderMethodIDs.size());
+  }
+  // Pop the start for the past-the-end iteration that never really happened.
+  DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex.pop_back();
+  OS << "};\n";
+  OS << "\n";
+  OS << "// Gets the index for the data of a given DecoderMethodSequenceID in the Flat_DecoderMethodSequenceIDToDecoderMethodIDs array.\n";
+  OS << "static const unsigned "
+        "DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex[] = {";
+  for (size_t DecoderMethodSequenceID = 0;
+       DecoderMethodSequenceID < DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex.size();
+       DecoderMethodSequenceID++) {
+    if (DecoderMethodSequenceID % 10 == 0) {
+      OS << "\n ";
+    }
+    OS << " "
+       << DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex
+              [DecoderMethodSequenceID]
+       << ",";
+  }
+  OS << "\n};\n";
+  OS << "\n";
+
+  // This dispatcher allows calling decoder methods in a generic way, which
+  // allows the decoder to be more compressed and more table-driven.
+  OS << "// Executes the next decoder method in the decoder method sequence, and increments the sequence's iterator to the following entry.\n";
+  OS << "template <typename InsnType>\n"
+        "static DecodeStatus DispatchDecoderMethod(const unsigned "
+        "*&NextDecoderMethod, MCInst &MI, const InsnType &tmp, uint64_t "
+        "Address, const MCDisassembler *Decoder) {\n"
+        "  const unsigned CurrDecoderMethod = *NextDecoderMethod++;\n"
+        "  switch (CurrDecoderMethod) {\n"
+        "  default:\n"
+        "    llvm_unreachable(\"Unknown DecoderMethod ID.\");\n";
+  for (size_t DecoderMethodIdx = 0; DecoderMethodIdx < DecoderMethods.size();
+       DecoderMethodIdx++) {
+    OS << "  case " << DecoderMethodIdx << ":\n"
+       << "    LLVM_DEBUG(dbgs() << \"Calling DecoderMethod "
+       << DecoderMethodIdx << " \\\"\" << "
+       << "R\"(" << DecoderMethods[DecoderMethodIdx].val() << ")\""
+       << " << \"\\\" with input \" << tmp << \"\\n\");\n"
+       << "    return " << DecoderMethods[DecoderMethodIdx].val()
+       << "(MI, tmp, Address, Decoder);\n";
+  }
+  OS << "  }\n"
+        "}\n\n";
+
+  // Lookup table for the IDs of extractors associated to a sequence ID.
+  // +1 in size to guarantee a sentinel value (to catch bugs).
+  OS << "// The ID of the extractors to call for each extractor method sequence.\n";
+  OS << "// Every time a decoder's execution sees a DispatchExtractor codelet,\n";
+  OS << "// it executes the next Extractor in its sequence of extractors.\n";
+  StringRef ExtractorIDType = getMinimalTypeForRange(DecoderExtractors.size());
+  OS << "using ExtractorIDType = " << ExtractorIDType << ";\n";
+  OS << "static const " << ExtractorIDType
+     << " Flat_DecoderExtractorSequenceIDToExtractorIDs[] = {\n";
+  std::vector<unsigned> DecoderExtractorSequenceIDToFlatExtractorIDsIndex;
+  // Sequence IDs start at 1, so write a null value for ID 0.
+  DecoderExtractorSequenceIDToFlatExtractorIDsIndex.push_back(0);
+  OS << "  0,\n";
+  // Push the start for the first iteration.
+  DecoderExtractorSequenceIDToFlatExtractorIDsIndex.push_back(1);
+  // Now write the non-null sequence IDs' extractor IDs.
+  for (size_t DecoderExtractorSequenceID = 1;
+       DecoderExtractorSequenceID <= DecoderExtractorSequences.size();
+       DecoderExtractorSequenceID++) {
+    OS << "  // DecoderExtractorSequenceID " << DecoderExtractorSequenceID << "\n";
+    OS << "  ";
+    const auto &ExtractorIDs =
+        DecoderExtractorSequences[DecoderExtractorSequenceID];
+    for (size_t ExtractorIdx = 0; ExtractorIdx < ExtractorIDs.size();
+         ExtractorIdx++) {
+      OS << ExtractorIDs[ExtractorIdx];
+      OS << ", ";
+    }
+    // Push the start for the next iteration.
+    DecoderExtractorSequenceIDToFlatExtractorIDsIndex.push_back(
+        DecoderExtractorSequenceIDToFlatExtractorIDsIndex.back() +
+        ExtractorIDs.size());
+    OS << "0,\n";
+    // Account for final 0.
+    DecoderExtractorSequenceIDToFlatExtractorIDsIndex.back() += 1;
+  }
+  // Pop the start for the past-the-end iteration that never really happened.
+  DecoderExtractorSequenceIDToFlatExtractorIDsIndex.pop_back();
+  OS << "};\n";
+  OS << "\n";
+  OS << "// Gets the index for the data of a given DecoderExtractorSequenceID in the Flat_DecoderExtractorSequenceIDToExtractorIDs array.\n";
+  OS << "static const unsigned "
+        "DecoderExtractorSequenceIDToFlatExtractorIDsIndex[] = {";
+  for (size_t DecoderExtractorSequenceID = 0;
+       DecoderExtractorSequenceID < DecoderExtractorSequenceIDToFlatExtractorIDsIndex.size();
+       DecoderExtractorSequenceID++) {
+    if (DecoderExtractorSequenceID % 10 == 0) {
+      OS << "\n ";
+    }
+    OS << " "
+       << DecoderExtractorSequenceIDToFlatExtractorIDsIndex
+              [DecoderExtractorSequenceID]
+       << ",";
+  }
+  OS << "\n};\n";
+  OS << "\n";
+
+  OS << "// The specification of the bit extraction commands for each bit extractor.\n";
+  OS << "// Specifies eg. the operands for the shifts and masks that extract bits.\n";
+  OS << "static const uint8_t Flat_ExtractorIDToExtractor[] = {\n";
+  std::vector<unsigned> ExtractorIDToFlatExtractorIndex;
+  // Extractor IDs start at 1, so write a null value for ID 0.
+  ExtractorIDToFlatExtractorIndex.push_back(0);
+  OS << "  0,\n";
+  ExtractorIDToFlatExtractorIndex.push_back(1);
+  for (size_t ExtractorIdx = 0; ExtractorIdx < DecoderExtractors.size();
+       ExtractorIdx++) {
+    unsigned ExtractorID = unsigned(ExtractorIdx) + 1;
+    OS << "  // ExtractorID " << ExtractorID << "\n";
+    const auto &Extractor = DecoderExtractors[ExtractorID];
+    unsigned BytesOutputtedForExtractor = 0;
+    assert(isUInt<8>(Extractor.size()) && "Too large");
+    OS << "  " << Extractor.size() << ",";
+    BytesOutputtedForExtractor++;
+    for (const uint32_t ExtractorCmd : Extractor) {
+      SmallString<16> CmdBytes;
+      raw_svector_ostream S(CmdBytes);
+      encodeULEB128(ExtractorCmd, S);
+      for (const uint8_t Byte : S.str()) {
+        OS << " " << (unsigned)Byte << ",";
+        BytesOutputtedForExtractor++;
+      }
+    }
+    // Push the start for the next iteration.
+    ExtractorIDToFlatExtractorIndex.push_back(
+        ExtractorIDToFlatExtractorIndex.back() + BytesOutputtedForExtractor);
+    OS << "\n";
+  }
+  // Pop the start for the past-the-end iteration that never really happened.
+  ExtractorIDToFlatExtractorIndex.pop_back();
+  OS << "};\n";
+  OS << "\n";
+  OS << "// Gets the index for the data of a given Extractor in the Flat_ExtractorIDToExtractor array.\n";
+  OS << "static const unsigned "
+        "ExtractorIDToFlatExtractorIndex[] = {";
+  for (size_t ExtractorID = 0;
+       ExtractorID < ExtractorIDToFlatExtractorIndex.size(); ExtractorID++) {
+    if (ExtractorID % 10 == 0) {
+      OS << "\n ";
+    }
+    OS << " " << ExtractorIDToFlatExtractorIndex[ExtractorID] << ",";
+  }
+  OS << "\n};\n";
+  OS << "\n";
+
+  // Lookup table for the IDs of codelets associated to a sequence ID.
+  // +1 in size to guarantee a sentinel value (to catch bugs).
+  OS << "// The list of codelets to execute for each codelet sequence.\n";
+  OS << "// Executing a decoder consists of running all codelets in its sequence.\n";
+  StringRef CodeletIDType =
+      getMinimalTypeForRange((unsigned)DecoderCodeletID::Terminator);
+  OS << "using CodeletIDType = " << CodeletIDType << ";\n";
+  OS << "static const CodeletIDType Flat_DecoderCodeletSequenceIDToCodeletIDs[] = "
+        "{\n";
+  std::vector<unsigned> DecoderCodeletSequenceIDToFlatCodeletIDsIndex;
+  // Sequence IDs start at 1, so write a null value for ID 0.
+  DecoderCodeletSequenceIDToFlatCodeletIDsIndex.push_back(0);
+  OS << "  " << DecoderCodeletID::Terminator << ",\n";
+  // Push the start for the first iteration.
+  DecoderCodeletSequenceIDToFlatCodeletIDsIndex.push_back(1);
+  // Now write the non-null sequence IDs' codelet IDs.
+  for (size_t DecoderCodeletSequenceID = 1;
+       DecoderCodeletSequenceID <= DecoderCodeletSequences.size();
+       DecoderCodeletSequenceID++) {
+    OS << "  // CodeletSequenceID " << DecoderCodeletSequenceID << "\n";
+    OS << "  ";
+    const auto &CodeletIDs = DecoderCodeletSequences[DecoderCodeletSequenceID];
+    for (size_t CodeletIdx = 0; CodeletIdx < CodeletIDs.size(); CodeletIdx++) {
+      OS << CodeletIDs[CodeletIdx];
+      OS << ", ";
+    }
+    // Push the start for the next iteration.
+    DecoderCodeletSequenceIDToFlatCodeletIDsIndex.push_back(
+        DecoderCodeletSequenceIDToFlatCodeletIDsIndex.back() +
+        CodeletIDs.size());
+    OS << DecoderCodeletID::Terminator << ",\n";
+    // Account for final terminator.
+    DecoderCodeletSequenceIDToFlatCodeletIDsIndex.back() += 1;
+  }
+  // Pop the start for the past-the-end iteration that never really happened.
+  DecoderCodeletSequenceIDToFlatCodeletIDsIndex.pop_back();
+  OS << "};\n";
+  OS << "\n";
+  OS << "// Gets the index for the data of a given DecoderCodeletSequenceID in the Flat_DecoderCodeletSequenceIDToCodeletIDs array.\n";
+  OS << "static const unsigned "
+        "DecoderCodeletSequenceIDToFlatCodeletIDsIndex[] = {";
+  for (size_t DecoderCodeletSequenceID = 0;
+       DecoderCodeletSequenceID <
+       DecoderCodeletSequenceIDToFlatCodeletIDsIndex.size();
+       DecoderCodeletSequenceID++) {
+    if (DecoderCodeletSequenceID % 10 == 0) {
+      OS << "\n ";
+    }
+    OS << " "
+       << DecoderCodeletSequenceIDToFlatCodeletIDsIndex
+              [DecoderCodeletSequenceID]
+       << ",";
+  }
+  OS << "\n};\n";
+  OS << "\n";
+
+  // This dispatcher allows doing bit extraction operations in a generic way,
+  // which allows the decoder to be more compressed and more table-driven.
+  OS << "// Executes the next bit extractor in the extractor sequence, and increments the sequence's iterator to the following entry.\n";
+  OS << "template <typename InsnType>\n"
+        "static void DispatchExtractor(const ExtractorIDType "
+        "*&NextExtractor, const InsnType &insn, InsnType &tmp) {\n"
+        "  const ExtractorIDType ExtractorID = *NextExtractor++;\n"
+        "  LLVM_DEBUG(dbgs() << \"Executing Extractor \" << ExtractorID << \":\\n\");\n"
+        "  const uint8_t *Extractor = &Flat_ExtractorIDToExtractor[ExtractorIDToFlatExtractorIndex[ExtractorID]];\n"
+        "  // Number of tokens that make up this extractor, minus the Len.\n"
+        "  uint32_t ExtractorLen = Extractor[0];\n"
+        "  assert(ExtractorLen != 0 && \"Ran an empty extractor?\");\n"
+        "  const uint8_t *ExtractorStart = &Extractor[1];\n"
+        "  // Initial value.\n"
+        "  unsigned WordLen0, WordLen1;\n"
+        "  uint32_t Word0 = decodeULEB128(ExtractorStart, &WordLen0);\n"
+        "  uint32_t Word1 = decodeULEB128(ExtractorStart + WordLen0, &WordLen1);\n"
+        "  tmp = Make_64(Word0, Word1);\n"
+        "  LLVM_DEBUG(dbgs() << \"  Extractor: tmp = \" << tmp << \"\\n\");\n"
+        "  ExtractorLen -= 2;\n"
+        "  // Now execute every extraction command one-by-one.\n"
+        "  for (const uint8_t *CurrExtractorCmd = &ExtractorStart[WordLen0 + WordLen1];\n"
+        "       ExtractorLen > 0;\n"
+        "       ExtractorLen -= 3) {\n"
+        "    unsigned CmdLen;\n"
+        "    const uint32_t Base = decodeULEB128(CurrExtractorCmd, &CmdLen);\n"
+        "    CurrExtractorCmd += CmdLen;\n"
+        "    const uint32_t Width = decodeULEB128(CurrExtractorCmd, &CmdLen);\n"
+        "    CurrExtractorCmd += CmdLen;\n"
+        "    const uint32_t Offset = decodeULEB128(CurrExtractorCmd, &CmdLen);\n"
+        "    CurrExtractorCmd += CmdLen;\n"
+        "    InsnType ExtractedBits = fieldFromInstruction(insn, Base, Width) << Offset;\n"
+        "    LLVM_DEBUG(dbgs() << \"  Extractor: tmp |= \" << ExtractedBits\n"
+        "      << \" // fieldFromInstruction(\"\n"
+        "      << \"insn=\" << insn \n"
+        "      << \", Base=\" << Base << \", Width=\" << Width << \")\"\n"
+        "      << \" << Offset=\" << Offset << \"\\n\");\n"
+        "    tmp |= ExtractedBits;\n"
+        "  }\n"
+        "}\n\n";
+
+  OS << "template <typename InsnType>\n";
+  OS << "static DecodeStatus decodeToMCInst(DecodeStatus S,"
+     << " unsigned Idx, const InsnType &insn, MCInst &MI,\n";
+  OS << "                                   uint64_t "
+     << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n";
+  OS << "  DecodeComplete = true;\n";
   // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits
   // It would be better for emitBinaryParser to use a 64-bit tmp whenever
   // possible but fall back to an InsnType-sized tmp for truly large fields.
-  OS.indent(Indentation) << "using TmpType = "
-                            "std::conditional_t<std::is_integral<InsnType>::"
-                            "value, InsnType, uint64_t>;\n";
-  OS.indent(Indentation) << "TmpType tmp;\n";
-  OS.indent(Indentation) << "switch (Idx) {\n";
-  OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n";
-  unsigned Index = 0;
-  for (const auto &Decoder : Decoders) {
-    OS.indent(Indentation) << "case " << Index++ << ":\n";
-    OS << Decoder;
-    OS.indent(Indentation+2) << "return S;\n";
-  }
-  OS.indent(Indentation) << "}\n";
-  Indentation -= 2;
-  OS.indent(Indentation) << "}\n\n";
+  OS << "  using TmpType = "
+        "std::conditional_t<std::is_integral<InsnType>::"
+        "value, InsnType, uint64_t>;\n";
+  OS << "  // Grab all the pieces of the state machine for this decoder.\n";
+  OS << "  const unsigned DecoderCodeletSequenceID = "
+        "DecoderIDToDecoderCodeletSequenceID[Idx];\n";
+  OS << "  const unsigned DecoderMethodSequenceID = "
+        "DecoderIDToDecoderMethodSequenceID[Idx];\n";
+  OS << "  const unsigned DecoderExtractorSequenceID = "
+        "DecoderIDToDecoderExtractorSequenceID[Idx];\n";
+  OS << "  const CodeletIDType *NextCodelet = "
+        "&Flat_DecoderCodeletSequenceIDToCodeletIDs["
+        "DecoderCodeletSequenceIDToFlatCodeletIDsIndex["
+        "DecoderCodeletSequenceID]];\n";
+  OS << "  const unsigned *NextDecoderMethod = "
+        "&Flat_DecoderMethodSequenceIDToDecoderMethodIDs["
+        "DecoderMethodSequenceIDToFlatDecoderMethodIDsIndex["
+        "DecoderMethodSequenceID]];\n";
+  OS << "  const ExtractorIDType *NextExtractor = "
+        "&Flat_DecoderExtractorSequenceIDToExtractorIDs["
+        "DecoderExtractorSequenceIDToFlatExtractorIDsIndex["
+        "DecoderExtractorSequenceID]];\n";
+  OS << "  DecodeComplete = true;\n";
+  OS << "  // \"tmp\" is the single register used by the decoder state "
+        "machine.\n";
+  OS << "  // It's used as storage to pass data between different codelets "
+        "being executed.\n";
+  OS << "  TmpType tmp;\n";
+  OS << "  while (*NextCodelet != " << DecoderCodeletID::Terminator << ") {\n";
+  OS << "    const unsigned CodeletID = *NextCodelet;\n";
+  OS << "    switch (CodeletID) {\n";
+  OS << "    default:\n";
+  OS << "      llvm_unreachable(\"Invalid Codelet ID\");\n";
+  OS << "    case " << DecoderCodeletID::DispatchExtractor << ":\n";
+  OS << "      // DispatchExtractor: Calls a generic function that extracts "
+        "bits from the encoding and stores them in \"tmp\"\n";
+  OS << "      DispatchExtractor(NextExtractor, insn, tmp);\n";
+  OS << "      break;\n";
+  OS << "    case " << DecoderCodeletID::DispatchDecoderMethod << ":\n";
+  OS << "      // DispatchDecoderMethod: Calls a generic dispatch function "
+        "that calls a method to converts the extracted bits into MCInst "
+        "operands.\n";
+  OS << "      " << GuardPrefix
+     << "DispatchDecoderMethod(NextDecoderMethod, MI, tmp, Address, Decoder)"
+     << GuardPostfix << " {\n";
+  OS << "        DecodeComplete = false;\n";
+  OS << "        return MCDisassembler::Fail;\n";
+  OS << "      }\n";
+  OS << "      break;\n";
+  OS << "    case " << DecoderCodeletID::DispatchCompleteDecoderMethod << ":\n";
+  OS << "      // DispatchCompleteDecoderMethod: Calls a decoder method while requiring it to be complete (and therefore can't fail.)\n";
+  OS << "      " << GuardPrefix
+     << "DispatchDecoderMethod(NextDecoderMethod, MI, tmp, Address, Decoder)"
+     << GuardPostfix << "\n";
+  OS << "        return MCDisassembler::Fail;\n";
+  OS << "      break;\n";
+  OS << "    case " << DecoderCodeletID::CopyInsnToTmp << ":\n";
+  OS << "      // CopyInsnToTmp: Copies all of \"insn\" into the \"tmp\" variable.\n";
+  OS << "      // Mainly used for DecoderMethods that are run on the whole instruction.\n";
+  OS << "      tmp = insn;\n";
+  OS << "      break;\n";
+  OS << "    }\n";
+  OS << "    NextCodelet++;\n";
+  OS << "  }\n";
+  OS << "  return S;\n";
+  OS << "}\n\n";
 }
 
 // Populates the field of the insn given the start position and the number of
@@ -1139,91 +1606,124 @@
   return Num;
 }
 
-void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation,
-                                     const OperandInfo &OpInfo,
-                                     bool &OpHasCompleteDecoder) const {
-  const std::string &Decoder = OpInfo.Decoder;
-
-  bool UseInsertBits = OpInfo.numFields() != 1 || OpInfo.InitValue != 0;
-
-  if (UseInsertBits) {
-    o.indent(Indentation) << "tmp = 0x";
-    o.write_hex(OpInfo.InitValue);
-    o << ";\n";
-  }
+void FilterChooser::emitBinaryParser(
+    SmallVector<DecoderCodeletID, 16> &CodeletIDs,
+    SmallVector<SmallString<256>, 16> &DecoderMethods,
+    SmallVector<SmallVector<uint32_t, 16>, 16> &Extractors,
+    const OperandInfo &OpInfo, bool &OpHasCompleteDecoder) const {
+  // Build the extractor first then pass its result to the decoder method.
+  {
+    SmallVector<uint32_t, 16> Extractor;
+    // Split into low and high to avoid storing all commands as 64-bit ints.
+    Extractor.push_back(Hi_32(OpInfo.InitValue));
+    Extractor.push_back(Lo_32(OpInfo.InitValue));
+
+    for (const EncodingField &EF : OpInfo) {
+      Extractor.push_back(EF.Base);
+      Extractor.push_back(EF.Width);
+      Extractor.push_back(EF.Offset);
+    }
 
-  for (const EncodingField &EF : OpInfo) {
-    o.indent(Indentation);
-    if (UseInsertBits)
-      o << "insertBits(tmp, ";
-    else
-      o << "tmp = ";
-    o << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << ')';
-    if (UseInsertBits)
-      o << ", " << EF.Offset << ", " << EF.Width << ')';
-    else if (EF.Offset != 0)
-      o << " << " << EF.Offset;
-    o << ";\n";
+    CodeletIDs.push_back(DecoderCodeletID::DispatchExtractor);
+    Extractors.push_back(std::move(Extractor));
   }
 
-  if (Decoder != "") {
+  const std::string &UserDecoder = OpInfo.Decoder;
+  StringRef DecoderMethod;
+  if (UserDecoder != "") {
     OpHasCompleteDecoder = OpInfo.HasCompleteDecoder;
-    o.indent(Indentation) << Emitter->GuardPrefix << Decoder
-      << "(MI, tmp, Address, Decoder)"
-      << Emitter->GuardPostfix
-      << " { " << (OpHasCompleteDecoder ? "" : "DecodeComplete = false; ")
-      << "return MCDisassembler::Fail; }\n";
+    DecoderMethod = UserDecoder;
   } else {
     OpHasCompleteDecoder = true;
-    o.indent(Indentation) << "MI.addOperand(MCOperand::createImm(tmp));\n";
+    // Wrapped in lambda to conform to the syntax of decoder methods.
+    DecoderMethod = "DefaultDecodeImm";
   }
+
+  CodeletIDs.push_back(OpHasCompleteDecoder
+                           ? DecoderCodeletID::DispatchCompleteDecoderMethod
+                           : DecoderCodeletID::DispatchDecoderMethod);
+  DecoderMethods.push_back(DecoderMethod);
 }
 
-void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation,
-                                unsigned Opc, bool &HasCompleteDecoder) const {
+void FilterChooser::emitDecoder(
+    SmallVector<DecoderCodeletID, 16> &CodeletIDs,
+    SmallVector<SmallString<256>, 16> &DecoderMethods,
+    SmallVector<SmallVector<uint32_t, 16>, 16> &Extractors, unsigned Opc,
+    bool &HasCompleteDecoder) const {
   HasCompleteDecoder = true;
 
   for (const auto &Op : Operands.find(Opc)->second) {
     // If a custom instruction decoder was specified, use that.
     if (Op.numFields() == 0 && !Op.Decoder.empty()) {
       HasCompleteDecoder = Op.HasCompleteDecoder;
-      OS.indent(Indentation) << Emitter->GuardPrefix << Op.Decoder
-        << "(MI, insn, Address, Decoder)"
-        << Emitter->GuardPostfix
-        << " { " << (HasCompleteDecoder ? "" : "DecodeComplete = false; ")
-        << "return MCDisassembler::Fail; }\n";
+      CodeletIDs.push_back(DecoderCodeletID::CopyInsnToTmp);
+      CodeletIDs.push_back(HasCompleteDecoder
+                               ? DecoderCodeletID::DispatchCompleteDecoderMethod
+                               : DecoderCodeletID::DispatchDecoderMethod);
+      DecoderMethods.emplace_back(Op.Decoder);
       break;
     }
 
     bool OpHasCompleteDecoder;
-    emitBinaryParser(OS, Indentation, Op, OpHasCompleteDecoder);
+    emitBinaryParser(CodeletIDs, DecoderMethods, Extractors, Op,
+                     OpHasCompleteDecoder);
     if (!OpHasCompleteDecoder)
       HasCompleteDecoder = false;
   }
 }
 
-unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders,
-                                        unsigned Opc,
-                                        bool &HasCompleteDecoder) const {
-  // Build up the predicate string.
-  SmallString<256> Decoder;
-  // FIXME: emitDecoder() function can take a buffer directly rather than
-  // a stream.
-  raw_svector_ostream S(Decoder);
-  unsigned I = 4;
-  emitDecoder(S, I, Opc, HasCompleteDecoder);
-
-  // Using the full decoder string as the key value here is a bit
-  // heavyweight, but is effective. If the string comparisons become a
-  // performance concern, we can implement a mangling of the predicate
-  // data easily enough with a map back to the actual string. That's
-  // overkill for now, though.
+unsigned FilterChooser::getDecoderIndex(
+    DecoderMap &Decoders, DecoderMethodSet &UniqueDecoderMethods,
+    DecoderExtractorSet &UniqueExtractors,
+    DecoderCodeletSequenceSet &UniqueDecoderCodeletSequences,
+    DecoderMethodSequenceSet &UniqueDecoderMethodSequences,
+    DecoderExtractorSequenceSet &UniqueDecoderExtractorSequences, unsigned Opc,
+    bool &HasCompleteDecoder) const {
+  // Build up the list of codelets and methods that decode this instruction.
+  DecoderCodeletIDVector DecoderCodeletIDs;
+  SmallVector<SmallString<256>, 16> DecoderMethods;
+  SmallVector<SmallVector<uint32_t, 16>, 16> Extractors;
+  emitDecoder(DecoderCodeletIDs, DecoderMethods, Extractors, Opc,
+              HasCompleteDecoder);
+
+  // Add new sequence or get a reference to a previously inserted identical one.
+  const unsigned CodeletSequenceID =
+      UniqueDecoderCodeletSequences.insert(DecoderCodeletIDs);
+
+  // Convert method strings to the corresponding method ID sequence.
+  DecoderMethodIDVector DecoderMethodIDs;
+  for (const auto &DecoderMethod : DecoderMethods) {
+    // Make sure the method exists in the table.
+    UniqueDecoderMethods.insert(CachedHashString(DecoderMethod));
+    // Get the unique ID of the method from its location in the table.
+    DecoderMethodSet::const_iterator P =
+        find(UniqueDecoderMethods, DecoderMethod);
+    // Append the unique ID of the method to the decoder's methods.
+    DecoderMethodIDs.push_back((unsigned)(P - UniqueDecoderMethods.begin()));
+  }
 
-  // Make sure the predicate is in the table.
-  Decoders.insert(CachedHashString(Decoder));
-  // Now figure out the index for when we write out the table.
-  DecoderSet::const_iterator P = find(Decoders, Decoder.str());
-  return (unsigned)(P - Decoders.begin());
+  // Add new sequence or get a reference to a previously inserted identical one.
+  const unsigned MethodSequenceID =
+      UniqueDecoderMethodSequences.insert(DecoderMethodIDs);
+
+  // Convert extractors to the corresponding extractor ID sequence.
+  DecoderExtractorIDVector ExtractorIDs;
+  for (const auto &Extractor : Extractors) {
+    // Make sure the extractor exists in the table.
+    const unsigned ExtractorID = UniqueExtractors.insert(Extractor);
+    // Append the unique ID of the extractor to the decoder's extractors.
+    ExtractorIDs.push_back(ExtractorID);
+  }
+
+  // Add new extractor or get reference to previously inserted identical one.
+  const unsigned ExtractorSequenceID =
+      UniqueDecoderExtractorSequences.insert(ExtractorIDs);
+
+  // Add new decoder or get a reference to previously inserted identical one.
+  const PerDecoderInfo Decoder{CodeletSequenceID, MethodSequenceID,
+                               ExtractorSequenceID};
+  const unsigned DecoderID = Decoders.insert(Decoder);
+  return DecoderID;
 }
 
 bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
@@ -1442,8 +1942,10 @@
   emitSoftFailTableEntry(TableInfo, Opc.EncodingID);
 
   bool HasCompleteDecoder;
-  unsigned DIdx =
-      getDecoderIndex(TableInfo.Decoders, Opc.EncodingID, HasCompleteDecoder);
+  unsigned DIdx = getDecoderIndex(
+      TableInfo.Decoders, TableInfo.DecoderMethods, TableInfo.DecoderExtractors,
+      TableInfo.DecoderCodeletSequences, TableInfo.DecoderMethodSequences,
+      TableInfo.DecoderExtractorSequences, Opc.EncodingID, HasCompleteDecoder);
 
   // Produce OPC_Decode or OPC_TryDecode opcode based on the information
   // whether the instruction decoder is complete or not. If it is complete
@@ -2676,7 +3178,10 @@
   emitPredicateFunction(OS, TableInfo.Predicates, 0);
 
   // Emit the decoder function.
-  emitDecoderFunction(OS, TableInfo.Decoders, 0);
+  emitDecoderFunction(
+      OS, TableInfo.Decoders, TableInfo.DecoderMethods,
+      TableInfo.DecoderExtractors, TableInfo.DecoderCodeletSequences,
+      TableInfo.DecoderMethodSequences, TableInfo.DecoderExtractorSequences);
 
   // Emit the main entry point for the decoder, decodeInstruction().
   emitDecodeInstruction(OS, IsVarLenInst);